From 0a6e526ae529a2c50122008c828319b2031bda10 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 4 Jan 2011 17:20:54 +1030 Subject: [PATCH] --- yaml --- r: 224108 b: refs/heads/master c: d50d8fe192428090790e7178e9507e981e0b005b h: refs/heads/master v: v3 --- [refs] | 2 +- trunk/CREDITS | 2 + trunk/Documentation/RCU/trace.txt | 144 +--- trunk/Documentation/dontdiff | 26 - trunk/Documentation/filesystems/Locking | 212 +++--- trunk/Documentation/kernel-docs.txt | 27 +- trunk/Documentation/kernel-parameters.txt | 23 +- trunk/Documentation/trace/events-power.txt | 90 --- trunk/Documentation/x86/boot.txt | 1 - trunk/MAINTAINERS | 35 +- trunk/Makefile | 2 +- trunk/arch/Kconfig | 3 - trunk/arch/alpha/include/asm/perf_event.h | 6 + trunk/arch/alpha/kernel/irq_alpha.c | 2 + trunk/arch/alpha/kernel/perf_event.c | 11 +- trunk/arch/arm/include/asm/hardware/it8152.h | 1 - trunk/arch/arm/include/asm/highmem.h | 3 + trunk/arch/arm/include/asm/sizes.h | 6 +- trunk/arch/arm/include/asm/system.h | 1 - trunk/arch/arm/kernel/entry-common.S | 6 - trunk/arch/arm/kernel/perf_event.c | 4 +- trunk/arch/arm/kernel/smp.c | 1 + trunk/arch/arm/mach-pxa/Kconfig | 1 - trunk/arch/arm/mach-pxa/sleep.S | 4 +- trunk/arch/arm/mm/cache-feroceon-l2.c | 37 +- trunk/arch/arm/mm/cache-xsc3l2.c | 57 +- trunk/arch/arm/mm/dma-mapping.c | 7 +- trunk/arch/arm/mm/flush.c | 7 +- trunk/arch/arm/mm/highmem.c | 87 +++ trunk/arch/mips/kernel/perf_event_mipsxx.c | 2 +- trunk/arch/mn10300/kernel/irq.c | 2 +- trunk/arch/powerpc/kernel/e500-pmu.c | 2 +- trunk/arch/powerpc/kernel/mpc7450-pmu.c | 2 +- trunk/arch/powerpc/kernel/perf_event.c | 2 +- .../arch/powerpc/kernel/perf_event_fsl_emb.c | 2 +- trunk/arch/powerpc/kernel/power4-pmu.c | 2 +- trunk/arch/powerpc/kernel/power5+-pmu.c | 2 +- trunk/arch/powerpc/kernel/power5-pmu.c | 2 +- trunk/arch/powerpc/kernel/power6-pmu.c | 2 +- trunk/arch/powerpc/kernel/power7-pmu.c | 2 +- trunk/arch/powerpc/kernel/ppc970-pmu.c | 2 +- trunk/arch/s390/Kconfig | 1 - trunk/arch/s390/include/asm/mutex.h | 2 - trunk/arch/sh/kernel/cpu/sh4/perf_event.c | 2 +- trunk/arch/sh/kernel/cpu/sh4a/perf_event.c | 2 +- trunk/arch/sh/kernel/perf_event.c | 2 +- trunk/arch/sparc/include/asm/perf_event.h | 4 + trunk/arch/sparc/kernel/nmi.c | 2 + trunk/arch/sparc/kernel/perf_event.c | 9 +- trunk/arch/x86/Kconfig | 41 +- trunk/arch/x86/Kconfig.debug | 11 - trunk/arch/x86/boot/compressed/head_64.S | 2 +- trunk/arch/x86/include/asm/alternative.h | 8 - trunk/arch/x86/include/asm/amd_nb.h | 49 +- trunk/arch/x86/include/asm/apic.h | 1 - trunk/arch/x86/include/asm/apicdef.h | 1 - trunk/arch/x86/include/asm/bootparam.h | 1 - trunk/arch/x86/include/asm/fixmap.h | 4 - trunk/arch/x86/include/asm/i387.h | 24 - trunk/arch/x86/include/asm/io_apic.h | 8 +- trunk/arch/x86/include/asm/irq.h | 4 + trunk/arch/x86/include/asm/kdebug.h | 2 +- trunk/arch/x86/include/asm/mce.h | 3 - trunk/arch/x86/include/asm/microcode.h | 6 - trunk/arch/x86/include/asm/mpspec.h | 31 +- trunk/arch/x86/include/asm/mpspec_def.h | 7 + trunk/arch/x86/include/asm/mrst-vrtc.h | 9 - trunk/arch/x86/include/asm/mrst.h | 14 +- trunk/arch/x86/include/asm/msr-index.h | 16 - trunk/arch/x86/include/asm/nmi.h | 53 +- trunk/arch/x86/include/asm/paravirt.h | 2 +- trunk/arch/x86/include/asm/pci.h | 1 - trunk/arch/x86/include/asm/perf_event.h | 2 + trunk/arch/x86/include/asm/perf_event_p4.h | 63 +- trunk/arch/x86/include/asm/setup.h | 6 - trunk/arch/x86/include/asm/smpboot_hooks.h | 1 + trunk/arch/x86/include/asm/stacktrace.h | 33 +- trunk/arch/x86/include/asm/timer.h | 6 + trunk/arch/x86/include/asm/uv/uv_bau.h | 9 +- trunk/arch/x86/kernel/Makefile | 1 + trunk/arch/x86/kernel/acpi/boot.c | 11 +- trunk/arch/x86/kernel/alternative.c | 52 +- trunk/arch/x86/kernel/amd_nb.c | 135 ++-- trunk/arch/x86/kernel/apb_timer.c | 1 - trunk/arch/x86/kernel/aperture_64.c | 10 +- trunk/arch/x86/kernel/apic/Makefile | 5 +- trunk/arch/x86/kernel/apic/apic.c | 119 ++-- trunk/arch/x86/kernel/apic/hw_nmi.c | 36 +- trunk/arch/x86/kernel/apic/io_apic.c | 83 ++- trunk/arch/x86/kernel/apic/nmi.c | 567 +++++++++++++++ trunk/arch/x86/kernel/apic/x2apic_uv_x.c | 61 +- trunk/arch/x86/kernel/cpu/common.c | 1 + trunk/arch/x86/kernel/cpu/intel_cacheinfo.c | 147 ++-- trunk/arch/x86/kernel/cpu/mcheck/mce_amd.c | 135 ++-- .../arch/x86/kernel/cpu/mcheck/therm_throt.c | 40 -- trunk/arch/x86/kernel/cpu/perf_event.c | 74 +- trunk/arch/x86/kernel/cpu/perf_event_amd.c | 16 +- trunk/arch/x86/kernel/cpu/perf_event_intel.c | 26 - trunk/arch/x86/kernel/cpu/perfctr-watchdog.c | 644 +++++++++++++++++- trunk/arch/x86/kernel/dumpstack.c | 12 +- trunk/arch/x86/kernel/dumpstack_32.c | 25 +- trunk/arch/x86/kernel/dumpstack_64.c | 24 +- trunk/arch/x86/kernel/early_printk.c | 3 +- .../mrst => kernel}/early_printk_mrst.c | 0 trunk/arch/x86/kernel/ftrace.c | 3 - trunk/arch/x86/kernel/head32.c | 3 - trunk/arch/x86/kernel/head_32.S | 83 ++- trunk/arch/x86/kernel/kprobes.c | 113 +-- trunk/arch/x86/kernel/microcode_amd.c | 34 +- trunk/arch/x86/kernel/pci-gart_64.c | 34 +- trunk/arch/x86/kernel/process.c | 10 +- trunk/arch/x86/kernel/process_32.c | 2 +- trunk/arch/x86/kernel/process_64.c | 2 - trunk/arch/x86/kernel/reboot_fixups_32.c | 16 - trunk/arch/x86/kernel/setup.c | 13 +- trunk/arch/x86/kernel/smpboot.c | 36 +- trunk/arch/x86/kernel/stacktrace.c | 8 +- trunk/arch/x86/kernel/time.c | 18 + trunk/arch/x86/kernel/trampoline_64.S | 2 +- trunk/arch/x86/kernel/traps.c | 35 +- trunk/arch/x86/kernel/tsc.c | 96 +-- .../kernel/{verify_cpu.S => verify_cpu_64.S} | 49 +- trunk/arch/x86/kernel/vmlinux.lds.S | 8 +- trunk/arch/x86/kvm/i8259.c | 2 - trunk/arch/x86/kvm/mmu.c | 3 +- trunk/arch/x86/lguest/i386_head.S | 105 --- trunk/arch/x86/mm/Makefile | 2 +- trunk/arch/x86/mm/init.c | 3 +- trunk/arch/x86/mm/init_32.c | 20 +- .../mm/{amdtopology_64.c => k8topology_64.c} | 12 +- trunk/arch/x86/mm/kmemcheck/error.c | 2 +- trunk/arch/x86/mm/numa_64.c | 22 +- trunk/arch/x86/mm/pageattr.c | 33 +- trunk/arch/x86/mm/setup_nx.c | 2 +- trunk/arch/x86/mm/srat_32.c | 1 - trunk/arch/x86/mm/srat_64.c | 10 - trunk/arch/x86/oprofile/backtrace.c | 2 +- trunk/arch/x86/oprofile/nmi_int.c | 3 - trunk/arch/x86/oprofile/nmi_timer_int.c | 3 + trunk/arch/x86/oprofile/op_model_amd.c | 79 +-- trunk/arch/x86/oprofile/op_model_p4.c | 2 +- trunk/arch/x86/pci/Makefile | 1 - trunk/arch/x86/pci/ce4100.c | 315 --------- trunk/arch/x86/pci/pcbios.c | 23 - trunk/arch/x86/platform/Makefile | 2 - trunk/arch/x86/platform/ce4100/Makefile | 1 - trunk/arch/x86/platform/ce4100/ce4100.c | 132 ---- trunk/arch/x86/platform/iris/Makefile | 1 - trunk/arch/x86/platform/iris/iris.c | 91 --- trunk/arch/x86/platform/mrst/Makefile | 2 - trunk/arch/x86/platform/mrst/mrst.c | 546 +-------------- trunk/arch/x86/platform/mrst/vrtc.c | 165 ----- trunk/arch/x86/platform/sfi/sfi.c | 4 +- trunk/arch/x86/platform/uv/tlb_uv.c | 22 +- trunk/arch/x86/platform/visws/visws_quirks.c | 2 +- trunk/drivers/acpi/acpica/nsinit.c | 2 + trunk/drivers/acpi/numa.c | 14 +- trunk/drivers/atm/atmtcp.c | 5 +- trunk/drivers/char/agp/amd64-agp.c | 33 +- trunk/drivers/cpufreq/cpufreq.c | 1 - trunk/drivers/cpuidle/cpuidle.c | 1 - trunk/drivers/dma/mv_xor.c | 2 +- trunk/drivers/edac/amd64_edac.c | 4 +- trunk/drivers/gpu/drm/i915/dvo_ch7017.c | 2 +- trunk/drivers/gpu/drm/i915/i915_dma.c | 23 - trunk/drivers/gpu/drm/i915/i915_reg.h | 10 - trunk/drivers/gpu/drm/i915/intel_bios.c | 2 +- trunk/drivers/gpu/drm/i915/intel_display.c | 21 +- trunk/drivers/gpu/drm/i915/intel_sdvo.c | 3 +- trunk/drivers/hwmon/s3c-hwmon.c | 2 + trunk/drivers/idle/intel_idle.c | 3 +- trunk/drivers/isdn/gigaset/capi.c | 1 - .../media/video/cx25840/cx25840-core.c | 19 +- trunk/drivers/media/video/cx88/cx88-alsa.c | 99 ++- trunk/drivers/media/video/cx88/cx88-cards.c | 7 - trunk/drivers/media/video/cx88/cx88-video.c | 27 +- trunk/drivers/media/video/cx88/cx88.h | 6 +- .../drivers/media/video/em28xx/em28xx-video.c | 2 +- trunk/drivers/media/video/wm8775.c | 104 +-- trunk/drivers/net/atlx/atl1.c | 10 - trunk/drivers/net/cnic.c | 10 +- trunk/drivers/net/ehea/ehea_ethtool.c | 7 - trunk/drivers/net/ppp_generic.c | 9 +- trunk/drivers/net/skfp/skfddi.c | 2 +- trunk/drivers/net/starfire.c | 2 +- trunk/drivers/net/tg3.c | 2 +- trunk/drivers/platform/x86/intel_ips.c | 36 +- trunk/drivers/platform/x86/intel_ips.h | 21 - trunk/drivers/platform/x86/intel_scu_ipc.c | 5 - trunk/drivers/rtc/Kconfig | 12 - trunk/drivers/rtc/Makefile | 1 - trunk/drivers/rtc/rtc-mrst.c | 582 ---------------- trunk/drivers/spi/coldfire_qspi.c | 2 +- trunk/drivers/spi/omap2_mcspi.c | 39 -- trunk/drivers/staging/zram/zram_drv.c | 6 + trunk/drivers/usb/atm/ueagle-atm.c | 22 +- trunk/drivers/watchdog/hpwdt.c | 11 +- trunk/fs/gfs2/bmap.c | 11 +- trunk/fs/gfs2/glock.c | 71 +- trunk/fs/gfs2/glock.h | 28 +- trunk/fs/gfs2/glops.c | 1 + trunk/fs/gfs2/incore.h | 12 +- trunk/fs/gfs2/inode.c | 9 + trunk/fs/gfs2/lock_dlm.c | 15 +- trunk/fs/gfs2/ops_inode.c | 18 +- trunk/fs/gfs2/quota.c | 13 +- trunk/fs/gfs2/rgrp.c | 57 +- trunk/fs/gfs2/rgrp.h | 1 - trunk/fs/gfs2/xattr.c | 23 +- trunk/fs/proc/base.c | 79 --- trunk/include/linux/completion.h | 8 +- trunk/include/linux/dmaengine.h | 13 +- trunk/include/linux/ftrace_event.h | 14 - trunk/include/linux/hrtimer.h | 33 +- trunk/include/linux/init_task.h | 18 +- trunk/include/linux/interrupt.h | 6 +- trunk/include/linux/kprobes.h | 4 +- trunk/include/linux/module.h | 11 +- trunk/include/linux/mutex.h | 4 - trunk/include/linux/nmi.h | 10 +- trunk/include/linux/perf_event.h | 26 +- trunk/include/linux/rculist.h | 5 + trunk/include/linux/rcupdate.h | 4 +- trunk/include/linux/rcutiny.h | 13 +- trunk/include/linux/rcutree.h | 2 - trunk/include/linux/sched.h | 51 +- trunk/include/linux/sfi.h | 8 +- trunk/include/linux/stacktrace.h | 4 +- trunk/include/linux/syscalls.h | 10 +- trunk/include/linux/timer.h | 32 +- trunk/include/linux/timerqueue.h | 50 -- trunk/include/linux/tracepoint.h | 33 +- trunk/include/linux/workqueue.h | 8 - trunk/include/media/wm8775.h | 3 + trunk/include/trace/define_trace.h | 15 - trunk/include/trace/events/power.h | 98 +-- trunk/include/trace/events/syscalls.h | 4 - trunk/include/trace/ftrace.h | 21 +- trunk/init/Kconfig | 68 +- trunk/init/do_mounts.c | 2 +- trunk/init/main.c | 5 +- trunk/kernel/cpu.c | 29 +- trunk/kernel/fork.c | 7 +- trunk/kernel/futex.c | 235 ++++--- trunk/kernel/hrtimer.c | 83 ++- trunk/kernel/hw_breakpoint.c | 2 +- trunk/kernel/irq/manage.c | 4 +- trunk/kernel/kprobes.c | 565 +++++---------- trunk/kernel/kthread.c | 2 +- trunk/kernel/lockdep_proc.c | 16 +- trunk/kernel/module.c | 171 +---- trunk/kernel/mutex.c | 2 +- trunk/kernel/perf_event.c | 573 ++++------------ trunk/kernel/posix-timers.c | 10 +- trunk/kernel/power/suspend.c | 3 - trunk/kernel/printk.c | 8 +- trunk/kernel/rcutiny.c | 105 +-- trunk/kernel/rcutiny_plugin.h | 433 +----------- trunk/kernel/rcutorture.c | 270 +------- trunk/kernel/rcutree.c | 156 +++-- trunk/kernel/rcutree.h | 61 +- trunk/kernel/rcutree_plugin.h | 135 +--- trunk/kernel/rcutree_trace.c | 12 +- trunk/kernel/sched.c | 640 +++++++++++++---- trunk/kernel/sched_autogroup.c | 238 ------- trunk/kernel/sched_autogroup.h | 32 - trunk/kernel/sched_clock.c | 2 +- trunk/kernel/sched_debug.c | 91 +-- trunk/kernel/sched_fair.c | 322 ++------- trunk/kernel/sched_features.h | 2 + trunk/kernel/sched_rt.c | 24 - trunk/kernel/softirq.c | 4 +- trunk/kernel/srcu.c | 8 +- trunk/kernel/sys.c | 4 +- trunk/kernel/sysctl.c | 53 +- trunk/kernel/sysctl_binary.c | 1 + trunk/kernel/time/timecompare.c | 5 +- trunk/kernel/time/timekeeping.c | 9 +- trunk/kernel/time/timer_list.c | 8 +- trunk/kernel/timer.c | 50 +- trunk/kernel/trace/Kconfig | 15 - trunk/kernel/trace/power-traces.c | 5 +- trunk/kernel/trace/trace_event_perf.c | 31 +- trunk/kernel/trace/trace_events.c | 6 - trunk/kernel/trace/trace_export.c | 14 +- trunk/kernel/trace/trace_selftest.c | 2 +- trunk/kernel/user.c | 1 - trunk/kernel/watchdog.c | 14 +- trunk/lib/Kconfig.debug | 3 +- trunk/lib/Makefile | 2 +- trunk/lib/timerqueue.c | 107 --- trunk/mm/memcontrol.c | 19 +- trunk/net/bridge/br_multicast.c | 28 +- trunk/net/bridge/br_stp_bpdu.c | 2 - trunk/net/can/bcm.c | 4 +- trunk/net/ipv4/route.c | 8 +- trunk/scripts/Makefile.build | 13 +- trunk/scripts/kconfig/menu.c | 14 - trunk/scripts/kernel-doc | 102 ++- trunk/security/integrity/ima/ima_policy.c | 2 - trunk/sound/oss/soundcard.c | 4 +- trunk/sound/pci/hda/hda_intel.c | 1 - trunk/sound/soc/codecs/max98088.c | 10 +- trunk/sound/soc/codecs/wm8523.c | 9 +- trunk/sound/soc/codecs/wm8741.c | 10 +- trunk/sound/soc/codecs/wm8753.c | 226 +++--- trunk/sound/soc/codecs/wm8904.c | 37 +- trunk/sound/soc/codecs/wm8940.c | 1 - trunk/sound/soc/codecs/wm8955.c | 31 +- trunk/sound/soc/codecs/wm8960.c | 1 - trunk/sound/soc/codecs/wm8962.c | 45 +- trunk/sound/soc/codecs/wm8971.c | 1 - trunk/sound/soc/codecs/wm9081.c | 1 - trunk/sound/soc/codecs/wm9090.c | 18 +- .../perf/Documentation/perf-annotate.txt | 37 +- .../perf/Documentation/perf-buildid-list.txt | 3 - trunk/tools/perf/Documentation/perf-diff.txt | 21 +- trunk/tools/perf/Documentation/perf-kvm.txt | 8 +- trunk/tools/perf/Documentation/perf-lock.txt | 15 - trunk/tools/perf/Documentation/perf-probe.txt | 4 +- .../tools/perf/Documentation/perf-record.txt | 22 +- .../tools/perf/Documentation/perf-report.txt | 55 +- trunk/tools/perf/Documentation/perf-sched.txt | 18 +- trunk/tools/perf/Documentation/perf-stat.txt | 44 +- trunk/tools/perf/Documentation/perf-test.txt | 2 +- .../perf/Documentation/perf-timechart.txt | 2 - trunk/tools/perf/Documentation/perf-top.txt | 28 +- ...rf-script-perl.txt => perf-trace-perl.txt} | 28 +- ...cript-python.txt => perf-trace-python.txt} | 88 +-- .../{perf-script.txt => perf-trace.txt} | 61 +- trunk/tools/perf/MANIFEST | 1 - trunk/tools/perf/Makefile | 23 +- trunk/tools/perf/bench/mem-memcpy-arch.h | 12 - .../perf/bench/mem-memcpy-x86-64-asm-def.h | 4 - .../tools/perf/bench/mem-memcpy-x86-64-asm.S | 2 - trunk/tools/perf/bench/mem-memcpy.c | 219 ++---- trunk/tools/perf/builtin-annotate.c | 10 +- trunk/tools/perf/builtin-buildid-list.c | 3 +- trunk/tools/perf/builtin-diff.c | 21 +- trunk/tools/perf/builtin-inject.c | 41 +- trunk/tools/perf/builtin-kmem.c | 27 +- trunk/tools/perf/builtin-lock.c | 23 +- trunk/tools/perf/builtin-record.c | 210 +++--- trunk/tools/perf/builtin-report.c | 23 +- trunk/tools/perf/builtin-sched.c | 33 +- trunk/tools/perf/builtin-stat.c | 531 ++++++--------- trunk/tools/perf/builtin-test.c | 110 +-- trunk/tools/perf/builtin-timechart.c | 132 +--- trunk/tools/perf/builtin-top.c | 237 +++---- .../{builtin-script.c => builtin-trace.c} | 135 ++-- trunk/tools/perf/builtin.h | 2 +- trunk/tools/perf/command-list.txt | 2 +- trunk/tools/perf/feature-tests.mak | 4 +- trunk/tools/perf/perf.c | 4 +- .../scripts/perl/Perf-Trace-Util/Context.c | 2 +- .../scripts/perl/Perf-Trace-Util/Context.xs | 4 +- .../perf/scripts/perl/Perf-Trace-Util/README | 4 +- .../Perf-Trace-Util/lib/Perf/Trace/Context.pm | 2 +- .../Perf-Trace-Util/lib/Perf/Trace/Core.pm | 4 +- .../Perf-Trace-Util/lib/Perf/Trace/Util.pm | 4 +- .../scripts/perl/bin/failed-syscalls-report | 2 +- .../perf/scripts/perl/bin/rw-by-file-report | 5 +- .../perf/scripts/perl/bin/rw-by-pid-report | 5 +- .../tools/perf/scripts/perl/bin/rwtop-report | 5 +- .../scripts/perl/bin/wakeup-latency-report | 5 +- .../scripts/perl/bin/workqueue-stats-report | 6 +- .../perf/scripts/perl/check-perf-trace.pl | 2 +- trunk/tools/perf/scripts/perl/rw-by-file.pl | 2 +- .../perf/scripts/perl/workqueue-stats.pl | 2 +- .../scripts/python/Perf-Trace-Util/Context.c | 2 +- .../Perf-Trace-Util/lib/Perf/Trace/Core.py | 2 +- .../lib/Perf/Trace/SchedGui.py | 2 +- .../Perf-Trace-Util/lib/Perf/Trace/Util.py | 2 +- .../python/bin/failed-syscalls-by-pid-report | 2 +- .../python/bin/futex-contention-report | 2 +- .../scripts/python/bin/netdev-times-report | 2 +- .../scripts/python/bin/sched-migration-report | 2 +- .../perf/scripts/python/bin/sctop-report | 2 +- .../python/bin/syscall-counts-by-pid-report | 2 +- .../scripts/python/bin/syscall-counts-report | 2 +- .../perf/scripts/python/check-perf-trace.py | 2 +- .../scripts/python/failed-syscalls-by-pid.py | 2 +- .../perf/scripts/python/sched-migration.py | 2 +- trunk/tools/perf/scripts/python/sctop.py | 2 +- .../scripts/python/syscall-counts-by-pid.py | 2 +- .../perf/scripts/python/syscall-counts.py | 2 +- trunk/tools/perf/util/build-id.c | 7 +- trunk/tools/perf/util/cpumap.c | 123 +--- trunk/tools/perf/util/cpumap.h | 10 +- trunk/tools/perf/util/debug.c | 42 +- trunk/tools/perf/util/debug.h | 2 - trunk/tools/perf/util/event.c | 358 ++++------ trunk/tools/perf/util/event.h | 30 +- trunk/tools/perf/util/evsel.c | 186 ----- trunk/tools/perf/util/evsel.h | 115 ---- trunk/tools/perf/util/header.c | 58 +- trunk/tools/perf/util/header.h | 5 +- trunk/tools/perf/util/hist.c | 25 +- trunk/tools/perf/util/hist.h | 2 - .../tools/perf/util/include/asm/cpufeature.h | 9 - trunk/tools/perf/util/include/asm/dwarf2.h | 11 - trunk/tools/perf/util/include/linux/bitops.h | 5 - trunk/tools/perf/util/include/linux/linkage.h | 13 - trunk/tools/perf/util/parse-events.c | 111 +-- trunk/tools/perf/util/parse-events.h | 19 +- trunk/tools/perf/util/parse-options.h | 4 - trunk/tools/perf/util/probe-event.c | 234 +++---- trunk/tools/perf/util/probe-finder.c | 42 +- trunk/tools/perf/util/probe-finder.h | 6 +- .../util/scripting-engines/trace-event-perl.c | 6 +- .../scripting-engines/trace-event-python.c | 4 +- trunk/tools/perf/util/session.c | 579 +++++----------- trunk/tools/perf/util/session.h | 26 +- trunk/tools/perf/util/sort.c | 6 +- trunk/tools/perf/util/symbol.c | 139 +--- trunk/tools/perf/util/symbol.h | 4 +- trunk/tools/perf/util/thread.c | 43 +- trunk/tools/perf/util/thread.h | 15 +- trunk/tools/perf/util/trace-event-info.c | 30 +- trunk/tools/perf/util/trace-event.h | 5 +- trunk/tools/perf/util/ui/util.c | 16 +- trunk/tools/perf/util/util.c | 17 - trunk/tools/perf/util/util.h | 1 - trunk/tools/perf/util/xyarray.c | 20 - trunk/tools/perf/util/xyarray.h | 20 - 425 files changed, 6130 insertions(+), 11486 deletions(-) delete mode 100644 trunk/Documentation/trace/events-power.txt delete mode 100644 trunk/arch/x86/include/asm/mrst-vrtc.h create mode 100644 trunk/arch/x86/kernel/apic/nmi.c rename trunk/arch/x86/{platform/mrst => kernel}/early_printk_mrst.c (100%) rename trunk/arch/x86/kernel/{verify_cpu.S => verify_cpu_64.S} (65%) rename trunk/arch/x86/mm/{amdtopology_64.c => k8topology_64.c} (94%) delete mode 100644 trunk/arch/x86/pci/ce4100.c delete mode 100644 trunk/arch/x86/platform/ce4100/Makefile delete mode 100644 trunk/arch/x86/platform/ce4100/ce4100.c delete mode 100644 trunk/arch/x86/platform/iris/Makefile delete mode 100644 trunk/arch/x86/platform/iris/iris.c delete mode 100644 trunk/arch/x86/platform/mrst/vrtc.c delete mode 100644 trunk/drivers/platform/x86/intel_ips.h delete mode 100644 trunk/drivers/rtc/rtc-mrst.c delete mode 100644 trunk/include/linux/timerqueue.h delete mode 100644 trunk/kernel/sched_autogroup.c delete mode 100644 trunk/kernel/sched_autogroup.h delete mode 100644 trunk/lib/timerqueue.c rename trunk/tools/perf/Documentation/{perf-script-perl.txt => perf-trace-perl.txt} (90%) rename trunk/tools/perf/Documentation/{perf-script-python.txt => perf-trace-python.txt} (89%) rename trunk/tools/perf/Documentation/{perf-script.txt => perf-trace.txt} (62%) delete mode 100644 trunk/tools/perf/bench/mem-memcpy-arch.h delete mode 100644 trunk/tools/perf/bench/mem-memcpy-x86-64-asm-def.h delete mode 100644 trunk/tools/perf/bench/mem-memcpy-x86-64-asm.S rename trunk/tools/perf/{builtin-script.c => builtin-trace.c} (83%) delete mode 100644 trunk/tools/perf/util/evsel.c delete mode 100644 trunk/tools/perf/util/evsel.h delete mode 100644 trunk/tools/perf/util/include/asm/cpufeature.h delete mode 100644 trunk/tools/perf/util/include/asm/dwarf2.h delete mode 100644 trunk/tools/perf/util/include/linux/linkage.h delete mode 100644 trunk/tools/perf/util/xyarray.c delete mode 100644 trunk/tools/perf/util/xyarray.h diff --git a/[refs] b/[refs] index fd1ffad7a878..50c81a242aca 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 47935a731b7b850a4c6c0e55ed0741e3dd25d889 +refs/heads/master: d50d8fe192428090790e7178e9507e981e0b005b diff --git a/trunk/CREDITS b/trunk/CREDITS index 494b6e4746d7..41d8e63d5165 100644 --- a/trunk/CREDITS +++ b/trunk/CREDITS @@ -2365,6 +2365,8 @@ E: acme@redhat.com W: http://oops.ghostprotocols.net:81/blog/ P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD 841A B6AB 4681 9224 DF01 D: IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks +S: R. Brasílio Itiberê, 4270/1010 - Água Verde +S: 80240-060 - Curitiba - Paraná S: Brazil N: Karsten Merker diff --git a/trunk/Documentation/RCU/trace.txt b/trunk/Documentation/RCU/trace.txt index 6a8c73f55b80..a851118775d8 100644 --- a/trunk/Documentation/RCU/trace.txt +++ b/trunk/Documentation/RCU/trace.txt @@ -1,22 +1,18 @@ CONFIG_RCU_TRACE debugfs Files and Formats -The rcutree and rcutiny implementations of RCU provide debugfs trace -output that summarizes counters and state. This information is useful for -debugging RCU itself, and can sometimes also help to debug abuses of RCU. -The following sections describe the debugfs files and formats, first -for rcutree and next for rcutiny. +The rcutree implementation of RCU provides debugfs trace output that +summarizes counters and state. This information is useful for debugging +RCU itself, and can sometimes also help to debug abuses of RCU. +The following sections describe the debugfs files and formats. -CONFIG_TREE_RCU and CONFIG_TREE_PREEMPT_RCU debugfs Files and Formats +Hierarchical RCU debugfs Files and Formats -These implementations of RCU provides five debugfs files under the +This implementation of RCU provides three debugfs files under the top-level directory RCU: rcu/rcudata (which displays fields in struct -rcu_data), rcu/rcudata.csv (which is a .csv spreadsheet version of -rcu/rcudata), rcu/rcugp (which displays grace-period counters), -rcu/rcuhier (which displays the struct rcu_node hierarchy), and -rcu/rcu_pending (which displays counts of the reasons that the -rcu_pending() function decided that there was core RCU work to do). +rcu_data), rcu/rcugp (which displays grace-period counters), and +rcu/rcuhier (which displays the struct rcu_node hierarchy). The output of "cat rcu/rcudata" looks as follows: @@ -134,8 +130,7 @@ o "ci" is the number of RCU callbacks that have been invoked for been registered in absence of CPU-hotplug activity. o "co" is the number of RCU callbacks that have been orphaned due to - this CPU going offline. These orphaned callbacks have been moved - to an arbitrarily chosen online CPU. + this CPU going offline. o "ca" is the number of RCU callbacks that have been adopted due to other CPUs going offline. Note that ci+co-ca+ql is the number of @@ -173,12 +168,12 @@ o "gpnum" is the number of grace periods that have started. It is The output of "cat rcu/rcuhier" looks as follows, with very long lines: -c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 +c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 oqlen=0 1/1 .>. 0:127 ^0 3/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3 3/3f .>. 0:5 ^0 2/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3 rcu_bh: -c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 +c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 oqlen=0 0/1 .>. 0:127 ^0 0/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3 0/3f .>. 0:5 ^0 0/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3 @@ -217,6 +212,11 @@ o "fqlh" is the number of calls to force_quiescent_state() that exited immediately (without even being counted in nfqs above) due to contention on ->fqslock. +o "oqlen" is the number of callbacks on the "orphan" callback + list. RCU callbacks are placed on this list by CPUs going + offline, and are "adopted" either by the CPU helping the outgoing + CPU or by the next rcu_barrier*() call, whichever comes first. + o Each element of the form "1/1 0:127 ^0" represents one struct rcu_node. Each line represents one level of the hierarchy, from root to leaves. It is best to think of the rcu_data structures @@ -326,115 +326,3 @@ o "nn" is the number of times that this CPU needed nothing. Alert readers will note that the rcu "nn" number for a given CPU very closely matches the rcu_bh "np" number for that same CPU. This is due to short-circuit evaluation in rcu_pending(). - - -CONFIG_TINY_RCU and CONFIG_TINY_PREEMPT_RCU debugfs Files and Formats - -These implementations of RCU provides a single debugfs file under the -top-level directory RCU, namely rcu/rcudata, which displays fields in -rcu_bh_ctrlblk, rcu_sched_ctrlblk and, for CONFIG_TINY_PREEMPT_RCU, -rcu_preempt_ctrlblk. - -The output of "cat rcu/rcudata" is as follows: - -rcu_preempt: qlen=24 gp=1097669 g197/p197/c197 tasks=... - ttb=. btg=no ntb=184 neb=0 nnb=183 j=01f7 bt=0274 - normal balk: nt=1097669 gt=0 bt=371 b=0 ny=25073378 nos=0 - exp balk: bt=0 nos=0 -rcu_sched: qlen: 0 -rcu_bh: qlen: 0 - -This is split into rcu_preempt, rcu_sched, and rcu_bh sections, with the -rcu_preempt section appearing only in CONFIG_TINY_PREEMPT_RCU builds. -The last three lines of the rcu_preempt section appear only in -CONFIG_RCU_BOOST kernel builds. The fields are as follows: - -o "qlen" is the number of RCU callbacks currently waiting either - for an RCU grace period or waiting to be invoked. This is the - only field present for rcu_sched and rcu_bh, due to the - short-circuiting of grace period in those two cases. - -o "gp" is the number of grace periods that have completed. - -o "g197/p197/c197" displays the grace-period state, with the - "g" number being the number of grace periods that have started - (mod 256), the "p" number being the number of grace periods - that the CPU has responded to (also mod 256), and the "c" - number being the number of grace periods that have completed - (once again mode 256). - - Why have both "gp" and "g"? Because the data flowing into - "gp" is only present in a CONFIG_RCU_TRACE kernel. - -o "tasks" is a set of bits. The first bit is "T" if there are - currently tasks that have recently blocked within an RCU - read-side critical section, the second bit is "N" if any of the - aforementioned tasks are blocking the current RCU grace period, - and the third bit is "E" if any of the aforementioned tasks are - blocking the current expedited grace period. Each bit is "." - if the corresponding condition does not hold. - -o "ttb" is a single bit. It is "B" if any of the blocked tasks - need to be priority boosted and "." otherwise. - -o "btg" indicates whether boosting has been carried out during - the current grace period, with "exp" indicating that boosting - is in progress for an expedited grace period, "no" indicating - that boosting has not yet started for a normal grace period, - "begun" indicating that boosting has bebug for a normal grace - period, and "done" indicating that boosting has completed for - a normal grace period. - -o "ntb" is the total number of tasks subjected to RCU priority boosting - periods since boot. - -o "neb" is the number of expedited grace periods that have had - to resort to RCU priority boosting since boot. - -o "nnb" is the number of normal grace periods that have had - to resort to RCU priority boosting since boot. - -o "j" is the low-order 12 bits of the jiffies counter in hexadecimal. - -o "bt" is the low-order 12 bits of the value that the jiffies counter - will have at the next time that boosting is scheduled to begin. - -o In the line beginning with "normal balk", the fields are as follows: - - o "nt" is the number of times that the system balked from - boosting because there were no blocked tasks to boost. - Note that the system will balk from boosting even if the - grace period is overdue when the currently running task - is looping within an RCU read-side critical section. - There is no point in boosting in this case, because - boosting a running task won't make it run any faster. - - o "gt" is the number of times that the system balked - from boosting because, although there were blocked tasks, - none of them were preventing the current grace period - from completing. - - o "bt" is the number of times that the system balked - from boosting because boosting was already in progress. - - o "b" is the number of times that the system balked from - boosting because boosting had already completed for - the grace period in question. - - o "ny" is the number of times that the system balked from - boosting because it was not yet time to start boosting - the grace period in question. - - o "nos" is the number of times that the system balked from - boosting for inexplicable ("not otherwise specified") - reasons. This can actually happen due to races involving - increments of the jiffies counter. - -o In the line beginning with "exp balk", the fields are as follows: - - o "bt" is the number of times that the system balked from - boosting because there were no blocked tasks to boost. - - o "nos" is the number of times that the system balked from - boosting for inexplicable ("not otherwise specified") - reasons. diff --git a/trunk/Documentation/dontdiff b/trunk/Documentation/dontdiff index 470d3dba1a69..d9bcffd59433 100644 --- a/trunk/Documentation/dontdiff +++ b/trunk/Documentation/dontdiff @@ -62,10 +62,6 @@ aic7*reg_print.c* aic7*seq.h* aicasm aicdb.h* -altivec1.c -altivec2.c -altivec4.c -altivec8.c asm-offsets.h asm_offsets.h autoconf.h* @@ -80,7 +76,6 @@ btfixupprep build bvmlinux bzImage* -capflags.c classlist.h* comp*.log compile.h* @@ -99,7 +94,6 @@ devlist.h* docproc elf2ecoff elfconfig.h* -evergreen_reg_safe.h fixdep flask.h fore200e_mkfirm @@ -114,16 +108,9 @@ genksyms *_gray256.c ihex2fw ikconfig.h* -inat-tables.c initramfs_data.cpio initramfs_data.cpio.gz initramfs_list -int16.c -int1.c -int2.c -int32.c -int4.c -int8.c kallsyms kconfig keywords.c @@ -153,7 +140,6 @@ mkprep mktables mktree modpost -modules.builtin modules.order modversions.h* ncscope.* @@ -167,23 +153,14 @@ pca200e.bin pca200e_ecd.bin2 piggy.gz piggyback -piggy.S pnmtologo ppc_defs.h* pss_boot.h qconf -r100_reg_safe.h -r200_reg_safe.h -r300_reg_safe.h -r420_reg_safe.h -r600_reg_safe.h raid6altivec*.c raid6int*.c raid6tables.c relocs -rn50_reg_safe.h -rs600_reg_safe.h -rv515_reg_safe.h series setup setup.bin @@ -192,7 +169,6 @@ sImage sm_tbl* split-include syscalltab.h -tables.c tags tftpboot.img timeconst.h @@ -214,7 +190,6 @@ vmlinux vmlinux-* vmlinux.aout vmlinux.lds -voffset.h vsyscall.lds vsyscall_32.lds wanxlfw.inc @@ -225,4 +200,3 @@ wakeup.elf wakeup.lds zImage* zconf.hash.c -zoffset.h diff --git a/trunk/Documentation/filesystems/Locking b/trunk/Documentation/filesystems/Locking index 33fa3e5d38fd..b6426f15b4ae 100644 --- a/trunk/Documentation/filesystems/Locking +++ b/trunk/Documentation/filesystems/Locking @@ -18,6 +18,7 @@ prototypes: char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); locking rules: + none have BKL dcache_lock rename_lock ->d_lock may block d_revalidate: no no no yes d_hash no no no yes @@ -41,23 +42,18 @@ ata *); int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *); int (*readlink) (struct dentry *, char __user *,int); - void * (*follow_link) (struct dentry *, struct nameidata *); - void (*put_link) (struct dentry *, struct nameidata *, void *); + int (*follow_link) (struct dentry *, struct nameidata *); void (*truncate) (struct inode *); int (*permission) (struct inode *, int, struct nameidata *); - int (*check_acl)(struct inode *, int); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *); int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); - void (*truncate_range)(struct inode *, loff_t, loff_t); - long (*fallocate)(struct inode *inode, int mode, loff_t offset, loff_t len); - int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); locking rules: - all may block + all may block, none have BKL i_mutex(inode) lookup: yes create: yes @@ -70,24 +66,19 @@ rmdir: yes (both) (see below) rename: yes (all) (see below) readlink: no follow_link: no -put_link: no truncate: yes (see below) setattr: yes permission: no -check_acl: no getattr: no setxattr: yes getxattr: no listxattr: no removexattr: yes -truncate_range: yes -fallocate: no -fiemap: no Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on victim. cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. ->truncate() is never called directly - it's a callback, not a -method. It's called by vmtruncate() - deprecated library function used by +method. It's called by vmtruncate() - library function normally used by ->setattr(). Locking information above applies to that call (i.e. is inherited from ->setattr() - vmtruncate() is used when ATTR_SIZE had been passed). @@ -100,7 +91,7 @@ prototypes: struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); void (*dirty_inode) (struct inode *); - int (*write_inode) (struct inode *, struct writeback_control *wbc); + int (*write_inode) (struct inode *, int); int (*drop_inode) (struct inode *); void (*evict_inode) (struct inode *); void (*put_super) (struct super_block *); @@ -114,10 +105,10 @@ prototypes: int (*show_options)(struct seq_file *, struct vfsmount *); ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); - int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); locking rules: All may block [not true, see below] + None have BKL s_umount alloc_inode: destroy_inode: @@ -136,7 +127,6 @@ umount_begin: no show_options: no (namespace_sem) quota_read: no (see below) quota_write: no (see below) -bdev_try_to_free_page: no (see below) ->statfs() has s_umount (shared) when called by ustat(2) (native or compat), but that's an accident of bad API; s_umount is used to pin @@ -149,25 +139,19 @@ be the only ones operating on the quota file by the quota code (via dqio_sem) (unless an admin really wants to screw up something and writes to quota files with quotas on). For other details about locking see also dquot_operations section. -->bdev_try_to_free_page is called from the ->releasepage handler of -the block device inode. See there for more details. --------------------------- file_system_type --------------------------- prototypes: int (*get_sb) (struct file_system_type *, int, const char *, void *, struct vfsmount *); - struct dentry *(*mount) (struct file_system_type *, int, - const char *, void *); void (*kill_sb) (struct super_block *); locking rules: - may block -get_sb yes -mount yes -kill_sb yes + may block BKL +get_sb yes no +kill_sb yes no ->get_sb() returns error or 0 with locked superblock attached to the vfsmount (exclusive on ->s_umount). -->mount() returns ERR_PTR or the root dentry. ->kill_sb() takes a write-locked superblock, does all shutdown work on it, unlocks and drops the reference. @@ -192,35 +176,27 @@ prototypes: void (*freepage)(struct page *); int (*direct_IO)(int, struct kiocb *, const struct iovec *iov, loff_t offset, unsigned long nr_segs); - int (*get_xip_mem)(struct address_space *, pgoff_t, int, void **, - unsigned long *); - int (*migratepage)(struct address_space *, struct page *, struct page *); - int (*launder_page)(struct page *); - int (*is_partially_uptodate)(struct page *, read_descriptor_t *, unsigned long); - int (*error_remove_page)(struct address_space *, struct page *); + int (*launder_page) (struct page *); locking rules: All except set_page_dirty and freepage may block - PageLocked(page) i_mutex -writepage: yes, unlocks (see below) -readpage: yes, unlocks -sync_page: maybe -writepages: -set_page_dirty no -readpages: -write_begin: locks the page yes -write_end: yes, unlocks yes -bmap: -invalidatepage: yes -releasepage: yes -freepage: yes -direct_IO: -get_xip_mem: maybe -migratepage: yes (both) -launder_page: yes -is_partially_uptodate: yes -error_remove_page: yes + BKL PageLocked(page) i_mutex +writepage: no yes, unlocks (see below) +readpage: no yes, unlocks +sync_page: no maybe +writepages: no +set_page_dirty no no +readpages: no +write_begin: no locks the page yes +write_end: no yes, unlocks yes +perform_write: no n/a yes +bmap: no +invalidatepage: no yes +releasepage: no yes +freepage: no yes +direct_IO: no +launder_page: no yes ->write_begin(), ->write_end(), ->sync_page() and ->readpage() may be called from the request handler (/dev/loop). @@ -300,8 +276,9 @@ under spinlock (it cannot block) and is sometimes called with the page not locked. ->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some -filesystems and by the swapper. The latter will eventually go away. Please, -keep it that way and don't breed new callers. +filesystems and by the swapper. The latter will eventually go away. All +instances do not actually need the BKL. Please, keep it that way and don't +breed new callers. ->invalidatepage() is called when the filesystem must attempt to drop some or all of the buffers from the page when it is being truncated. It @@ -322,37 +299,47 @@ cleaned, or an error value if not. Note that in order to prevent the page getting mapped back in and redirtied, it needs to be kept locked across the entire operation. + Note: currently almost all instances of address_space methods are +using BKL for internal serialization and that's one of the worst sources +of contention. Normally they are calling library functions (in fs/buffer.c) +and pass foo_get_block() as a callback (on local block-based filesystems, +indeed). BKL is not needed for library stuff and is usually taken by +foo_get_block(). It's an overkill, since block bitmaps can be protected by +internal fs locking and real critical areas are much smaller than the areas +filesystems protect now. + ----------------------- file_lock_operations ------------------------------ prototypes: + void (*fl_insert)(struct file_lock *); /* lock insertion callback */ + void (*fl_remove)(struct file_lock *); /* lock removal callback */ void (*fl_copy_lock)(struct file_lock *, struct file_lock *); void (*fl_release_private)(struct file_lock *); locking rules: - file_lock_lock may block -fl_copy_lock: yes no -fl_release_private: maybe no + BKL may block +fl_insert: yes no +fl_remove: yes no +fl_copy_lock: yes no +fl_release_private: yes yes ----------------------- lock_manager_operations --------------------------- prototypes: int (*fl_compare_owner)(struct file_lock *, struct file_lock *); void (*fl_notify)(struct file_lock *); /* unblock callback */ - int (*fl_grant)(struct file_lock *, struct file_lock *, int); void (*fl_release_private)(struct file_lock *); void (*fl_break)(struct file_lock *); /* break_lease callback */ - int (*fl_mylease)(struct file_lock *, struct file_lock *); - int (*fl_change)(struct file_lock **, int); locking rules: - file_lock_lock may block -fl_compare_owner: yes no -fl_notify: yes no -fl_grant: no no -fl_release_private: maybe no -fl_break: yes no -fl_mylease: yes no -fl_change yes no - + BKL may block +fl_compare_owner: yes no +fl_notify: yes no +fl_release_private: yes yes +fl_break: yes no + + Currently only NFSD and NLM provide instances of this class. None of the +them block. If you have out-of-tree instances - please, show up. Locking +in that area will change. --------------------------- buffer_head ----------------------------------- prototypes: void (*b_end_io)(struct buffer_head *bh, int uptodate); @@ -377,17 +364,17 @@ prototypes: void (*swap_slot_free_notify) (struct block_device *, unsigned long); locking rules: - bd_mutex -open: yes -release: yes -ioctl: no -compat_ioctl: no -direct_access: no -media_changed: no -unlock_native_capacity: no -revalidate_disk: no -getgeo: no -swap_slot_free_notify: no (see below) + BKL bd_mutex +open: no yes +release: no yes +ioctl: no no +compat_ioctl: no no +direct_access: no no +media_changed: no no +unlock_native_capacity: no no +revalidate_disk: no no +getgeo: no no +swap_slot_free_notify: no no (see below) media_changed, unlock_native_capacity and revalidate_disk are called only from check_disk_change(). @@ -426,21 +413,34 @@ prototypes: unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); int (*check_flags)(int); - int (*flock) (struct file *, int, struct file_lock *); - ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, - size_t, unsigned int); - ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, - size_t, unsigned int); - int (*setlease)(struct file *, long, struct file_lock **); }; locking rules: - All may block except for ->setlease. - No VFS locks held on entry except for ->fsync and ->setlease. - -->fsync() has i_mutex on inode. - -->setlease has the file_list_lock held and must not sleep. + All may block. + BKL +llseek: no (see below) +read: no +aio_read: no +write: no +aio_write: no +readdir: no +poll: no +unlocked_ioctl: no +compat_ioctl: no +mmap: no +open: no +flush: no +release: no +fsync: no (see below) +aio_fsync: no +fasync: no +lock: yes +readv: no +writev: no +sendfile: no +sendpage: no +get_unmapped_area: no +check_flags: no ->llseek() locking has moved from llseek to the individual llseek implementations. If your fs is not using generic_file_llseek, you @@ -450,10 +450,17 @@ mutex or just to use i_size_read() instead. Note: this does not protect the file->f_pos against concurrent modifications since this is something the userspace has to take care about. -->fasync() is responsible for maintaining the FASYNC bit in filp->f_flags. -Most instances call fasync_helper(), which does that maintenance, so it's -not normally something one needs to worry about. Return values > 0 will be -mapped to zero in the VFS layer. +Note: ext2_release() was *the* source of contention on fs-intensive +loads and dropping BKL on ->release() helps to get rid of that (we still +grab BKL for cases when we close a file that had been opened r/w, but that +can and should be done using the internal locking with smaller critical areas). +Current worst offender is ext2_get_block()... + +->fasync() is called without BKL protection, and is responsible for +maintaining the FASYNC bit in filp->f_flags. Most instances call +fasync_helper(), which does that maintenance, so it's not normally +something one needs to worry about. Return values > 0 will be mapped to +zero in the VFS layer. ->readdir() and ->ioctl() on directories must be changed. Ideally we would move ->readdir() to inode_operations and use a separate method for directory @@ -464,6 +471,8 @@ components. And there are other reasons why the current interface is a mess... ->read on directories probably must go away - we should just enforce -EISDIR in sys_read() and friends. +->fsync() has i_mutex on inode. + --------------------------- dquot_operations ------------------------------- prototypes: int (*write_dquot) (struct dquot *); @@ -498,12 +507,12 @@ prototypes: int (*access)(struct vm_area_struct *, unsigned long, void*, int, int); locking rules: - mmap_sem PageLocked(page) -open: yes -close: yes -fault: yes can return with page locked -page_mkwrite: yes can return with page locked -access: yes + BKL mmap_sem PageLocked(page) +open: no yes +close: no yes +fault: no yes can return with page locked +page_mkwrite: no yes can return with page locked +access: no yes ->fault() is called when a previously not present pte is about to be faulted in. The filesystem must find and return the page associated @@ -530,3 +539,6 @@ VM_IO | VM_PFNMAP VMAs. (if you break something or notice that it is broken and do not fix it yourself - at least put it here) + +ipc/shm.c::shm_delete() - may need BKL. +->read() and ->write() in many drivers are (probably) missing BKL. diff --git a/trunk/Documentation/kernel-docs.txt b/trunk/Documentation/kernel-docs.txt index 9a8674629a07..715eaaf1519d 100644 --- a/trunk/Documentation/kernel-docs.txt +++ b/trunk/Documentation/kernel-docs.txt @@ -537,7 +537,7 @@ Notes: Further information in http://www.oreilly.com/catalog/linuxdrive2/ - * Title: "Linux Device Drivers, 3rd Edition" + * Title: "Linux Device Drivers, 3nd Edition" Authors: Jonathan Corbet, Alessandro Rubini, and Greg Kroah-Hartman Publisher: O'Reilly & Associates. Date: 2005. @@ -592,6 +592,14 @@ Pages: 600. ISBN: 0-13-101908-2 + * Title: "The Design and Implementation of the 4.4 BSD UNIX + Operating System" + Author: Marshall Kirk McKusick, Keith Bostic, Michael J. Karels, + John S. Quarterman. + Publisher: Addison-Wesley. + Date: 1996. + ISBN: 0-201-54979-4 + * Title: "Programming for the real world - POSIX.4" Author: Bill O. Gallmeister. Publisher: O'Reilly & Associates, Inc.. @@ -602,13 +610,28 @@ POSIX. Good reference. * Title: "UNIX Systems for Modern Architectures: Symmetric - Multiprocessing and Caching for Kernel Programmers" + Multiprocesssing and Caching for Kernel Programmers" Author: Curt Schimmel. Publisher: Addison Wesley. Date: June, 1994. Pages: 432. ISBN: 0-201-63338-8 + * Title: "The Design and Implementation of the 4.3 BSD UNIX + Operating System" + Author: Samuel J. Leffler, Marshall Kirk McKusick, Michael J. + Karels, John S. Quarterman. + Publisher: Addison-Wesley. + Date: 1989 (reprinted with corrections on October, 1990). + ISBN: 0-201-06196-1 + + * Title: "The Design of the UNIX Operating System" + Author: Maurice J. Bach. + Publisher: Prentice Hall. + Date: 1986. + Pages: 471. + ISBN: 0-13-201757-1 + MISCELLANEOUS: * Name: linux/Documentation diff --git a/trunk/Documentation/kernel-parameters.txt b/trunk/Documentation/kernel-parameters.txt index f3dc951e949f..8b61c9360999 100644 --- a/trunk/Documentation/kernel-parameters.txt +++ b/trunk/Documentation/kernel-parameters.txt @@ -1579,12 +1579,20 @@ and is between 256 and 4096 characters. It is defined in the file nmi_watchdog= [KNL,BUGS=X86] Debugging features for SMP kernels Format: [panic,][num] - Valid num: 0 + Valid num: 0,1,2 0 - turn nmi_watchdog off + 1 - use the IO-APIC timer for the NMI watchdog + 2 - use the local APIC for the NMI watchdog using + a performance counter. Note: This will use one + performance counter and the local APIC's performance + vector. When panic is specified, panic when an NMI watchdog timeout occurs. This is useful when you use a panic=... timeout and need the box quickly up again. + Instead of 1 and 2 it is possible to use the following + symbolic names: lapic and ioapic + Example: nmi_watchdog=2 or nmi_watchdog=panic,lapic netpoll.carrier_timeout= [NET] Specifies amount of time (in seconds) that @@ -1614,8 +1622,6 @@ and is between 256 and 4096 characters. It is defined in the file noapic [SMP,APIC] Tells the kernel to not make use of any IOAPICs that may be present in the system. - noautogroup Disable scheduler automatic task group creation. - nobats [PPC] Do not use BATs for mapping kernel lowmem on "Classic" PPC cores. @@ -1753,7 +1759,7 @@ and is between 256 and 4096 characters. It is defined in the file nousb [USB] Disable the USB subsystem - nowatchdog [KNL] Disable the lockup detector (NMI watchdog). + nowatchdog [KNL] Disable the lockup detector. nowb [ARM] @@ -2461,13 +2467,12 @@ and is between 256 and 4096 characters. It is defined in the file to facilitate early boot debugging. See also Documentation/trace/events.txt - tsc= Disable clocksource stability checks for TSC. + tsc= Disable clocksource-must-verify flag for TSC. Format: [x86] reliable: mark tsc clocksource as reliable, this - disables clocksource verification at runtime, as well - as the stability checks done at bootup. Used to enable - high-resolution timer mode on older hardware, and in - virtualized environment. + disables clocksource verification at runtime. + Used to enable high-resolution timer mode on older + hardware, and in virtualized environment. [x86] noirqtime: Do not use TSC to do irq accounting. Used to run time disable IRQ_TIME_ACCOUNTING on any platforms where RDTSC is slow and this accounting diff --git a/trunk/Documentation/trace/events-power.txt b/trunk/Documentation/trace/events-power.txt deleted file mode 100644 index 96d87b67fe37..000000000000 --- a/trunk/Documentation/trace/events-power.txt +++ /dev/null @@ -1,90 +0,0 @@ - - Subsystem Trace Points: power - -The power tracing system captures events related to power transitions -within the kernel. Broadly speaking there are three major subheadings: - - o Power state switch which reports events related to suspend (S-states), - cpuidle (C-states) and cpufreq (P-states) - o System clock related changes - o Power domains related changes and transitions - -This document describes what each of the tracepoints is and why they -might be useful. - -Cf. include/trace/events/power.h for the events definitions. - -1. Power state switch events -============================ - -1.1 New trace API ------------------ - -A 'cpu' event class gathers the CPU-related events: cpuidle and -cpufreq. - -cpu_idle "state=%lu cpu_id=%lu" -cpu_frequency "state=%lu cpu_id=%lu" - -A suspend event is used to indicate the system going in and out of the -suspend mode: - -machine_suspend "state=%lu" - - -Note: the value of '-1' or '4294967295' for state means an exit from the current state, -i.e. trace_cpu_idle(4, smp_processor_id()) means that the system -enters the idle state 4, while trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()) -means that the system exits the previous idle state. - -The event which has 'state=4294967295' in the trace is very important to the user -space tools which are using it to detect the end of the current state, and so to -correctly draw the states diagrams and to calculate accurate statistics etc. - -1.2 DEPRECATED trace API ------------------------- - -A new Kconfig option CONFIG_EVENT_POWER_TRACING_DEPRECATED with the default value of -'y' has been created. This allows the legacy trace power API to be used conjointly -with the new trace API. -The Kconfig option, the old trace API (in include/trace/events/power.h) and the -old trace points will disappear in a future release (namely 2.6.41). - -power_start "type=%lu state=%lu cpu_id=%lu" -power_frequency "type=%lu state=%lu cpu_id=%lu" -power_end "cpu_id=%lu" - -The 'type' parameter takes one of those macros: - . POWER_NONE = 0, - . POWER_CSTATE = 1, /* C-State */ - . POWER_PSTATE = 2, /* Fequency change or DVFS */ - -The 'state' parameter is set depending on the type: - . Target C-state for type=POWER_CSTATE, - . Target frequency for type=POWER_PSTATE, - -power_end is used to indicate the exit of a state, corresponding to the latest -power_start event. - -2. Clocks events -================ -The clock events are used for clock enable/disable and for -clock rate change. - -clock_enable "%s state=%lu cpu_id=%lu" -clock_disable "%s state=%lu cpu_id=%lu" -clock_set_rate "%s state=%lu cpu_id=%lu" - -The first parameter gives the clock name (e.g. "gpio1_iclk"). -The second parameter is '1' for enable, '0' for disable, the target -clock rate for set_rate. - -3. Power domains events -======================= -The power domain events are used for power domains transitions - -power_domain_target "%s state=%lu cpu_id=%lu" - -The first parameter gives the power domain name (e.g. "mpu_pwrdm"). -The second parameter is the power domain target state. - diff --git a/trunk/Documentation/x86/boot.txt b/trunk/Documentation/x86/boot.txt index bdeb81ccb5f6..30b43e1b2697 100644 --- a/trunk/Documentation/x86/boot.txt +++ b/trunk/Documentation/x86/boot.txt @@ -600,7 +600,6 @@ Protocol: 2.07+ 0x00000001 lguest 0x00000002 Xen 0x00000003 Moorestown MID - 0x00000004 CE4100 TV Platform Field name: hardware_subarch_data Type: write (subarch-dependent) diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS index c5c7292daba0..71e40f9118df 100644 --- a/trunk/MAINTAINERS +++ b/trunk/MAINTAINERS @@ -792,14 +792,11 @@ S: Maintained ARM/NOMADIK ARCHITECTURE M: Alessandro Rubini -M: Linus Walleij M: STEricsson L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm/mach-nomadik/ F: arch/arm/plat-nomadik/ -F: drivers/i2c/busses/i2c-nomadik.c -T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-stericsson.git ARM/OPENMOKO NEO FREERUNNER (GTA02) MACHINE SUPPORT M: Nelson Castillo @@ -1001,24 +998,12 @@ F: drivers/i2c/busses/i2c-stu300.c F: drivers/rtc/rtc-coh901331.c F: drivers/watchdog/coh901327_wdt.c F: drivers/dma/coh901318* -F: drivers/mfd/ab3100* -F: drivers/rtc/rtc-ab3100.c -F: drivers/rtc/rtc-coh901331.c -T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-stericsson.git -ARM/Ux500 ARM ARCHITECTURE +ARM/U8500 ARM ARCHITECTURE M: Srinidhi Kasagar -M: Linus Walleij L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm/mach-ux500/ -F: drivers/dma/ste_dma40* -F: drivers/mfd/ab3550* -F: drivers/mfd/abx500* -F: drivers/mfd/ab8500* -F: drivers/mfd/stmpe* -F: drivers/rtc/rtc-ab8500.c -T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-stericsson.git ARM/VFP SUPPORT M: Russell King @@ -2812,10 +2797,6 @@ M: Thomas Gleixner S: Maintained F: Documentation/timers/ F: kernel/hrtimer.c -F: kernel/time/clockevents.c -F: kernel/time/tick*.* -F: kernel/time/timer_*.c -F include/linux/clockevents.h F: include/linux/hrtimer.h HIGH-SPEED SCC DRIVER FOR AX.25 @@ -4631,7 +4612,7 @@ PERFORMANCE EVENTS SUBSYSTEM M: Peter Zijlstra M: Paul Mackerras M: Ingo Molnar -M: Arnaldo Carvalho de Melo +M: Arnaldo Carvalho de Melo S: Supported F: kernel/perf_event*.c F: include/linux/perf_event.h @@ -5146,18 +5127,6 @@ L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Supported F: sound/soc/s3c24xx -TIMEKEEPING, NTP -M: John Stultz -M: Thomas Gleixner -S: Supported -F: include/linux/clocksource.h -F: include/linux/time.h -F: include/linux/timex.h -F: include/linux/timekeeping.h -F: kernel/time/clocksource.c -F: kernel/time/time*.c -F: kernel/time/ntp.c - TLG2300 VIDEO4LINUX-2 DRIVER M: Huang Shijie M: Kang Yong diff --git a/trunk/Makefile b/trunk/Makefile index 74b25559f831..e7c41f1344e5 100644 --- a/trunk/Makefile +++ b/trunk/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 37 -EXTRAVERSION = +EXTRAVERSION = -rc8 NAME = Flesh-Eating Bats with Fangs # *DOCUMENTATION* diff --git a/trunk/arch/Kconfig b/trunk/arch/Kconfig index f78c2be4242b..8bf0fa652eb6 100644 --- a/trunk/arch/Kconfig +++ b/trunk/arch/Kconfig @@ -175,7 +175,4 @@ config HAVE_PERF_EVENTS_NMI config HAVE_ARCH_JUMP_LABEL bool -config HAVE_ARCH_MUTEX_CPU_RELAX - bool - source "kernel/gcov/Kconfig" diff --git a/trunk/arch/alpha/include/asm/perf_event.h b/trunk/arch/alpha/include/asm/perf_event.h index 5996e7a6757e..fe792ca818f6 100644 --- a/trunk/arch/alpha/include/asm/perf_event.h +++ b/trunk/arch/alpha/include/asm/perf_event.h @@ -1,4 +1,10 @@ #ifndef __ASM_ALPHA_PERF_EVENT_H #define __ASM_ALPHA_PERF_EVENT_H +#ifdef CONFIG_PERF_EVENTS +extern void init_hw_perf_events(void); +#else +static inline void init_hw_perf_events(void) { } +#endif + #endif /* __ASM_ALPHA_PERF_EVENT_H */ diff --git a/trunk/arch/alpha/kernel/irq_alpha.c b/trunk/arch/alpha/kernel/irq_alpha.c index 4c8bb374eb0a..5f77afb88e89 100644 --- a/trunk/arch/alpha/kernel/irq_alpha.c +++ b/trunk/arch/alpha/kernel/irq_alpha.c @@ -112,6 +112,8 @@ init_IRQ(void) wrent(entInt, 0); alpha_mv.init_irq(); + + init_hw_perf_events(); } /* diff --git a/trunk/arch/alpha/kernel/perf_event.c b/trunk/arch/alpha/kernel/perf_event.c index 90561c45e7d8..1cc49683fb69 100644 --- a/trunk/arch/alpha/kernel/perf_event.c +++ b/trunk/arch/alpha/kernel/perf_event.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include @@ -864,13 +863,13 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr, /* * Init call to initialise performance events at kernel startup. */ -int __init init_hw_perf_events(void) +void __init init_hw_perf_events(void) { pr_info("Performance events: "); if (!supported_cpu()) { pr_cont("No support for your CPU.\n"); - return 0; + return; } pr_cont("Supported CPU type!\n"); @@ -882,8 +881,6 @@ int __init init_hw_perf_events(void) /* And set up PMU specification */ alpha_pmu = &ev67_pmu; - perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); - - return 0; + perf_pmu_register(&pmu); } -early_initcall(init_hw_perf_events); + diff --git a/trunk/arch/arm/include/asm/hardware/it8152.h b/trunk/arch/arm/include/asm/hardware/it8152.h index b2f95c72287c..21fa272301f8 100644 --- a/trunk/arch/arm/include/asm/hardware/it8152.h +++ b/trunk/arch/arm/include/asm/hardware/it8152.h @@ -76,7 +76,6 @@ extern unsigned long it8152_base_address; IT8152_PD_IRQ(0) Audio controller (ACR) */ #define IT8152_IRQ(x) (IRQ_BOARD_START + (x)) -#define IT8152_LAST_IRQ (IRQ_BOARD_START + 40) /* IRQ-sources in 3 groups - local devices, LPC (serial), and external PCI */ #define IT8152_LD_IRQ_COUNT 9 diff --git a/trunk/arch/arm/include/asm/highmem.h b/trunk/arch/arm/include/asm/highmem.h index 7080e2c8fa62..1fc684e70ab6 100644 --- a/trunk/arch/arm/include/asm/highmem.h +++ b/trunk/arch/arm/include/asm/highmem.h @@ -25,6 +25,9 @@ extern void *kmap_high(struct page *page); extern void *kmap_high_get(struct page *page); extern void kunmap_high(struct page *page); +extern void *kmap_high_l1_vipt(struct page *page, pte_t *saved_pte); +extern void kunmap_high_l1_vipt(struct page *page, pte_t saved_pte); + /* * The following functions are already defined by * when CONFIG_HIGHMEM is not set. diff --git a/trunk/arch/arm/include/asm/sizes.h b/trunk/arch/arm/include/asm/sizes.h index 316bb2b2be3d..4fc1565e4f93 100644 --- a/trunk/arch/arm/include/asm/sizes.h +++ b/trunk/arch/arm/include/asm/sizes.h @@ -13,6 +13,9 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +/* DO NOT EDIT!! - this file automatically generated + * from .s file by awk -f s2h.awk + */ /* Size definitions * Copyright (C) ARM Limited 1998. All rights reserved. */ @@ -22,9 +25,6 @@ /* handy sizes */ #define SZ_16 0x00000010 -#define SZ_32 0x00000020 -#define SZ_64 0x00000040 -#define SZ_128 0x00000080 #define SZ_256 0x00000100 #define SZ_512 0x00000200 diff --git a/trunk/arch/arm/include/asm/system.h b/trunk/arch/arm/include/asm/system.h index 80025948b8ad..1120f18a6b17 100644 --- a/trunk/arch/arm/include/asm/system.h +++ b/trunk/arch/arm/include/asm/system.h @@ -150,7 +150,6 @@ extern unsigned int user_debug; #define rmb() dmb() #define wmb() mb() #else -#include #define mb() do { if (arch_is_coherent()) dmb(); else barrier(); } while (0) #define rmb() do { if (arch_is_coherent()) dmb(); else barrier(); } while (0) #define wmb() do { if (arch_is_coherent()) dmb(); else barrier(); } while (0) diff --git a/trunk/arch/arm/kernel/entry-common.S b/trunk/arch/arm/kernel/entry-common.S index 80bf8cd88d7c..8bfa98757cd2 100644 --- a/trunk/arch/arm/kernel/entry-common.S +++ b/trunk/arch/arm/kernel/entry-common.S @@ -29,9 +29,6 @@ ret_fast_syscall: ldr r1, [tsk, #TI_FLAGS] tst r1, #_TIF_WORK_MASK bne fast_work_pending -#if defined(CONFIG_IRQSOFF_TRACER) - asm_trace_hardirqs_on -#endif /* perform architecture specific actions before user return */ arch_ret_to_user r1, lr @@ -68,9 +65,6 @@ ret_slow_syscall: tst r1, #_TIF_WORK_MASK bne work_pending no_work_pending: -#if defined(CONFIG_IRQSOFF_TRACER) - asm_trace_hardirqs_on -#endif /* perform architecture specific actions before user return */ arch_ret_to_user r1, lr diff --git a/trunk/arch/arm/kernel/perf_event.c b/trunk/arch/arm/kernel/perf_event.c index fdfa4976b0bf..07a50357492a 100644 --- a/trunk/arch/arm/kernel/perf_event.c +++ b/trunk/arch/arm/kernel/perf_event.c @@ -3034,11 +3034,11 @@ init_hw_perf_events(void) pr_info("no hardware support available\n"); } - perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); + perf_pmu_register(&pmu); return 0; } -early_initcall(init_hw_perf_events); +arch_initcall(init_hw_perf_events); /* * Callchain handling code. diff --git a/trunk/arch/arm/kernel/smp.c b/trunk/arch/arm/kernel/smp.c index 9066473c0ebc..8c1959590252 100644 --- a/trunk/arch/arm/kernel/smp.c +++ b/trunk/arch/arm/kernel/smp.c @@ -310,6 +310,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void) * All kernel threads share the same mm context; grab a * reference and switch to it. */ + atomic_inc(&mm->mm_users); atomic_inc(&mm->mm_count); current->active_mm = mm; cpumask_set_cpu(cpu, mm_cpumask(mm)); diff --git a/trunk/arch/arm/mach-pxa/Kconfig b/trunk/arch/arm/mach-pxa/Kconfig index c93e73d54dd1..dd235ecc9d6c 100644 --- a/trunk/arch/arm/mach-pxa/Kconfig +++ b/trunk/arch/arm/mach-pxa/Kconfig @@ -540,7 +540,6 @@ config MACH_ICONTROL config ARCH_PXA_ESERIES bool "PXA based Toshiba e-series PDAs" select PXA25x - select FB_W100 config MACH_E330 bool "Toshiba e330" diff --git a/trunk/arch/arm/mach-pxa/sleep.S b/trunk/arch/arm/mach-pxa/sleep.S index ae008110db4e..52c30b01a671 100644 --- a/trunk/arch/arm/mach-pxa/sleep.S +++ b/trunk/arch/arm/mach-pxa/sleep.S @@ -353,8 +353,8 @@ resume_turn_on_mmu: @ Let us ensure we jump to resume_after_mmu only when the mcr above @ actually took effect. They call it the "cpwait" operation. - mrc p15, 0, r0, c2, c0, 0 @ queue a dependency on CP15 - sub pc, r2, r0, lsr #32 @ jump to virtual addr + mrc p15, 0, r1, c2, c0, 0 @ queue a dependency on CP15 + sub pc, r2, r1, lsr #32 @ jump to virtual addr nop nop nop diff --git a/trunk/arch/arm/mm/cache-feroceon-l2.c b/trunk/arch/arm/mm/cache-feroceon-l2.c index e0b0e7a4ec68..6e77c042d8e9 100644 --- a/trunk/arch/arm/mm/cache-feroceon-l2.c +++ b/trunk/arch/arm/mm/cache-feroceon-l2.c @@ -13,9 +13,13 @@ */ #include -#include #include +#include +#include +#include +#include #include +#include "mm.h" /* * Low-level cache maintenance operations. @@ -35,30 +39,27 @@ * between which we don't want to be preempted. */ -static inline unsigned long l2_get_va(unsigned long paddr) +static inline unsigned long l2_start_va(unsigned long paddr) { #ifdef CONFIG_HIGHMEM /* + * Let's do our own fixmap stuff in a minimal way here. * Because range ops can't be done on physical addresses, * we simply install a virtual mapping for it only for the * TLB lookup to occur, hence no need to flush the untouched - * memory mapping afterwards (note: a cache flush may happen - * in some circumstances depending on the path taken in kunmap_atomic). + * memory mapping. This is protected with the disabling of + * interrupts by the caller. */ - void *vaddr = kmap_atomic_pfn(paddr >> PAGE_SHIFT); - return (unsigned long)vaddr + (paddr & ~PAGE_MASK); + unsigned long idx = KM_L2_CACHE + KM_TYPE_NR * smp_processor_id(); + unsigned long vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + set_pte_ext(TOP_PTE(vaddr), pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL), 0); + local_flush_tlb_kernel_page(vaddr); + return vaddr + (paddr & ~PAGE_MASK); #else return __phys_to_virt(paddr); #endif } -static inline void l2_put_va(unsigned long vaddr) -{ -#ifdef CONFIG_HIGHMEM - kunmap_atomic((void *)vaddr); -#endif -} - static inline void l2_clean_pa(unsigned long addr) { __asm__("mcr p15, 1, %0, c15, c9, 3" : : "r" (addr)); @@ -75,14 +76,13 @@ static inline void l2_clean_pa_range(unsigned long start, unsigned long end) */ BUG_ON((start ^ end) >> PAGE_SHIFT); - va_start = l2_get_va(start); - va_end = va_start + (end - start); raw_local_irq_save(flags); + va_start = l2_start_va(start); + va_end = va_start + (end - start); __asm__("mcr p15, 1, %0, c15, c9, 4\n\t" "mcr p15, 1, %1, c15, c9, 5" : : "r" (va_start), "r" (va_end)); raw_local_irq_restore(flags); - l2_put_va(va_start); } static inline void l2_clean_inv_pa(unsigned long addr) @@ -106,14 +106,13 @@ static inline void l2_inv_pa_range(unsigned long start, unsigned long end) */ BUG_ON((start ^ end) >> PAGE_SHIFT); - va_start = l2_get_va(start); - va_end = va_start + (end - start); raw_local_irq_save(flags); + va_start = l2_start_va(start); + va_end = va_start + (end - start); __asm__("mcr p15, 1, %0, c15, c11, 4\n\t" "mcr p15, 1, %1, c15, c11, 5" : : "r" (va_start), "r" (va_end)); raw_local_irq_restore(flags); - l2_put_va(va_start); } static inline void l2_inv_all(void) diff --git a/trunk/arch/arm/mm/cache-xsc3l2.c b/trunk/arch/arm/mm/cache-xsc3l2.c index 5a32020471e3..c3154928bccd 100644 --- a/trunk/arch/arm/mm/cache-xsc3l2.c +++ b/trunk/arch/arm/mm/cache-xsc3l2.c @@ -17,10 +17,14 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include -#include #include #include #include +#include +#include +#include +#include +#include "mm.h" #define CR_L2 (1 << 26) @@ -67,15 +71,16 @@ static inline void xsc3_l2_inv_all(void) dsb(); } -static inline void l2_unmap_va(unsigned long va) -{ #ifdef CONFIG_HIGHMEM - if (va != -1) - kunmap_atomic((void *)va); +#define l2_map_save_flags(x) raw_local_save_flags(x) +#define l2_map_restore_flags(x) raw_local_irq_restore(x) +#else +#define l2_map_save_flags(x) ((x) = 0) +#define l2_map_restore_flags(x) ((void)(x)) #endif -} -static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va) +static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va, + unsigned long flags) { #ifdef CONFIG_HIGHMEM unsigned long va = prev_va & PAGE_MASK; @@ -84,10 +89,17 @@ static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va) /* * Switching to a new page. Because cache ops are * using virtual addresses only, we must put a mapping - * in place for it. + * in place for it. We also enable interrupts for a + * short while and disable them again to protect this + * mapping. */ - l2_unmap_va(prev_va); - va = (unsigned long)kmap_atomic_pfn(pa >> PAGE_SHIFT); + unsigned long idx; + raw_local_irq_restore(flags); + idx = KM_L2_CACHE + KM_TYPE_NR * smp_processor_id(); + va = __fix_to_virt(FIX_KMAP_BEGIN + idx); + raw_local_irq_restore(flags | PSR_I_BIT); + set_pte_ext(TOP_PTE(va), pfn_pte(pa >> PAGE_SHIFT, PAGE_KERNEL), 0); + local_flush_tlb_kernel_page(va); } return va + (pa_offset >> (32 - PAGE_SHIFT)); #else @@ -97,7 +109,7 @@ static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va) static void xsc3_l2_inv_range(unsigned long start, unsigned long end) { - unsigned long vaddr; + unsigned long vaddr, flags; if (start == 0 && end == -1ul) { xsc3_l2_inv_all(); @@ -105,12 +117,13 @@ static void xsc3_l2_inv_range(unsigned long start, unsigned long end) } vaddr = -1; /* to force the first mapping */ + l2_map_save_flags(flags); /* * Clean and invalidate partial first cache line. */ if (start & (CACHE_LINE_SIZE - 1)) { - vaddr = l2_map_va(start & ~(CACHE_LINE_SIZE - 1), vaddr); + vaddr = l2_map_va(start & ~(CACHE_LINE_SIZE - 1), vaddr, flags); xsc3_l2_clean_mva(vaddr); xsc3_l2_inv_mva(vaddr); start = (start | (CACHE_LINE_SIZE - 1)) + 1; @@ -120,7 +133,7 @@ static void xsc3_l2_inv_range(unsigned long start, unsigned long end) * Invalidate all full cache lines between 'start' and 'end'. */ while (start < (end & ~(CACHE_LINE_SIZE - 1))) { - vaddr = l2_map_va(start, vaddr); + vaddr = l2_map_va(start, vaddr, flags); xsc3_l2_inv_mva(vaddr); start += CACHE_LINE_SIZE; } @@ -129,30 +142,31 @@ static void xsc3_l2_inv_range(unsigned long start, unsigned long end) * Clean and invalidate partial last cache line. */ if (start < end) { - vaddr = l2_map_va(start, vaddr); + vaddr = l2_map_va(start, vaddr, flags); xsc3_l2_clean_mva(vaddr); xsc3_l2_inv_mva(vaddr); } - l2_unmap_va(vaddr); + l2_map_restore_flags(flags); dsb(); } static void xsc3_l2_clean_range(unsigned long start, unsigned long end) { - unsigned long vaddr; + unsigned long vaddr, flags; vaddr = -1; /* to force the first mapping */ + l2_map_save_flags(flags); start &= ~(CACHE_LINE_SIZE - 1); while (start < end) { - vaddr = l2_map_va(start, vaddr); + vaddr = l2_map_va(start, vaddr, flags); xsc3_l2_clean_mva(vaddr); start += CACHE_LINE_SIZE; } - l2_unmap_va(vaddr); + l2_map_restore_flags(flags); dsb(); } @@ -179,7 +193,7 @@ static inline void xsc3_l2_flush_all(void) static void xsc3_l2_flush_range(unsigned long start, unsigned long end) { - unsigned long vaddr; + unsigned long vaddr, flags; if (start == 0 && end == -1ul) { xsc3_l2_flush_all(); @@ -187,16 +201,17 @@ static void xsc3_l2_flush_range(unsigned long start, unsigned long end) } vaddr = -1; /* to force the first mapping */ + l2_map_save_flags(flags); start &= ~(CACHE_LINE_SIZE - 1); while (start < end) { - vaddr = l2_map_va(start, vaddr); + vaddr = l2_map_va(start, vaddr, flags); xsc3_l2_clean_mva(vaddr); xsc3_l2_inv_mva(vaddr); start += CACHE_LINE_SIZE; } - l2_unmap_va(vaddr); + l2_map_restore_flags(flags); dsb(); } diff --git a/trunk/arch/arm/mm/dma-mapping.c b/trunk/arch/arm/mm/dma-mapping.c index 809f1bf9fa29..ac6a36142fcd 100644 --- a/trunk/arch/arm/mm/dma-mapping.c +++ b/trunk/arch/arm/mm/dma-mapping.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -481,10 +480,10 @@ static void dma_cache_maint_page(struct page *page, unsigned long offset, op(vaddr, len, dir); kunmap_high(page); } else if (cache_is_vipt()) { - /* unmapped pages might still be cached */ - vaddr = kmap_atomic(page); + pte_t saved_pte; + vaddr = kmap_high_l1_vipt(page, &saved_pte); op(vaddr + offset, len, dir); - kunmap_atomic(vaddr); + kunmap_high_l1_vipt(page, saved_pte); } } else { vaddr = page_address(page) + offset; diff --git a/trunk/arch/arm/mm/flush.c b/trunk/arch/arm/mm/flush.c index c29f2839f1d2..391ffae75098 100644 --- a/trunk/arch/arm/mm/flush.c +++ b/trunk/arch/arm/mm/flush.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include @@ -181,10 +180,10 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page) __cpuc_flush_dcache_area(addr, PAGE_SIZE); kunmap_high(page); } else if (cache_is_vipt()) { - /* unmapped pages might still be cached */ - addr = kmap_atomic(page); + pte_t saved_pte; + addr = kmap_high_l1_vipt(page, &saved_pte); __cpuc_flush_dcache_area(addr, PAGE_SIZE); - kunmap_atomic(addr); + kunmap_high_l1_vipt(page, saved_pte); } } diff --git a/trunk/arch/arm/mm/highmem.c b/trunk/arch/arm/mm/highmem.c index 807c0573abbe..c435fd9e1da9 100644 --- a/trunk/arch/arm/mm/highmem.c +++ b/trunk/arch/arm/mm/highmem.c @@ -140,3 +140,90 @@ struct page *kmap_atomic_to_page(const void *ptr) pte = TOP_PTE(vaddr); return pte_page(*pte); } + +#ifdef CONFIG_CPU_CACHE_VIPT + +#include + +/* + * The VIVT cache of a highmem page is always flushed before the page + * is unmapped. Hence unmapped highmem pages need no cache maintenance + * in that case. + * + * However unmapped pages may still be cached with a VIPT cache, and + * it is not possible to perform cache maintenance on them using physical + * addresses unfortunately. So we have no choice but to set up a temporary + * virtual mapping for that purpose. + * + * Yet this VIPT cache maintenance may be triggered from DMA support + * functions which are possibly called from interrupt context. As we don't + * want to keep interrupt disabled all the time when such maintenance is + * taking place, we therefore allow for some reentrancy by preserving and + * restoring the previous fixmap entry before the interrupted context is + * resumed. If the reentrancy depth is 0 then there is no need to restore + * the previous fixmap, and leaving the current one in place allow it to + * be reused the next time without a TLB flush (common with DMA). + */ + +static DEFINE_PER_CPU(int, kmap_high_l1_vipt_depth); + +void *kmap_high_l1_vipt(struct page *page, pte_t *saved_pte) +{ + unsigned int idx, cpu; + int *depth; + unsigned long vaddr, flags; + pte_t pte, *ptep; + + if (!in_interrupt()) + preempt_disable(); + + cpu = smp_processor_id(); + depth = &per_cpu(kmap_high_l1_vipt_depth, cpu); + + idx = KM_L1_CACHE + KM_TYPE_NR * cpu; + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + ptep = TOP_PTE(vaddr); + pte = mk_pte(page, kmap_prot); + + raw_local_irq_save(flags); + (*depth)++; + if (pte_val(*ptep) == pte_val(pte)) { + *saved_pte = pte; + } else { + *saved_pte = *ptep; + set_pte_ext(ptep, pte, 0); + local_flush_tlb_kernel_page(vaddr); + } + raw_local_irq_restore(flags); + + return (void *)vaddr; +} + +void kunmap_high_l1_vipt(struct page *page, pte_t saved_pte) +{ + unsigned int idx, cpu = smp_processor_id(); + int *depth = &per_cpu(kmap_high_l1_vipt_depth, cpu); + unsigned long vaddr, flags; + pte_t pte, *ptep; + + idx = KM_L1_CACHE + KM_TYPE_NR * cpu; + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + ptep = TOP_PTE(vaddr); + pte = mk_pte(page, kmap_prot); + + BUG_ON(pte_val(*ptep) != pte_val(pte)); + BUG_ON(*depth <= 0); + + raw_local_irq_save(flags); + (*depth)--; + if (*depth != 0 && pte_val(pte) != pte_val(saved_pte)) { + set_pte_ext(ptep, saved_pte, 0); + local_flush_tlb_kernel_page(vaddr); + } + raw_local_irq_restore(flags); + + if (!in_interrupt()) + preempt_enable(); +} + +#endif /* CONFIG_CPU_CACHE_VIPT */ diff --git a/trunk/arch/mips/kernel/perf_event_mipsxx.c b/trunk/arch/mips/kernel/perf_event_mipsxx.c index 183e0d226669..5c7c6fc07565 100644 --- a/trunk/arch/mips/kernel/perf_event_mipsxx.c +++ b/trunk/arch/mips/kernel/perf_event_mipsxx.c @@ -1047,6 +1047,6 @@ init_hw_perf_events(void) return 0; } -early_initcall(init_hw_perf_events); +arch_initcall(init_hw_perf_events); #endif /* defined(CONFIG_CPU_MIPS32)... */ diff --git a/trunk/arch/mn10300/kernel/irq.c b/trunk/arch/mn10300/kernel/irq.c index ac11754ecec5..c2e44597c22b 100644 --- a/trunk/arch/mn10300/kernel/irq.c +++ b/trunk/arch/mn10300/kernel/irq.c @@ -459,7 +459,7 @@ void migrate_irqs(void) tmp = CROSS_GxICR(irq, new); x &= GxICR_LEVEL | GxICR_ENABLE; - if (GxICR(irq) & GxICR_REQUEST) + if (GxICR(irq) & GxICR_REQUEST) { x |= GxICR_REQUEST | GxICR_DETECT; CROSS_GxICR(irq, new) = x; tmp = CROSS_GxICR(irq, new); diff --git a/trunk/arch/powerpc/kernel/e500-pmu.c b/trunk/arch/powerpc/kernel/e500-pmu.c index b150b510510f..7c07de0d8943 100644 --- a/trunk/arch/powerpc/kernel/e500-pmu.c +++ b/trunk/arch/powerpc/kernel/e500-pmu.c @@ -126,4 +126,4 @@ static int init_e500_pmu(void) return register_fsl_emb_pmu(&e500_pmu); } -early_initcall(init_e500_pmu); +arch_initcall(init_e500_pmu); diff --git a/trunk/arch/powerpc/kernel/mpc7450-pmu.c b/trunk/arch/powerpc/kernel/mpc7450-pmu.c index 2cc5e0301d0b..09d72028f317 100644 --- a/trunk/arch/powerpc/kernel/mpc7450-pmu.c +++ b/trunk/arch/powerpc/kernel/mpc7450-pmu.c @@ -414,4 +414,4 @@ static int init_mpc7450_pmu(void) return register_power_pmu(&mpc7450_pmu); } -early_initcall(init_mpc7450_pmu); +arch_initcall(init_mpc7450_pmu); diff --git a/trunk/arch/powerpc/kernel/perf_event.c b/trunk/arch/powerpc/kernel/perf_event.c index 567480705789..3129c855933c 100644 --- a/trunk/arch/powerpc/kernel/perf_event.c +++ b/trunk/arch/powerpc/kernel/perf_event.c @@ -1379,7 +1379,7 @@ int register_power_pmu(struct power_pmu *pmu) freeze_events_kernel = MMCR0_FCHV; #endif /* CONFIG_PPC64 */ - perf_pmu_register(&power_pmu, "cpu", PERF_TYPE_RAW); + perf_pmu_register(&power_pmu); perf_cpu_notifier(power_pmu_notifier); return 0; diff --git a/trunk/arch/powerpc/kernel/perf_event_fsl_emb.c b/trunk/arch/powerpc/kernel/perf_event_fsl_emb.c index 4dcf5f831e9d..7ecca59ddf77 100644 --- a/trunk/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/trunk/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -681,7 +681,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu) pr_info("%s performance monitor hardware support registered\n", pmu->name); - perf_pmu_register(&fsl_emb_pmu, "cpu", PERF_TYPE_RAW); + perf_pmu_register(&fsl_emb_pmu); return 0; } diff --git a/trunk/arch/powerpc/kernel/power4-pmu.c b/trunk/arch/powerpc/kernel/power4-pmu.c index ead8b3c2649e..2a361cdda635 100644 --- a/trunk/arch/powerpc/kernel/power4-pmu.c +++ b/trunk/arch/powerpc/kernel/power4-pmu.c @@ -613,4 +613,4 @@ static int init_power4_pmu(void) return register_power_pmu(&power4_pmu); } -early_initcall(init_power4_pmu); +arch_initcall(init_power4_pmu); diff --git a/trunk/arch/powerpc/kernel/power5+-pmu.c b/trunk/arch/powerpc/kernel/power5+-pmu.c index eca0ac595cb6..199de527d411 100644 --- a/trunk/arch/powerpc/kernel/power5+-pmu.c +++ b/trunk/arch/powerpc/kernel/power5+-pmu.c @@ -682,4 +682,4 @@ static int init_power5p_pmu(void) return register_power_pmu(&power5p_pmu); } -early_initcall(init_power5p_pmu); +arch_initcall(init_power5p_pmu); diff --git a/trunk/arch/powerpc/kernel/power5-pmu.c b/trunk/arch/powerpc/kernel/power5-pmu.c index d5ff0f64a5e6..98b6a729a9dd 100644 --- a/trunk/arch/powerpc/kernel/power5-pmu.c +++ b/trunk/arch/powerpc/kernel/power5-pmu.c @@ -621,4 +621,4 @@ static int init_power5_pmu(void) return register_power_pmu(&power5_pmu); } -early_initcall(init_power5_pmu); +arch_initcall(init_power5_pmu); diff --git a/trunk/arch/powerpc/kernel/power6-pmu.c b/trunk/arch/powerpc/kernel/power6-pmu.c index 31603927e376..84a607bda8fb 100644 --- a/trunk/arch/powerpc/kernel/power6-pmu.c +++ b/trunk/arch/powerpc/kernel/power6-pmu.c @@ -544,4 +544,4 @@ static int init_power6_pmu(void) return register_power_pmu(&power6_pmu); } -early_initcall(init_power6_pmu); +arch_initcall(init_power6_pmu); diff --git a/trunk/arch/powerpc/kernel/power7-pmu.c b/trunk/arch/powerpc/kernel/power7-pmu.c index 593740fcb799..852f7b7f6b40 100644 --- a/trunk/arch/powerpc/kernel/power7-pmu.c +++ b/trunk/arch/powerpc/kernel/power7-pmu.c @@ -369,4 +369,4 @@ static int init_power7_pmu(void) return register_power_pmu(&power7_pmu); } -early_initcall(init_power7_pmu); +arch_initcall(init_power7_pmu); diff --git a/trunk/arch/powerpc/kernel/ppc970-pmu.c b/trunk/arch/powerpc/kernel/ppc970-pmu.c index 9a6e093858fe..3fee685de4df 100644 --- a/trunk/arch/powerpc/kernel/ppc970-pmu.c +++ b/trunk/arch/powerpc/kernel/ppc970-pmu.c @@ -494,4 +494,4 @@ static int init_ppc970_pmu(void) return register_power_pmu(&ppc970_pmu); } -early_initcall(init_ppc970_pmu); +arch_initcall(init_ppc970_pmu); diff --git a/trunk/arch/s390/Kconfig b/trunk/arch/s390/Kconfig index 6c6d7b339aae..e0b98e71ff47 100644 --- a/trunk/arch/s390/Kconfig +++ b/trunk/arch/s390/Kconfig @@ -99,7 +99,6 @@ config S390 select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZO select HAVE_GET_USER_PAGES_FAST - select HAVE_ARCH_MUTEX_CPU_RELAX select ARCH_INLINE_SPIN_TRYLOCK select ARCH_INLINE_SPIN_TRYLOCK_BH select ARCH_INLINE_SPIN_LOCK diff --git a/trunk/arch/s390/include/asm/mutex.h b/trunk/arch/s390/include/asm/mutex.h index 688271f5f2e4..458c1f7fbc18 100644 --- a/trunk/arch/s390/include/asm/mutex.h +++ b/trunk/arch/s390/include/asm/mutex.h @@ -7,5 +7,3 @@ */ #include - -#define arch_mutex_cpu_relax() barrier() diff --git a/trunk/arch/sh/kernel/cpu/sh4/perf_event.c b/trunk/arch/sh/kernel/cpu/sh4/perf_event.c index 748955df018d..dbf3b4bb71fe 100644 --- a/trunk/arch/sh/kernel/cpu/sh4/perf_event.c +++ b/trunk/arch/sh/kernel/cpu/sh4/perf_event.c @@ -250,4 +250,4 @@ static int __init sh7750_pmu_init(void) return register_sh_pmu(&sh7750_pmu); } -early_initcall(sh7750_pmu_init); +arch_initcall(sh7750_pmu_init); diff --git a/trunk/arch/sh/kernel/cpu/sh4a/perf_event.c b/trunk/arch/sh/kernel/cpu/sh4a/perf_event.c index 17e6bebfede0..580276525731 100644 --- a/trunk/arch/sh/kernel/cpu/sh4a/perf_event.c +++ b/trunk/arch/sh/kernel/cpu/sh4a/perf_event.c @@ -284,4 +284,4 @@ static int __init sh4a_pmu_init(void) return register_sh_pmu(&sh4a_pmu); } -early_initcall(sh4a_pmu_init); +arch_initcall(sh4a_pmu_init); diff --git a/trunk/arch/sh/kernel/perf_event.c b/trunk/arch/sh/kernel/perf_event.c index 2ee21a47b5af..5a4b33435650 100644 --- a/trunk/arch/sh/kernel/perf_event.c +++ b/trunk/arch/sh/kernel/perf_event.c @@ -389,7 +389,7 @@ int __cpuinit register_sh_pmu(struct sh_pmu *_pmu) WARN_ON(_pmu->num_events > MAX_HWEVENTS); - perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); + perf_pmu_register(&pmu); perf_cpu_notifier(sh_pmu_notifier); return 0; } diff --git a/trunk/arch/sparc/include/asm/perf_event.h b/trunk/arch/sparc/include/asm/perf_event.h index 4d3dbe3703e9..6e8bfa1786da 100644 --- a/trunk/arch/sparc/include/asm/perf_event.h +++ b/trunk/arch/sparc/include/asm/perf_event.h @@ -4,6 +4,8 @@ #ifdef CONFIG_PERF_EVENTS #include +extern void init_hw_perf_events(void); + #define perf_arch_fetch_caller_regs(regs, ip) \ do { \ unsigned long _pstate, _asi, _pil, _i7, _fp; \ @@ -24,6 +26,8 @@ do { \ (regs)->u_regs[UREG_I6] = _fp; \ (regs)->u_regs[UREG_I7] = _i7; \ } while (0) +#else +static inline void init_hw_perf_events(void) { } #endif #endif diff --git a/trunk/arch/sparc/kernel/nmi.c b/trunk/arch/sparc/kernel/nmi.c index 300f810142f5..a4bd7ba74c89 100644 --- a/trunk/arch/sparc/kernel/nmi.c +++ b/trunk/arch/sparc/kernel/nmi.c @@ -270,6 +270,8 @@ int __init nmi_init(void) atomic_set(&nmi_active, -1); } } + if (!err) + init_hw_perf_events(); return err; } diff --git a/trunk/arch/sparc/kernel/perf_event.c b/trunk/arch/sparc/kernel/perf_event.c index 760578687e7c..0d6deb55a2ae 100644 --- a/trunk/arch/sparc/kernel/perf_event.c +++ b/trunk/arch/sparc/kernel/perf_event.c @@ -1307,23 +1307,20 @@ static bool __init supported_pmu(void) return false; } -int __init init_hw_perf_events(void) +void __init init_hw_perf_events(void) { pr_info("Performance events: "); if (!supported_pmu()) { pr_cont("No support for PMU type '%s'\n", sparc_pmu_type); - return 0; + return; } pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); - perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); + perf_pmu_register(&pmu); register_die_notifier(&perf_event_nmi_notifier); - - return 0; } -early_initcall(init_hw_perf_events); void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) diff --git a/trunk/arch/x86/Kconfig b/trunk/arch/x86/Kconfig index b6fccb07123e..e330da21b84f 100644 --- a/trunk/arch/x86/Kconfig +++ b/trunk/arch/x86/Kconfig @@ -377,18 +377,6 @@ config X86_ELAN If unsure, choose "PC-compatible" instead. -config X86_INTEL_CE - bool "CE4100 TV platform" - depends on PCI - depends on PCI_GODIRECT - depends on X86_32 - depends on X86_EXTENDED_PLATFORM - select X86_REBOOTFIXUPS - ---help--- - Select for the Intel CE media processor (CE4100) SOC. - This option compiles in support for the CE4100 SOC for settop - boxes and media devices. - config X86_MRST bool "Moorestown MID platform" depends on PCI @@ -397,10 +385,6 @@ config X86_MRST depends on X86_EXTENDED_PLATFORM depends on X86_IO_APIC select APB_TIMER - select I2C - select SPI - select INTEL_SCU_IPC - select X86_PLATFORM_DEVICES ---help--- Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin Internet Device(MID) platform. Moorestown consists of two chips: @@ -482,19 +466,6 @@ config X86_ES7000 Support for Unisys ES7000 systems. Say 'Y' here if this kernel is supposed to run on an IA32-based Unisys ES7000 system. -config X86_32_IRIS - tristate "Eurobraille/Iris poweroff module" - depends on X86_32 - ---help--- - The Iris machines from EuroBraille do not have APM or ACPI support - to shut themselves down properly. A special I/O sequence is - needed to do so, which is what this module does at - kernel shutdown. - - This is only for Iris machines from EuroBraille. - - If unused, say N. - config SCHED_OMIT_FRAME_POINTER def_bool y prompt "Single-depth WCHAN output" @@ -1170,16 +1141,16 @@ config NUMA comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI) -config AMD_NUMA +config K8_NUMA def_bool y prompt "Old style AMD Opteron NUMA detection" depends on X86_64 && NUMA && PCI ---help--- - Enable AMD NUMA node topology detection. You should say Y here if - you have a multi processor AMD system. This uses an old method to - read the NUMA configuration directly from the builtin Northbridge - of Opteron. It is recommended to use X86_64_ACPI_NUMA instead, - which also takes priority if both are compiled in. + Enable K8 NUMA node topology detection. You should say Y here if + you have a multi processor AMD K8 system. This uses an old + method to read the NUMA configuration directly from the builtin + Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA + instead, which also takes priority if both are compiled in. config X86_64_ACPI_NUMA def_bool y diff --git a/trunk/arch/x86/Kconfig.debug b/trunk/arch/x86/Kconfig.debug index 45143bbcfe5e..b59ee765414e 100644 --- a/trunk/arch/x86/Kconfig.debug +++ b/trunk/arch/x86/Kconfig.debug @@ -117,17 +117,6 @@ config DEBUG_RODATA_TEST feature as well as for the change_page_attr() infrastructure. If in doubt, say "N" -config DEBUG_SET_MODULE_RONX - bool "Set loadable kernel module data as NX and text as RO" - depends on MODULES - ---help--- - This option helps catch unintended modifications to loadable - kernel module's text and read-only data. It also prevents execution - of module data. Such protection may interfere with run-time code - patching and dynamic kernel tracing - and they might also protect - against certain classes of kernel exploits. - If in doubt, say "N". - config DEBUG_NX_TEST tristate "Testcase for the NX non-executable stack feature" depends on DEBUG_KERNEL && m diff --git a/trunk/arch/x86/boot/compressed/head_64.S b/trunk/arch/x86/boot/compressed/head_64.S index 35af09d13dc1..52f85a196fa0 100644 --- a/trunk/arch/x86/boot/compressed/head_64.S +++ b/trunk/arch/x86/boot/compressed/head_64.S @@ -182,7 +182,7 @@ no_longmode: hlt jmp 1b -#include "../../kernel/verify_cpu.S" +#include "../../kernel/verify_cpu_64.S" /* * Be careful here startup_64 needs to be at a predictable diff --git a/trunk/arch/x86/include/asm/alternative.h b/trunk/arch/x86/include/asm/alternative.h index 13009d1af99a..76561d20ea2f 100644 --- a/trunk/arch/x86/include/asm/alternative.h +++ b/trunk/arch/x86/include/asm/alternative.h @@ -66,7 +66,6 @@ extern void alternatives_smp_module_add(struct module *mod, char *name, extern void alternatives_smp_module_del(struct module *mod); extern void alternatives_smp_switch(int smp); extern int alternatives_text_reserved(void *start, void *end); -extern bool skip_smp_alternatives; #else static inline void alternatives_smp_module_add(struct module *mod, char *name, void *locks, void *locks_end, @@ -181,15 +180,8 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len); * On the local CPU you need to be protected again NMI or MCE handlers seeing an * inconsistent instruction while you patch. */ -struct text_poke_param { - void *addr; - const void *opcode; - size_t len; -}; - extern void *text_poke(void *addr, const void *opcode, size_t len); extern void *text_poke_smp(void *addr, const void *opcode, size_t len); -extern void text_poke_smp_batch(struct text_poke_param *params, int n); #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) #define IDEAL_NOP_SIZE_5 5 diff --git a/trunk/arch/x86/include/asm/amd_nb.h b/trunk/arch/x86/include/asm/amd_nb.h index 6aee50d655d1..c8517f81b21e 100644 --- a/trunk/arch/x86/include/asm/amd_nb.h +++ b/trunk/arch/x86/include/asm/amd_nb.h @@ -3,53 +3,36 @@ #include -extern struct pci_device_id amd_nb_misc_ids[]; +extern struct pci_device_id k8_nb_ids[]; struct bootnode; -extern int early_is_amd_nb(u32 value); -extern int amd_cache_northbridges(void); -extern void amd_flush_garts(void); -extern int amd_get_nodes(struct bootnode *nodes); -extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn); -extern int amd_scan_nodes(void); +extern int early_is_k8_nb(u32 value); +extern int cache_k8_northbridges(void); +extern void k8_flush_garts(void); +extern int k8_get_nodes(struct bootnode *nodes); +extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn); +extern int k8_scan_nodes(void); -struct amd_northbridge { - struct pci_dev *misc; -}; - -struct amd_northbridge_info { +struct k8_northbridge_info { u16 num; - u64 flags; - struct amd_northbridge *nb; + u8 gart_supported; + struct pci_dev **nb_misc; }; -extern struct amd_northbridge_info amd_northbridges; - -#define AMD_NB_GART 0x1 -#define AMD_NB_L3_INDEX_DISABLE 0x2 +extern struct k8_northbridge_info k8_northbridges; #ifdef CONFIG_AMD_NB -static inline int amd_nb_num(void) +static inline struct pci_dev *node_to_k8_nb_misc(int node) { - return amd_northbridges.num; + return (node < k8_northbridges.num) ? k8_northbridges.nb_misc[node] : NULL; } -static inline int amd_nb_has_feature(int feature) -{ - return ((amd_northbridges.flags & feature) == feature); -} +#else -static inline struct amd_northbridge *node_to_amd_nb(int node) +static inline struct pci_dev *node_to_k8_nb_misc(int node) { - return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; + return NULL; } - -#else - -#define amd_nb_num(x) 0 -#define amd_nb_has_feature(x) false -#define node_to_amd_nb(x) NULL - #endif diff --git a/trunk/arch/x86/include/asm/apic.h b/trunk/arch/x86/include/asm/apic.h index cf12007796db..f6ce0bda3b98 100644 --- a/trunk/arch/x86/include/asm/apic.h +++ b/trunk/arch/x86/include/asm/apic.h @@ -238,7 +238,6 @@ extern void setup_boot_APIC_clock(void); extern void setup_secondary_APIC_clock(void); extern int APIC_init_uniprocessor(void); extern void enable_NMI_through_LVT0(void); -extern int apic_force_enable(void); /* * On 32bit this is mach-xxx local diff --git a/trunk/arch/x86/include/asm/apicdef.h b/trunk/arch/x86/include/asm/apicdef.h index 47a30ff8e517..a859ca461fb0 100644 --- a/trunk/arch/x86/include/asm/apicdef.h +++ b/trunk/arch/x86/include/asm/apicdef.h @@ -145,7 +145,6 @@ #ifdef CONFIG_X86_32 # define MAX_IO_APICS 64 -# define MAX_LOCAL_APIC 256 #else # define MAX_IO_APICS 128 # define MAX_LOCAL_APIC 32768 diff --git a/trunk/arch/x86/include/asm/bootparam.h b/trunk/arch/x86/include/asm/bootparam.h index c8bfe63a06de..8e6218550e77 100644 --- a/trunk/arch/x86/include/asm/bootparam.h +++ b/trunk/arch/x86/include/asm/bootparam.h @@ -124,7 +124,6 @@ enum { X86_SUBARCH_LGUEST, X86_SUBARCH_XEN, X86_SUBARCH_MRST, - X86_SUBARCH_CE4100, X86_NR_SUBARCHS, }; diff --git a/trunk/arch/x86/include/asm/fixmap.h b/trunk/arch/x86/include/asm/fixmap.h index 0141b234406f..9479a037419f 100644 --- a/trunk/arch/x86/include/asm/fixmap.h +++ b/trunk/arch/x86/include/asm/fixmap.h @@ -117,10 +117,6 @@ enum fixed_addresses { FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */ FIX_TEXT_POKE0, /* first page is last, because allocation is backward */ __end_of_permanent_fixed_addresses, - -#ifdef CONFIG_X86_MRST - FIX_LNW_VRTC, -#endif /* * 256 temporary boot-time mappings, used by early_ioremap(), * before ioremap() is functional. diff --git a/trunk/arch/x86/include/asm/i387.h b/trunk/arch/x86/include/asm/i387.h index ef328901c802..4aa2bb3b242a 100644 --- a/trunk/arch/x86/include/asm/i387.h +++ b/trunk/arch/x86/include/asm/i387.h @@ -93,17 +93,6 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) int err; /* See comment in fxsave() below. */ -#ifdef CONFIG_AS_FXSAVEQ - asm volatile("1: fxrstorq %[fx]\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err) - : [fx] "m" (*fx), "0" (0)); -#else asm volatile("1: rex64/fxrstor (%[fx])\n\t" "2:\n" ".section .fixup,\"ax\"\n" @@ -113,7 +102,6 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) _ASM_EXTABLE(1b, 3b) : [err] "=r" (err) : [fx] "R" (fx), "m" (*fx), "0" (0)); -#endif return err; } @@ -131,17 +119,6 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) return -EFAULT; /* See comment in fxsave() below. */ -#ifdef CONFIG_AS_FXSAVEQ - asm volatile("1: fxsaveq %[fx]\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err), [fx] "=m" (*fx) - : "0" (0)); -#else asm volatile("1: rex64/fxsave (%[fx])\n\t" "2:\n" ".section .fixup,\"ax\"\n" @@ -151,7 +128,6 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) _ASM_EXTABLE(1b, 3b) : [err] "=r" (err), "=m" (*fx) : [fx] "R" (fx), "0" (0)); -#endif if (unlikely(err) && __clear_user(fx, sizeof(struct i387_fxsave_struct))) err = -EFAULT; diff --git a/trunk/arch/x86/include/asm/io_apic.h b/trunk/arch/x86/include/asm/io_apic.h index 0c5ca4e30d7b..a6b28d017c2f 100644 --- a/trunk/arch/x86/include/asm/io_apic.h +++ b/trunk/arch/x86/include/asm/io_apic.h @@ -159,7 +159,7 @@ struct io_apic_irq_attr; extern int io_apic_set_pci_routing(struct device *dev, int irq, struct io_apic_irq_attr *irq_attr); void setup_IO_APIC_irq_extra(u32 gsi); -extern void ioapic_and_gsi_init(void); +extern void ioapic_init_mappings(void); extern void ioapic_insert_resources(void); extern struct IO_APIC_route_entry **alloc_ioapic_entries(void); @@ -168,9 +168,10 @@ extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); +extern void probe_nr_irqs_gsi(void); extern int get_nr_irqs_gsi(void); + extern void setup_ioapic_ids_from_mpc(void); -extern void setup_ioapic_ids_from_mpc_nocheck(void); struct mp_ioapic_gsi{ u32 gsi_base; @@ -188,8 +189,9 @@ extern void __init pre_init_apic_IRQ0(void); #define io_apic_assign_pci_irqs 0 #define setup_ioapic_ids_from_mpc x86_init_noop static const int timer_through_8259 = 0; -static inline void ioapic_and_gsi_init(void) { } +static inline void ioapic_init_mappings(void) { } static inline void ioapic_insert_resources(void) { } +static inline void probe_nr_irqs_gsi(void) { } #define gsi_top (NR_IRQS_LEGACY) static inline int mp_find_ioapic(u32 gsi) { return 0; } diff --git a/trunk/arch/x86/include/asm/irq.h b/trunk/arch/x86/include/asm/irq.h index ba870bb6dd8e..13b0ebaa512f 100644 --- a/trunk/arch/x86/include/asm/irq.h +++ b/trunk/arch/x86/include/asm/irq.h @@ -15,6 +15,10 @@ static inline int irq_canonicalize(int irq) return ((irq == 2) ? 9 : irq); } +#ifdef CONFIG_X86_LOCAL_APIC +# define ARCH_HAS_NMI_WATCHDOG +#endif + #ifdef CONFIG_X86_32 extern void irq_ctx_init(int cpu); #else diff --git a/trunk/arch/x86/include/asm/kdebug.h b/trunk/arch/x86/include/asm/kdebug.h index f23eb2528464..5bdfca86581b 100644 --- a/trunk/arch/x86/include/asm/kdebug.h +++ b/trunk/arch/x86/include/asm/kdebug.h @@ -28,7 +28,7 @@ extern void die(const char *, struct pt_regs *,long); extern int __must_check __die(const char *, struct pt_regs *, long); extern void show_registers(struct pt_regs *regs); extern void show_trace(struct task_struct *t, struct pt_regs *regs, - unsigned long *sp); + unsigned long *sp, unsigned long bp); extern void __show_regs(struct pt_regs *regs, int all); extern void show_regs(struct pt_regs *regs); extern unsigned long oops_begin(void); diff --git a/trunk/arch/x86/include/asm/mce.h b/trunk/arch/x86/include/asm/mce.h index eb16e94ae04f..c62c13cb9788 100644 --- a/trunk/arch/x86/include/asm/mce.h +++ b/trunk/arch/x86/include/asm/mce.h @@ -223,9 +223,6 @@ void intel_init_thermal(struct cpuinfo_x86 *c); void mce_log_therm_throt_event(__u64 status); -/* Interrupt Handler for core thermal thresholds */ -extern int (*platform_thermal_notify)(__u64 msr_val); - #ifdef CONFIG_X86_THERMAL_VECTOR extern void mcheck_intel_therm_init(void); #else diff --git a/trunk/arch/x86/include/asm/microcode.h b/trunk/arch/x86/include/asm/microcode.h index 24215072d0e1..ef51b501e22a 100644 --- a/trunk/arch/x86/include/asm/microcode.h +++ b/trunk/arch/x86/include/asm/microcode.h @@ -48,12 +48,6 @@ static inline struct microcode_ops * __init init_intel_microcode(void) #ifdef CONFIG_MICROCODE_AMD extern struct microcode_ops * __init init_amd_microcode(void); - -static inline void get_ucode_data(void *to, const u8 *from, size_t n) -{ - memcpy(to, from, n); -} - #else static inline struct microcode_ops * __init init_amd_microcode(void) { diff --git a/trunk/arch/x86/include/asm/mpspec.h b/trunk/arch/x86/include/asm/mpspec.h index 0c90dd9f0505..c82868e9f905 100644 --- a/trunk/arch/x86/include/asm/mpspec.h +++ b/trunk/arch/x86/include/asm/mpspec.h @@ -5,9 +5,8 @@ #include #include -#include -extern int apic_version[]; +extern int apic_version[MAX_APICS]; extern int pic_mode; #ifdef CONFIG_X86_32 @@ -108,7 +107,7 @@ extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level, int active_high_low); #endif /* CONFIG_ACPI */ -#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_LOCAL_APIC) +#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) struct physid_mask { unsigned long mask[PHYSID_ARRAY_SIZE]; @@ -123,31 +122,31 @@ typedef struct physid_mask physid_mask_t; test_and_set_bit(physid, (map).mask) #define physids_and(dst, src1, src2) \ - bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC) + bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS) #define physids_or(dst, src1, src2) \ - bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC) + bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS) #define physids_clear(map) \ - bitmap_zero((map).mask, MAX_LOCAL_APIC) + bitmap_zero((map).mask, MAX_APICS) #define physids_complement(dst, src) \ - bitmap_complement((dst).mask, (src).mask, MAX_LOCAL_APIC) + bitmap_complement((dst).mask, (src).mask, MAX_APICS) #define physids_empty(map) \ - bitmap_empty((map).mask, MAX_LOCAL_APIC) + bitmap_empty((map).mask, MAX_APICS) #define physids_equal(map1, map2) \ - bitmap_equal((map1).mask, (map2).mask, MAX_LOCAL_APIC) + bitmap_equal((map1).mask, (map2).mask, MAX_APICS) #define physids_weight(map) \ - bitmap_weight((map).mask, MAX_LOCAL_APIC) + bitmap_weight((map).mask, MAX_APICS) #define physids_shift_right(d, s, n) \ - bitmap_shift_right((d).mask, (s).mask, n, MAX_LOCAL_APIC) + bitmap_shift_right((d).mask, (s).mask, n, MAX_APICS) #define physids_shift_left(d, s, n) \ - bitmap_shift_left((d).mask, (s).mask, n, MAX_LOCAL_APIC) + bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS) static inline unsigned long physids_coerce(physid_mask_t *map) { @@ -160,6 +159,14 @@ static inline void physids_promote(unsigned long physids, physid_mask_t *map) map->mask[0] = physids; } +/* Note: will create very large stack frames if physid_mask_t is big */ +#define physid_mask_of_physid(physid) \ + ({ \ + physid_mask_t __physid_mask = PHYSID_MASK_NONE; \ + physid_set(physid, __physid_mask); \ + __physid_mask; \ + }) + static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map) { physids_clear(*map); diff --git a/trunk/arch/x86/include/asm/mpspec_def.h b/trunk/arch/x86/include/asm/mpspec_def.h index c0a955a9a087..4a7f96d7c188 100644 --- a/trunk/arch/x86/include/asm/mpspec_def.h +++ b/trunk/arch/x86/include/asm/mpspec_def.h @@ -15,6 +15,13 @@ #ifdef CONFIG_X86_32 # define MAX_MPC_ENTRY 1024 +# define MAX_APICS 256 +#else +# if NR_CPUS <= 255 +# define MAX_APICS 255 +# else +# define MAX_APICS 32768 +# endif #endif /* Intel MP Floating Pointer Structure */ diff --git a/trunk/arch/x86/include/asm/mrst-vrtc.h b/trunk/arch/x86/include/asm/mrst-vrtc.h deleted file mode 100644 index 73668abdbedf..000000000000 --- a/trunk/arch/x86/include/asm/mrst-vrtc.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef _MRST_VRTC_H -#define _MRST_VRTC_H - -extern unsigned char vrtc_cmos_read(unsigned char reg); -extern void vrtc_cmos_write(unsigned char val, unsigned char reg); -extern unsigned long vrtc_get_time(void); -extern int vrtc_set_mmss(unsigned long nowtime); - -#endif diff --git a/trunk/arch/x86/include/asm/mrst.h b/trunk/arch/x86/include/asm/mrst.h index 719f00b28ff5..4a711a684b17 100644 --- a/trunk/arch/x86/include/asm/mrst.h +++ b/trunk/arch/x86/include/asm/mrst.h @@ -14,9 +14,7 @@ #include extern int pci_mrst_init(void); -extern int __init sfi_parse_mrtc(struct sfi_table_header *table); -extern int sfi_mrtc_num; -extern struct sfi_rtc_table_entry sfi_mrtc_array[]; +int __init sfi_parse_mrtc(struct sfi_table_header *table); /* * Medfield is the follow-up of Moorestown, it combines two chip solution into @@ -52,14 +50,4 @@ extern void mrst_early_console_init(void); extern struct console early_hsu_console; extern void hsu_early_console_init(void); - -extern void intel_scu_devices_create(void); -extern void intel_scu_devices_destroy(void); - -/* VRTC timer */ -#define MRST_VRTC_MAP_SZ (1024) -/*#define MRST_VRTC_PGOFFSET (0xc00) */ - -extern void mrst_rtc_init(void); - #endif /* _ASM_X86_MRST_H */ diff --git a/trunk/arch/x86/include/asm/msr-index.h b/trunk/arch/x86/include/asm/msr-index.h index 4d0dfa0d998e..6b89f5e86021 100644 --- a/trunk/arch/x86/include/asm/msr-index.h +++ b/trunk/arch/x86/include/asm/msr-index.h @@ -123,10 +123,6 @@ #define MSR_AMD64_IBSCTL 0xc001103a #define MSR_AMD64_IBSBRTARGET 0xc001103b -/* Fam 15h MSRs */ -#define MSR_F15H_PERF_CTL 0xc0010200 -#define MSR_F15H_PERF_CTR 0xc0010201 - /* Fam 10h MSRs */ #define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 #define FAM10H_MMIO_CONF_ENABLE (1<<0) @@ -257,18 +253,6 @@ #define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1) #define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24) -/* Thermal Thresholds Support */ -#define THERM_INT_THRESHOLD0_ENABLE (1 << 15) -#define THERM_SHIFT_THRESHOLD0 8 -#define THERM_MASK_THRESHOLD0 (0x7f << THERM_SHIFT_THRESHOLD0) -#define THERM_INT_THRESHOLD1_ENABLE (1 << 23) -#define THERM_SHIFT_THRESHOLD1 16 -#define THERM_MASK_THRESHOLD1 (0x7f << THERM_SHIFT_THRESHOLD1) -#define THERM_STATUS_THRESHOLD0 (1 << 6) -#define THERM_LOG_THRESHOLD0 (1 << 7) -#define THERM_STATUS_THRESHOLD1 (1 << 8) -#define THERM_LOG_THRESHOLD1 (1 << 9) - /* MISC_ENABLE bits: architectural */ #define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0) #define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1) diff --git a/trunk/arch/x86/include/asm/nmi.h b/trunk/arch/x86/include/asm/nmi.h index c4021b953510..932f0f86b4b7 100644 --- a/trunk/arch/x86/include/asm/nmi.h +++ b/trunk/arch/x86/include/asm/nmi.h @@ -5,15 +5,41 @@ #include #include -#ifdef CONFIG_X86_LOCAL_APIC +#ifdef ARCH_HAS_NMI_WATCHDOG + +/** + * do_nmi_callback + * + * Check to see if a callback exists and execute it. Return 1 + * if the handler exists and was handled successfully. + */ +int do_nmi_callback(struct pt_regs *regs, int cpu); extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); +extern int check_nmi_watchdog(void); +#if !defined(CONFIG_LOCKUP_DETECTOR) +extern int nmi_watchdog_enabled; +#endif extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); extern int reserve_perfctr_nmi(unsigned int); extern void release_perfctr_nmi(unsigned int); extern int reserve_evntsel_nmi(unsigned int); extern void release_evntsel_nmi(unsigned int); +extern void setup_apic_nmi_watchdog(void *); +extern void stop_apic_nmi_watchdog(void *); +extern void disable_timer_nmi_watchdog(void); +extern void enable_timer_nmi_watchdog(void); +extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason); +extern void cpu_nmi_set_wd_enabled(void); + +extern atomic_t nmi_active; +extern unsigned int nmi_watchdog; +#define NMI_NONE 0 +#define NMI_IO_APIC 1 +#define NMI_LOCAL_APIC 2 +#define NMI_INVALID 3 + struct ctl_table; extern int proc_nmi_enabled(struct ctl_table *, int , void __user *, size_t *, loff_t *); @@ -21,8 +47,33 @@ extern int unknown_nmi_panic; void arch_trigger_all_cpu_backtrace(void); #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace + +static inline void localise_nmi_watchdog(void) +{ + if (nmi_watchdog == NMI_IO_APIC) + nmi_watchdog = NMI_LOCAL_APIC; +} + +/* check if nmi_watchdog is active (ie was specified at boot) */ +static inline int nmi_watchdog_active(void) +{ + /* + * actually it should be: + * return (nmi_watchdog == NMI_LOCAL_APIC || + * nmi_watchdog == NMI_IO_APIC) + * but since they are power of two we could use a + * cheaper way --cvg + */ + return nmi_watchdog & (NMI_LOCAL_APIC | NMI_IO_APIC); +} #endif +void lapic_watchdog_stop(void); +int lapic_watchdog_init(unsigned nmi_hz); +int lapic_wd_event(unsigned nmi_hz); +unsigned lapic_adjust_nmi_hz(unsigned hz); +void disable_lapic_nmi_watchdog(void); +void enable_lapic_nmi_watchdog(void); void stop_nmi(void); void restart_nmi(void); diff --git a/trunk/arch/x86/include/asm/paravirt.h b/trunk/arch/x86/include/asm/paravirt.h index 7709c12431b8..ef9975812c77 100644 --- a/trunk/arch/x86/include/asm/paravirt.h +++ b/trunk/arch/x86/include/asm/paravirt.h @@ -112,7 +112,7 @@ static inline void arch_safe_halt(void) static inline void halt(void) { - PVOP_VCALL0(pv_irq_ops.halt); + PVOP_VCALL0(pv_irq_ops.safe_halt); } static inline void wbinvd(void) diff --git a/trunk/arch/x86/include/asm/pci.h b/trunk/arch/x86/include/asm/pci.h index 676129229630..ca0437c714b2 100644 --- a/trunk/arch/x86/include/asm/pci.h +++ b/trunk/arch/x86/include/asm/pci.h @@ -65,7 +65,6 @@ extern unsigned long pci_mem_start; #define PCIBIOS_MIN_CARDBUS_IO 0x4000 -extern int pcibios_enabled; void pcibios_config_init(void); struct pci_bus *pcibios_scan_root(int bus); diff --git a/trunk/arch/x86/include/asm/perf_event.h b/trunk/arch/x86/include/asm/perf_event.h index d9d4dae305f6..550e26b1dbb3 100644 --- a/trunk/arch/x86/include/asm/perf_event.h +++ b/trunk/arch/x86/include/asm/perf_event.h @@ -125,6 +125,7 @@ union cpuid10_edx { #define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ #ifdef CONFIG_PERF_EVENTS +extern void init_hw_perf_events(void); extern void perf_events_lapic_init(void); #define PERF_EVENT_INDEX_OFFSET 0 @@ -155,6 +156,7 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); } #else +static inline void init_hw_perf_events(void) { } static inline void perf_events_lapic_init(void) { } #endif diff --git a/trunk/arch/x86/include/asm/perf_event_p4.h b/trunk/arch/x86/include/asm/perf_event_p4.h index 295e2ff18a6a..a70cd216be5d 100644 --- a/trunk/arch/x86/include/asm/perf_event_p4.h +++ b/trunk/arch/x86/include/asm/perf_event_p4.h @@ -744,6 +744,14 @@ enum P4_ESCR_EMASKS { }; /* + * P4 PEBS specifics (Replay Event only) + * + * Format (bits): + * 0-6: metric from P4_PEBS_METRIC enum + * 7 : reserved + * 8 : reserved + * 9-11 : reserved + * * Note we have UOP and PEBS bits reserved for now * just in case if we will need them once */ @@ -780,60 +788,5 @@ enum P4_PEBS_METRIC { P4_PEBS_METRIC__max }; -/* - * Notes on internal configuration of ESCR+CCCR tuples - * - * Since P4 has quite the different architecture of - * performance registers in compare with "architectural" - * once and we have on 64 bits to keep configuration - * of performance event, the following trick is used. - * - * 1) Since both ESCR and CCCR registers have only low - * 32 bits valuable, we pack them into a single 64 bit - * configuration. Low 32 bits of such config correspond - * to low 32 bits of CCCR register and high 32 bits - * correspond to low 32 bits of ESCR register. - * - * 2) The meaning of every bit of such config field can - * be found in Intel SDM but it should be noted that - * we "borrow" some reserved bits for own usage and - * clean them or set to a proper value when we do - * a real write to hardware registers. - * - * 3) The format of bits of config is the following - * and should be either 0 or set to some predefined - * values: - * - * Low 32 bits - * ----------- - * 0-6: P4_PEBS_METRIC enum - * 7-11: reserved - * 12: reserved (Enable) - * 13-15: reserved (ESCR select) - * 16-17: Active Thread - * 18: Compare - * 19: Complement - * 20-23: Threshold - * 24: Edge - * 25: reserved (FORCE_OVF) - * 26: reserved (OVF_PMI_T0) - * 27: reserved (OVF_PMI_T1) - * 28-29: reserved - * 30: reserved (Cascade) - * 31: reserved (OVF) - * - * High 32 bits - * ------------ - * 0: reserved (T1_USR) - * 1: reserved (T1_OS) - * 2: reserved (T0_USR) - * 3: reserved (T0_OS) - * 4: Tag Enable - * 5-8: Tag Value - * 9-24: Event Mask (may use P4_ESCR_EMASK_BIT helper) - * 25-30: enum P4_EVENTS - * 31: reserved (HT thread) - */ - #endif /* PERF_EVENT_P4_H */ diff --git a/trunk/arch/x86/include/asm/setup.h b/trunk/arch/x86/include/asm/setup.h index db8aa19a08a2..d6763b139a84 100644 --- a/trunk/arch/x86/include/asm/setup.h +++ b/trunk/arch/x86/include/asm/setup.h @@ -53,12 +53,6 @@ extern void x86_mrst_early_setup(void); static inline void x86_mrst_early_setup(void) { } #endif -#ifdef CONFIG_X86_INTEL_CE -extern void x86_ce4100_early_setup(void); -#else -static inline void x86_ce4100_early_setup(void) { } -#endif - #ifndef _SETUP /* diff --git a/trunk/arch/x86/include/asm/smpboot_hooks.h b/trunk/arch/x86/include/asm/smpboot_hooks.h index 6c22bf353f26..1def60114906 100644 --- a/trunk/arch/x86/include/asm/smpboot_hooks.h +++ b/trunk/arch/x86/include/asm/smpboot_hooks.h @@ -48,6 +48,7 @@ static inline void __init smpboot_setup_io_apic(void) setup_IO_APIC(); else { nr_ioapics = 0; + localise_nmi_watchdog(); } #endif } diff --git a/trunk/arch/x86/include/asm/stacktrace.h b/trunk/arch/x86/include/asm/stacktrace.h index 52b5c7ed3608..2b16a2ad23dc 100644 --- a/trunk/arch/x86/include/asm/stacktrace.h +++ b/trunk/arch/x86/include/asm/stacktrace.h @@ -7,7 +7,6 @@ #define _ASM_X86_STACKTRACE_H #include -#include extern int kstack_depth_to_print; @@ -47,7 +46,7 @@ struct stacktrace_ops { }; void dump_trace(struct task_struct *tsk, struct pt_regs *regs, - unsigned long *stack, + unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data); #ifdef CONFIG_X86_32 @@ -58,39 +57,13 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) #endif -#ifdef CONFIG_FRAME_POINTER -static inline unsigned long -stack_frame(struct task_struct *task, struct pt_regs *regs) -{ - unsigned long bp; - - if (regs) - return regs->bp; - - if (task == current) { - /* Grab bp right from our regs */ - get_bp(bp); - return bp; - } - - /* bp is the last reg pushed by switch_to */ - return *(unsigned long *)task->thread.sp; -} -#else -static inline unsigned long -stack_frame(struct task_struct *task, struct pt_regs *regs) -{ - return 0; -} -#endif - extern void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, char *log_lvl); + unsigned long *stack, unsigned long bp, char *log_lvl); extern void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, char *log_lvl); + unsigned long *sp, unsigned long bp, char *log_lvl); extern unsigned int code_bytes; diff --git a/trunk/arch/x86/include/asm/timer.h b/trunk/arch/x86/include/asm/timer.h index fa7b9176b76c..5469630b27f5 100644 --- a/trunk/arch/x86/include/asm/timer.h +++ b/trunk/arch/x86/include/asm/timer.h @@ -10,6 +10,12 @@ unsigned long long native_sched_clock(void); extern int recalibrate_cpu_khz(void); +#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) +extern int timer_ack; +#else +# define timer_ack (0) +#endif + extern int no_timer_check; /* Accelerators for sched_clock() diff --git a/trunk/arch/x86/include/asm/uv/uv_bau.h b/trunk/arch/x86/include/asm/uv/uv_bau.h index ce1d54c8a433..42d412fd8b02 100644 --- a/trunk/arch/x86/include/asm/uv/uv_bau.h +++ b/trunk/arch/x86/include/asm/uv/uv_bau.h @@ -26,22 +26,20 @@ * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512, * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on. * - * We will use one set for sending BAU messages from each of the + * We will use 31 sets, one for sending BAU messages from each of the 32 * cpu's on the uvhub. * * TLB shootdown will use the first of the 8 descriptors of each set. * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set). */ -#define MAX_CPUS_PER_UVHUB 64 -#define MAX_CPUS_PER_SOCKET 32 -#define UV_ADP_SIZE 64 /* hardware-provided max. */ -#define UV_CPUS_PER_ACT_STATUS 32 /* hardware-provided max. */ #define UV_ITEMS_PER_DESCRIPTOR 8 /* the 'throttle' to prevent the hardware stay-busy bug */ #define MAX_BAU_CONCURRENT 3 +#define UV_CPUS_PER_ACT_STATUS 32 #define UV_ACT_STATUS_MASK 0x3 #define UV_ACT_STATUS_SIZE 2 +#define UV_ADP_SIZE 32 #define UV_DISTRIBUTION_SIZE 256 #define UV_SW_ACK_NPENDING 8 #define UV_NET_ENDPOINT_INTD 0x38 @@ -102,6 +100,7 @@ * number of destination side software ack resources */ #define DEST_NUM_RESOURCES 8 +#define MAX_CPUS_PER_NODE 32 /* * completion statuses for sending a TLB flush message */ diff --git a/trunk/arch/x86/kernel/Makefile b/trunk/arch/x86/kernel/Makefile index 34244b2cd880..1e994754d323 100644 --- a/trunk/arch/x86/kernel/Makefile +++ b/trunk/arch/x86/kernel/Makefile @@ -85,6 +85,7 @@ obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_VM86) += vm86_32.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o obj-$(CONFIG_HPET_TIMER) += hpet.o obj-$(CONFIG_APB_TIMER) += apb_timer.o diff --git a/trunk/arch/x86/kernel/acpi/boot.c b/trunk/arch/x86/kernel/acpi/boot.c index 17c8090fabd4..71232b941b6c 100644 --- a/trunk/arch/x86/kernel/acpi/boot.c +++ b/trunk/arch/x86/kernel/acpi/boot.c @@ -198,11 +198,6 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled) { unsigned int ver = 0; - if (id >= (MAX_LOCAL_APIC-1)) { - printk(KERN_INFO PREFIX "skipped apicid that is too big\n"); - return; - } - if (!enabled) { ++disabled_cpus; return; @@ -915,13 +910,13 @@ static int __init acpi_parse_madt_lapic_entries(void) acpi_register_lapic_address(acpi_lapic_addr); count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC, - acpi_parse_sapic, MAX_LOCAL_APIC); + acpi_parse_sapic, MAX_APICS); if (!count) { x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC, - acpi_parse_x2apic, MAX_LOCAL_APIC); + acpi_parse_x2apic, MAX_APICS); count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, - acpi_parse_lapic, MAX_LOCAL_APIC); + acpi_parse_lapic, MAX_APICS); } if (!count && !x2count) { printk(KERN_ERR PREFIX "No LAPIC entries present\n"); diff --git a/trunk/arch/x86/kernel/alternative.c b/trunk/arch/x86/kernel/alternative.c index 123608531c8f..5079f24c955a 100644 --- a/trunk/arch/x86/kernel/alternative.c +++ b/trunk/arch/x86/kernel/alternative.c @@ -353,7 +353,6 @@ void __init_or_module alternatives_smp_module_del(struct module *mod) mutex_unlock(&smp_alt); } -bool skip_smp_alternatives; void alternatives_smp_switch(int smp) { struct smp_alt_module *mod; @@ -369,7 +368,7 @@ void alternatives_smp_switch(int smp) printk("lockdep: fixing up alternatives.\n"); #endif - if (noreplace_smp || smp_alt_once || skip_smp_alternatives) + if (noreplace_smp || smp_alt_once) return; BUG_ON(!smp && (num_online_cpus() > 1)); @@ -592,21 +591,17 @@ static atomic_t stop_machine_first; static int wrote_text; struct text_poke_params { - struct text_poke_param *params; - int nparams; + void *addr; + const void *opcode; + size_t len; }; static int __kprobes stop_machine_text_poke(void *data) { struct text_poke_params *tpp = data; - struct text_poke_param *p; - int i; if (atomic_dec_and_test(&stop_machine_first)) { - for (i = 0; i < tpp->nparams; i++) { - p = &tpp->params[i]; - text_poke(p->addr, p->opcode, p->len); - } + text_poke(tpp->addr, tpp->opcode, tpp->len); smp_wmb(); /* Make sure other cpus see that this has run */ wrote_text = 1; } else { @@ -615,12 +610,8 @@ static int __kprobes stop_machine_text_poke(void *data) smp_mb(); /* Load wrote_text before following execution */ } - for (i = 0; i < tpp->nparams; i++) { - p = &tpp->params[i]; - flush_icache_range((unsigned long)p->addr, - (unsigned long)p->addr + p->len); - } - + flush_icache_range((unsigned long)tpp->addr, + (unsigned long)tpp->addr + tpp->len); return 0; } @@ -640,13 +631,10 @@ static int __kprobes stop_machine_text_poke(void *data) void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) { struct text_poke_params tpp; - struct text_poke_param p; - p.addr = addr; - p.opcode = opcode; - p.len = len; - tpp.params = &p; - tpp.nparams = 1; + tpp.addr = addr; + tpp.opcode = opcode; + tpp.len = len; atomic_set(&stop_machine_first, 1); wrote_text = 0; /* Use __stop_machine() because the caller already got online_cpus. */ @@ -654,26 +642,6 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) return addr; } -/** - * text_poke_smp_batch - Update instructions on a live kernel on SMP - * @params: an array of text_poke parameters - * @n: the number of elements in params. - * - * Modify multi-byte instruction by using stop_machine() on SMP. Since the - * stop_machine() is heavy task, it is better to aggregate text_poke requests - * and do it once if possible. - * - * Note: Must be called under get_online_cpus() and text_mutex. - */ -void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n) -{ - struct text_poke_params tpp = {.params = params, .nparams = n}; - - atomic_set(&stop_machine_first, 1); - wrote_text = 0; - stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); -} - #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) #ifdef CONFIG_X86_64 diff --git a/trunk/arch/x86/kernel/amd_nb.c b/trunk/arch/x86/kernel/amd_nb.c index affacb5e0065..8f6463d8ed0d 100644 --- a/trunk/arch/x86/kernel/amd_nb.c +++ b/trunk/arch/x86/kernel/amd_nb.c @@ -12,116 +12,95 @@ static u32 *flush_words; -struct pci_device_id amd_nb_misc_ids[] = { +struct pci_device_id k8_nb_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) }, {} }; -EXPORT_SYMBOL(amd_nb_misc_ids); +EXPORT_SYMBOL(k8_nb_ids); -struct amd_northbridge_info amd_northbridges; -EXPORT_SYMBOL(amd_northbridges); +struct k8_northbridge_info k8_northbridges; +EXPORT_SYMBOL(k8_northbridges); -static struct pci_dev *next_northbridge(struct pci_dev *dev, - struct pci_device_id *ids) +static struct pci_dev *next_k8_northbridge(struct pci_dev *dev) { do { dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); if (!dev) break; - } while (!pci_match_id(ids, dev)); + } while (!pci_match_id(&k8_nb_ids[0], dev)); return dev; } -int amd_cache_northbridges(void) +int cache_k8_northbridges(void) { - int i = 0; - struct amd_northbridge *nb; - struct pci_dev *misc; + int i; + struct pci_dev *dev; - if (amd_nb_num()) + if (k8_northbridges.num) return 0; - misc = NULL; - while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL) - i++; + dev = NULL; + while ((dev = next_k8_northbridge(dev)) != NULL) + k8_northbridges.num++; - if (i == 0) - return 0; + /* some CPU families (e.g. family 0x11) do not support GART */ + if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 || + boot_cpu_data.x86 == 0x15) + k8_northbridges.gart_supported = 1; - nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL); - if (!nb) + k8_northbridges.nb_misc = kmalloc((k8_northbridges.num + 1) * + sizeof(void *), GFP_KERNEL); + if (!k8_northbridges.nb_misc) return -ENOMEM; - amd_northbridges.nb = nb; - amd_northbridges.num = i; - - misc = NULL; - for (i = 0; i != amd_nb_num(); i++) { - node_to_amd_nb(i)->misc = misc = - next_northbridge(misc, amd_nb_misc_ids); - } + if (!k8_northbridges.num) { + k8_northbridges.nb_misc[0] = NULL; + return 0; + } - /* some CPU families (e.g. family 0x11) do not support GART */ - if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 || - boot_cpu_data.x86 == 0x15) - amd_northbridges.flags |= AMD_NB_GART; - - /* - * Some CPU families support L3 Cache Index Disable. There are some - * limitations because of E382 and E388 on family 0x10. - */ - if (boot_cpu_data.x86 == 0x10 && - boot_cpu_data.x86_model >= 0x8 && - (boot_cpu_data.x86_model > 0x9 || - boot_cpu_data.x86_mask >= 0x1)) - amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE; + if (k8_northbridges.gart_supported) { + flush_words = kmalloc(k8_northbridges.num * sizeof(u32), + GFP_KERNEL); + if (!flush_words) { + kfree(k8_northbridges.nb_misc); + return -ENOMEM; + } + } + dev = NULL; + i = 0; + while ((dev = next_k8_northbridge(dev)) != NULL) { + k8_northbridges.nb_misc[i] = dev; + if (k8_northbridges.gart_supported) + pci_read_config_dword(dev, 0x9c, &flush_words[i++]); + } + k8_northbridges.nb_misc[i] = NULL; return 0; } -EXPORT_SYMBOL_GPL(amd_cache_northbridges); +EXPORT_SYMBOL_GPL(cache_k8_northbridges); /* Ignores subdevice/subvendor but as far as I can figure out they're useless anyways */ -int __init early_is_amd_nb(u32 device) +int __init early_is_k8_nb(u32 device) { struct pci_device_id *id; u32 vendor = device & 0xffff; device >>= 16; - for (id = amd_nb_misc_ids; id->vendor; id++) + for (id = k8_nb_ids; id->vendor; id++) if (vendor == id->vendor && device == id->device) return 1; return 0; } -int amd_cache_gart(void) -{ - int i; - - if (!amd_nb_has_feature(AMD_NB_GART)) - return 0; - - flush_words = kmalloc(amd_nb_num() * sizeof(u32), GFP_KERNEL); - if (!flush_words) { - amd_northbridges.flags &= ~AMD_NB_GART; - return -ENOMEM; - } - - for (i = 0; i != amd_nb_num(); i++) - pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c, - &flush_words[i]); - - return 0; -} - -void amd_flush_garts(void) +void k8_flush_garts(void) { int flushed, i; unsigned long flags; static DEFINE_SPINLOCK(gart_lock); - if (!amd_nb_has_feature(AMD_NB_GART)) + if (!k8_northbridges.gart_supported) return; /* Avoid races between AGP and IOMMU. In theory it's not needed @@ -130,16 +109,16 @@ void amd_flush_garts(void) that it doesn't matter to serialize more. -AK */ spin_lock_irqsave(&gart_lock, flags); flushed = 0; - for (i = 0; i < amd_nb_num(); i++) { - pci_write_config_dword(node_to_amd_nb(i)->misc, 0x9c, - flush_words[i] | 1); + for (i = 0; i < k8_northbridges.num; i++) { + pci_write_config_dword(k8_northbridges.nb_misc[i], 0x9c, + flush_words[i]|1); flushed++; } - for (i = 0; i < amd_nb_num(); i++) { + for (i = 0; i < k8_northbridges.num; i++) { u32 w; /* Make sure the hardware actually executed the flush*/ for (;;) { - pci_read_config_dword(node_to_amd_nb(i)->misc, + pci_read_config_dword(k8_northbridges.nb_misc[i], 0x9c, &w); if (!(w & 1)) break; @@ -150,23 +129,19 @@ void amd_flush_garts(void) if (!flushed) printk("nothing to flush?\n"); } -EXPORT_SYMBOL_GPL(amd_flush_garts); +EXPORT_SYMBOL_GPL(k8_flush_garts); -static __init int init_amd_nbs(void) +static __init int init_k8_nbs(void) { int err = 0; - err = amd_cache_northbridges(); + err = cache_k8_northbridges(); if (err < 0) - printk(KERN_NOTICE "AMD NB: Cannot enumerate AMD northbridges.\n"); - - if (amd_cache_gart() < 0) - printk(KERN_NOTICE "AMD NB: Cannot initialize GART flush words, " - "GART support disabled.\n"); + printk(KERN_NOTICE "K8 NB: Cannot enumerate AMD northbridges.\n"); return err; } /* This has to go after the PCI subsystem */ -fs_initcall(init_amd_nbs); +fs_initcall(init_k8_nbs); diff --git a/trunk/arch/x86/kernel/apb_timer.c b/trunk/arch/x86/kernel/apb_timer.c index 7c9ab59653e8..92543c73cf8e 100644 --- a/trunk/arch/x86/kernel/apb_timer.c +++ b/trunk/arch/x86/kernel/apb_timer.c @@ -315,7 +315,6 @@ static void apbt_setup_irq(struct apbt_dev *adev) if (system_state == SYSTEM_BOOTING) { irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT); - irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); /* APB timer irqs are set up as mp_irqs, timer is edge type */ __set_irq_handler(adev->irq, handle_edge_irq, 0, "edge"); if (request_irq(adev->irq, apbt_interrupt_handler, diff --git a/trunk/arch/x86/kernel/aperture_64.c b/trunk/arch/x86/kernel/aperture_64.c index dcd7c83e1659..b3a16e8f0703 100644 --- a/trunk/arch/x86/kernel/aperture_64.c +++ b/trunk/arch/x86/kernel/aperture_64.c @@ -206,7 +206,7 @@ static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order) * Do an PCI bus scan by hand because we're running before the PCI * subsystem. * - * All AMD AGP bridges are AGPv3 compliant, so we can do this scan + * All K8 AGP bridges are AGPv3 compliant, so we can do this scan * generically. It's probably overkill to always scan all slots because * the AGP bridges should be always an own bus on the HT hierarchy, * but do it here for future safety. @@ -303,7 +303,7 @@ void __init early_gart_iommu_check(void) dev_limit = bus_dev_ranges[i].dev_limit; for (slot = dev_base; slot < dev_limit; slot++) { - if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) + if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) continue; ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); @@ -358,7 +358,7 @@ void __init early_gart_iommu_check(void) dev_limit = bus_dev_ranges[i].dev_limit; for (slot = dev_base; slot < dev_limit; slot++) { - if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) + if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) continue; ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); @@ -400,7 +400,7 @@ int __init gart_iommu_hole_init(void) dev_limit = bus_dev_ranges[i].dev_limit; for (slot = dev_base; slot < dev_limit; slot++) { - if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) + if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) continue; iommu_detected = 1; @@ -518,7 +518,7 @@ int __init gart_iommu_hole_init(void) dev_base = bus_dev_ranges[i].dev_base; dev_limit = bus_dev_ranges[i].dev_limit; for (slot = dev_base; slot < dev_limit; slot++) { - if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) + if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) continue; write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); diff --git a/trunk/arch/x86/kernel/apic/Makefile b/trunk/arch/x86/kernel/apic/Makefile index 3966b564ea47..910f20b457c4 100644 --- a/trunk/arch/x86/kernel/apic/Makefile +++ b/trunk/arch/x86/kernel/apic/Makefile @@ -3,7 +3,10 @@ # obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o -obj-y += hw_nmi.o +ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y) +obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o +endif +obj-$(CONFIG_HARDLOCKUP_DETECTOR) += hw_nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_SMP) += ipi.o diff --git a/trunk/arch/x86/kernel/apic/apic.c b/trunk/arch/x86/kernel/apic/apic.c index 879999a5230f..78218135b48e 100644 --- a/trunk/arch/x86/kernel/apic/apic.c +++ b/trunk/arch/x86/kernel/apic/apic.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -431,18 +432,17 @@ int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask) reserved = reserve_eilvt_offset(offset, new); if (reserved != new) { - pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for " - "vector 0x%x, but the register is already in use for " - "vector 0x%x on another cpu\n", - smp_processor_id(), reg, offset, new, reserved); + pr_err(FW_BUG "cpu %d, try to setup vector 0x%x, but " + "vector 0x%x was already reserved by another core, " + "APIC%lX=0x%x\n", + smp_processor_id(), new, reserved, reg, old); return -EINVAL; } if (!eilvt_entry_is_changeable(old, new)) { - pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for " - "vector 0x%x, but the register is already in use for " - "vector 0x%x on this cpu\n", - smp_processor_id(), reg, offset, new, old); + pr_err(FW_BUG "cpu %d, try to setup vector 0x%x but " + "register already in use, APIC%lX=0x%x\n", + smp_processor_id(), new, reg, old); return -EBUSY; } @@ -799,7 +799,11 @@ void __init setup_boot_APIC_clock(void) * PIT/HPET going. Otherwise register lapic as a dummy * device. */ - lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; + if (nmi_watchdog != NMI_IO_APIC) + lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; + else + pr_warning("APIC timer registered as dummy," + " due to nmi_watchdog=%d!\n", nmi_watchdog); /* Setup the lapic or request the broadcast */ setup_APIC_timer(); @@ -1383,6 +1387,7 @@ void __cpuinit end_local_APIC_setup(void) } #endif + setup_apic_nmi_watchdog(NULL); apic_pm_activate(); /* @@ -1533,60 +1538,13 @@ static int __init detect_init_APIC(void) return 0; } #else - -static int apic_verify(void) -{ - u32 features, h, l; - - /* - * The APIC feature bit should now be enabled - * in `cpuid' - */ - features = cpuid_edx(1); - if (!(features & (1 << X86_FEATURE_APIC))) { - pr_warning("Could not enable APIC!\n"); - return -1; - } - set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - - /* The BIOS may have set up the APIC at some other address */ - rdmsr(MSR_IA32_APICBASE, l, h); - if (l & MSR_IA32_APICBASE_ENABLE) - mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; - - pr_info("Found and enabled local APIC!\n"); - return 0; -} - -int apic_force_enable(void) -{ - u32 h, l; - - if (disable_apic) - return -1; - - /* - * Some BIOSes disable the local APIC in the APIC_BASE - * MSR. This can only be done in software for Intel P6 or later - * and AMD K7 (Model > 1) or later. - */ - rdmsr(MSR_IA32_APICBASE, l, h); - if (!(l & MSR_IA32_APICBASE_ENABLE)) { - pr_info("Local APIC disabled by BIOS -- reenabling.\n"); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; - wrmsr(MSR_IA32_APICBASE, l, h); - enabled_via_apicbase = 1; - } - return apic_verify(); -} - /* * Detect and initialize APIC */ static int __init detect_init_APIC(void) { + u32 h, l, features; + /* Disabled by kernel option? */ if (disable_apic) return -1; @@ -1616,12 +1574,38 @@ static int __init detect_init_APIC(void) "you can enable it with \"lapic\"\n"); return -1; } - if (apic_force_enable()) - return -1; - } else { - if (apic_verify()) - return -1; + /* + * Some BIOSes disable the local APIC in the APIC_BASE + * MSR. This can only be done in software for Intel P6 or later + * and AMD K7 (Model > 1) or later. + */ + rdmsr(MSR_IA32_APICBASE, l, h); + if (!(l & MSR_IA32_APICBASE_ENABLE)) { + pr_info("Local APIC disabled by BIOS -- reenabling.\n"); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; + wrmsr(MSR_IA32_APICBASE, l, h); + enabled_via_apicbase = 1; + } + } + /* + * The APIC feature bit should now be enabled + * in `cpuid' + */ + features = cpuid_edx(1); + if (!(features & (1 << X86_FEATURE_APIC))) { + pr_warning("Could not enable APIC!\n"); + return -1; } + set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; + + /* The BIOS may have set up the APIC at some other address */ + rdmsr(MSR_IA32_APICBASE, l, h); + if (l & MSR_IA32_APICBASE_ENABLE) + mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; + + pr_info("Found and enabled local APIC!\n"); apic_pm_activate(); @@ -1709,7 +1693,7 @@ void __init init_apic_mappings(void) * This initializes the IO-APIC and APIC hardware if this is * a UP kernel. */ -int apic_version[MAX_LOCAL_APIC]; +int apic_version[MAX_APICS]; int __init APIC_init_uniprocessor(void) { @@ -1774,10 +1758,17 @@ int __init APIC_init_uniprocessor(void) setup_IO_APIC(); else { nr_ioapics = 0; + localise_nmi_watchdog(); } +#else + localise_nmi_watchdog(); #endif x86_init.timers.setup_percpu_clockev(); +#ifdef CONFIG_X86_64 + check_nmi_watchdog(); +#endif + return 0; } diff --git a/trunk/arch/x86/kernel/apic/hw_nmi.c b/trunk/arch/x86/kernel/apic/hw_nmi.c index 72ec29e1ae06..62f6e1e55b90 100644 --- a/trunk/arch/x86/kernel/apic/hw_nmi.c +++ b/trunk/arch/x86/kernel/apic/hw_nmi.c @@ -17,31 +17,20 @@ #include #include -#ifdef CONFIG_HARDLOCKUP_DETECTOR u64 hw_nmi_get_sample_period(void) { return (u64)(cpu_khz) * 1000 * 60; } -#endif -#ifdef arch_trigger_all_cpu_backtrace +#ifdef ARCH_HAS_NMI_WATCHDOG + /* For reliability, we're prepared to waste bits here. */ static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; -/* "in progress" flag of arch_trigger_all_cpu_backtrace */ -static unsigned long backtrace_flag; - void arch_trigger_all_cpu_backtrace(void) { int i; - if (test_and_set_bit(0, &backtrace_flag)) - /* - * If there is already a trigger_all_cpu_backtrace() in progress - * (backtrace_flag == 1), don't output double cpu dump infos. - */ - return; - cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); printk(KERN_INFO "sending NMI to all CPUs:\n"); @@ -53,9 +42,6 @@ void arch_trigger_all_cpu_backtrace(void) break; mdelay(1); } - - clear_bit(0, &backtrace_flag); - smp_mb__after_clear_bit(); } static int __kprobes @@ -64,7 +50,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, { struct die_args *args = __args; struct pt_regs *regs; - int cpu; + int cpu = smp_processor_id(); switch (cmd) { case DIE_NMI: @@ -76,7 +62,6 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, } regs = args->regs; - cpu = smp_processor_id(); if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; @@ -106,3 +91,18 @@ static int __init register_trigger_all_cpu_backtrace(void) } early_initcall(register_trigger_all_cpu_backtrace); #endif + +/* STUB calls to mimic old nmi_watchdog behaviour */ +#if defined(CONFIG_X86_LOCAL_APIC) +unsigned int nmi_watchdog = NMI_NONE; +EXPORT_SYMBOL(nmi_watchdog); +void acpi_nmi_enable(void) { return; } +void acpi_nmi_disable(void) { return; } +#endif +atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ +EXPORT_SYMBOL(nmi_active); +int unknown_nmi_panic; +void cpu_nmi_set_wd_enabled(void) { return; } +void stop_apic_nmi_watchdog(void *unused) { return; } +void setup_apic_nmi_watchdog(void *unused) { return; } +int __init check_nmi_watchdog(void) { return 0; } diff --git a/trunk/arch/x86/kernel/apic/io_apic.c b/trunk/arch/x86/kernel/apic/io_apic.c index f6cd5b410770..fadcd743a74f 100644 --- a/trunk/arch/x86/kernel/apic/io_apic.c +++ b/trunk/arch/x86/kernel/apic/io_apic.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -1933,7 +1934,8 @@ void disable_IO_APIC(void) * * by Matt Domsch Tue Dec 21 12:25:05 CST 1999 */ -void __init setup_ioapic_ids_from_mpc_nocheck(void) + +void __init setup_ioapic_ids_from_mpc(void) { union IO_APIC_reg_00 reg_00; physid_mask_t phys_id_present_map; @@ -1942,6 +1944,15 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void) unsigned char old_id; unsigned long flags; + if (acpi_ioapic) + return; + /* + * Don't check I/O APIC IDs for xAPIC systems. They have + * no meaning without the serial APIC bus. + */ + if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) + return; /* * This is broken; anything with a real cpu count has to * circumvent this idiocy regardless. @@ -1995,6 +2006,7 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void) physids_or(phys_id_present_map, phys_id_present_map, tmp); } + /* * We need to adjust the IRQ routing table * if the ID changed. @@ -2030,21 +2042,6 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void) apic_printk(APIC_VERBOSE, " ok.\n"); } } - -void __init setup_ioapic_ids_from_mpc(void) -{ - - if (acpi_ioapic) - return; - /* - * Don't check I/O APIC IDs for xAPIC systems. They have - * no meaning without the serial APIC bus. - */ - if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) - return; - setup_ioapic_ids_from_mpc_nocheck(); -} #endif int no_timer_check __initdata; @@ -2645,6 +2642,24 @@ static void lapic_register_intr(int irq) "edge"); } +static void __init setup_nmi(void) +{ + /* + * Dirty trick to enable the NMI watchdog ... + * We put the 8259A master into AEOI mode and + * unmask on all local APICs LVT0 as NMI. + * + * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') + * is from Maciej W. Rozycki - so we do not have to EOI from + * the NMI handler or the timer interrupt. + */ + apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); + + enable_NMI_through_LVT0(); + + apic_printk(APIC_VERBOSE, " done.\n"); +} + /* * This looks a bit hackish but it's about the only one way of sending * a few INTA cycles to 8259As and any associated glue logic. ICR does @@ -2750,6 +2765,15 @@ static inline void __init check_timer(void) */ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); legacy_pic->init(1); +#ifdef CONFIG_X86_32 + { + unsigned int ver; + + ver = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(ver); + timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); + } +#endif pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); @@ -2797,6 +2821,10 @@ static inline void __init check_timer(void) unmask_ioapic(cfg); } if (timer_irq_works()) { + if (nmi_watchdog == NMI_IO_APIC) { + setup_nmi(); + legacy_pic->unmask(0); + } if (disable_timer_pin_1 > 0) clear_IO_APIC_pin(0, pin1); goto out; @@ -2822,6 +2850,11 @@ static inline void __init check_timer(void) if (timer_irq_works()) { apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); timer_through_8259 = 1; + if (nmi_watchdog == NMI_IO_APIC) { + legacy_pic->mask(0); + setup_nmi(); + legacy_pic->unmask(0); + } goto out; } /* @@ -2833,6 +2866,15 @@ static inline void __init check_timer(void) apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); } + if (nmi_watchdog == NMI_IO_APIC) { + apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " + "through the IO-APIC - disabling NMI Watchdog!\n"); + nmi_watchdog = NMI_NONE; + } +#ifdef CONFIG_X86_32 + timer_ack = 0; +#endif + apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...\n"); @@ -3597,7 +3639,7 @@ int __init io_apic_get_redir_entries (int ioapic) return reg_01.bits.entries + 1; } -static void __init probe_nr_irqs_gsi(void) +void __init probe_nr_irqs_gsi(void) { int nr; @@ -3914,7 +3956,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics) return res; } -void __init ioapic_and_gsi_init(void) +void __init ioapic_init_mappings(void) { unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; struct resource *ioapic_res; @@ -3952,8 +3994,6 @@ void __init ioapic_and_gsi_init(void) ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1; ioapic_res++; } - - probe_nr_irqs_gsi(); } void __init ioapic_insert_resources(void) @@ -4063,8 +4103,7 @@ void __init pre_init_apic_IRQ0(void) printk(KERN_INFO "Early APIC setup for system timer0\n"); #ifndef CONFIG_SMP - physid_set_mask_of_physid(boot_cpu_physical_apicid, - &phys_cpu_present_map); + phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); #endif /* Make sure the irq descriptor is set up */ cfg = alloc_irq_and_cfg_at(0, 0); diff --git a/trunk/arch/x86/kernel/apic/nmi.c b/trunk/arch/x86/kernel/apic/nmi.c new file mode 100644 index 000000000000..c90041ccb742 --- /dev/null +++ b/trunk/arch/x86/kernel/apic/nmi.c @@ -0,0 +1,567 @@ +/* + * NMI watchdog support on APIC systems + * + * Started by Ingo Molnar + * + * Fixes: + * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. + * Mikael Pettersson : Power Management for local APIC NMI watchdog. + * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog. + * Pavel Machek and + * Mikael Pettersson : PM converted to driver model. Disable/enable API. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include + +int unknown_nmi_panic; +int nmi_watchdog_enabled; + +/* For reliability, we're prepared to waste bits here. */ +static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; + +/* nmi_active: + * >0: the lapic NMI watchdog is active, but can be disabled + * <0: the lapic NMI watchdog has not been set up, and cannot + * be enabled + * 0: the lapic NMI watchdog is disabled, but can be enabled + */ +atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ +EXPORT_SYMBOL(nmi_active); + +unsigned int nmi_watchdog = NMI_NONE; +EXPORT_SYMBOL(nmi_watchdog); + +static int panic_on_timeout; + +static unsigned int nmi_hz = HZ; +static DEFINE_PER_CPU(short, wd_enabled); +static int endflag __initdata; + +static inline unsigned int get_nmi_count(int cpu) +{ + return per_cpu(irq_stat, cpu).__nmi_count; +} + +static inline int mce_in_progress(void) +{ +#if defined(CONFIG_X86_MCE) + return atomic_read(&mce_entry) > 0; +#endif + return 0; +} + +/* + * Take the local apic timer and PIT/HPET into account. We don't + * know which one is active, when we have highres/dyntick on + */ +static inline unsigned int get_timer_irqs(int cpu) +{ + return per_cpu(irq_stat, cpu).apic_timer_irqs + + per_cpu(irq_stat, cpu).irq0_irqs; +} + +#ifdef CONFIG_SMP +/* + * The performance counters used by NMI_LOCAL_APIC don't trigger when + * the CPU is idle. To make sure the NMI watchdog really ticks on all + * CPUs during the test make them busy. + */ +static __init void nmi_cpu_busy(void *data) +{ + local_irq_enable_in_hardirq(); + /* + * Intentionally don't use cpu_relax here. This is + * to make sure that the performance counter really ticks, + * even if there is a simulator or similar that catches the + * pause instruction. On a real HT machine this is fine because + * all other CPUs are busy with "useless" delay loops and don't + * care if they get somewhat less cycles. + */ + while (endflag == 0) + mb(); +} +#endif + +static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count) +{ + printk(KERN_CONT "\n"); + + printk(KERN_WARNING + "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", + cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); + + printk(KERN_WARNING + "Please report this to bugzilla.kernel.org,\n"); + printk(KERN_WARNING + "and attach the output of the 'dmesg' command.\n"); + + per_cpu(wd_enabled, cpu) = 0; + atomic_dec(&nmi_active); +} + +static void __acpi_nmi_disable(void *__unused) +{ + apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); +} + +int __init check_nmi_watchdog(void) +{ + unsigned int *prev_nmi_count; + int cpu; + + if (!nmi_watchdog_active() || !atomic_read(&nmi_active)) + return 0; + + prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL); + if (!prev_nmi_count) + goto error; + + printk(KERN_INFO "Testing NMI watchdog ... "); + +#ifdef CONFIG_SMP + if (nmi_watchdog == NMI_LOCAL_APIC) + smp_call_function(nmi_cpu_busy, (void *)&endflag, 0); +#endif + + for_each_possible_cpu(cpu) + prev_nmi_count[cpu] = get_nmi_count(cpu); + local_irq_enable(); + mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */ + + for_each_online_cpu(cpu) { + if (!per_cpu(wd_enabled, cpu)) + continue; + if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) + report_broken_nmi(cpu, prev_nmi_count); + } + endflag = 1; + if (!atomic_read(&nmi_active)) { + kfree(prev_nmi_count); + atomic_set(&nmi_active, -1); + goto error; + } + printk("OK.\n"); + + /* + * now that we know it works we can reduce NMI frequency to + * something more reasonable; makes a difference in some configs + */ + if (nmi_watchdog == NMI_LOCAL_APIC) + nmi_hz = lapic_adjust_nmi_hz(1); + + kfree(prev_nmi_count); + return 0; +error: + if (nmi_watchdog == NMI_IO_APIC) { + if (!timer_through_8259) + legacy_pic->mask(0); + on_each_cpu(__acpi_nmi_disable, NULL, 1); + } + +#ifdef CONFIG_X86_32 + timer_ack = 0; +#endif + return -1; +} + +static int __init setup_nmi_watchdog(char *str) +{ + unsigned int nmi; + + if (!strncmp(str, "panic", 5)) { + panic_on_timeout = 1; + str = strchr(str, ','); + if (!str) + return 1; + ++str; + } + + if (!strncmp(str, "lapic", 5)) + nmi_watchdog = NMI_LOCAL_APIC; + else if (!strncmp(str, "ioapic", 6)) + nmi_watchdog = NMI_IO_APIC; + else { + get_option(&str, &nmi); + if (nmi >= NMI_INVALID) + return 0; + nmi_watchdog = nmi; + } + + return 1; +} +__setup("nmi_watchdog=", setup_nmi_watchdog); + +/* + * Suspend/resume support + */ +#ifdef CONFIG_PM + +static int nmi_pm_active; /* nmi_active before suspend */ + +static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) +{ + /* only CPU0 goes here, other CPUs should be offline */ + nmi_pm_active = atomic_read(&nmi_active); + stop_apic_nmi_watchdog(NULL); + BUG_ON(atomic_read(&nmi_active) != 0); + return 0; +} + +static int lapic_nmi_resume(struct sys_device *dev) +{ + /* only CPU0 goes here, other CPUs should be offline */ + if (nmi_pm_active > 0) { + setup_apic_nmi_watchdog(NULL); + touch_nmi_watchdog(); + } + return 0; +} + +static struct sysdev_class nmi_sysclass = { + .name = "lapic_nmi", + .resume = lapic_nmi_resume, + .suspend = lapic_nmi_suspend, +}; + +static struct sys_device device_lapic_nmi = { + .id = 0, + .cls = &nmi_sysclass, +}; + +static int __init init_lapic_nmi_sysfs(void) +{ + int error; + + /* + * should really be a BUG_ON but b/c this is an + * init call, it just doesn't work. -dcz + */ + if (nmi_watchdog != NMI_LOCAL_APIC) + return 0; + + if (atomic_read(&nmi_active) < 0) + return 0; + + error = sysdev_class_register(&nmi_sysclass); + if (!error) + error = sysdev_register(&device_lapic_nmi); + return error; +} + +/* must come after the local APIC's device_initcall() */ +late_initcall(init_lapic_nmi_sysfs); + +#endif /* CONFIG_PM */ + +static void __acpi_nmi_enable(void *__unused) +{ + apic_write(APIC_LVT0, APIC_DM_NMI); +} + +/* + * Enable timer based NMIs on all CPUs: + */ +void acpi_nmi_enable(void) +{ + if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) + on_each_cpu(__acpi_nmi_enable, NULL, 1); +} + +/* + * Disable timer based NMIs on all CPUs: + */ +void acpi_nmi_disable(void) +{ + if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) + on_each_cpu(__acpi_nmi_disable, NULL, 1); +} + +/* + * This function is called as soon the LAPIC NMI watchdog driver has everything + * in place and it's ready to check if the NMIs belong to the NMI watchdog + */ +void cpu_nmi_set_wd_enabled(void) +{ + __get_cpu_var(wd_enabled) = 1; +} + +void setup_apic_nmi_watchdog(void *unused) +{ + if (__get_cpu_var(wd_enabled)) + return; + + /* cheap hack to support suspend/resume */ + /* if cpu0 is not active neither should the other cpus */ + if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0) + return; + + switch (nmi_watchdog) { + case NMI_LOCAL_APIC: + if (lapic_watchdog_init(nmi_hz) < 0) { + __get_cpu_var(wd_enabled) = 0; + return; + } + /* FALL THROUGH */ + case NMI_IO_APIC: + __get_cpu_var(wd_enabled) = 1; + atomic_inc(&nmi_active); + } +} + +void stop_apic_nmi_watchdog(void *unused) +{ + /* only support LOCAL and IO APICs for now */ + if (!nmi_watchdog_active()) + return; + if (__get_cpu_var(wd_enabled) == 0) + return; + if (nmi_watchdog == NMI_LOCAL_APIC) + lapic_watchdog_stop(); + else + __acpi_nmi_disable(NULL); + __get_cpu_var(wd_enabled) = 0; + atomic_dec(&nmi_active); +} + +/* + * the best way to detect whether a CPU has a 'hard lockup' problem + * is to check it's local APIC timer IRQ counts. If they are not + * changing then that CPU has some problem. + * + * as these watchdog NMI IRQs are generated on every CPU, we only + * have to check the current processor. + * + * since NMIs don't listen to _any_ locks, we have to be extremely + * careful not to rely on unsafe variables. The printk might lock + * up though, so we have to break up any console locks first ... + * [when there will be more tty-related locks, break them up here too!] + */ + +static DEFINE_PER_CPU(unsigned, last_irq_sum); +static DEFINE_PER_CPU(long, alert_counter); +static DEFINE_PER_CPU(int, nmi_touch); + +void touch_nmi_watchdog(void) +{ + if (nmi_watchdog_active()) { + unsigned cpu; + + /* + * Tell other CPUs to reset their alert counters. We cannot + * do it ourselves because the alert count increase is not + * atomic. + */ + for_each_present_cpu(cpu) { + if (per_cpu(nmi_touch, cpu) != 1) + per_cpu(nmi_touch, cpu) = 1; + } + } + + /* + * Tickle the softlockup detector too: + */ + touch_softlockup_watchdog(); +} +EXPORT_SYMBOL(touch_nmi_watchdog); + +notrace __kprobes int +nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) +{ + /* + * Since current_thread_info()-> is always on the stack, and we + * always switch the stack NMI-atomically, it's safe to use + * smp_processor_id(). + */ + unsigned int sum; + int touched = 0; + int cpu = smp_processor_id(); + int rc = 0; + + sum = get_timer_irqs(cpu); + + if (__get_cpu_var(nmi_touch)) { + __get_cpu_var(nmi_touch) = 0; + touched = 1; + } + + /* We can be called before check_nmi_watchdog, hence NULL check. */ + if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { + static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */ + + raw_spin_lock(&lock); + printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); + show_regs(regs); + dump_stack(); + raw_spin_unlock(&lock); + cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); + + rc = 1; + } + + /* Could check oops_in_progress here too, but it's safer not to */ + if (mce_in_progress()) + touched = 1; + + /* if the none of the timers isn't firing, this cpu isn't doing much */ + if (!touched && __get_cpu_var(last_irq_sum) == sum) { + /* + * Ayiee, looks like this CPU is stuck ... + * wait a few IRQs (5 seconds) before doing the oops ... + */ + __this_cpu_inc(alert_counter); + if (__this_cpu_read(alert_counter) == 5 * nmi_hz) + /* + * die_nmi will return ONLY if NOTIFY_STOP happens.. + */ + die_nmi("BUG: NMI Watchdog detected LOCKUP", + regs, panic_on_timeout); + } else { + __get_cpu_var(last_irq_sum) = sum; + __this_cpu_write(alert_counter, 0); + } + + /* see if the nmi watchdog went off */ + if (!__get_cpu_var(wd_enabled)) + return rc; + switch (nmi_watchdog) { + case NMI_LOCAL_APIC: + rc |= lapic_wd_event(nmi_hz); + break; + case NMI_IO_APIC: + /* + * don't know how to accurately check for this. + * just assume it was a watchdog timer interrupt + * This matches the old behaviour. + */ + rc = 1; + break; + } + return rc; +} + +#ifdef CONFIG_SYSCTL + +static void enable_ioapic_nmi_watchdog_single(void *unused) +{ + __get_cpu_var(wd_enabled) = 1; + atomic_inc(&nmi_active); + __acpi_nmi_enable(NULL); +} + +static void enable_ioapic_nmi_watchdog(void) +{ + on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1); + touch_nmi_watchdog(); +} + +static void disable_ioapic_nmi_watchdog(void) +{ + on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); +} + +static int __init setup_unknown_nmi_panic(char *str) +{ + unknown_nmi_panic = 1; + return 1; +} +__setup("unknown_nmi_panic", setup_unknown_nmi_panic); + +static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) +{ + unsigned char reason = get_nmi_reason(); + char buf[64]; + + sprintf(buf, "NMI received for unknown reason %02x\n", reason); + die_nmi(buf, regs, 1); /* Always panic here */ + return 0; +} + +/* + * proc handler for /proc/sys/kernel/nmi + */ +int proc_nmi_enabled(struct ctl_table *table, int write, + void __user *buffer, size_t *length, loff_t *ppos) +{ + int old_state; + + nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; + old_state = nmi_watchdog_enabled; + proc_dointvec(table, write, buffer, length, ppos); + if (!!old_state == !!nmi_watchdog_enabled) + return 0; + + if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) { + printk(KERN_WARNING + "NMI watchdog is permanently disabled\n"); + return -EIO; + } + + if (nmi_watchdog == NMI_LOCAL_APIC) { + if (nmi_watchdog_enabled) + enable_lapic_nmi_watchdog(); + else + disable_lapic_nmi_watchdog(); + } else if (nmi_watchdog == NMI_IO_APIC) { + if (nmi_watchdog_enabled) + enable_ioapic_nmi_watchdog(); + else + disable_ioapic_nmi_watchdog(); + } else { + printk(KERN_WARNING + "NMI watchdog doesn't know what hardware to touch\n"); + return -EIO; + } + return 0; +} + +#endif /* CONFIG_SYSCTL */ + +int do_nmi_callback(struct pt_regs *regs, int cpu) +{ +#ifdef CONFIG_SYSCTL + if (unknown_nmi_panic) + return unknown_nmi_panic_callback(regs, cpu); +#endif + return 0; +} + +void arch_trigger_all_cpu_backtrace(void) +{ + int i; + + cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); + + printk(KERN_INFO "sending NMI to all CPUs:\n"); + apic->send_IPI_all(NMI_VECTOR); + + /* Wait for up to 10 seconds for all CPUs to do the backtrace */ + for (i = 0; i < 10 * 1000; i++) { + if (cpumask_empty(to_cpumask(backtrace_mask))) + break; + mdelay(1); + } +} diff --git a/trunk/arch/x86/kernel/apic/x2apic_uv_x.c b/trunk/arch/x86/kernel/apic/x2apic_uv_x.c index 2a3f2a7db243..c1c52c341f40 100644 --- a/trunk/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/trunk/arch/x86/kernel/apic/x2apic_uv_x.c @@ -48,16 +48,6 @@ unsigned int uv_apicid_hibits; EXPORT_SYMBOL_GPL(uv_apicid_hibits); static DEFINE_SPINLOCK(uv_nmi_lock); -static unsigned long __init uv_early_read_mmr(unsigned long addr) -{ - unsigned long val, *mmr; - - mmr = early_ioremap(UV_LOCAL_MMR_BASE | addr, sizeof(*mmr)); - val = *mmr; - early_iounmap(mmr, sizeof(*mmr)); - return val; -} - static inline bool is_GRU_range(u64 start, u64 end) { return start >= gru_start_paddr && end <= gru_end_paddr; @@ -68,24 +58,28 @@ static bool uv_is_untracked_pat_range(u64 start, u64 end) return is_ISA_range(start, end) || is_GRU_range(start, end); } -static int __init early_get_pnodeid(void) +static int early_get_nodeid(void) { union uvh_node_id_u node_id; - union uvh_rh_gam_config_mmr_u m_n_config; - int pnode; + unsigned long *mmr; + + mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr)); + node_id.v = *mmr; + early_iounmap(mmr, sizeof(*mmr)); /* Currently, all blades have same revision number */ - node_id.v = uv_early_read_mmr(UVH_NODE_ID); - m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR); uv_min_hub_revision_id = node_id.s.revision; - pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1); - return pnode; + return node_id.s.node_id; } static void __init early_get_apic_pnode_shift(void) { - uvh_apicid.v = uv_early_read_mmr(UVH_APICID); + unsigned long *mmr; + + mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_APICID, sizeof(*mmr)); + uvh_apicid.v = *mmr; + early_iounmap(mmr, sizeof(*mmr)); if (!uvh_apicid.v) /* * Old bios, use default value @@ -101,17 +95,21 @@ static void __init early_get_apic_pnode_shift(void) static void __init uv_set_apicid_hibit(void) { union uvh_lb_target_physical_apic_id_mask_u apicid_mask; + unsigned long *mmr; - apicid_mask.v = uv_early_read_mmr(UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK); + mmr = early_ioremap(UV_LOCAL_MMR_BASE | + UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK, sizeof(*mmr)); + apicid_mask.v = *mmr; + early_iounmap(mmr, sizeof(*mmr)); uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK; } static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - int pnodeid; + int nodeid; if (!strcmp(oem_id, "SGI")) { - pnodeid = early_get_pnodeid(); + nodeid = early_get_nodeid(); early_get_apic_pnode_shift(); x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; x86_platform.nmi_init = uv_nmi_init; @@ -121,7 +119,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) uv_system_type = UV_X2APIC; else if (!strcmp(oem_table_id, "UVH")) { __get_cpu_var(x2apic_extra_bits) = - pnodeid << uvh_apicid.s.pnode_shift; + nodeid << (uvh_apicid.s.pnode_shift - 1); uv_system_type = UV_NON_UNIQUE_APIC; uv_set_apicid_hibit(); return 1; @@ -684,32 +682,27 @@ void uv_nmi_init(void) void __init uv_system_init(void) { union uvh_rh_gam_config_mmr_u m_n_config; - union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; union uvh_node_id_u node_id; unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; - int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val, n_io; + int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val; int gnode_extra, max_pnode = 0; unsigned long mmr_base, present, paddr; - unsigned short pnode_mask, pnode_io_mask; + unsigned short pnode_mask; map_low_mmrs(); m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR ); m_val = m_n_config.s.m_skt; n_val = m_n_config.s.n_skt; - mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); - n_io = mmioh.s.n_io; mmr_base = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & ~UV_MMR_ENABLE; pnode_mask = (1 << n_val) - 1; - pnode_io_mask = (1 << n_io) - 1; - node_id.v = uv_read_local_mmr(UVH_NODE_ID); gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1; gnode_upper = ((unsigned long)gnode_extra << m_val); - printk(KERN_INFO "UV: N %d, M %d, N_IO: %d, gnode_upper 0x%lx, gnode_extra 0x%x, pnode_mask 0x%x, pnode_io_mask 0x%x\n", - n_val, m_val, n_io, gnode_upper, gnode_extra, pnode_mask, pnode_io_mask); + printk(KERN_DEBUG "UV: N %d, M %d, gnode_upper 0x%lx, gnode_extra 0x%x\n", + n_val, m_val, gnode_upper, gnode_extra); printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); @@ -742,7 +735,7 @@ void __init uv_system_init(void) for (j = 0; j < 64; j++) { if (!test_bit(j, &present)) continue; - pnode = (i * 64 + j) & pnode_mask; + pnode = (i * 64 + j); uv_blade_info[blade].pnode = pnode; uv_blade_info[blade].nr_possible_cpus = 0; uv_blade_info[blade].nr_online_cpus = 0; @@ -763,7 +756,6 @@ void __init uv_system_init(void) /* * apic_pnode_shift must be set before calling uv_apicid_to_pnode(); */ - uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift; pnode = uv_apicid_to_pnode(apicid); blade = boot_pnode_to_blade(pnode); @@ -780,6 +772,7 @@ void __init uv_system_init(void) uv_cpu_hub_info(cpu)->numa_blade_id = blade; uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; uv_cpu_hub_info(cpu)->pnode = pnode; + uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1; uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; @@ -803,7 +796,7 @@ void __init uv_system_init(void) map_gru_high(max_pnode); map_mmr_high(max_pnode); - map_mmioh_high(max_pnode & pnode_io_mask); + map_mmioh_high(max_pnode); uv_cpu_init(); uv_scir_register_cpu_notifier(); diff --git a/trunk/arch/x86/kernel/cpu/common.c b/trunk/arch/x86/kernel/cpu/common.c index 1d59834396bd..4b68bda30938 100644 --- a/trunk/arch/x86/kernel/cpu/common.c +++ b/trunk/arch/x86/kernel/cpu/common.c @@ -894,6 +894,7 @@ void __init identify_boot_cpu(void) #else vgetcpu_set_mode(); #endif + init_hw_perf_events(); } void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) diff --git a/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c b/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c index 9ecf81f9b90f..17ad03366211 100644 --- a/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -149,7 +149,8 @@ union _cpuid4_leaf_ecx { }; struct amd_l3_cache { - struct amd_northbridge *nb; + struct pci_dev *dev; + bool can_disable; unsigned indices; u8 subcaches[4]; }; @@ -310,12 +311,14 @@ struct _cache_attr { /* * L3 cache descriptors */ +static struct amd_l3_cache **__cpuinitdata l3_caches; + static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) { unsigned int sc0, sc1, sc2, sc3; u32 val = 0; - pci_read_config_dword(l3->nb->misc, 0x1C4, &val); + pci_read_config_dword(l3->dev, 0x1C4, &val); /* calculate subcache sizes */ l3->subcaches[0] = sc0 = !(val & BIT(0)); @@ -327,14 +330,47 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; } -static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, - int index) +static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node) +{ + struct amd_l3_cache *l3; + struct pci_dev *dev = node_to_k8_nb_misc(node); + + l3 = kzalloc(sizeof(struct amd_l3_cache), GFP_ATOMIC); + if (!l3) { + printk(KERN_WARNING "Error allocating L3 struct\n"); + return NULL; + } + + l3->dev = dev; + + amd_calc_l3_indices(l3); + + return l3; +} + +static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, + int index) { - static struct amd_l3_cache *__cpuinitdata l3_caches; int node; - /* only for L3, and not in virtualized environments */ - if (index < 3 || amd_nb_num() == 0) + if (boot_cpu_data.x86 != 0x10) + return; + + if (index < 3) + return; + + /* see errata #382 and #388 */ + if (boot_cpu_data.x86_model < 0x8) + return; + + if ((boot_cpu_data.x86_model == 0x8 || + boot_cpu_data.x86_model == 0x9) + && + boot_cpu_data.x86_mask < 0x1) + return; + + /* not in virtualized environments */ + if (k8_northbridges.num == 0) return; /* @@ -342,7 +378,7 @@ static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, * never freed but this is done only on shutdown so it doesn't matter. */ if (!l3_caches) { - int size = amd_nb_num() * sizeof(struct amd_l3_cache); + int size = k8_northbridges.num * sizeof(struct amd_l3_cache *); l3_caches = kzalloc(size, GFP_ATOMIC); if (!l3_caches) @@ -351,12 +387,14 @@ static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, node = amd_get_nb_id(smp_processor_id()); - if (!l3_caches[node].nb) { - l3_caches[node].nb = node_to_amd_nb(node); - amd_calc_l3_indices(&l3_caches[node]); + if (!l3_caches[node]) { + l3_caches[node] = amd_init_l3_cache(node); + l3_caches[node]->can_disable = true; } - this_leaf->l3 = &l3_caches[node]; + WARN_ON(!l3_caches[node]); + + this_leaf->l3 = l3_caches[node]; } /* @@ -370,7 +408,7 @@ int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot) { unsigned int reg = 0; - pci_read_config_dword(l3->nb->misc, 0x1BC + slot * 4, ®); + pci_read_config_dword(l3->dev, 0x1BC + slot * 4, ®); /* check whether this slot is activated already */ if (reg & (3UL << 30)) @@ -384,8 +422,7 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, { int index; - if (!this_leaf->l3 || - !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) + if (!this_leaf->l3 || !this_leaf->l3->can_disable) return -EINVAL; index = amd_get_l3_disable_slot(this_leaf->l3, slot); @@ -420,7 +457,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu, if (!l3->subcaches[i]) continue; - pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg); + pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); /* * We need to WBINVD on a core on the node containing the L3 @@ -430,7 +467,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu, wbinvd_on_cpu(cpu); reg |= BIT(31); - pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg); + pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); } } @@ -487,8 +524,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!this_leaf->l3 || - !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) + if (!this_leaf->l3 || !this_leaf->l3->can_disable) return -EINVAL; cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); @@ -509,7 +545,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, #define STORE_CACHE_DISABLE(slot) \ static ssize_t \ store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \ - const char *buf, size_t count) \ + const char *buf, size_t count) \ { \ return store_cache_disable(this_leaf, buf, count, slot); \ } @@ -522,7 +558,10 @@ static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, show_cache_disable_1, store_cache_disable_1); #else /* CONFIG_AMD_NB */ -#define amd_init_l3_cache(x, y) +static void __cpuinit +amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index) +{ +}; #endif /* CONFIG_AMD_NB */ static int @@ -536,7 +575,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index, if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { amd_cpuid4(index, &eax, &ebx, &ecx); - amd_init_l3_cache(this_leaf, index); + amd_check_l3_disable(this_leaf, index); } else { cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); } @@ -944,48 +983,30 @@ define_one_ro(size); define_one_ro(shared_cpu_map); define_one_ro(shared_cpu_list); +#define DEFAULT_SYSFS_CACHE_ATTRS \ + &type.attr, \ + &level.attr, \ + &coherency_line_size.attr, \ + &physical_line_partition.attr, \ + &ways_of_associativity.attr, \ + &number_of_sets.attr, \ + &size.attr, \ + &shared_cpu_map.attr, \ + &shared_cpu_list.attr + static struct attribute *default_attrs[] = { - &type.attr, - &level.attr, - &coherency_line_size.attr, - &physical_line_partition.attr, - &ways_of_associativity.attr, - &number_of_sets.attr, - &size.attr, - &shared_cpu_map.attr, - &shared_cpu_list.attr, + DEFAULT_SYSFS_CACHE_ATTRS, NULL }; +static struct attribute *default_l3_attrs[] = { + DEFAULT_SYSFS_CACHE_ATTRS, #ifdef CONFIG_AMD_NB -static struct attribute ** __cpuinit amd_l3_attrs(void) -{ - static struct attribute **attrs; - int n; - - if (attrs) - return attrs; - - n = sizeof (default_attrs) / sizeof (struct attribute *); - - if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) - n += 2; - - attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL); - if (attrs == NULL) - return attrs = default_attrs; - - for (n = 0; default_attrs[n]; n++) - attrs[n] = default_attrs[n]; - - if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) { - attrs[n++] = &cache_disable_0.attr; - attrs[n++] = &cache_disable_1.attr; - } - - return attrs; -} + &cache_disable_0.attr, + &cache_disable_1.attr, #endif + NULL +}; static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -1096,11 +1117,11 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) this_leaf = CPUID4_INFO_IDX(cpu, i); - ktype_cache.default_attrs = default_attrs; -#ifdef CONFIG_AMD_NB - if (this_leaf->l3) - ktype_cache.default_attrs = amd_l3_attrs(); -#endif + if (this_leaf->l3 && this_leaf->l3->can_disable) + ktype_cache.default_attrs = default_l3_attrs; + else + ktype_cache.default_attrs = default_attrs; + retval = kobject_init_and_add(&(this_object->kobj), &ktype_cache, per_cpu(ici_cache_kobject, cpu), diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce_amd.c b/trunk/arch/x86/kernel/cpu/mcheck/mce_amd.c index 5bf2fac52aca..80c482382d5c 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -31,6 +31,8 @@ #include #include +#define PFX "mce_threshold: " +#define VERSION "version 1.1.1" #define NR_BANKS 6 #define NR_BLOCKS 9 #define THRESHOLD_MAX 0xFFF @@ -57,6 +59,12 @@ struct threshold_block { struct list_head miscj; }; +/* defaults used early on boot */ +static struct threshold_block threshold_defaults = { + .interrupt_enable = 0, + .threshold_limit = THRESHOLD_MAX, +}; + struct threshold_bank { struct kobject *kobj; struct threshold_block *blocks; @@ -81,101 +89,50 @@ static void amd_threshold_interrupt(void); struct thresh_restart { struct threshold_block *b; int reset; - int set_lvt_off; - int lvt_off; u16 old_limit; }; -static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) -{ - int msr = (hi & MASK_LVTOFF_HI) >> 20; - - if (apic < 0) { - pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt " - "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu, - b->bank, b->block, b->address, hi, lo); - return 0; - } - - if (apic != msr) { - pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d " - "for bank %d, block %d (MSR%08X=0x%x%08x)\n", - b->cpu, apic, b->bank, b->block, b->address, hi, lo); - return 0; - } - - return 1; -}; - /* must be called with correct cpu affinity */ /* Called via smp_call_function_single() */ static void threshold_restart_bank(void *_tr) { struct thresh_restart *tr = _tr; - u32 hi, lo; + u32 mci_misc_hi, mci_misc_lo; - rdmsr(tr->b->address, lo, hi); + rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi); - if (tr->b->threshold_limit < (hi & THRESHOLD_MAX)) + if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) tr->reset = 1; /* limit cannot be lower than err count */ if (tr->reset) { /* reset err count and overflow bit */ - hi = - (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | + mci_misc_hi = + (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | (THRESHOLD_MAX - tr->b->threshold_limit); } else if (tr->old_limit) { /* change limit w/o reset */ - int new_count = (hi & THRESHOLD_MAX) + + int new_count = (mci_misc_hi & THRESHOLD_MAX) + (tr->old_limit - tr->b->threshold_limit); - hi = (hi & ~MASK_ERR_COUNT_HI) | + mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | (new_count & THRESHOLD_MAX); } - if (tr->set_lvt_off) { - if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) { - /* set new lvt offset */ - hi &= ~MASK_LVTOFF_HI; - hi |= tr->lvt_off << 20; - } - } - tr->b->interrupt_enable ? - (hi = (hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : - (hi &= ~MASK_INT_TYPE_HI); + (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : + (mci_misc_hi &= ~MASK_INT_TYPE_HI); - hi |= MASK_COUNT_EN_HI; - wrmsr(tr->b->address, lo, hi); -} - -static void mce_threshold_block_init(struct threshold_block *b, int offset) -{ - struct thresh_restart tr = { - .b = b, - .set_lvt_off = 1, - .lvt_off = offset, - }; - - b->threshold_limit = THRESHOLD_MAX; - threshold_restart_bank(&tr); -}; - -static int setup_APIC_mce(int reserved, int new) -{ - if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR, - APIC_EILVT_MSG_FIX, 0)) - return new; - - return reserved; + mci_misc_hi |= MASK_COUNT_EN_HI; + wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi); } /* cpu init entry point, called from mce.c with preempt off */ void mce_amd_feature_init(struct cpuinfo_x86 *c) { - struct threshold_block b; unsigned int cpu = smp_processor_id(); u32 low = 0, high = 0, address = 0; unsigned int bank, block; - int offset = -1; + struct thresh_restart tr; + int lvt_off = -1; + u8 offset; for (bank = 0; bank < NR_BANKS; ++bank) { for (block = 0; block < NR_BLOCKS; ++block) { @@ -206,16 +163,39 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) if (shared_bank[bank] && c->cpu_core_id) break; #endif - offset = setup_APIC_mce(offset, - (high & MASK_LVTOFF_HI) >> 20); + offset = (high & MASK_LVTOFF_HI) >> 20; + if (lvt_off < 0) { + if (setup_APIC_eilvt(offset, + THRESHOLD_APIC_VECTOR, + APIC_EILVT_MSG_FIX, 0)) { + pr_err(FW_BUG "cpu %d, failed to " + "setup threshold interrupt " + "for bank %d, block %d " + "(MSR%08X=0x%x%08x)", + smp_processor_id(), bank, block, + address, high, low); + continue; + } + lvt_off = offset; + } else if (lvt_off != offset) { + pr_err(FW_BUG "cpu %d, invalid threshold " + "interrupt offset %d for bank %d," + "block %d (MSR%08X=0x%x%08x)", + smp_processor_id(), lvt_off, bank, + block, address, high, low); + continue; + } + + high &= ~MASK_LVTOFF_HI; + high |= lvt_off << 20; + wrmsr(address, low, high); - memset(&b, 0, sizeof(b)); - b.cpu = cpu; - b.bank = bank; - b.block = block; - b.address = address; + threshold_defaults.address = address; + tr.b = &threshold_defaults; + tr.reset = 0; + tr.old_limit = 0; + threshold_restart_bank(&tr); - mce_threshold_block_init(&b, offset); mce_threshold_vector = amd_threshold_interrupt; } } @@ -318,8 +298,9 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size) b->interrupt_enable = !!new; - memset(&tr, 0, sizeof(tr)); tr.b = b; + tr.reset = 0; + tr.old_limit = 0; smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); @@ -340,10 +321,10 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size) if (new < 1) new = 1; - memset(&tr, 0, sizeof(tr)); tr.old_limit = b->threshold_limit; b->threshold_limit = new; tr.b = b; + tr.reset = 0; smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); @@ -622,9 +603,9 @@ static __cpuinit int threshold_create_device(unsigned int cpu) continue; err = threshold_create_bank(cpu, bank); if (err) - return err; + goto out; } - +out: return err; } diff --git a/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c b/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c index e12246ff5aa6..4b683267eca5 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -53,13 +53,8 @@ struct thermal_state { struct _thermal_state core_power_limit; struct _thermal_state package_throttle; struct _thermal_state package_power_limit; - struct _thermal_state core_thresh0; - struct _thermal_state core_thresh1; }; -/* Callback to handle core threshold interrupts */ -int (*platform_thermal_notify)(__u64 msr_val); - static DEFINE_PER_CPU(struct thermal_state, thermal_state); static atomic_t therm_throt_en = ATOMIC_INIT(0); @@ -205,22 +200,6 @@ static int therm_throt_process(bool new_event, int event, int level) return 0; } -static int thresh_event_valid(int event) -{ - struct _thermal_state *state; - unsigned int this_cpu = smp_processor_id(); - struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu); - u64 now = get_jiffies_64(); - - state = (event == 0) ? &pstate->core_thresh0 : &pstate->core_thresh1; - - if (time_before64(now, state->next_check)) - return 0; - - state->next_check = now + CHECK_INTERVAL; - return 1; -} - #ifdef CONFIG_SYSFS /* Add/Remove thermal_throttle interface for CPU device: */ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev, @@ -334,22 +313,6 @@ device_initcall(thermal_throttle_init_device); #define PACKAGE_THROTTLED ((__u64)2 << 62) #define PACKAGE_POWER_LIMIT ((__u64)3 << 62) -static void notify_thresholds(__u64 msr_val) -{ - /* check whether the interrupt handler is defined; - * otherwise simply return - */ - if (!platform_thermal_notify) - return; - - /* lower threshold reached */ - if ((msr_val & THERM_LOG_THRESHOLD0) && thresh_event_valid(0)) - platform_thermal_notify(msr_val); - /* higher threshold reached */ - if ((msr_val & THERM_LOG_THRESHOLD1) && thresh_event_valid(1)) - platform_thermal_notify(msr_val); -} - /* Thermal transition interrupt handler */ static void intel_thermal_interrupt(void) { @@ -358,9 +321,6 @@ static void intel_thermal_interrupt(void) rdmsrl(MSR_IA32_THERM_STATUS, msr_val); - /* Check for violation of core thermal thresholds*/ - notify_thresholds(msr_val); - if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, THERMAL_THROTTLING_EVENT, CORE_LEVEL) != 0) diff --git a/trunk/arch/x86/kernel/cpu/perf_event.c b/trunk/arch/x86/kernel/cpu/perf_event.c index 0a360d146596..6d75b9145b13 100644 --- a/trunk/arch/x86/kernel/cpu/perf_event.c +++ b/trunk/arch/x86/kernel/cpu/perf_event.c @@ -330,6 +330,9 @@ static bool reserve_pmc_hardware(void) { int i; + if (nmi_watchdog == NMI_LOCAL_APIC) + disable_lapic_nmi_watchdog(); + for (i = 0; i < x86_pmu.num_counters; i++) { if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) goto perfctr_fail; @@ -352,6 +355,9 @@ static bool reserve_pmc_hardware(void) for (i--; i >= 0; i--) release_perfctr_nmi(x86_pmu.perfctr + i); + if (nmi_watchdog == NMI_LOCAL_APIC) + enable_lapic_nmi_watchdog(); + return false; } @@ -363,6 +369,9 @@ static void release_pmc_hardware(void) release_perfctr_nmi(x86_pmu.perfctr + i); release_evntsel_nmi(x86_pmu.eventsel + i); } + + if (nmi_watchdog == NMI_LOCAL_APIC) + enable_lapic_nmi_watchdog(); } #else @@ -375,53 +384,15 @@ static void release_pmc_hardware(void) {} static bool check_hw_exists(void) { u64 val, val_new = 0; - int i, reg, ret = 0; - - /* - * Check to see if the BIOS enabled any of the counters, if so - * complain and bail. - */ - for (i = 0; i < x86_pmu.num_counters; i++) { - reg = x86_pmu.eventsel + i; - ret = rdmsrl_safe(reg, &val); - if (ret) - goto msr_fail; - if (val & ARCH_PERFMON_EVENTSEL_ENABLE) - goto bios_fail; - } - - if (x86_pmu.num_counters_fixed) { - reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; - ret = rdmsrl_safe(reg, &val); - if (ret) - goto msr_fail; - for (i = 0; i < x86_pmu.num_counters_fixed; i++) { - if (val & (0x03 << i*4)) - goto bios_fail; - } - } + int ret = 0; - /* - * Now write a value and read it back to see if it matches, - * this is needed to detect certain hardware emulators (qemu/kvm) - * that don't trap on the MSR access and always return 0s. - */ val = 0xabcdUL; - ret = checking_wrmsrl(x86_pmu.perfctr, val); + ret |= checking_wrmsrl(x86_pmu.perfctr, val); ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new); if (ret || val != val_new) - goto msr_fail; + return false; return true; - -bios_fail: - printk(KERN_CONT "Broken BIOS detected, using software events only.\n"); - printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val); - return false; - -msr_fail: - printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n"); - return false; } static void reserve_ds_buffers(void); @@ -480,7 +451,7 @@ static int x86_setup_perfctr(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; u64 config; - if (!is_sampling_event(event)) { + if (!hwc->sample_period) { hwc->sample_period = x86_pmu.max_period; hwc->last_period = hwc->sample_period; local64_set(&hwc->period_left, hwc->sample_period); @@ -1391,7 +1362,7 @@ static void __init pmu_check_apic(void) pr_info("no hardware sampling interrupt available.\n"); } -int __init init_hw_perf_events(void) +void __init init_hw_perf_events(void) { struct event_constraint *c; int err; @@ -1406,18 +1377,20 @@ int __init init_hw_perf_events(void) err = amd_pmu_init(); break; default: - return 0; + return; } if (err != 0) { pr_cont("no PMU driver, software events only.\n"); - return 0; + return; } pmu_check_apic(); /* sanity check that the hardware exists or is emulated */ - if (!check_hw_exists()) - return 0; + if (!check_hw_exists()) { + pr_cont("Broken PMU hardware detected, software events only.\n"); + return; + } pr_cont("%s PMU driver.\n", x86_pmu.name); @@ -1465,12 +1438,9 @@ int __init init_hw_perf_events(void) pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); - perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); + perf_pmu_register(&pmu); perf_cpu_notifier(x86_pmu_notifier); - - return 0; } -early_initcall(init_hw_perf_events); static inline void x86_pmu_read(struct perf_event *event) { @@ -1716,7 +1686,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) perf_callchain_store(entry, regs->ip); - dump_trace(NULL, regs, NULL, &backtrace_ops, entry); + dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); } #ifdef CONFIG_COMPAT diff --git a/trunk/arch/x86/kernel/cpu/perf_event_amd.c b/trunk/arch/x86/kernel/cpu/perf_event_amd.c index 67e2202a6039..e421b8cd6944 100644 --- a/trunk/arch/x86/kernel/cpu/perf_event_amd.c +++ b/trunk/arch/x86/kernel/cpu/perf_event_amd.c @@ -1,5 +1,7 @@ #ifdef CONFIG_CPU_SUP_AMD +static DEFINE_RAW_SPINLOCK(amd_nb_lock); + static __initconst const u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -273,7 +275,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) return &emptyconstraint; } -static struct amd_nb *amd_alloc_nb(int cpu) +static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) { struct amd_nb *nb; int i; @@ -283,7 +285,7 @@ static struct amd_nb *amd_alloc_nb(int cpu) if (!nb) return NULL; - nb->nb_id = -1; + nb->nb_id = nb_id; /* * initialize all possible NB constraints @@ -304,7 +306,7 @@ static int amd_pmu_cpu_prepare(int cpu) if (boot_cpu_data.x86_max_cores < 2) return NOTIFY_OK; - cpuc->amd_nb = amd_alloc_nb(cpu); + cpuc->amd_nb = amd_alloc_nb(cpu, -1); if (!cpuc->amd_nb) return NOTIFY_BAD; @@ -323,6 +325,8 @@ static void amd_pmu_cpu_starting(int cpu) nb_id = amd_get_nb_id(cpu); WARN_ON_ONCE(nb_id == BAD_APICID); + raw_spin_lock(&amd_nb_lock); + for_each_online_cpu(i) { nb = per_cpu(cpu_hw_events, i).amd_nb; if (WARN_ON_ONCE(!nb)) @@ -337,6 +341,8 @@ static void amd_pmu_cpu_starting(int cpu) cpuc->amd_nb->nb_id = nb_id; cpuc->amd_nb->refcnt++; + + raw_spin_unlock(&amd_nb_lock); } static void amd_pmu_cpu_dead(int cpu) @@ -348,6 +354,8 @@ static void amd_pmu_cpu_dead(int cpu) cpuhw = &per_cpu(cpu_hw_events, cpu); + raw_spin_lock(&amd_nb_lock); + if (cpuhw->amd_nb) { struct amd_nb *nb = cpuhw->amd_nb; @@ -356,6 +364,8 @@ static void amd_pmu_cpu_dead(int cpu) cpuhw->amd_nb = NULL; } + + raw_spin_unlock(&amd_nb_lock); } static __initconst const struct x86_pmu amd_pmu = { diff --git a/trunk/arch/x86/kernel/cpu/perf_event_intel.c b/trunk/arch/x86/kernel/cpu/perf_event_intel.c index 24e390e40f2e..c8f5c088cad1 100644 --- a/trunk/arch/x86/kernel/cpu/perf_event_intel.c +++ b/trunk/arch/x86/kernel/cpu/perf_event_intel.c @@ -816,32 +816,6 @@ static int intel_pmu_hw_config(struct perf_event *event) if (ret) return ret; - if (event->attr.precise_ip && - (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { - /* - * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P - * (0x003c) so that we can use it with PEBS. - * - * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't - * PEBS capable. However we can use INST_RETIRED.ANY_P - * (0x00c0), which is a PEBS capable event, to get the same - * count. - * - * INST_RETIRED.ANY_P counts the number of cycles that retires - * CNTMASK instructions. By setting CNTMASK to a value (16) - * larger than the maximum number of instructions that can be - * retired per cycle (4) and then inverting the condition, we - * count all cycles that retire 16 or less instructions, which - * is every cycle. - * - * Thereby we gain a PEBS capable cycle counter. - */ - u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */ - - alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); - event->hw.config = alt_config; - } - if (event->attr.type != PERF_TYPE_RAW) return 0; diff --git a/trunk/arch/x86/kernel/cpu/perfctr-watchdog.c b/trunk/arch/x86/kernel/cpu/perfctr-watchdog.c index d5a236615501..d9f4ff8fcd69 100644 --- a/trunk/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/trunk/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -16,12 +16,32 @@ #include #include #include -#include +#include #include #include #include +struct nmi_watchdog_ctlblk { + unsigned int cccr_msr; + unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ + unsigned int evntsel_msr; /* the MSR to select the events to handle */ +}; + +/* Interface defining a CPU specific perfctr watchdog */ +struct wd_ops { + int (*reserve)(void); + void (*unreserve)(void); + int (*setup)(unsigned nmi_hz); + void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz); + void (*stop)(void); + unsigned perfctr; + unsigned evntsel; + u64 checkbit; +}; + +static const struct wd_ops *wd_ops; + /* * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's * offset from MSR_P4_BSU_ESCR0. @@ -40,6 +60,8 @@ static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); +static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); + /* converts an msr to an appropriate reservation bit */ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) { @@ -150,3 +172,623 @@ void release_evntsel_nmi(unsigned int msr) clear_bit(counter, evntsel_nmi_owner); } EXPORT_SYMBOL(release_evntsel_nmi); + +void disable_lapic_nmi_watchdog(void) +{ + BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); + + if (atomic_read(&nmi_active) <= 0) + return; + + on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); + + if (wd_ops) + wd_ops->unreserve(); + + BUG_ON(atomic_read(&nmi_active) != 0); +} + +void enable_lapic_nmi_watchdog(void) +{ + BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); + + /* are we already enabled */ + if (atomic_read(&nmi_active) != 0) + return; + + /* are we lapic aware */ + if (!wd_ops) + return; + if (!wd_ops->reserve()) { + printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n"); + return; + } + + on_each_cpu(setup_apic_nmi_watchdog, NULL, 1); + touch_nmi_watchdog(); +} + +/* + * Activate the NMI watchdog via the local APIC. + */ + +static unsigned int adjust_for_32bit_ctr(unsigned int hz) +{ + u64 counter_val; + unsigned int retval = hz; + + /* + * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter + * are writable, with higher bits sign extending from bit 31. + * So, we can only program the counter with 31 bit values and + * 32nd bit should be 1, for 33.. to be 1. + * Find the appropriate nmi_hz + */ + counter_val = (u64)cpu_khz * 1000; + do_div(counter_val, retval); + if (counter_val > 0x7fffffffULL) { + u64 count = (u64)cpu_khz * 1000; + do_div(count, 0x7fffffffUL); + retval = count + 1; + } + return retval; +} + +static void write_watchdog_counter(unsigned int perfctr_msr, + const char *descr, unsigned nmi_hz) +{ + u64 count = (u64)cpu_khz * 1000; + + do_div(count, nmi_hz); + if (descr) + pr_debug("setting %s to -0x%08Lx\n", descr, count); + wrmsrl(perfctr_msr, 0 - count); +} + +static void write_watchdog_counter32(unsigned int perfctr_msr, + const char *descr, unsigned nmi_hz) +{ + u64 count = (u64)cpu_khz * 1000; + + do_div(count, nmi_hz); + if (descr) + pr_debug("setting %s to -0x%08Lx\n", descr, count); + wrmsr(perfctr_msr, (u32)(-count), 0); +} + +/* + * AMD K7/K8/Family10h/Family11h support. + * AMD keeps this interface nicely stable so there is not much variety + */ +#define K7_EVNTSEL_ENABLE (1 << 22) +#define K7_EVNTSEL_INT (1 << 20) +#define K7_EVNTSEL_OS (1 << 17) +#define K7_EVNTSEL_USR (1 << 16) +#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 +#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING + +static int setup_k7_watchdog(unsigned nmi_hz) +{ + unsigned int perfctr_msr, evntsel_msr; + unsigned int evntsel; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + perfctr_msr = wd_ops->perfctr; + evntsel_msr = wd_ops->evntsel; + + wrmsrl(perfctr_msr, 0UL); + + evntsel = K7_EVNTSEL_INT + | K7_EVNTSEL_OS + | K7_EVNTSEL_USR + | K7_NMI_EVENT; + + /* setup the timer */ + wrmsr(evntsel_msr, evntsel, 0); + write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz); + + /* initialize the wd struct before enabling */ + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = 0; /* unused */ + + /* ok, everything is initialized, announce that we're set */ + cpu_nmi_set_wd_enabled(); + + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= K7_EVNTSEL_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); + + return 1; +} + +static void single_msr_stop_watchdog(void) +{ + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + wrmsr(wd->evntsel_msr, 0, 0); +} + +static int single_msr_reserve(void) +{ + if (!reserve_perfctr_nmi(wd_ops->perfctr)) + return 0; + + if (!reserve_evntsel_nmi(wd_ops->evntsel)) { + release_perfctr_nmi(wd_ops->perfctr); + return 0; + } + return 1; +} + +static void single_msr_unreserve(void) +{ + release_evntsel_nmi(wd_ops->evntsel); + release_perfctr_nmi(wd_ops->perfctr); +} + +static void __kprobes +single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) +{ + /* start the cycle over again */ + write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); +} + +static const struct wd_ops k7_wd_ops = { + .reserve = single_msr_reserve, + .unreserve = single_msr_unreserve, + .setup = setup_k7_watchdog, + .rearm = single_msr_rearm, + .stop = single_msr_stop_watchdog, + .perfctr = MSR_K7_PERFCTR0, + .evntsel = MSR_K7_EVNTSEL0, + .checkbit = 1ULL << 47, +}; + +/* + * Intel Model 6 (PPro+,P2,P3,P-M,Core1) + */ +#define P6_EVNTSEL0_ENABLE (1 << 22) +#define P6_EVNTSEL_INT (1 << 20) +#define P6_EVNTSEL_OS (1 << 17) +#define P6_EVNTSEL_USR (1 << 16) +#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 +#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED + +static int setup_p6_watchdog(unsigned nmi_hz) +{ + unsigned int perfctr_msr, evntsel_msr; + unsigned int evntsel; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + perfctr_msr = wd_ops->perfctr; + evntsel_msr = wd_ops->evntsel; + + /* KVM doesn't implement this MSR */ + if (wrmsr_safe(perfctr_msr, 0, 0) < 0) + return 0; + + evntsel = P6_EVNTSEL_INT + | P6_EVNTSEL_OS + | P6_EVNTSEL_USR + | P6_NMI_EVENT; + + /* setup the timer */ + wrmsr(evntsel_msr, evntsel, 0); + nmi_hz = adjust_for_32bit_ctr(nmi_hz); + write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz); + + /* initialize the wd struct before enabling */ + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = 0; /* unused */ + + /* ok, everything is initialized, announce that we're set */ + cpu_nmi_set_wd_enabled(); + + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= P6_EVNTSEL0_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); + + return 1; +} + +static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) +{ + /* + * P6 based Pentium M need to re-unmask + * the apic vector but it doesn't hurt + * other P6 variant. + * ArchPerfom/Core Duo also needs this + */ + apic_write(APIC_LVTPC, APIC_DM_NMI); + + /* P6/ARCH_PERFMON has 32 bit counter write */ + write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz); +} + +static const struct wd_ops p6_wd_ops = { + .reserve = single_msr_reserve, + .unreserve = single_msr_unreserve, + .setup = setup_p6_watchdog, + .rearm = p6_rearm, + .stop = single_msr_stop_watchdog, + .perfctr = MSR_P6_PERFCTR0, + .evntsel = MSR_P6_EVNTSEL0, + .checkbit = 1ULL << 39, +}; + +/* + * Intel P4 performance counters. + * By far the most complicated of all. + */ +#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7) +#define P4_ESCR_EVENT_SELECT(N) ((N) << 25) +#define P4_ESCR_OS (1 << 3) +#define P4_ESCR_USR (1 << 2) +#define P4_CCCR_OVF_PMI0 (1 << 26) +#define P4_CCCR_OVF_PMI1 (1 << 27) +#define P4_CCCR_THRESHOLD(N) ((N) << 20) +#define P4_CCCR_COMPLEMENT (1 << 19) +#define P4_CCCR_COMPARE (1 << 18) +#define P4_CCCR_REQUIRED (3 << 16) +#define P4_CCCR_ESCR_SELECT(N) ((N) << 13) +#define P4_CCCR_ENABLE (1 << 12) +#define P4_CCCR_OVF (1 << 31) + +#define P4_CONTROLS 18 +static unsigned int p4_controls[18] = { + MSR_P4_BPU_CCCR0, + MSR_P4_BPU_CCCR1, + MSR_P4_BPU_CCCR2, + MSR_P4_BPU_CCCR3, + MSR_P4_MS_CCCR0, + MSR_P4_MS_CCCR1, + MSR_P4_MS_CCCR2, + MSR_P4_MS_CCCR3, + MSR_P4_FLAME_CCCR0, + MSR_P4_FLAME_CCCR1, + MSR_P4_FLAME_CCCR2, + MSR_P4_FLAME_CCCR3, + MSR_P4_IQ_CCCR0, + MSR_P4_IQ_CCCR1, + MSR_P4_IQ_CCCR2, + MSR_P4_IQ_CCCR3, + MSR_P4_IQ_CCCR4, + MSR_P4_IQ_CCCR5, +}; +/* + * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter + * CRU_ESCR0 (with any non-null event selector) through a complemented + * max threshold. [IA32-Vol3, Section 14.9.9] + */ +static int setup_p4_watchdog(unsigned nmi_hz) +{ + unsigned int perfctr_msr, evntsel_msr, cccr_msr; + unsigned int evntsel, cccr_val; + unsigned int misc_enable, dummy; + unsigned int ht_num; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); + if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) + return 0; + +#ifdef CONFIG_SMP + /* detect which hyperthread we are on */ + if (smp_num_siblings == 2) { + unsigned int ebx, apicid; + + ebx = cpuid_ebx(1); + apicid = (ebx >> 24) & 0xff; + ht_num = apicid & 1; + } else +#endif + ht_num = 0; + + /* + * performance counters are shared resources + * assign each hyperthread its own set + * (re-use the ESCR0 register, seems safe + * and keeps the cccr_val the same) + */ + if (!ht_num) { + /* logical cpu 0 */ + perfctr_msr = MSR_P4_IQ_PERFCTR0; + evntsel_msr = MSR_P4_CRU_ESCR0; + cccr_msr = MSR_P4_IQ_CCCR0; + cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); + + /* + * If we're on the kdump kernel or other situation, we may + * still have other performance counter registers set to + * interrupt and they'll keep interrupting forever because + * of the P4_CCCR_OVF quirk. So we need to ACK all the + * pending interrupts and disable all the registers here, + * before reenabling the NMI delivery. Refer to p4_rearm() + * about the P4_CCCR_OVF quirk. + */ + if (reset_devices) { + unsigned int low, high; + int i; + + for (i = 0; i < P4_CONTROLS; i++) { + rdmsr(p4_controls[i], low, high); + low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF); + wrmsr(p4_controls[i], low, high); + } + } + } else { + /* logical cpu 1 */ + perfctr_msr = MSR_P4_IQ_PERFCTR1; + evntsel_msr = MSR_P4_CRU_ESCR0; + cccr_msr = MSR_P4_IQ_CCCR1; + + /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */ + if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4) + cccr_val = P4_CCCR_OVF_PMI0; + else + cccr_val = P4_CCCR_OVF_PMI1; + cccr_val |= P4_CCCR_ESCR_SELECT(4); + } + + evntsel = P4_ESCR_EVENT_SELECT(0x3F) + | P4_ESCR_OS + | P4_ESCR_USR; + + cccr_val |= P4_CCCR_THRESHOLD(15) + | P4_CCCR_COMPLEMENT + | P4_CCCR_COMPARE + | P4_CCCR_REQUIRED; + + wrmsr(evntsel_msr, evntsel, 0); + wrmsr(cccr_msr, cccr_val, 0); + write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); + + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = cccr_msr; + + /* ok, everything is initialized, announce that we're set */ + cpu_nmi_set_wd_enabled(); + + apic_write(APIC_LVTPC, APIC_DM_NMI); + cccr_val |= P4_CCCR_ENABLE; + wrmsr(cccr_msr, cccr_val, 0); + return 1; +} + +static void stop_p4_watchdog(void) +{ + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + wrmsr(wd->cccr_msr, 0, 0); + wrmsr(wd->evntsel_msr, 0, 0); +} + +static int p4_reserve(void) +{ + if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0)) + return 0; +#ifdef CONFIG_SMP + if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1)) + goto fail1; +#endif + if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0)) + goto fail2; + /* RED-PEN why is ESCR1 not reserved here? */ + return 1; + fail2: +#ifdef CONFIG_SMP + if (smp_num_siblings > 1) + release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); + fail1: +#endif + release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); + return 0; +} + +static void p4_unreserve(void) +{ +#ifdef CONFIG_SMP + if (smp_num_siblings > 1) + release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); +#endif + release_evntsel_nmi(MSR_P4_CRU_ESCR0); + release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); +} + +static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) +{ + unsigned dummy; + /* + * P4 quirks: + * - An overflown perfctr will assert its interrupt + * until the OVF flag in its CCCR is cleared. + * - LVTPC is masked on interrupt and must be + * unmasked by the LVTPC handler. + */ + rdmsrl(wd->cccr_msr, dummy); + dummy &= ~P4_CCCR_OVF; + wrmsrl(wd->cccr_msr, dummy); + apic_write(APIC_LVTPC, APIC_DM_NMI); + /* start the cycle over again */ + write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); +} + +static const struct wd_ops p4_wd_ops = { + .reserve = p4_reserve, + .unreserve = p4_unreserve, + .setup = setup_p4_watchdog, + .rearm = p4_rearm, + .stop = stop_p4_watchdog, + /* RED-PEN this is wrong for the other sibling */ + .perfctr = MSR_P4_BPU_PERFCTR0, + .evntsel = MSR_P4_BSU_ESCR0, + .checkbit = 1ULL << 39, +}; + +/* + * Watchdog using the Intel architected PerfMon. + * Used for Core2 and hopefully all future Intel CPUs. + */ +#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL +#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK + +static struct wd_ops intel_arch_wd_ops; + +static int setup_intel_arch_watchdog(unsigned nmi_hz) +{ + unsigned int ebx; + union cpuid10_eax eax; + unsigned int unused; + unsigned int perfctr_msr, evntsel_msr; + unsigned int evntsel; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + /* + * Check whether the Architectural PerfMon supports + * Unhalted Core Cycles Event or not. + * NOTE: Corresponding bit = 0 in ebx indicates event present. + */ + cpuid(10, &(eax.full), &ebx, &unused, &unused); + if ((eax.split.mask_length < + (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || + (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) + return 0; + + perfctr_msr = wd_ops->perfctr; + evntsel_msr = wd_ops->evntsel; + + wrmsrl(perfctr_msr, 0UL); + + evntsel = ARCH_PERFMON_EVENTSEL_INT + | ARCH_PERFMON_EVENTSEL_OS + | ARCH_PERFMON_EVENTSEL_USR + | ARCH_PERFMON_NMI_EVENT_SEL + | ARCH_PERFMON_NMI_EVENT_UMASK; + + /* setup the timer */ + wrmsr(evntsel_msr, evntsel, 0); + nmi_hz = adjust_for_32bit_ctr(nmi_hz); + write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); + + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = 0; /* unused */ + + /* ok, everything is initialized, announce that we're set */ + cpu_nmi_set_wd_enabled(); + + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); + intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); + return 1; +} + +static struct wd_ops intel_arch_wd_ops __read_mostly = { + .reserve = single_msr_reserve, + .unreserve = single_msr_unreserve, + .setup = setup_intel_arch_watchdog, + .rearm = p6_rearm, + .stop = single_msr_stop_watchdog, + .perfctr = MSR_ARCH_PERFMON_PERFCTR1, + .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, +}; + +static void probe_nmi_watchdog(void) +{ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + if (boot_cpu_data.x86 == 6 || + (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15)) + wd_ops = &k7_wd_ops; + return; + case X86_VENDOR_INTEL: + /* Work around where perfctr1 doesn't have a working enable + * bit as described in the following errata: + * AE49 Core Duo and Intel Core Solo 65 nm + * AN49 Intel Pentium Dual-Core + * AF49 Dual-Core Intel Xeon Processor LV + */ + if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) || + ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 && + boot_cpu_data.x86_mask == 4))) { + intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; + intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; + } + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { + wd_ops = &intel_arch_wd_ops; + break; + } + switch (boot_cpu_data.x86) { + case 6: + if (boot_cpu_data.x86_model > 13) + return; + + wd_ops = &p6_wd_ops; + break; + case 15: + wd_ops = &p4_wd_ops; + break; + default: + return; + } + break; + } +} + +/* Interface to nmi.c */ + +int lapic_watchdog_init(unsigned nmi_hz) +{ + if (!wd_ops) { + probe_nmi_watchdog(); + if (!wd_ops) { + printk(KERN_INFO "NMI watchdog: CPU not supported\n"); + return -1; + } + + if (!wd_ops->reserve()) { + printk(KERN_ERR + "NMI watchdog: cannot reserve perfctrs\n"); + return -1; + } + } + + if (!(wd_ops->setup(nmi_hz))) { + printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n", + raw_smp_processor_id()); + return -1; + } + + return 0; +} + +void lapic_watchdog_stop(void) +{ + if (wd_ops) + wd_ops->stop(); +} + +unsigned lapic_adjust_nmi_hz(unsigned hz) +{ + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + if (wd->perfctr_msr == MSR_P6_PERFCTR0 || + wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1) + hz = adjust_for_32bit_ctr(hz); + return hz; +} + +int __kprobes lapic_wd_event(unsigned nmi_hz) +{ + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + u64 ctr; + + rdmsrl(wd->perfctr_msr, ctr); + if (ctr & wd_ops->checkbit) /* perfctr still running? */ + return 0; + + wd_ops->rearm(wd, nmi_hz); + return 1; +} diff --git a/trunk/arch/x86/kernel/dumpstack.c b/trunk/arch/x86/kernel/dumpstack.c index 8474c998cbd4..6e8752c1bd52 100644 --- a/trunk/arch/x86/kernel/dumpstack.c +++ b/trunk/arch/x86/kernel/dumpstack.c @@ -175,21 +175,21 @@ static const struct stacktrace_ops print_trace_ops = { void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, char *log_lvl) + unsigned long *stack, unsigned long bp, char *log_lvl) { printk("%sCall Trace:\n", log_lvl); - dump_trace(task, regs, stack, &print_trace_ops, log_lvl); + dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); } void show_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack) + unsigned long *stack, unsigned long bp) { - show_trace_log_lvl(task, regs, stack, ""); + show_trace_log_lvl(task, regs, stack, bp, ""); } void show_stack(struct task_struct *task, unsigned long *sp) { - show_stack_log_lvl(task, NULL, sp, ""); + show_stack_log_lvl(task, NULL, sp, 0, ""); } /* @@ -210,7 +210,7 @@ void dump_stack(void) init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); - show_trace(NULL, NULL, &stack); + show_trace(NULL, NULL, &stack, bp); } EXPORT_SYMBOL(dump_stack); diff --git a/trunk/arch/x86/kernel/dumpstack_32.c b/trunk/arch/x86/kernel/dumpstack_32.c index 74cc1eda384b..1bc7f75a5bda 100644 --- a/trunk/arch/x86/kernel/dumpstack_32.c +++ b/trunk/arch/x86/kernel/dumpstack_32.c @@ -17,12 +17,11 @@ #include -void dump_trace(struct task_struct *task, - struct pt_regs *regs, unsigned long *stack, +void dump_trace(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data) { int graph = 0; - unsigned long bp; if (!task) task = current; @@ -35,7 +34,18 @@ void dump_trace(struct task_struct *task, stack = (unsigned long *)task->thread.sp; } - bp = stack_frame(task, regs); +#ifdef CONFIG_FRAME_POINTER + if (!bp) { + if (task == current) { + /* Grab bp right from our regs */ + get_bp(bp); + } else { + /* bp is the last reg pushed by switch_to */ + bp = *(unsigned long *) task->thread.sp; + } + } +#endif + for (;;) { struct thread_info *context; @@ -55,7 +65,7 @@ EXPORT_SYMBOL(dump_trace); void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, char *log_lvl) + unsigned long *sp, unsigned long bp, char *log_lvl) { unsigned long *stack; int i; @@ -77,7 +87,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, touch_nmi_watchdog(); } printk(KERN_CONT "\n"); - show_trace_log_lvl(task, regs, sp, log_lvl); + show_trace_log_lvl(task, regs, sp, bp, log_lvl); } @@ -102,7 +112,8 @@ void show_registers(struct pt_regs *regs) u8 *ip; printk(KERN_EMERG "Stack:\n"); - show_stack_log_lvl(NULL, regs, ®s->sp, KERN_EMERG); + show_stack_log_lvl(NULL, regs, ®s->sp, + 0, KERN_EMERG); printk(KERN_EMERG "Code: "); diff --git a/trunk/arch/x86/kernel/dumpstack_64.c b/trunk/arch/x86/kernel/dumpstack_64.c index 64101335de19..6a340485249a 100644 --- a/trunk/arch/x86/kernel/dumpstack_64.c +++ b/trunk/arch/x86/kernel/dumpstack_64.c @@ -139,8 +139,8 @@ fixup_bp_irq_link(unsigned long bp, unsigned long *stack, * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack */ -void dump_trace(struct task_struct *task, - struct pt_regs *regs, unsigned long *stack, +void dump_trace(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); @@ -149,7 +149,6 @@ void dump_trace(struct task_struct *task, unsigned used = 0; struct thread_info *tinfo; int graph = 0; - unsigned long bp; if (!task) task = current; @@ -161,7 +160,18 @@ void dump_trace(struct task_struct *task, stack = (unsigned long *)task->thread.sp; } - bp = stack_frame(task, regs); +#ifdef CONFIG_FRAME_POINTER + if (!bp) { + if (task == current) { + /* Grab bp right from our regs */ + get_bp(bp); + } else { + /* bp is the last reg pushed by switch_to */ + bp = *(unsigned long *) task->thread.sp; + } + } +#endif + /* * Print function call entries in all stacks, starting at the * current stack address. If the stacks consist of nested @@ -225,7 +235,7 @@ EXPORT_SYMBOL(dump_trace); void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, char *log_lvl) + unsigned long *sp, unsigned long bp, char *log_lvl) { unsigned long *irq_stack_end; unsigned long *irq_stack; @@ -269,7 +279,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, preempt_enable(); printk(KERN_CONT "\n"); - show_trace_log_lvl(task, regs, sp, log_lvl); + show_trace_log_lvl(task, regs, sp, bp, log_lvl); } void show_registers(struct pt_regs *regs) @@ -298,7 +308,7 @@ void show_registers(struct pt_regs *regs) printk(KERN_EMERG "Stack:\n"); show_stack_log_lvl(NULL, regs, (unsigned long *)sp, - KERN_EMERG); + regs->bp, KERN_EMERG); printk(KERN_EMERG "Code: "); diff --git a/trunk/arch/x86/kernel/early_printk.c b/trunk/arch/x86/kernel/early_printk.c index cd28a350f7f9..4572f25f9325 100644 --- a/trunk/arch/x86/kernel/early_printk.c +++ b/trunk/arch/x86/kernel/early_printk.c @@ -240,7 +240,7 @@ static int __init setup_early_printk(char *buf) if (!strncmp(buf, "xen", 3)) early_console_register(&xenboot_console, keep); #endif -#ifdef CONFIG_EARLY_PRINTK_MRST +#ifdef CONFIG_X86_MRST_EARLY_PRINTK if (!strncmp(buf, "mrst", 4)) { mrst_early_console_init(); early_console_register(&early_mrst_console, keep); @@ -250,6 +250,7 @@ static int __init setup_early_printk(char *buf) hsu_early_console_init(); early_console_register(&early_hsu_console, keep); } + #endif buf++; } diff --git a/trunk/arch/x86/platform/mrst/early_printk_mrst.c b/trunk/arch/x86/kernel/early_printk_mrst.c similarity index 100% rename from trunk/arch/x86/platform/mrst/early_printk_mrst.c rename to trunk/arch/x86/kernel/early_printk_mrst.c diff --git a/trunk/arch/x86/kernel/ftrace.c b/trunk/arch/x86/kernel/ftrace.c index 298448656b60..3afb33f14d2d 100644 --- a/trunk/arch/x86/kernel/ftrace.c +++ b/trunk/arch/x86/kernel/ftrace.c @@ -19,7 +19,6 @@ #include #include #include -#include #include @@ -50,7 +49,6 @@ static DEFINE_PER_CPU(int, save_modifying_code); int ftrace_arch_code_modify_prepare(void) { set_kernel_text_rw(); - set_all_modules_text_rw(); modifying_code = 1; return 0; } @@ -58,7 +56,6 @@ int ftrace_arch_code_modify_prepare(void) int ftrace_arch_code_modify_post_process(void) { modifying_code = 0; - set_all_modules_text_ro(); set_kernel_text_ro(); return 0; } diff --git a/trunk/arch/x86/kernel/head32.c b/trunk/arch/x86/kernel/head32.c index 7f138b3c3c52..763310165fa0 100644 --- a/trunk/arch/x86/kernel/head32.c +++ b/trunk/arch/x86/kernel/head32.c @@ -61,9 +61,6 @@ void __init i386_start_kernel(void) case X86_SUBARCH_MRST: x86_mrst_early_setup(); break; - case X86_SUBARCH_CE4100: - x86_ce4100_early_setup(); - break; default: i386_default_early_setup(); break; diff --git a/trunk/arch/x86/kernel/head_32.S b/trunk/arch/x86/kernel/head_32.S index 5707fc8a7a4b..e3a8bb91e168 100644 --- a/trunk/arch/x86/kernel/head_32.S +++ b/trunk/arch/x86/kernel/head_32.S @@ -139,39 +139,6 @@ ENTRY(startup_32) movl %eax, pa(olpc_ofw_pgd) #endif -#ifdef CONFIG_PARAVIRT - /* This is can only trip for a broken bootloader... */ - cmpw $0x207, pa(boot_params + BP_version) - jb default_entry - - /* Paravirt-compatible boot parameters. Look to see what architecture - we're booting under. */ - movl pa(boot_params + BP_hardware_subarch), %eax - cmpl $num_subarch_entries, %eax - jae bad_subarch - - movl pa(subarch_entries)(,%eax,4), %eax - subl $__PAGE_OFFSET, %eax - jmp *%eax - -bad_subarch: -WEAK(lguest_entry) -WEAK(xen_entry) - /* Unknown implementation; there's really - nothing we can do at this point. */ - ud2a - - __INITDATA - -subarch_entries: - .long default_entry /* normal x86/PC */ - .long lguest_entry /* lguest hypervisor */ - .long xen_entry /* Xen hypervisor */ - .long default_entry /* Moorestown MID */ -num_subarch_entries = (. - subarch_entries) / 4 -.previous -#endif /* CONFIG_PARAVIRT */ - /* * Initialize page tables. This creates a PDE and a set of page * tables, which are located immediately beyond __brk_base. The variable @@ -181,7 +148,6 @@ num_subarch_entries = (. - subarch_entries) / 4 * * Note that the stack is not yet set up! */ -default_entry: #ifdef CONFIG_X86_PAE /* @@ -261,7 +227,42 @@ page_pde_offset = (__PAGE_OFFSET >> 20); movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax movl %eax,pa(initial_page_table+0xffc) #endif - jmp 3f + +#ifdef CONFIG_PARAVIRT + /* This is can only trip for a broken bootloader... */ + cmpw $0x207, pa(boot_params + BP_version) + jb default_entry + + /* Paravirt-compatible boot parameters. Look to see what architecture + we're booting under. */ + movl pa(boot_params + BP_hardware_subarch), %eax + cmpl $num_subarch_entries, %eax + jae bad_subarch + + movl pa(subarch_entries)(,%eax,4), %eax + subl $__PAGE_OFFSET, %eax + jmp *%eax + +bad_subarch: +WEAK(lguest_entry) +WEAK(xen_entry) + /* Unknown implementation; there's really + nothing we can do at this point. */ + ud2a + + __INITDATA + +subarch_entries: + .long default_entry /* normal x86/PC */ + .long lguest_entry /* lguest hypervisor */ + .long xen_entry /* Xen hypervisor */ + .long default_entry /* Moorestown MID */ +num_subarch_entries = (. - subarch_entries) / 4 +.previous +#else + jmp default_entry +#endif /* CONFIG_PARAVIRT */ + /* * Non-boot CPU entry point; entered from trampoline.S * We can't lgdt here, because lgdt itself uses a data segment, but @@ -282,7 +283,7 @@ ENTRY(startup_32_smp) movl %eax,%fs movl %eax,%gs #endif /* CONFIG_SMP */ -3: +default_entry: /* * New page tables may be in 4Mbyte page mode and may @@ -316,10 +317,6 @@ ENTRY(startup_32_smp) subl $0x80000001, %eax cmpl $(0x8000ffff-0x80000001), %eax ja 6f - - /* Clear bogus XD_DISABLE bits */ - call verify_cpu - mov $0x80000001, %eax cpuid /* Execute Disable bit supported? */ @@ -615,8 +612,6 @@ ignore_int: #endif iret -#include "verify_cpu.S" - __REFDATA .align 4 ENTRY(initial_code) @@ -628,13 +623,13 @@ ENTRY(initial_code) __PAGE_ALIGNED_BSS .align PAGE_SIZE_asm #ifdef CONFIG_X86_PAE -ENTRY(initial_pg_pmd) +initial_pg_pmd: .fill 1024*KPMDS,4,0 #else ENTRY(initial_page_table) .fill 1024,4,0 #endif -ENTRY(initial_pg_fixmap) +initial_pg_fixmap: .fill 1024,4,0 ENTRY(empty_zero_page) .fill 4096,1,0 diff --git a/trunk/arch/x86/kernel/kprobes.c b/trunk/arch/x86/kernel/kprobes.c index 5940282bd2f9..1cbd54c0df99 100644 --- a/trunk/arch/x86/kernel/kprobes.c +++ b/trunk/arch/x86/kernel/kprobes.c @@ -1184,10 +1184,6 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - /* This is possible if op is under delayed unoptimizing */ - if (kprobe_disabled(&op->kp)) - return; - preempt_disable(); if (kprobe_running()) { kprobes_inc_nmissed_count(&op->kp); @@ -1405,16 +1401,10 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) return 0; } -#define MAX_OPTIMIZE_PROBES 256 -static struct text_poke_param *jump_poke_params; -static struct jump_poke_buffer { - u8 buf[RELATIVEJUMP_SIZE]; -} *jump_poke_bufs; - -static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm, - u8 *insn_buf, - struct optimized_kprobe *op) +/* Replace a breakpoint (int3) with a relative jump. */ +int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op) { + unsigned char jmp_code[RELATIVEJUMP_SIZE]; s32 rel = (s32)((long)op->optinsn.insn - ((long)op->kp.addr + RELATIVEJUMP_SIZE)); @@ -1422,79 +1412,16 @@ static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm, memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, RELATIVE_ADDR_SIZE); - insn_buf[0] = RELATIVEJUMP_OPCODE; - *(s32 *)(&insn_buf[1]) = rel; - - tprm->addr = op->kp.addr; - tprm->opcode = insn_buf; - tprm->len = RELATIVEJUMP_SIZE; -} - -/* - * Replace breakpoints (int3) with relative jumps. - * Caller must call with locking kprobe_mutex and text_mutex. - */ -void __kprobes arch_optimize_kprobes(struct list_head *oplist) -{ - struct optimized_kprobe *op, *tmp; - int c = 0; - - list_for_each_entry_safe(op, tmp, oplist, list) { - WARN_ON(kprobe_disabled(&op->kp)); - /* Setup param */ - setup_optimize_kprobe(&jump_poke_params[c], - jump_poke_bufs[c].buf, op); - list_del_init(&op->list); - if (++c >= MAX_OPTIMIZE_PROBES) - break; - } - - /* - * text_poke_smp doesn't support NMI/MCE code modifying. - * However, since kprobes itself also doesn't support NMI/MCE - * code probing, it's not a problem. - */ - text_poke_smp_batch(jump_poke_params, c); -} - -static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm, - u8 *insn_buf, - struct optimized_kprobe *op) -{ - /* Set int3 to first byte for kprobes */ - insn_buf[0] = BREAKPOINT_INSTRUCTION; - memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); - - tprm->addr = op->kp.addr; - tprm->opcode = insn_buf; - tprm->len = RELATIVEJUMP_SIZE; -} - -/* - * Recover original instructions and breakpoints from relative jumps. - * Caller must call with locking kprobe_mutex. - */ -extern void arch_unoptimize_kprobes(struct list_head *oplist, - struct list_head *done_list) -{ - struct optimized_kprobe *op, *tmp; - int c = 0; - - list_for_each_entry_safe(op, tmp, oplist, list) { - /* Setup param */ - setup_unoptimize_kprobe(&jump_poke_params[c], - jump_poke_bufs[c].buf, op); - list_move(&op->list, done_list); - if (++c >= MAX_OPTIMIZE_PROBES) - break; - } + jmp_code[0] = RELATIVEJUMP_OPCODE; + *(s32 *)(&jmp_code[1]) = rel; /* * text_poke_smp doesn't support NMI/MCE code modifying. * However, since kprobes itself also doesn't support NMI/MCE * code probing, it's not a problem. */ - text_poke_smp_batch(jump_poke_params, c); + text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE); + return 0; } /* Replace a relative jump with a breakpoint (int3). */ @@ -1526,35 +1453,11 @@ static int __kprobes setup_detour_execution(struct kprobe *p, } return 0; } - -static int __kprobes init_poke_params(void) -{ - /* Allocate code buffer and parameter array */ - jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) * - MAX_OPTIMIZE_PROBES, GFP_KERNEL); - if (!jump_poke_bufs) - return -ENOMEM; - - jump_poke_params = kmalloc(sizeof(struct text_poke_param) * - MAX_OPTIMIZE_PROBES, GFP_KERNEL); - if (!jump_poke_params) { - kfree(jump_poke_bufs); - jump_poke_bufs = NULL; - return -ENOMEM; - } - - return 0; -} -#else /* !CONFIG_OPTPROBES */ -static int __kprobes init_poke_params(void) -{ - return 0; -} #endif int __init arch_init_kprobes(void) { - return init_poke_params(); + return 0; } int __kprobes arch_trampoline_kprobe(struct kprobe *p) diff --git a/trunk/arch/x86/kernel/microcode_amd.c b/trunk/arch/x86/kernel/microcode_amd.c index 0fe6d1a66c38..ce0cb4721c9a 100644 --- a/trunk/arch/x86/kernel/microcode_amd.c +++ b/trunk/arch/x86/kernel/microcode_amd.c @@ -155,6 +155,12 @@ static int apply_microcode_amd(int cpu) return 0; } +static int get_ucode_data(void *to, const u8 *from, size_t n) +{ + memcpy(to, from, n); + return 0; +} + static void * get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) { @@ -162,7 +168,8 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; void *mc; - get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR); + if (get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR)) + return NULL; if (section_hdr[0] != UCODE_UCODE_TYPE) { pr_err("error: invalid type field in container file section header\n"); @@ -176,13 +183,16 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) return NULL; } - mc = vzalloc(UCODE_MAX_SIZE); - if (!mc) - return NULL; - - get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size); - *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; - + mc = vmalloc(UCODE_MAX_SIZE); + if (mc) { + memset(mc, 0, UCODE_MAX_SIZE); + if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, + total_size)) { + vfree(mc); + mc = NULL; + } else + *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; + } return mc; } @@ -192,7 +202,8 @@ static int install_equiv_cpu_table(const u8 *buf) unsigned int *buf_pos = (unsigned int *)container_hdr; unsigned long size; - get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE); + if (get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE)) + return 0; size = buf_pos[2]; @@ -208,7 +219,10 @@ static int install_equiv_cpu_table(const u8 *buf) } buf += UCODE_CONTAINER_HEADER_SIZE; - get_ucode_data(equiv_cpu_table, buf, size); + if (get_ucode_data(equiv_cpu_table, buf, size)) { + vfree(equiv_cpu_table); + return 0; + } return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ } diff --git a/trunk/arch/x86/kernel/pci-gart_64.c b/trunk/arch/x86/kernel/pci-gart_64.c index c01ffa5b9b87..ba0f0ca9f280 100644 --- a/trunk/arch/x86/kernel/pci-gart_64.c +++ b/trunk/arch/x86/kernel/pci-gart_64.c @@ -143,7 +143,7 @@ static void flush_gart(void) spin_lock_irqsave(&iommu_bitmap_lock, flags); if (need_flush) { - amd_flush_garts(); + k8_flush_garts(); need_flush = false; } spin_unlock_irqrestore(&iommu_bitmap_lock, flags); @@ -561,17 +561,17 @@ static void enable_gart_translations(void) { int i; - if (!amd_nb_has_feature(AMD_NB_GART)) + if (!k8_northbridges.gart_supported) return; - for (i = 0; i < amd_nb_num(); i++) { - struct pci_dev *dev = node_to_amd_nb(i)->misc; + for (i = 0; i < k8_northbridges.num; i++) { + struct pci_dev *dev = k8_northbridges.nb_misc[i]; enable_gart_translation(dev, __pa(agp_gatt_table)); } /* Flush the GART-TLB to remove stale entries */ - amd_flush_garts(); + k8_flush_garts(); } /* @@ -596,13 +596,13 @@ static void gart_fixup_northbridges(struct sys_device *dev) if (!fix_up_north_bridges) return; - if (!amd_nb_has_feature(AMD_NB_GART)) + if (!k8_northbridges.gart_supported) return; pr_info("PCI-DMA: Restoring GART aperture settings\n"); - for (i = 0; i < amd_nb_num(); i++) { - struct pci_dev *dev = node_to_amd_nb(i)->misc; + for (i = 0; i < k8_northbridges.num; i++) { + struct pci_dev *dev = k8_northbridges.nb_misc[i]; /* * Don't enable translations just yet. That is the next @@ -644,7 +644,7 @@ static struct sys_device device_gart = { * Private Northbridge GATT initialization in case we cannot use the * AGP driver for some reason. */ -static __init int init_amd_gatt(struct agp_kern_info *info) +static __init int init_k8_gatt(struct agp_kern_info *info) { unsigned aper_size, gatt_size, new_aper_size; unsigned aper_base, new_aper_base; @@ -656,8 +656,8 @@ static __init int init_amd_gatt(struct agp_kern_info *info) aper_size = aper_base = info->aper_size = 0; dev = NULL; - for (i = 0; i < amd_nb_num(); i++) { - dev = node_to_amd_nb(i)->misc; + for (i = 0; i < k8_northbridges.num; i++) { + dev = k8_northbridges.nb_misc[i]; new_aper_base = read_aperture(dev, &new_aper_size); if (!new_aper_base) goto nommu; @@ -725,13 +725,13 @@ static void gart_iommu_shutdown(void) if (!no_agp) return; - if (!amd_nb_has_feature(AMD_NB_GART)) + if (!k8_northbridges.gart_supported) return; - for (i = 0; i < amd_nb_num(); i++) { + for (i = 0; i < k8_northbridges.num; i++) { u32 ctl; - dev = node_to_amd_nb(i)->misc; + dev = k8_northbridges.nb_misc[i]; pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); ctl &= ~GARTEN; @@ -749,14 +749,14 @@ int __init gart_iommu_init(void) unsigned long scratch; long i; - if (!amd_nb_has_feature(AMD_NB_GART)) + if (!k8_northbridges.gart_supported) return 0; #ifndef CONFIG_AGP_AMD64 no_agp = 1; #else /* Makefile puts PCI initialization via subsys_initcall first. */ - /* Add other AMD AGP bridge drivers here */ + /* Add other K8 AGP bridge drivers here */ no_agp = no_agp || (agp_amd64_init() < 0) || (agp_copy_info(agp_bridge, &info) < 0); @@ -765,7 +765,7 @@ int __init gart_iommu_init(void) if (no_iommu || (!force_iommu && max_pfn <= MAX_DMA32_PFN) || !gart_iommu_aperture || - (no_agp && init_amd_gatt(&info) < 0)) { + (no_agp && init_k8_gatt(&info) < 0)) { if (max_pfn > MAX_DMA32_PFN) { pr_warning("More than 4GB of memory but GART IOMMU not available.\n"); pr_warning("falling back to iommu=soft.\n"); diff --git a/trunk/arch/x86/kernel/process.c b/trunk/arch/x86/kernel/process.c index c852041bfc3d..57d1868a86aa 100644 --- a/trunk/arch/x86/kernel/process.c +++ b/trunk/arch/x86/kernel/process.c @@ -91,7 +91,8 @@ void exit_thread(void) void show_regs(struct pt_regs *regs) { show_registers(regs); - show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs)); + show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), + regs->bp); } void show_regs_common(void) @@ -373,7 +374,6 @@ void default_idle(void) { if (hlt_use_halt()) { trace_power_start(POWER_CSTATE, 1, smp_processor_id()); - trace_cpu_idle(1, smp_processor_id()); current_thread_info()->status &= ~TS_POLLING; /* * TS_POLLING-cleared state must be visible before we @@ -444,7 +444,6 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); void mwait_idle_with_hints(unsigned long ax, unsigned long cx) { trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id()); - trace_cpu_idle((ax>>4)+1, smp_processor_id()); if (!need_resched()) { if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); @@ -461,7 +460,6 @@ static void mwait_idle(void) { if (!need_resched()) { trace_power_start(POWER_CSTATE, 1, smp_processor_id()); - trace_cpu_idle(1, smp_processor_id()); if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); @@ -483,12 +481,10 @@ static void mwait_idle(void) static void poll_idle(void) { trace_power_start(POWER_CSTATE, 0, smp_processor_id()); - trace_cpu_idle(0, smp_processor_id()); local_irq_enable(); while (!need_resched()) cpu_relax(); - trace_power_end(smp_processor_id()); - trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); + trace_power_end(0); } /* diff --git a/trunk/arch/x86/kernel/process_32.c b/trunk/arch/x86/kernel/process_32.c index 4b9befa0e347..96586c3cbbbf 100644 --- a/trunk/arch/x86/kernel/process_32.c +++ b/trunk/arch/x86/kernel/process_32.c @@ -113,8 +113,8 @@ void cpu_idle(void) stop_critical_timings(); pm_idle(); start_critical_timings(); + trace_power_end(smp_processor_id()); - trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); } tick_nohz_restart_sched_tick(); preempt_enable_no_resched(); diff --git a/trunk/arch/x86/kernel/process_64.c b/trunk/arch/x86/kernel/process_64.c index 4c818a738396..b3d7a3a04f38 100644 --- a/trunk/arch/x86/kernel/process_64.c +++ b/trunk/arch/x86/kernel/process_64.c @@ -142,8 +142,6 @@ void cpu_idle(void) start_critical_timings(); trace_power_end(smp_processor_id()); - trace_cpu_idle(PWR_EVENT_EXIT, - smp_processor_id()); /* In many cases the interrupt that ended idle has already called exit_idle. But some idle diff --git a/trunk/arch/x86/kernel/reboot_fixups_32.c b/trunk/arch/x86/kernel/reboot_fixups_32.c index c8e41e90f59c..fda313ebbb03 100644 --- a/trunk/arch/x86/kernel/reboot_fixups_32.c +++ b/trunk/arch/x86/kernel/reboot_fixups_32.c @@ -43,33 +43,17 @@ static void rdc321x_reset(struct pci_dev *dev) outb(1, 0x92); } -static void ce4100_reset(struct pci_dev *dev) -{ - int i; - - for (i = 0; i < 10; i++) { - outb(0x2, 0xcf9); - udelay(50); - } -} - struct device_fixup { unsigned int vendor; unsigned int device; void (*reboot_fixup)(struct pci_dev *); }; -/* - * PCI ids solely used for fixups_table go here - */ -#define PCI_DEVICE_ID_INTEL_CE4100 0x0708 - static const struct device_fixup fixups_table[] = { { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset }, { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset }, { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset }, { PCI_VENDOR_ID_RDC, PCI_DEVICE_ID_RDC_R6030, rdc321x_reset }, -{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CE4100, ce4100_reset }, }; /* diff --git a/trunk/arch/x86/kernel/setup.c b/trunk/arch/x86/kernel/setup.c index d3cfe26c0252..a0f52af256a0 100644 --- a/trunk/arch/x86/kernel/setup.c +++ b/trunk/arch/x86/kernel/setup.c @@ -705,7 +705,7 @@ static u64 __init get_max_mapped(void) void __init setup_arch(char **cmdline_p) { int acpi = 0; - int amd = 0; + int k8 = 0; unsigned long flags; #ifdef CONFIG_X86_32 @@ -991,12 +991,12 @@ void __init setup_arch(char **cmdline_p) acpi = acpi_numa_init(); #endif -#ifdef CONFIG_AMD_NUMA +#ifdef CONFIG_K8_NUMA if (!acpi) - amd = !amd_numa_init(0, max_pfn); + k8 = !k8_numa_init(0, max_pfn); #endif - initmem_init(0, max_pfn, acpi, amd); + initmem_init(0, max_pfn, acpi, k8); memblock_find_dma_reserve(); dma32_reserve_bootmem(); @@ -1045,7 +1045,10 @@ void __init setup_arch(char **cmdline_p) #endif init_apic_mappings(); - ioapic_and_gsi_init(); + ioapic_init_mappings(); + + /* need to wait for io_apic is mapped */ + probe_nr_irqs_gsi(); kvm_guest_init(); diff --git a/trunk/arch/x86/kernel/smpboot.c b/trunk/arch/x86/kernel/smpboot.c index ee886fe10ef4..083e99d1b7df 100644 --- a/trunk/arch/x86/kernel/smpboot.c +++ b/trunk/arch/x86/kernel/smpboot.c @@ -281,13 +281,6 @@ static void __cpuinit smp_callin(void) */ smp_store_cpu_info(cpuid); - /* - * This must be done before setting cpu_online_mask - * or calling notify_cpu_starting. - */ - set_cpu_sibling_map(raw_smp_processor_id()); - wmb(); - notify_cpu_starting(cpuid); /* @@ -323,6 +316,16 @@ notrace static void __cpuinit start_secondary(void *unused) */ check_tsc_sync_target(); + if (nmi_watchdog == NMI_IO_APIC) { + legacy_pic->mask(0); + enable_NMI_through_LVT0(); + legacy_pic->unmask(0); + } + + /* This must be done before setting cpu_online_mask */ + set_cpu_sibling_map(raw_smp_processor_id()); + wmb(); + /* * We need to hold call_lock, so there is no inconsistency * between the time smp_call_function() determines number of @@ -1058,6 +1061,8 @@ static int __init smp_sanity_check(unsigned max_cpus) printk(KERN_INFO "SMP mode deactivated.\n"); smpboot_clear_io_apic(); + localise_nmi_watchdog(); + connect_bsp_APIC(); setup_local_APIC(); end_local_APIC_setup(); @@ -1161,20 +1166,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) preempt_enable(); } -void arch_disable_nonboot_cpus_begin(void) -{ - /* - * Avoid the smp alternatives switch during the disable_nonboot_cpus(). - * In the suspend path, we will be back in the SMP mode shortly anyways. - */ - skip_smp_alternatives = true; -} - -void arch_disable_nonboot_cpus_end(void) -{ - skip_smp_alternatives = false; -} - void arch_enable_nonboot_cpus_begin(void) { set_mtrr_aps_delayed_init(); @@ -1205,6 +1196,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) #ifdef CONFIG_X86_IO_APIC setup_ioapic_dest(); #endif + check_nmi_watchdog(); mtrr_aps_init(); } @@ -1349,6 +1341,8 @@ int native_cpu_disable(void) if (cpu == 0) return -EBUSY; + if (nmi_watchdog == NMI_LOCAL_APIC) + stop_apic_nmi_watchdog(NULL); clear_local_APIC(); cpu_disable_common(); diff --git a/trunk/arch/x86/kernel/stacktrace.c b/trunk/arch/x86/kernel/stacktrace.c index 938c8e10a19a..b53c525368a7 100644 --- a/trunk/arch/x86/kernel/stacktrace.c +++ b/trunk/arch/x86/kernel/stacktrace.c @@ -73,22 +73,22 @@ static const struct stacktrace_ops save_stack_ops_nosched = { */ void save_stack_trace(struct stack_trace *trace) { - dump_trace(current, NULL, NULL, &save_stack_ops, trace); + dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } EXPORT_SYMBOL_GPL(save_stack_trace); -void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs) +void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp) { - dump_trace(current, regs, NULL, &save_stack_ops, trace); + dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { - dump_trace(tsk, NULL, NULL, &save_stack_ops_nosched, trace); + dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } diff --git a/trunk/arch/x86/kernel/time.c b/trunk/arch/x86/kernel/time.c index 25a28a245937..fb5cc5e14cfa 100644 --- a/trunk/arch/x86/kernel/time.c +++ b/trunk/arch/x86/kernel/time.c @@ -22,6 +22,10 @@ #include #include +#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) +int timer_ack; +#endif + #ifdef CONFIG_X86_64 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; #endif @@ -59,6 +63,20 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) /* Keep nmi watchdog up to date */ inc_irq_stat(irq0_irqs); + /* Optimized out for !IO_APIC and x86_64 */ + if (timer_ack) { + /* + * Subtle, when I/O APICs are used we have to ack timer IRQ + * manually to deassert NMI lines for the watchdog if run + * on an 82489DX-based system. + */ + raw_spin_lock(&i8259A_lock); + outb(0x0c, PIC_MASTER_OCW3); + /* Ack the IRQ; AEOI will end it automatically. */ + inb(PIC_MASTER_POLL); + raw_spin_unlock(&i8259A_lock); + } + global_clock_event->event_handler(global_clock_event); /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ diff --git a/trunk/arch/x86/kernel/trampoline_64.S b/trunk/arch/x86/kernel/trampoline_64.S index 075d130efcf9..3af2dff58b21 100644 --- a/trunk/arch/x86/kernel/trampoline_64.S +++ b/trunk/arch/x86/kernel/trampoline_64.S @@ -127,7 +127,7 @@ startup_64: no_longmode: hlt jmp no_longmode -#include "verify_cpu.S" +#include "verify_cpu_64.S" # Careful these need to be in the same 64K segment as the above; tidt: diff --git a/trunk/arch/x86/kernel/traps.c b/trunk/arch/x86/kernel/traps.c index c76aaca5694d..cb838ca42c96 100644 --- a/trunk/arch/x86/kernel/traps.c +++ b/trunk/arch/x86/kernel/traps.c @@ -83,8 +83,6 @@ EXPORT_SYMBOL_GPL(used_vectors); static int ignore_nmis; -int unknown_nmi_panic; - static inline void conditional_sti(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) @@ -302,13 +300,6 @@ do_general_protection(struct pt_regs *regs, long error_code) die("general protection fault", regs, error_code); } -static int __init setup_unknown_nmi_panic(char *str) -{ - unknown_nmi_panic = 1; - return 1; -} -__setup("unknown_nmi_panic", setup_unknown_nmi_panic); - static notrace __kprobes void mem_parity_error(unsigned char reason, struct pt_regs *regs) { @@ -351,11 +342,9 @@ io_check_error(unsigned char reason, struct pt_regs *regs) reason = (reason & 0xf) | 8; outb(reason, 0x61); - i = 20000; - while (--i) { - touch_nmi_watchdog(); - udelay(100); - } + i = 2000; + while (--i) + udelay(1000); reason &= ~8; outb(reason, 0x61); @@ -382,7 +371,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) reason, smp_processor_id()); printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); - if (unknown_nmi_panic || panic_on_unrecovered_nmi) + if (panic_on_unrecovered_nmi) panic("NMI: Not continuing"); printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); @@ -408,8 +397,20 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) return; -#endif + +#ifndef CONFIG_LOCKUP_DETECTOR + /* + * Ok, so this is none of the documented NMI sources, + * so it must be the NMI watchdog. + */ + if (nmi_watchdog_tick(regs, reason)) + return; + if (!do_nmi_callback(regs, cpu)) +#endif /* !CONFIG_LOCKUP_DETECTOR */ + unknown_nmi_error(reason, regs); +#else unknown_nmi_error(reason, regs); +#endif return; } @@ -445,12 +446,14 @@ do_nmi(struct pt_regs *regs, long error_code) void stop_nmi(void) { + acpi_nmi_disable(); ignore_nmis++; } void restart_nmi(void) { ignore_nmis--; + acpi_nmi_enable(); } /* May run on IST stack. */ diff --git a/trunk/arch/x86/kernel/tsc.c b/trunk/arch/x86/kernel/tsc.c index 356a0d455cf9..0c40d8b72416 100644 --- a/trunk/arch/x86/kernel/tsc.c +++ b/trunk/arch/x86/kernel/tsc.c @@ -872,9 +872,6 @@ __cpuinit int unsynchronized_tsc(void) if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) return 0; - - if (tsc_clocksource_reliable) - return 0; /* * Intel systems are normally all synchronized. * Exceptions must mark TSC as unstable: @@ -882,92 +879,14 @@ __cpuinit int unsynchronized_tsc(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { /* assume multi socket systems are not synchronized: */ if (num_possible_cpus() > 1) - return 1; - } - - return 0; -} - - -static void tsc_refine_calibration_work(struct work_struct *work); -static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work); -/** - * tsc_refine_calibration_work - Further refine tsc freq calibration - * @work - ignored. - * - * This functions uses delayed work over a period of a - * second to further refine the TSC freq value. Since this is - * timer based, instead of loop based, we don't block the boot - * process while this longer calibration is done. - * - * If there are any calibration anomolies (too many SMIs, etc), - * or the refined calibration is off by 1% of the fast early - * calibration, we throw out the new calibration and use the - * early calibration. - */ -static void tsc_refine_calibration_work(struct work_struct *work) -{ - static u64 tsc_start = -1, ref_start; - static int hpet; - u64 tsc_stop, ref_stop, delta; - unsigned long freq; - - /* Don't bother refining TSC on unstable systems */ - if (check_tsc_unstable()) - goto out; - - /* - * Since the work is started early in boot, we may be - * delayed the first time we expire. So set the workqueue - * again once we know timers are working. - */ - if (tsc_start == -1) { - /* - * Only set hpet once, to avoid mixing hardware - * if the hpet becomes enabled later. - */ - hpet = is_hpet_enabled(); - schedule_delayed_work(&tsc_irqwork, HZ); - tsc_start = tsc_read_refs(&ref_start, hpet); - return; + tsc_unstable = 1; } - tsc_stop = tsc_read_refs(&ref_stop, hpet); - - /* hpet or pmtimer available ? */ - if (!hpet && !ref_start && !ref_stop) - goto out; - - /* Check, whether the sampling was disturbed by an SMI */ - if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX) - goto out; - - delta = tsc_stop - tsc_start; - delta *= 1000000LL; - if (hpet) - freq = calc_hpet_ref(delta, ref_start, ref_stop); - else - freq = calc_pmtimer_ref(delta, ref_start, ref_stop); - - /* Make sure we're within 1% */ - if (abs(tsc_khz - freq) > tsc_khz/100) - goto out; - - tsc_khz = freq; - printk(KERN_INFO "Refined TSC clocksource calibration: " - "%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000, - (unsigned long)tsc_khz % 1000); - -out: - clocksource_register_khz(&clocksource_tsc, tsc_khz); + return tsc_unstable; } - -static int __init init_tsc_clocksource(void) +static void __init init_tsc_clocksource(void) { - if (!cpu_has_tsc || tsc_disabled > 0) - return 0; - if (tsc_clocksource_reliable) clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; /* lower the rating if we already know its unstable: */ @@ -975,14 +894,8 @@ static int __init init_tsc_clocksource(void) clocksource_tsc.rating = 0; clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; } - schedule_delayed_work(&tsc_irqwork, 0); - return 0; + clocksource_register_khz(&clocksource_tsc, tsc_khz); } -/* - * We use device_initcall here, to ensure we run after the hpet - * is fully initialized, which may occur at fs_initcall time. - */ -device_initcall(init_tsc_clocksource); void __init tsc_init(void) { @@ -1036,5 +949,6 @@ void __init tsc_init(void) mark_tsc_unstable("TSCs unsynchronized"); check_system_tsc_reliable(); + init_tsc_clocksource(); } diff --git a/trunk/arch/x86/kernel/verify_cpu.S b/trunk/arch/x86/kernel/verify_cpu_64.S similarity index 65% rename from trunk/arch/x86/kernel/verify_cpu.S rename to trunk/arch/x86/kernel/verify_cpu_64.S index 0edefc19a113..56a8c2a867d9 100644 --- a/trunk/arch/x86/kernel/verify_cpu.S +++ b/trunk/arch/x86/kernel/verify_cpu_64.S @@ -7,7 +7,6 @@ * Copyright (c) 2007 Andi Kleen (ak@suse.de) * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com) * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com) - * Copyright (c) 2010 Kees Cook (kees.cook@canonical.com) * * This source code is licensed under the GNU General Public License, * Version 2. See the file COPYING for more details. @@ -15,16 +14,17 @@ * This is a common code for verification whether CPU supports * long mode and SSE or not. It is not called directly instead this * file is included at various places and compiled in that context. - * This file is expected to run in 32bit code. Currently: + * Following are the current usage. * - * arch/x86/boot/compressed/head_64.S: Boot cpu verification - * arch/x86/kernel/trampoline_64.S: secondary processor verfication - * arch/x86/kernel/head_32.S: processor startup + * This file is included by both 16bit and 32bit code. * - * verify_cpu, returns the status of longmode and SSE in register %eax. - * 0: Success 1: Failure + * arch/x86_64/boot/setup.S : Boot cpu verification (16bit) + * arch/x86_64/boot/compressed/head.S: Boot cpu verification (32bit) + * arch/x86_64/kernel/trampoline.S: secondary processor verfication (16bit) + * arch/x86_64/kernel/acpi/wakeup.S:Verfication at resume (16bit) * - * On Intel, the XD_DISABLE flag will be cleared as a side-effect. + * verify_cpu, returns the status of cpu check in register %eax. + * 0: Success 1: Failure * * The caller needs to check for the error code and take the action * appropriately. Either display a message or halt. @@ -62,41 +62,8 @@ verify_cpu: cmpl $0x444d4163,%ecx jnz verify_cpu_noamd mov $1,%di # cpu is from AMD - jmp verify_cpu_check verify_cpu_noamd: - cmpl $0x756e6547,%ebx # GenuineIntel? - jnz verify_cpu_check - cmpl $0x49656e69,%edx - jnz verify_cpu_check - cmpl $0x6c65746e,%ecx - jnz verify_cpu_check - - # only call IA32_MISC_ENABLE when: - # family > 6 || (family == 6 && model >= 0xd) - movl $0x1, %eax # check CPU family and model - cpuid - movl %eax, %ecx - - andl $0x0ff00f00, %eax # mask family and extended family - shrl $8, %eax - cmpl $6, %eax - ja verify_cpu_clear_xd # family > 6, ok - jb verify_cpu_check # family < 6, skip - - andl $0x000f00f0, %ecx # mask model and extended model - shrl $4, %ecx - cmpl $0xd, %ecx - jb verify_cpu_check # family == 6, model < 0xd, skip - -verify_cpu_clear_xd: - movl $MSR_IA32_MISC_ENABLE, %ecx - rdmsr - btrl $2, %edx # clear MSR_IA32_MISC_ENABLE_XD_DISABLE - jnc verify_cpu_check # only write MSR if bit was changed - wrmsr - -verify_cpu_check: movl $0x1,%eax # Does the cpu have what it takes cpuid andl $REQUIRED_MASK0,%edx diff --git a/trunk/arch/x86/kernel/vmlinux.lds.S b/trunk/arch/x86/kernel/vmlinux.lds.S index bf4700755184..e03530aebfd0 100644 --- a/trunk/arch/x86/kernel/vmlinux.lds.S +++ b/trunk/arch/x86/kernel/vmlinux.lds.S @@ -69,7 +69,7 @@ jiffies_64 = jiffies; PHDRS { text PT_LOAD FLAGS(5); /* R_E */ - data PT_LOAD FLAGS(6); /* RW_ */ + data PT_LOAD FLAGS(7); /* RWE */ #ifdef CONFIG_X86_64 user PT_LOAD FLAGS(5); /* R_E */ #ifdef CONFIG_SMP @@ -116,10 +116,6 @@ SECTIONS EXCEPTION_TABLE(16) :text = 0x9090 -#if defined(CONFIG_DEBUG_RODATA) - /* .text should occupy whole number of pages */ - . = ALIGN(PAGE_SIZE); -#endif X64_ALIGN_DEBUG_RODATA_BEGIN RO_DATA(PAGE_SIZE) X64_ALIGN_DEBUG_RODATA_END @@ -339,7 +335,7 @@ SECTIONS __bss_start = .; *(.bss..page_aligned) *(.bss) - . = ALIGN(PAGE_SIZE); + . = ALIGN(4); __bss_stop = .; } diff --git a/trunk/arch/x86/kvm/i8259.c b/trunk/arch/x86/kvm/i8259.c index 3cece05e4ac4..f628234fbeca 100644 --- a/trunk/arch/x86/kvm/i8259.c +++ b/trunk/arch/x86/kvm/i8259.c @@ -575,8 +575,6 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) s->pics[1].elcr_mask = 0xde; s->pics[0].pics_state = s; s->pics[1].pics_state = s; - s->pics[0].isr_ack = 0xff; - s->pics[1].isr_ack = 0xff; /* * Initialize PIO device diff --git a/trunk/arch/x86/kvm/mmu.c b/trunk/arch/x86/kvm/mmu.c index fbb04aee8301..fb8b376bf28c 100644 --- a/trunk/arch/x86/kvm/mmu.c +++ b/trunk/arch/x86/kvm/mmu.c @@ -2394,8 +2394,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) ASSERT(!VALID_PAGE(root)); spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_free_some_pages(vcpu); - sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), - i << 30, + sp = kvm_mmu_get_page(vcpu, i << 30, i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL, NULL); root = __pa(sp->spt); diff --git a/trunk/arch/x86/lguest/i386_head.S b/trunk/arch/x86/lguest/i386_head.S index e7d5382ef263..4f420c2f2d55 100644 --- a/trunk/arch/x86/lguest/i386_head.S +++ b/trunk/arch/x86/lguest/i386_head.S @@ -4,7 +4,6 @@ #include #include #include -#include /*G:020 * Our story starts with the kernel booting into startup_32 in @@ -38,113 +37,9 @@ ENTRY(lguest_entry) /* Set up the initial stack so we can run C code. */ movl $(init_thread_union+THREAD_SIZE),%esp - call init_pagetables - /* Jumps are relative: we're running __PAGE_OFFSET too low. */ jmp lguest_init+__PAGE_OFFSET -/* - * Initialize page tables. This creates a PDE and a set of page - * tables, which are located immediately beyond __brk_base. The variable - * _brk_end is set up to point to the first "safe" location. - * Mappings are created both at virtual address 0 (identity mapping) - * and PAGE_OFFSET for up to _end. - * - * FIXME: This code is taken verbatim from arch/x86/kernel/head_32.S: they - * don't have a stack at this point, so we can't just use call and ret. - */ -init_pagetables: -#if PTRS_PER_PMD > 1 -#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD) -#else -#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) -#endif -#define pa(X) ((X) - __PAGE_OFFSET) - -/* Enough space to fit pagetables for the low memory linear map */ -MAPPING_BEYOND_END = \ - PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT -#ifdef CONFIG_X86_PAE - - /* - * In PAE mode initial_page_table is statically defined to contain - * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3 - * entries). The identity mapping is handled by pointing two PGD entries - * to the first kernel PMD. - * - * Note the upper half of each PMD or PTE are always zero at this stage. - */ - -#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ - - xorl %ebx,%ebx /* %ebx is kept at zero */ - - movl $pa(__brk_base), %edi - movl $pa(initial_pg_pmd), %edx - movl $PTE_IDENT_ATTR, %eax -10: - leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ - movl %ecx,(%edx) /* Store PMD entry */ - /* Upper half already zero */ - addl $8,%edx - movl $512,%ecx -11: - stosl - xchgl %eax,%ebx - stosl - xchgl %eax,%ebx - addl $0x1000,%eax - loop 11b - - /* - * End condition: we must map up to the end + MAPPING_BEYOND_END. - */ - movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp - cmpl %ebp,%eax - jb 10b -1: - addl $__PAGE_OFFSET, %edi - movl %edi, pa(_brk_end) - shrl $12, %eax - movl %eax, pa(max_pfn_mapped) - - /* Do early initialization of the fixmap area */ - movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax - movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8) -#else /* Not PAE */ - -page_pde_offset = (__PAGE_OFFSET >> 20); - - movl $pa(__brk_base), %edi - movl $pa(initial_page_table), %edx - movl $PTE_IDENT_ATTR, %eax -10: - leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ - movl %ecx,(%edx) /* Store identity PDE entry */ - movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ - addl $4,%edx - movl $1024, %ecx -11: - stosl - addl $0x1000,%eax - loop 11b - /* - * End condition: we must map up to the end + MAPPING_BEYOND_END. - */ - movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp - cmpl %ebp,%eax - jb 10b - addl $__PAGE_OFFSET, %edi - movl %edi, pa(_brk_end) - shrl $12, %eax - movl %eax, pa(max_pfn_mapped) - - /* Do early initialization of the fixmap area */ - movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax - movl %eax,pa(initial_page_table+0xffc) -#endif - ret - /*G:055 * We create a macro which puts the assembler code between lgstart_ and lgend_ * markers. These templates are put in the .text section: they can't be diff --git a/trunk/arch/x86/mm/Makefile b/trunk/arch/x86/mm/Makefile index 09df2f9a3d69..55543397a8a7 100644 --- a/trunk/arch/x86/mm/Makefile +++ b/trunk/arch/x86/mm/Makefile @@ -23,7 +23,7 @@ mmiotrace-y := kmmio.o pf_in.o mmio-mod.o obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o -obj-$(CONFIG_AMD_NUMA) += amdtopology_64.o +obj-$(CONFIG_K8_NUMA) += k8topology_64.o obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o diff --git a/trunk/arch/x86/mm/init.c b/trunk/arch/x86/mm/init.c index 947f42abe820..c0e28a13de7d 100644 --- a/trunk/arch/x86/mm/init.c +++ b/trunk/arch/x86/mm/init.c @@ -364,9 +364,8 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) /* * We just marked the kernel text read only above, now that * we are going to free part of that, we need to make that - * writeable and non-executable first. + * writeable first. */ - set_memory_nx(begin, (end - begin) >> PAGE_SHIFT); set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); diff --git a/trunk/arch/x86/mm/init_32.c b/trunk/arch/x86/mm/init_32.c index f89b5bb4e93f..0e969f9f401b 100644 --- a/trunk/arch/x86/mm/init_32.c +++ b/trunk/arch/x86/mm/init_32.c @@ -226,7 +226,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) static inline int is_kernel_text(unsigned long addr) { - if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end) + if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) return 1; return 0; } @@ -912,23 +912,6 @@ void set_kernel_text_ro(void) set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); } -static void mark_nxdata_nx(void) -{ - /* - * When this called, init has already been executed and released, - * so everything past _etext sould be NX. - */ - unsigned long start = PFN_ALIGN(_etext); - /* - * This comes from is_kernel_text upper limit. Also HPAGE where used: - */ - unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start; - - if (__supported_pte_mask & _PAGE_NX) - printk(KERN_INFO "NX-protecting the kernel data: %luk\n", size >> 10); - set_pages_nx(virt_to_page(start), size >> PAGE_SHIFT); -} - void mark_rodata_ro(void) { unsigned long start = PFN_ALIGN(_text); @@ -963,7 +946,6 @@ void mark_rodata_ro(void) printk(KERN_INFO "Testing CPA: write protecting again\n"); set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); #endif - mark_nxdata_nx(); } #endif diff --git a/trunk/arch/x86/mm/amdtopology_64.c b/trunk/arch/x86/mm/k8topology_64.c similarity index 94% rename from trunk/arch/x86/mm/amdtopology_64.c rename to trunk/arch/x86/mm/k8topology_64.c index 51fae9cfdecb..804a3b6c6e14 100644 --- a/trunk/arch/x86/mm/amdtopology_64.c +++ b/trunk/arch/x86/mm/k8topology_64.c @@ -1,8 +1,8 @@ /* - * AMD NUMA support. + * AMD K8 NUMA support. * Discover the memory map and associated nodes. * - * This version reads it directly from the AMD northbridge. + * This version reads it directly from the K8 northbridge. * * Copyright 2002,2003 Andi Kleen, SuSE Labs. */ @@ -57,7 +57,7 @@ static __init void early_get_boot_cpu_id(void) { /* * need to get the APIC ID of the BSP so can use that to - * create apicid_to_node in amd_scan_nodes() + * create apicid_to_node in k8_scan_nodes() */ #ifdef CONFIG_X86_MPPARSE /* @@ -69,7 +69,7 @@ static __init void early_get_boot_cpu_id(void) early_init_lapic_mapping(); } -int __init amd_get_nodes(struct bootnode *physnodes) +int __init k8_get_nodes(struct bootnode *physnodes) { int i; int ret = 0; @@ -82,7 +82,7 @@ int __init amd_get_nodes(struct bootnode *physnodes) return ret; } -int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) +int __init k8_numa_init(unsigned long start_pfn, unsigned long end_pfn) { unsigned long start = PFN_PHYS(start_pfn); unsigned long end = PFN_PHYS(end_pfn); @@ -194,7 +194,7 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) return 0; } -int __init amd_scan_nodes(void) +int __init k8_scan_nodes(void) { unsigned int bits; unsigned int cores; diff --git a/trunk/arch/x86/mm/kmemcheck/error.c b/trunk/arch/x86/mm/kmemcheck/error.c index 704a37cedddb..af3b6c8a436f 100644 --- a/trunk/arch/x86/mm/kmemcheck/error.c +++ b/trunk/arch/x86/mm/kmemcheck/error.c @@ -185,7 +185,7 @@ void kmemcheck_error_save(enum kmemcheck_shadow state, e->trace.entries = e->trace_entries; e->trace.max_entries = ARRAY_SIZE(e->trace_entries); e->trace.skip = 0; - save_stack_trace_regs(&e->trace, regs); + save_stack_trace_bp(&e->trace, regs->bp); /* Round address down to nearest 16 bytes */ shadow_copy = kmemcheck_shadow_lookup(address diff --git a/trunk/arch/x86/mm/numa_64.c b/trunk/arch/x86/mm/numa_64.c index 7762a517d69d..7ffc9b727efd 100644 --- a/trunk/arch/x86/mm/numa_64.c +++ b/trunk/arch/x86/mm/numa_64.c @@ -264,7 +264,7 @@ static struct bootnode physnodes[MAX_NUMNODES] __initdata; static char *cmdline __initdata; static int __init setup_physnodes(unsigned long start, unsigned long end, - int acpi, int amd) + int acpi, int k8) { int nr_nodes = 0; int ret = 0; @@ -274,13 +274,13 @@ static int __init setup_physnodes(unsigned long start, unsigned long end, if (acpi) nr_nodes = acpi_get_nodes(physnodes); #endif -#ifdef CONFIG_AMD_NUMA - if (amd) - nr_nodes = amd_get_nodes(physnodes); +#ifdef CONFIG_K8_NUMA + if (k8) + nr_nodes = k8_get_nodes(physnodes); #endif /* * Basic sanity checking on the physical node map: there may be errors - * if the SRAT or AMD code incorrectly reported the topology or the mem= + * if the SRAT or K8 incorrectly reported the topology or the mem= * kernel parameter is used. */ for (i = 0; i < nr_nodes; i++) { @@ -549,7 +549,7 @@ static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size) * numa=fake command-line option. */ static int __init numa_emulation(unsigned long start_pfn, - unsigned long last_pfn, int acpi, int amd) + unsigned long last_pfn, int acpi, int k8) { u64 addr = start_pfn << PAGE_SHIFT; u64 max_addr = last_pfn << PAGE_SHIFT; @@ -557,7 +557,7 @@ static int __init numa_emulation(unsigned long start_pfn, int num_nodes; int i; - num_phys_nodes = setup_physnodes(addr, max_addr, acpi, amd); + num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8); /* * If the numa=fake command-line contains a 'M' or 'G', it represents * the fixed node size. Otherwise, if it is just a single number N, @@ -602,7 +602,7 @@ static int __init numa_emulation(unsigned long start_pfn, #endif /* CONFIG_NUMA_EMU */ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn, - int acpi, int amd) + int acpi, int k8) { int i; @@ -610,7 +610,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn, nodes_clear(node_online_map); #ifdef CONFIG_NUMA_EMU - if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, amd)) + if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, k8)) return; nodes_clear(node_possible_map); nodes_clear(node_online_map); @@ -624,8 +624,8 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn, nodes_clear(node_online_map); #endif -#ifdef CONFIG_AMD_NUMA - if (!numa_off && amd && !amd_scan_nodes()) +#ifdef CONFIG_K8_NUMA + if (!numa_off && k8 && !k8_scan_nodes()) return; nodes_clear(node_possible_map); nodes_clear(node_online_map); diff --git a/trunk/arch/x86/mm/pageattr.c b/trunk/arch/x86/mm/pageattr.c index 8b830ca14ac4..532e7933d606 100644 --- a/trunk/arch/x86/mm/pageattr.c +++ b/trunk/arch/x86/mm/pageattr.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include @@ -256,16 +255,13 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, unsigned long pfn) { pgprot_t forbidden = __pgprot(0); - pgprot_t required = __pgprot(0); /* * The BIOS area between 640k and 1Mb needs to be executable for * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support. */ -#ifdef CONFIG_PCI_BIOS - if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) + if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) pgprot_val(forbidden) |= _PAGE_NX; -#endif /* * The kernel text needs to be executable for obvious reasons @@ -282,12 +278,6 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT, __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) pgprot_val(forbidden) |= _PAGE_RW; - /* - * .data and .bss should always be writable. - */ - if (within(address, (unsigned long)_sdata, (unsigned long)_edata) || - within(address, (unsigned long)__bss_start, (unsigned long)__bss_stop)) - pgprot_val(required) |= _PAGE_RW; #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) /* @@ -327,7 +317,6 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, #endif prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); - prot = __pgprot(pgprot_val(prot) | pgprot_val(required)); return prot; } @@ -404,7 +393,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, { unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn; pte_t new_pte, old_pte, *tmp; - pgprot_t old_prot, new_prot, req_prot; + pgprot_t old_prot, new_prot; int i, do_split = 1; unsigned int level; @@ -449,10 +438,10 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * We are safe now. Check whether the new pgprot is the same: */ old_pte = *kpte; - old_prot = new_prot = req_prot = pte_pgprot(old_pte); + old_prot = new_prot = pte_pgprot(old_pte); - pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr); - pgprot_val(req_prot) |= pgprot_val(cpa->mask_set); + pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); + pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); /* * old_pte points to the large page base address. So we need @@ -461,17 +450,17 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT); cpa->pfn = pfn; - new_prot = static_protections(req_prot, address, pfn); + new_prot = static_protections(new_prot, address, pfn); /* * We need to check the full range, whether * static_protection() requires a different pgprot for one of * the pages in the range we try to preserve: */ - addr = address & pmask; - pfn = pte_pfn(old_pte); - for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) { - pgprot_t chk_prot = static_protections(req_prot, addr, pfn); + addr = address + PAGE_SIZE; + pfn++; + for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE, pfn++) { + pgprot_t chk_prot = static_protections(new_prot, addr, pfn); if (pgprot_val(chk_prot) != pgprot_val(new_prot)) goto out_unlock; @@ -494,7 +483,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * that we limited the number of possible pages already to * the number of pages in the large page. */ - if (address == (address & pmask) && cpa->numpages == (psize >> PAGE_SHIFT)) { + if (address == (nextpage_addr - psize) && cpa->numpages == numpages) { /* * The address is aligned and the number of pages * covers the full page. diff --git a/trunk/arch/x86/mm/setup_nx.c b/trunk/arch/x86/mm/setup_nx.c index 410531d3c292..a3250aa34086 100644 --- a/trunk/arch/x86/mm/setup_nx.c +++ b/trunk/arch/x86/mm/setup_nx.c @@ -41,7 +41,7 @@ void __init x86_report_nx(void) { if (!cpu_has_nx) { printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " - "missing in CPU!\n"); + "missing in CPU or disabled in BIOS!\n"); } else { #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) if (disable_nx) { diff --git a/trunk/arch/x86/mm/srat_32.c b/trunk/arch/x86/mm/srat_32.c index f16434568a51..a17dffd136c1 100644 --- a/trunk/arch/x86/mm/srat_32.c +++ b/trunk/arch/x86/mm/srat_32.c @@ -92,7 +92,6 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity) /* mark this node as "seen" in node bitmap */ BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo); - /* don't need to check apic_id here, because it is always 8 bits */ apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo; printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n", diff --git a/trunk/arch/x86/mm/srat_64.c b/trunk/arch/x86/mm/srat_64.c index 171a0aacb99a..a35cb9d8b060 100644 --- a/trunk/arch/x86/mm/srat_64.c +++ b/trunk/arch/x86/mm/srat_64.c @@ -134,10 +134,6 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) } apic_id = pa->apic_id; - if (apic_id >= MAX_LOCAL_APIC) { - printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node); - return; - } apicid_to_node[apic_id] = node; node_set(node, cpu_nodes_parsed); acpi_numa = 1; @@ -172,12 +168,6 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) apic_id = (pa->apic_id << 8) | pa->local_sapic_eid; else apic_id = pa->apic_id; - - if (apic_id >= MAX_LOCAL_APIC) { - printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node); - return; - } - apicid_to_node[apic_id] = node; node_set(node, cpu_nodes_parsed); acpi_numa = 1; diff --git a/trunk/arch/x86/oprofile/backtrace.c b/trunk/arch/x86/oprofile/backtrace.c index 72cbec14d783..2d49d4e19a36 100644 --- a/trunk/arch/x86/oprofile/backtrace.c +++ b/trunk/arch/x86/oprofile/backtrace.c @@ -126,7 +126,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth) if (!user_mode_vm(regs)) { unsigned long stack = kernel_stack_pointer(regs); if (depth) - dump_trace(NULL, regs, (unsigned long *)stack, + dump_trace(NULL, regs, (unsigned long *)stack, 0, &backtrace_ops, &depth); return; } diff --git a/trunk/arch/x86/oprofile/nmi_int.c b/trunk/arch/x86/oprofile/nmi_int.c index 358c8b9c96a7..4e8baad36d37 100644 --- a/trunk/arch/x86/oprofile/nmi_int.c +++ b/trunk/arch/x86/oprofile/nmi_int.c @@ -732,9 +732,6 @@ int __init op_nmi_init(struct oprofile_operations *ops) case 0x14: cpu_type = "x86-64/family14h"; break; - case 0x15: - cpu_type = "x86-64/family15h"; - break; default: return -ENODEV; } diff --git a/trunk/arch/x86/oprofile/nmi_timer_int.c b/trunk/arch/x86/oprofile/nmi_timer_int.c index 0636dd93cef8..e3ecb71b5790 100644 --- a/trunk/arch/x86/oprofile/nmi_timer_int.c +++ b/trunk/arch/x86/oprofile/nmi_timer_int.c @@ -58,6 +58,9 @@ static void timer_stop(void) int __init op_nmi_timer_init(struct oprofile_operations *ops) { + if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0)) + return -ENODEV; + ops->start = timer_start; ops->stop = timer_stop; ops->cpu_type = "timer"; diff --git a/trunk/arch/x86/oprofile/op_model_amd.c b/trunk/arch/x86/oprofile/op_model_amd.c index c3b8e24f2b16..a011bcc0f943 100644 --- a/trunk/arch/x86/oprofile/op_model_amd.c +++ b/trunk/arch/x86/oprofile/op_model_amd.c @@ -29,12 +29,11 @@ #include "op_x86_model.h" #include "op_counter.h" -#define NUM_COUNTERS 4 -#define NUM_COUNTERS_F15H 6 +#define NUM_COUNTERS 4 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX -#define NUM_VIRT_COUNTERS 32 +#define NUM_VIRT_COUNTERS 32 #else -#define NUM_VIRT_COUNTERS 0 +#define NUM_VIRT_COUNTERS NUM_COUNTERS #endif #define OP_EVENT_MASK 0x0FFF @@ -42,8 +41,7 @@ #define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21)) -static int num_counters; -static unsigned long reset_value[OP_MAX_COUNTER]; +static unsigned long reset_value[NUM_VIRT_COUNTERS]; #define IBS_FETCH_SIZE 6 #define IBS_OP_SIZE 12 @@ -389,7 +387,7 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, int i; /* enable active counters */ - for (i = 0; i < num_counters; ++i) { + for (i = 0; i < NUM_COUNTERS; ++i) { int virt = op_x86_phys_to_virt(i); if (!reset_value[virt]) continue; @@ -408,7 +406,7 @@ static void op_amd_shutdown(struct op_msrs const * const msrs) { int i; - for (i = 0; i < num_counters; ++i) { + for (i = 0; i < NUM_COUNTERS; ++i) { if (!msrs->counters[i].addr) continue; release_perfctr_nmi(MSR_K7_PERFCTR0 + i); @@ -420,7 +418,7 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs) { int i; - for (i = 0; i < num_counters; i++) { + for (i = 0; i < NUM_COUNTERS; i++) { if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) goto fail; if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) { @@ -428,13 +426,8 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs) goto fail; } /* both registers must be reserved */ - if (num_counters == NUM_COUNTERS_F15H) { - msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1); - msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1); - } else { - msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; - msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; - } + msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; + msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; continue; fail: if (!counter_config[i].enabled) @@ -454,7 +447,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, int i; /* setup reset_value */ - for (i = 0; i < OP_MAX_COUNTER; ++i) { + for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { if (counter_config[i].enabled && msrs->counters[op_x86_virt_to_phys(i)].addr) reset_value[i] = counter_config[i].count; @@ -463,7 +456,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, } /* clear all counters */ - for (i = 0; i < num_counters; ++i) { + for (i = 0; i < NUM_COUNTERS; ++i) { if (!msrs->controls[i].addr) continue; rdmsrl(msrs->controls[i].addr, val); @@ -479,7 +472,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, } /* enable active counters */ - for (i = 0; i < num_counters; ++i) { + for (i = 0; i < NUM_COUNTERS; ++i) { int virt = op_x86_phys_to_virt(i); if (!reset_value[virt]) continue; @@ -510,7 +503,7 @@ static int op_amd_check_ctrs(struct pt_regs * const regs, u64 val; int i; - for (i = 0; i < num_counters; ++i) { + for (i = 0; i < NUM_COUNTERS; ++i) { int virt = op_x86_phys_to_virt(i); if (!reset_value[virt]) continue; @@ -533,7 +526,7 @@ static void op_amd_start(struct op_msrs const * const msrs) u64 val; int i; - for (i = 0; i < num_counters; ++i) { + for (i = 0; i < NUM_COUNTERS; ++i) { if (!reset_value[op_x86_phys_to_virt(i)]) continue; rdmsrl(msrs->controls[i].addr, val); @@ -553,7 +546,7 @@ static void op_amd_stop(struct op_msrs const * const msrs) * Subtle: stop on all counters to avoid race with setting our * pm callback */ - for (i = 0; i < num_counters; ++i) { + for (i = 0; i < NUM_COUNTERS; ++i) { if (!reset_value[op_x86_phys_to_virt(i)]) continue; rdmsrl(msrs->controls[i].addr, val); @@ -610,7 +603,6 @@ static int force_ibs_eilvt_setup(void) ret = setup_ibs_ctl(i); if (ret) return ret; - pr_err(FW_BUG "using offset %d for IBS interrupts\n", i); return 0; } @@ -638,29 +630,21 @@ static int __init_ibs_nmi(void) return 0; } -/* - * check and reserve APIC extended interrupt LVT offset for IBS if - * available - * - * init_ibs() preforms implicitly cpu-local operations, so pin this - * thread to its current CPU - */ - +/* initialize the APIC for the IBS interrupts if available */ static void init_ibs(void) { - preempt_disable(); - ibs_caps = get_ibs_caps(); + if (!ibs_caps) - goto out; + return; - if (__init_ibs_nmi() < 0) + if (__init_ibs_nmi()) { ibs_caps = 0; - else - printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps); + return; + } -out: - preempt_enable(); + printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", + (unsigned)ibs_caps); } static int (*create_arch_files)(struct super_block *sb, struct dentry *root); @@ -714,29 +698,18 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) return 0; } -struct op_x86_model_spec op_amd_spec; - static int op_amd_init(struct oprofile_operations *ops) { init_ibs(); create_arch_files = ops->create_files; ops->create_files = setup_ibs_files; - - if (boot_cpu_data.x86 == 0x15) { - num_counters = NUM_COUNTERS_F15H; - } else { - num_counters = NUM_COUNTERS; - } - - op_amd_spec.num_counters = num_counters; - op_amd_spec.num_controls = num_counters; - op_amd_spec.num_virt_counters = max(num_counters, NUM_VIRT_COUNTERS); - return 0; } struct op_x86_model_spec op_amd_spec = { - /* num_counters/num_controls filled in at runtime */ + .num_counters = NUM_COUNTERS, + .num_controls = NUM_COUNTERS, + .num_virt_counters = NUM_VIRT_COUNTERS, .reserved = MSR_AMD_EVENTSEL_RESERVED, .event_mask = OP_EVENT_MASK, .init = op_amd_init, diff --git a/trunk/arch/x86/oprofile/op_model_p4.c b/trunk/arch/x86/oprofile/op_model_p4.c index 9fadec074142..182558dd5515 100644 --- a/trunk/arch/x86/oprofile/op_model_p4.c +++ b/trunk/arch/x86/oprofile/op_model_p4.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/trunk/arch/x86/pci/Makefile b/trunk/arch/x86/pci/Makefile index 6b8759f7634e..effd96e33f16 100644 --- a/trunk/arch/x86/pci/Makefile +++ b/trunk/arch/x86/pci/Makefile @@ -7,7 +7,6 @@ obj-$(CONFIG_PCI_OLPC) += olpc.o obj-$(CONFIG_PCI_XEN) += xen.o obj-y += fixup.o -obj-$(CONFIG_X86_INTEL_CE) += ce4100.o obj-$(CONFIG_ACPI) += acpi.o obj-y += legacy.o irq.o diff --git a/trunk/arch/x86/pci/ce4100.c b/trunk/arch/x86/pci/ce4100.c deleted file mode 100644 index 85b68ef5e809..000000000000 --- a/trunk/arch/x86/pci/ce4100.c +++ /dev/null @@ -1,315 +0,0 @@ -/* - * GPL LICENSE SUMMARY - * - * Copyright(c) 2010 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * The full GNU General Public License is included in this distribution - * in the file called LICENSE.GPL. - * - * Contact Information: - * Intel Corporation - * 2200 Mission College Blvd. - * Santa Clara, CA 97052 - * - * This provides access methods for PCI registers that mis-behave on - * the CE4100. Each register can be assigned a private init, read and - * write routine. The exception to this is the bridge device. The - * bridge device is the only device on bus zero (0) that requires any - * fixup so it is a special case ATM - */ - -#include -#include -#include - -#include - -struct sim_reg { - u32 value; - u32 mask; -}; - -struct sim_dev_reg { - int dev_func; - int reg; - void (*init)(struct sim_dev_reg *reg); - void (*read)(struct sim_dev_reg *reg, u32 *value); - void (*write)(struct sim_dev_reg *reg, u32 value); - struct sim_reg sim_reg; -}; - -struct sim_reg_op { - void (*init)(struct sim_dev_reg *reg); - void (*read)(struct sim_dev_reg *reg, u32 value); - void (*write)(struct sim_dev_reg *reg, u32 value); -}; - -#define MB (1024 * 1024) -#define KB (1024) -#define SIZE_TO_MASK(size) (~(size - 1)) - -#define DEFINE_REG(device, func, offset, size, init_op, read_op, write_op)\ -{ PCI_DEVFN(device, func), offset, init_op, read_op, write_op,\ - {0, SIZE_TO_MASK(size)} }, - -static void reg_init(struct sim_dev_reg *reg) -{ - pci_direct_conf1.read(0, 1, reg->dev_func, reg->reg, 4, - ®->sim_reg.value); -} - -static void reg_read(struct sim_dev_reg *reg, u32 *value) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&pci_config_lock, flags); - *value = reg->sim_reg.value; - raw_spin_unlock_irqrestore(&pci_config_lock, flags); -} - -static void reg_write(struct sim_dev_reg *reg, u32 value) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&pci_config_lock, flags); - reg->sim_reg.value = (value & reg->sim_reg.mask) | - (reg->sim_reg.value & ~reg->sim_reg.mask); - raw_spin_unlock_irqrestore(&pci_config_lock, flags); -} - -static void sata_reg_init(struct sim_dev_reg *reg) -{ - pci_direct_conf1.read(0, 1, PCI_DEVFN(14, 0), 0x10, 4, - ®->sim_reg.value); - reg->sim_reg.value += 0x400; -} - -static void ehci_reg_read(struct sim_dev_reg *reg, u32 *value) -{ - reg_read(reg, value); - if (*value != reg->sim_reg.mask) - *value |= 0x100; -} - -void sata_revid_init(struct sim_dev_reg *reg) -{ - reg->sim_reg.value = 0x01060100; - reg->sim_reg.mask = 0; -} - -static void sata_revid_read(struct sim_dev_reg *reg, u32 *value) -{ - reg_read(reg, value); -} - -static struct sim_dev_reg bus1_fixups[] = { - DEFINE_REG(2, 0, 0x10, (16*MB), reg_init, reg_read, reg_write) - DEFINE_REG(2, 0, 0x14, (256), reg_init, reg_read, reg_write) - DEFINE_REG(2, 1, 0x10, (64*KB), reg_init, reg_read, reg_write) - DEFINE_REG(3, 0, 0x10, (64*KB), reg_init, reg_read, reg_write) - DEFINE_REG(4, 0, 0x10, (128*KB), reg_init, reg_read, reg_write) - DEFINE_REG(4, 1, 0x10, (128*KB), reg_init, reg_read, reg_write) - DEFINE_REG(6, 0, 0x10, (512*KB), reg_init, reg_read, reg_write) - DEFINE_REG(6, 1, 0x10, (512*KB), reg_init, reg_read, reg_write) - DEFINE_REG(6, 2, 0x10, (64*KB), reg_init, reg_read, reg_write) - DEFINE_REG(8, 0, 0x10, (1*MB), reg_init, reg_read, reg_write) - DEFINE_REG(8, 1, 0x10, (64*KB), reg_init, reg_read, reg_write) - DEFINE_REG(8, 2, 0x10, (64*KB), reg_init, reg_read, reg_write) - DEFINE_REG(9, 0, 0x10 , (1*MB), reg_init, reg_read, reg_write) - DEFINE_REG(9, 0, 0x14, (64*KB), reg_init, reg_read, reg_write) - DEFINE_REG(10, 0, 0x10, (256), reg_init, reg_read, reg_write) - DEFINE_REG(10, 0, 0x14, (256*MB), reg_init, reg_read, reg_write) - DEFINE_REG(11, 0, 0x10, (256), reg_init, reg_read, reg_write) - DEFINE_REG(11, 0, 0x14, (256), reg_init, reg_read, reg_write) - DEFINE_REG(11, 1, 0x10, (256), reg_init, reg_read, reg_write) - DEFINE_REG(11, 2, 0x10, (256), reg_init, reg_read, reg_write) - DEFINE_REG(11, 2, 0x14, (256), reg_init, reg_read, reg_write) - DEFINE_REG(11, 2, 0x18, (256), reg_init, reg_read, reg_write) - DEFINE_REG(11, 3, 0x10, (256), reg_init, reg_read, reg_write) - DEFINE_REG(11, 3, 0x14, (256), reg_init, reg_read, reg_write) - DEFINE_REG(11, 4, 0x10, (256), reg_init, reg_read, reg_write) - DEFINE_REG(11, 5, 0x10, (64*KB), reg_init, reg_read, reg_write) - DEFINE_REG(11, 6, 0x10, (256), reg_init, reg_read, reg_write) - DEFINE_REG(11, 7, 0x10, (64*KB), reg_init, reg_read, reg_write) - DEFINE_REG(12, 0, 0x10, (128*KB), reg_init, reg_read, reg_write) - DEFINE_REG(12, 0, 0x14, (256), reg_init, reg_read, reg_write) - DEFINE_REG(12, 1, 0x10, (1024), reg_init, reg_read, reg_write) - DEFINE_REG(13, 0, 0x10, (32*KB), reg_init, ehci_reg_read, reg_write) - DEFINE_REG(13, 1, 0x10, (32*KB), reg_init, ehci_reg_read, reg_write) - DEFINE_REG(14, 0, 0x8, 0, sata_revid_init, sata_revid_read, 0) - DEFINE_REG(14, 0, 0x10, 0, reg_init, reg_read, reg_write) - DEFINE_REG(14, 0, 0x14, 0, reg_init, reg_read, reg_write) - DEFINE_REG(14, 0, 0x18, 0, reg_init, reg_read, reg_write) - DEFINE_REG(14, 0, 0x1C, 0, reg_init, reg_read, reg_write) - DEFINE_REG(14, 0, 0x20, 0, reg_init, reg_read, reg_write) - DEFINE_REG(14, 0, 0x24, (0x200), sata_reg_init, reg_read, reg_write) - DEFINE_REG(15, 0, 0x10, (64*KB), reg_init, reg_read, reg_write) - DEFINE_REG(15, 0, 0x14, (64*KB), reg_init, reg_read, reg_write) - DEFINE_REG(16, 0, 0x10, (64*KB), reg_init, reg_read, reg_write) - DEFINE_REG(16, 0, 0x14, (64*MB), reg_init, reg_read, reg_write) - DEFINE_REG(16, 0, 0x18, (64*MB), reg_init, reg_read, reg_write) - DEFINE_REG(17, 0, 0x10, (128*KB), reg_init, reg_read, reg_write) - DEFINE_REG(18, 0, 0x10, (1*KB), reg_init, reg_read, reg_write) -}; - -static void __init init_sim_regs(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) { - if (bus1_fixups[i].init) - bus1_fixups[i].init(&bus1_fixups[i]); - } -} - -static inline void extract_bytes(u32 *value, int reg, int len) -{ - uint32_t mask; - - *value >>= ((reg & 3) * 8); - mask = 0xFFFFFFFF >> ((4 - len) * 8); - *value &= mask; -} - -int bridge_read(unsigned int devfn, int reg, int len, u32 *value) -{ - u32 av_bridge_base, av_bridge_limit; - int retval = 0; - - switch (reg) { - /* Make BARs appear to not request any memory. */ - case PCI_BASE_ADDRESS_0: - case PCI_BASE_ADDRESS_0 + 1: - case PCI_BASE_ADDRESS_0 + 2: - case PCI_BASE_ADDRESS_0 + 3: - *value = 0; - break; - - /* Since subordinate bus number register is hardwired - * to zero and read only, so do the simulation. - */ - case PCI_PRIMARY_BUS: - if (len == 4) - *value = 0x00010100; - break; - - case PCI_SUBORDINATE_BUS: - *value = 1; - break; - - case PCI_MEMORY_BASE: - case PCI_MEMORY_LIMIT: - /* Get the A/V bridge base address. */ - pci_direct_conf1.read(0, 0, devfn, - PCI_BASE_ADDRESS_0, 4, &av_bridge_base); - - av_bridge_limit = av_bridge_base + (512*MB - 1); - av_bridge_limit >>= 16; - av_bridge_limit &= 0xFFF0; - - av_bridge_base >>= 16; - av_bridge_base &= 0xFFF0; - - if (reg == PCI_MEMORY_LIMIT) - *value = av_bridge_limit; - else if (len == 2) - *value = av_bridge_base; - else - *value = (av_bridge_limit << 16) | av_bridge_base; - break; - /* Make prefetchable memory limit smaller than prefetchable - * memory base, so not claim prefetchable memory space. - */ - case PCI_PREF_MEMORY_BASE: - *value = 0xFFF0; - break; - case PCI_PREF_MEMORY_LIMIT: - *value = 0x0; - break; - /* Make IO limit smaller than IO base, so not claim IO space. */ - case PCI_IO_BASE: - *value = 0xF0; - break; - case PCI_IO_LIMIT: - *value = 0; - break; - default: - retval = 1; - } - return retval; -} - -static int ce4100_conf_read(unsigned int seg, unsigned int bus, - unsigned int devfn, int reg, int len, u32 *value) -{ - int i, retval = 1; - - if (bus == 1) { - for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) { - if (bus1_fixups[i].dev_func == devfn && - bus1_fixups[i].reg == (reg & ~3) && - bus1_fixups[i].read) { - bus1_fixups[i].read(&(bus1_fixups[i]), - value); - extract_bytes(value, reg, len); - return 0; - } - } - } - - if (bus == 0 && (PCI_DEVFN(1, 0) == devfn) && - !bridge_read(devfn, reg, len, value)) - return 0; - - return pci_direct_conf1.read(seg, bus, devfn, reg, len, value); -} - -static int ce4100_conf_write(unsigned int seg, unsigned int bus, - unsigned int devfn, int reg, int len, u32 value) -{ - int i; - - if (bus == 1) { - for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) { - if (bus1_fixups[i].dev_func == devfn && - bus1_fixups[i].reg == (reg & ~3) && - bus1_fixups[i].write) { - bus1_fixups[i].write(&(bus1_fixups[i]), - value); - return 0; - } - } - } - - /* Discard writes to A/V bridge BAR. */ - if (bus == 0 && PCI_DEVFN(1, 0) == devfn && - ((reg & ~3) == PCI_BASE_ADDRESS_0)) - return 0; - - return pci_direct_conf1.write(seg, bus, devfn, reg, len, value); -} - -struct pci_raw_ops ce4100_pci_conf = { - .read = ce4100_conf_read, - .write = ce4100_conf_write, -}; - -static int __init ce4100_pci_init(void) -{ - init_sim_regs(); - raw_pci_ops = &ce4100_pci_conf; - return 0; -} -subsys_initcall(ce4100_pci_init); diff --git a/trunk/arch/x86/pci/pcbios.c b/trunk/arch/x86/pci/pcbios.c index a5f7d0d63de0..2492d165096a 100644 --- a/trunk/arch/x86/pci/pcbios.c +++ b/trunk/arch/x86/pci/pcbios.c @@ -9,7 +9,6 @@ #include #include #include -#include /* BIOS32 signature: "_32_" */ #define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) @@ -26,27 +25,6 @@ #define PCIBIOS_HW_TYPE1_SPEC 0x10 #define PCIBIOS_HW_TYPE2_SPEC 0x20 -int pcibios_enabled; - -/* According to the BIOS specification at: - * http://members.datafast.net.au/dft0802/specs/bios21.pdf, we could - * restrict the x zone to some pages and make it ro. But this may be - * broken on some bios, complex to handle with static_protections. - * We could make the 0xe0000-0x100000 range rox, but this can break - * some ISA mapping. - * - * So we let's an rw and x hole when pcibios is used. This shouldn't - * happen for modern system with mmconfig, and if you don't want it - * you could disable pcibios... - */ -static inline void set_bios_x(void) -{ - pcibios_enabled = 1; - set_memory_x(PAGE_OFFSET + BIOS_BEGIN, (BIOS_END - BIOS_BEGIN) >> PAGE_SHIFT); - if (__supported_pte_mask & _PAGE_NX) - printk(KERN_INFO "PCI : PCI BIOS aera is rw and x. Use pci=nobios if you want it NX.\n"); -} - /* * This is the standard structure used to identify the entry point * to the BIOS32 Service Directory, as documented in @@ -354,7 +332,6 @@ static struct pci_raw_ops * __devinit pci_find_bios(void) DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", bios32_entry); bios32_indirect.address = bios32_entry + PAGE_OFFSET; - set_bios_x(); if (check_pcibios()) return &pci_bios_access; } diff --git a/trunk/arch/x86/platform/Makefile b/trunk/arch/x86/platform/Makefile index 021eee91c056..7bf70b812fa2 100644 --- a/trunk/arch/x86/platform/Makefile +++ b/trunk/arch/x86/platform/Makefile @@ -1,7 +1,5 @@ # Platform specific code goes here -obj-y += ce4100/ obj-y += efi/ -obj-y += iris/ obj-y += mrst/ obj-y += olpc/ obj-y += scx200/ diff --git a/trunk/arch/x86/platform/ce4100/Makefile b/trunk/arch/x86/platform/ce4100/Makefile deleted file mode 100644 index 91fc92971d94..000000000000 --- a/trunk/arch/x86/platform/ce4100/Makefile +++ /dev/null @@ -1 +0,0 @@ -obj-$(CONFIG_X86_INTEL_CE) += ce4100.o diff --git a/trunk/arch/x86/platform/ce4100/ce4100.c b/trunk/arch/x86/platform/ce4100/ce4100.c deleted file mode 100644 index d2c0d51a7178..000000000000 --- a/trunk/arch/x86/platform/ce4100/ce4100.c +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Intel CE4100 platform specific setup code - * - * (C) Copyright 2010 Intel Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ -#include -#include -#include -#include -#include -#include - -#include -#include - -static int ce4100_i8042_detect(void) -{ - return 0; -} - -static void __init sdv_find_smp_config(void) -{ -} - -#ifdef CONFIG_SERIAL_8250 - - -static unsigned int mem_serial_in(struct uart_port *p, int offset) -{ - offset = offset << p->regshift; - return readl(p->membase + offset); -} - -/* - * The UART Tx interrupts are not set under some conditions and therefore serial - * transmission hangs. This is a silicon issue and has not been root caused. The - * workaround for this silicon issue checks UART_LSR_THRE bit and UART_LSR_TEMT - * bit of LSR register in interrupt handler to see whether at least one of these - * two bits is set, if so then process the transmit request. If this workaround - * is not applied, then the serial transmission may hang. This workaround is for - * errata number 9 in Errata - B step. -*/ - -static unsigned int ce4100_mem_serial_in(struct uart_port *p, int offset) -{ - unsigned int ret, ier, lsr; - - if (offset == UART_IIR) { - offset = offset << p->regshift; - ret = readl(p->membase + offset); - if (ret & UART_IIR_NO_INT) { - /* see if the TX interrupt should have really set */ - ier = mem_serial_in(p, UART_IER); - /* see if the UART's XMIT interrupt is enabled */ - if (ier & UART_IER_THRI) { - lsr = mem_serial_in(p, UART_LSR); - /* now check to see if the UART should be - generating an interrupt (but isn't) */ - if (lsr & (UART_LSR_THRE | UART_LSR_TEMT)) - ret &= ~UART_IIR_NO_INT; - } - } - } else - ret = mem_serial_in(p, offset); - return ret; -} - -static void ce4100_mem_serial_out(struct uart_port *p, int offset, int value) -{ - offset = offset << p->regshift; - writel(value, p->membase + offset); -} - -static void ce4100_serial_fixup(int port, struct uart_port *up, - unsigned short *capabilites) -{ -#ifdef CONFIG_EARLY_PRINTK - /* - * Over ride the legacy port configuration that comes from - * asm/serial.h. Using the ioport driver then switching to the - * PCI memmaped driver hangs the IOAPIC - */ - if (up->iotype != UPIO_MEM32) { - up->uartclk = 14745600; - up->mapbase = 0xdffe0200; - set_fixmap_nocache(FIX_EARLYCON_MEM_BASE, - up->mapbase & PAGE_MASK); - up->membase = - (void __iomem *)__fix_to_virt(FIX_EARLYCON_MEM_BASE); - up->membase += up->mapbase & ~PAGE_MASK; - up->iotype = UPIO_MEM32; - up->regshift = 2; - } -#endif - up->iobase = 0; - up->serial_in = ce4100_mem_serial_in; - up->serial_out = ce4100_mem_serial_out; - - *capabilites |= (1 << 12); -} - -static __init void sdv_serial_fixup(void) -{ - serial8250_set_isa_configurator(ce4100_serial_fixup); -} - -#else -static inline void sdv_serial_fixup(void); -#endif - -static void __init sdv_arch_setup(void) -{ - sdv_serial_fixup(); -} - -/* - * CE4100 specific x86_init function overrides and early setup - * calls. - */ -void __init x86_ce4100_early_setup(void) -{ - x86_init.oem.arch_setup = sdv_arch_setup; - x86_platform.i8042_detect = ce4100_i8042_detect; - x86_init.resources.probe_roms = x86_init_noop; - x86_init.mpparse.get_smp_config = x86_init_uint_noop; - x86_init.mpparse.find_smp_config = sdv_find_smp_config; -} diff --git a/trunk/arch/x86/platform/iris/Makefile b/trunk/arch/x86/platform/iris/Makefile deleted file mode 100644 index db921983a102..000000000000 --- a/trunk/arch/x86/platform/iris/Makefile +++ /dev/null @@ -1 +0,0 @@ -obj-$(CONFIG_X86_32_IRIS) += iris.o diff --git a/trunk/arch/x86/platform/iris/iris.c b/trunk/arch/x86/platform/iris/iris.c deleted file mode 100644 index 1ba7f5ed8c9b..000000000000 --- a/trunk/arch/x86/platform/iris/iris.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Eurobraille/Iris power off support. - * - * Eurobraille's Iris machine is a PC with no APM or ACPI support. - * It is shutdown by a special I/O sequence which this module provides. - * - * Copyright (C) Shérab - * - * This program is free software ; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation ; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY ; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with the program ; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#define IRIS_GIO_BASE 0x340 -#define IRIS_GIO_INPUT IRIS_GIO_BASE -#define IRIS_GIO_OUTPUT (IRIS_GIO_BASE + 1) -#define IRIS_GIO_PULSE 0x80 /* First byte to send */ -#define IRIS_GIO_REST 0x00 /* Second byte to send */ -#define IRIS_GIO_NODEV 0xff /* Likely not an Iris */ - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Sébastien Hinderer "); -MODULE_DESCRIPTION("A power_off handler for Iris devices from EuroBraille"); -MODULE_SUPPORTED_DEVICE("Eurobraille/Iris"); - -static int force; - -module_param(force, bool, 0); -MODULE_PARM_DESC(force, "Set to one to force poweroff handler installation."); - -static void (*old_pm_power_off)(void); - -static void iris_power_off(void) -{ - outb(IRIS_GIO_PULSE, IRIS_GIO_OUTPUT); - msleep(850); - outb(IRIS_GIO_REST, IRIS_GIO_OUTPUT); -} - -/* - * Before installing the power_off handler, try to make sure the OS is - * running on an Iris. Since Iris does not support DMI, this is done - * by reading its input port and seeing whether the read value is - * meaningful. - */ -static int iris_init(void) -{ - unsigned char status; - if (force != 1) { - printk(KERN_ERR "The force parameter has not been set to 1 so the Iris poweroff handler will not be installed.\n"); - return -ENODEV; - } - status = inb(IRIS_GIO_INPUT); - if (status == IRIS_GIO_NODEV) { - printk(KERN_ERR "This machine does not seem to be an Iris. Power_off handler not installed.\n"); - return -ENODEV; - } - old_pm_power_off = pm_power_off; - pm_power_off = &iris_power_off; - printk(KERN_INFO "Iris power_off handler installed.\n"); - - return 0; -} - -static void iris_exit(void) -{ - pm_power_off = old_pm_power_off; - printk(KERN_INFO "Iris power_off handler uninstalled.\n"); -} - -module_init(iris_init); -module_exit(iris_exit); diff --git a/trunk/arch/x86/platform/mrst/Makefile b/trunk/arch/x86/platform/mrst/Makefile index f61ccdd49341..efbbc552fa95 100644 --- a/trunk/arch/x86/platform/mrst/Makefile +++ b/trunk/arch/x86/platform/mrst/Makefile @@ -1,3 +1 @@ obj-$(CONFIG_X86_MRST) += mrst.o -obj-$(CONFIG_X86_MRST) += vrtc.o -obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o diff --git a/trunk/arch/x86/platform/mrst/mrst.c b/trunk/arch/x86/platform/mrst/mrst.c index fee0b4914e07..79ae68154e87 100644 --- a/trunk/arch/x86/platform/mrst/mrst.c +++ b/trunk/arch/x86/platform/mrst/mrst.c @@ -9,19 +9,9 @@ * as published by the Free Software Foundation; version 2 * of the License. */ - -#define pr_fmt(fmt) "mrst: " fmt - #include #include #include -#include -#include -#include -#include -#include -#include -#include #include #include @@ -33,9 +23,7 @@ #include #include #include -#include #include -#include /* * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock, @@ -114,10 +102,10 @@ static int __init sfi_parse_mtmr(struct sfi_table_header *table) memcpy(sfi_mtimer_array, pentry, totallen); } - pr_debug("SFI MTIMER info (num = %d):\n", sfi_mtimer_num); + printk(KERN_INFO "SFI: MTIMER info (num = %d):\n", sfi_mtimer_num); pentry = sfi_mtimer_array; for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) { - pr_debug("timer[%d]: paddr = 0x%08x, freq = %dHz," + printk(KERN_INFO "timer[%d]: paddr = 0x%08x, freq = %dHz," " irq = %d\n", totallen, (u32)pentry->phys_addr, pentry->freq_hz, pentry->irq); if (!pentry->irq) @@ -188,14 +176,14 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table) memcpy(sfi_mrtc_array, pentry, totallen); } - pr_debug("SFI RTC info (num = %d):\n", sfi_mrtc_num); + printk(KERN_INFO "SFI: RTC info (num = %d):\n", sfi_mrtc_num); pentry = sfi_mrtc_array; for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) { - pr_debug("RTC[%d]: paddr = 0x%08x, irq = %d\n", + printk(KERN_INFO "RTC[%d]: paddr = 0x%08x, irq = %d\n", totallen, (u32)pentry->phys_addr, pentry->irq); mp_irq.type = MP_IOAPIC; mp_irq.irqtype = mp_INT; - mp_irq.irqflag = 0xf; /* level trigger and active low */ + mp_irq.irqflag = 0; mp_irq.srcbus = 0; mp_irq.srcbusirq = pentry->irq; /* IRQ */ mp_irq.dstapic = MP_APIC_ALL; @@ -221,7 +209,6 @@ static unsigned long __init mrst_calibrate_tsc(void) void __init mrst_time_init(void) { - sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr); switch (mrst_timer_options) { case MRST_TIMER_APBT_ONLY: break; @@ -237,10 +224,16 @@ void __init mrst_time_init(void) return; } /* we need at least one APB timer */ + sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr); pre_init_apic_IRQ0(); apbt_time_init(); } +void __init mrst_rtc_init(void) +{ + sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc); +} + void __cpuinit mrst_arch_setup(void) { if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27) @@ -263,17 +256,6 @@ static int mrst_i8042_detect(void) return 0; } -/* Reboot and power off are handled by the SCU on a MID device */ -static void mrst_power_off(void) -{ - intel_scu_ipc_simple_command(0xf1, 1); -} - -static void mrst_reboot(void) -{ - intel_scu_ipc_simple_command(0xf1, 0); -} - /* * Moorestown specific x86_init function overrides and early setup * calls. @@ -299,10 +281,6 @@ void __init x86_mrst_early_setup(void) legacy_pic = &null_legacy_pic; - /* Moorestown specific power_off/restart method */ - pm_power_off = mrst_power_off; - machine_ops.emergency_restart = mrst_reboot; - /* Avoid searching for BIOS MP tables */ x86_init.mpparse.find_smp_config = x86_init_noop; x86_init.mpparse.get_smp_config = x86_init_uint_noop; @@ -331,505 +309,3 @@ static inline int __init setup_x86_mrst_timer(char *arg) return 0; } __setup("x86_mrst_timer=", setup_x86_mrst_timer); - -/* - * Parsing GPIO table first, since the DEVS table will need this table - * to map the pin name to the actual pin. - */ -static struct sfi_gpio_table_entry *gpio_table; -static int gpio_num_entry; - -static int __init sfi_parse_gpio(struct sfi_table_header *table) -{ - struct sfi_table_simple *sb; - struct sfi_gpio_table_entry *pentry; - int num, i; - - if (gpio_table) - return 0; - sb = (struct sfi_table_simple *)table; - num = SFI_GET_NUM_ENTRIES(sb, struct sfi_gpio_table_entry); - pentry = (struct sfi_gpio_table_entry *)sb->pentry; - - gpio_table = (struct sfi_gpio_table_entry *) - kmalloc(num * sizeof(*pentry), GFP_KERNEL); - if (!gpio_table) - return -1; - memcpy(gpio_table, pentry, num * sizeof(*pentry)); - gpio_num_entry = num; - - pr_debug("GPIO pin info:\n"); - for (i = 0; i < num; i++, pentry++) - pr_debug("info[%2d]: controller = %16.16s, pin_name = %16.16s," - " pin = %d\n", i, - pentry->controller_name, - pentry->pin_name, - pentry->pin_no); - return 0; -} - -static int get_gpio_by_name(const char *name) -{ - struct sfi_gpio_table_entry *pentry = gpio_table; - int i; - - if (!pentry) - return -1; - for (i = 0; i < gpio_num_entry; i++, pentry++) { - if (!strncmp(name, pentry->pin_name, SFI_NAME_LEN)) - return pentry->pin_no; - } - return -1; -} - -/* - * Here defines the array of devices platform data that IAFW would export - * through SFI "DEVS" table, we use name and type to match the device and - * its platform data. - */ -struct devs_id { - char name[SFI_NAME_LEN + 1]; - u8 type; - u8 delay; - void *(*get_platform_data)(void *info); -}; - -/* the offset for the mapping of global gpio pin to irq */ -#define MRST_IRQ_OFFSET 0x100 - -static void __init *pmic_gpio_platform_data(void *info) -{ - static struct intel_pmic_gpio_platform_data pmic_gpio_pdata; - int gpio_base = get_gpio_by_name("pmic_gpio_base"); - - if (gpio_base == -1) - gpio_base = 64; - pmic_gpio_pdata.gpio_base = gpio_base; - pmic_gpio_pdata.irq_base = gpio_base + MRST_IRQ_OFFSET; - pmic_gpio_pdata.gpiointr = 0xffffeff8; - - return &pmic_gpio_pdata; -} - -static void __init *max3111_platform_data(void *info) -{ - struct spi_board_info *spi_info = info; - int intr = get_gpio_by_name("max3111_int"); - - if (intr == -1) - return NULL; - spi_info->irq = intr + MRST_IRQ_OFFSET; - return NULL; -} - -/* we have multiple max7315 on the board ... */ -#define MAX7315_NUM 2 -static void __init *max7315_platform_data(void *info) -{ - static struct pca953x_platform_data max7315_pdata[MAX7315_NUM]; - static int nr; - struct pca953x_platform_data *max7315 = &max7315_pdata[nr]; - struct i2c_board_info *i2c_info = info; - int gpio_base, intr; - char base_pin_name[SFI_NAME_LEN + 1]; - char intr_pin_name[SFI_NAME_LEN + 1]; - - if (nr == MAX7315_NUM) { - pr_err("too many max7315s, we only support %d\n", - MAX7315_NUM); - return NULL; - } - /* we have several max7315 on the board, we only need load several - * instances of the same pca953x driver to cover them - */ - strcpy(i2c_info->type, "max7315"); - if (nr++) { - sprintf(base_pin_name, "max7315_%d_base", nr); - sprintf(intr_pin_name, "max7315_%d_int", nr); - } else { - strcpy(base_pin_name, "max7315_base"); - strcpy(intr_pin_name, "max7315_int"); - } - - gpio_base = get_gpio_by_name(base_pin_name); - intr = get_gpio_by_name(intr_pin_name); - - if (gpio_base == -1) - return NULL; - max7315->gpio_base = gpio_base; - if (intr != -1) { - i2c_info->irq = intr + MRST_IRQ_OFFSET; - max7315->irq_base = gpio_base + MRST_IRQ_OFFSET; - } else { - i2c_info->irq = -1; - max7315->irq_base = -1; - } - return max7315; -} - -static void __init *emc1403_platform_data(void *info) -{ - static short intr2nd_pdata; - struct i2c_board_info *i2c_info = info; - int intr = get_gpio_by_name("thermal_int"); - int intr2nd = get_gpio_by_name("thermal_alert"); - - if (intr == -1 || intr2nd == -1) - return NULL; - - i2c_info->irq = intr + MRST_IRQ_OFFSET; - intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET; - - return &intr2nd_pdata; -} - -static void __init *lis331dl_platform_data(void *info) -{ - static short intr2nd_pdata; - struct i2c_board_info *i2c_info = info; - int intr = get_gpio_by_name("accel_int"); - int intr2nd = get_gpio_by_name("accel_2"); - - if (intr == -1 || intr2nd == -1) - return NULL; - - i2c_info->irq = intr + MRST_IRQ_OFFSET; - intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET; - - return &intr2nd_pdata; -} - -static void __init *no_platform_data(void *info) -{ - return NULL; -} - -static const struct devs_id __initconst device_ids[] = { - {"pmic_gpio", SFI_DEV_TYPE_SPI, 1, &pmic_gpio_platform_data}, - {"spi_max3111", SFI_DEV_TYPE_SPI, 0, &max3111_platform_data}, - {"i2c_max7315", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data}, - {"i2c_max7315_2", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data}, - {"emc1403", SFI_DEV_TYPE_I2C, 1, &emc1403_platform_data}, - {"i2c_accel", SFI_DEV_TYPE_I2C, 0, &lis331dl_platform_data}, - {"pmic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data}, - {"msic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data}, - {}, -}; - -#define MAX_IPCDEVS 24 -static struct platform_device *ipc_devs[MAX_IPCDEVS]; -static int ipc_next_dev; - -#define MAX_SCU_SPI 24 -static struct spi_board_info *spi_devs[MAX_SCU_SPI]; -static int spi_next_dev; - -#define MAX_SCU_I2C 24 -static struct i2c_board_info *i2c_devs[MAX_SCU_I2C]; -static int i2c_bus[MAX_SCU_I2C]; -static int i2c_next_dev; - -static void __init intel_scu_device_register(struct platform_device *pdev) -{ - if(ipc_next_dev == MAX_IPCDEVS) - pr_err("too many SCU IPC devices"); - else - ipc_devs[ipc_next_dev++] = pdev; -} - -static void __init intel_scu_spi_device_register(struct spi_board_info *sdev) -{ - struct spi_board_info *new_dev; - - if (spi_next_dev == MAX_SCU_SPI) { - pr_err("too many SCU SPI devices"); - return; - } - - new_dev = kzalloc(sizeof(*sdev), GFP_KERNEL); - if (!new_dev) { - pr_err("failed to alloc mem for delayed spi dev %s\n", - sdev->modalias); - return; - } - memcpy(new_dev, sdev, sizeof(*sdev)); - - spi_devs[spi_next_dev++] = new_dev; -} - -static void __init intel_scu_i2c_device_register(int bus, - struct i2c_board_info *idev) -{ - struct i2c_board_info *new_dev; - - if (i2c_next_dev == MAX_SCU_I2C) { - pr_err("too many SCU I2C devices"); - return; - } - - new_dev = kzalloc(sizeof(*idev), GFP_KERNEL); - if (!new_dev) { - pr_err("failed to alloc mem for delayed i2c dev %s\n", - idev->type); - return; - } - memcpy(new_dev, idev, sizeof(*idev)); - - i2c_bus[i2c_next_dev] = bus; - i2c_devs[i2c_next_dev++] = new_dev; -} - -/* Called by IPC driver */ -void intel_scu_devices_create(void) -{ - int i; - - for (i = 0; i < ipc_next_dev; i++) - platform_device_add(ipc_devs[i]); - - for (i = 0; i < spi_next_dev; i++) - spi_register_board_info(spi_devs[i], 1); - - for (i = 0; i < i2c_next_dev; i++) { - struct i2c_adapter *adapter; - struct i2c_client *client; - - adapter = i2c_get_adapter(i2c_bus[i]); - if (adapter) { - client = i2c_new_device(adapter, i2c_devs[i]); - if (!client) - pr_err("can't create i2c device %s\n", - i2c_devs[i]->type); - } else - i2c_register_board_info(i2c_bus[i], i2c_devs[i], 1); - } -} -EXPORT_SYMBOL_GPL(intel_scu_devices_create); - -/* Called by IPC driver */ -void intel_scu_devices_destroy(void) -{ - int i; - - for (i = 0; i < ipc_next_dev; i++) - platform_device_del(ipc_devs[i]); -} -EXPORT_SYMBOL_GPL(intel_scu_devices_destroy); - -static void __init install_irq_resource(struct platform_device *pdev, int irq) -{ - /* Single threaded */ - static struct resource __initdata res = { - .name = "IRQ", - .flags = IORESOURCE_IRQ, - }; - res.start = irq; - platform_device_add_resources(pdev, &res, 1); -} - -static void __init sfi_handle_ipc_dev(struct platform_device *pdev) -{ - const struct devs_id *dev = device_ids; - void *pdata = NULL; - - while (dev->name[0]) { - if (dev->type == SFI_DEV_TYPE_IPC && - !strncmp(dev->name, pdev->name, SFI_NAME_LEN)) { - pdata = dev->get_platform_data(pdev); - break; - } - dev++; - } - pdev->dev.platform_data = pdata; - intel_scu_device_register(pdev); -} - -static void __init sfi_handle_spi_dev(struct spi_board_info *spi_info) -{ - const struct devs_id *dev = device_ids; - void *pdata = NULL; - - while (dev->name[0]) { - if (dev->type == SFI_DEV_TYPE_SPI && - !strncmp(dev->name, spi_info->modalias, SFI_NAME_LEN)) { - pdata = dev->get_platform_data(spi_info); - break; - } - dev++; - } - spi_info->platform_data = pdata; - if (dev->delay) - intel_scu_spi_device_register(spi_info); - else - spi_register_board_info(spi_info, 1); -} - -static void __init sfi_handle_i2c_dev(int bus, struct i2c_board_info *i2c_info) -{ - const struct devs_id *dev = device_ids; - void *pdata = NULL; - - while (dev->name[0]) { - if (dev->type == SFI_DEV_TYPE_I2C && - !strncmp(dev->name, i2c_info->type, SFI_NAME_LEN)) { - pdata = dev->get_platform_data(i2c_info); - break; - } - dev++; - } - i2c_info->platform_data = pdata; - - if (dev->delay) - intel_scu_i2c_device_register(bus, i2c_info); - else - i2c_register_board_info(bus, i2c_info, 1); - } - - -static int __init sfi_parse_devs(struct sfi_table_header *table) -{ - struct sfi_table_simple *sb; - struct sfi_device_table_entry *pentry; - struct spi_board_info spi_info; - struct i2c_board_info i2c_info; - struct platform_device *pdev; - int num, i, bus; - int ioapic; - struct io_apic_irq_attr irq_attr; - - sb = (struct sfi_table_simple *)table; - num = SFI_GET_NUM_ENTRIES(sb, struct sfi_device_table_entry); - pentry = (struct sfi_device_table_entry *)sb->pentry; - - for (i = 0; i < num; i++, pentry++) { - if (pentry->irq != (u8)0xff) { /* native RTE case */ - /* these SPI2 devices are not exposed to system as PCI - * devices, but they have separate RTE entry in IOAPIC - * so we have to enable them one by one here - */ - ioapic = mp_find_ioapic(pentry->irq); - irq_attr.ioapic = ioapic; - irq_attr.ioapic_pin = pentry->irq; - irq_attr.trigger = 1; - irq_attr.polarity = 1; - io_apic_set_pci_routing(NULL, pentry->irq, &irq_attr); - } - switch (pentry->type) { - case SFI_DEV_TYPE_IPC: - /* ID as IRQ is a hack that will go away */ - pdev = platform_device_alloc(pentry->name, pentry->irq); - if (pdev == NULL) { - pr_err("out of memory for SFI platform device '%s'.\n", - pentry->name); - continue; - } - install_irq_resource(pdev, pentry->irq); - pr_debug("info[%2d]: IPC bus, name = %16.16s, " - "irq = 0x%2x\n", i, pentry->name, pentry->irq); - sfi_handle_ipc_dev(pdev); - break; - case SFI_DEV_TYPE_SPI: - memset(&spi_info, 0, sizeof(spi_info)); - strncpy(spi_info.modalias, pentry->name, SFI_NAME_LEN); - spi_info.irq = pentry->irq; - spi_info.bus_num = pentry->host_num; - spi_info.chip_select = pentry->addr; - spi_info.max_speed_hz = pentry->max_freq; - pr_debug("info[%2d]: SPI bus = %d, name = %16.16s, " - "irq = 0x%2x, max_freq = %d, cs = %d\n", i, - spi_info.bus_num, - spi_info.modalias, - spi_info.irq, - spi_info.max_speed_hz, - spi_info.chip_select); - sfi_handle_spi_dev(&spi_info); - break; - case SFI_DEV_TYPE_I2C: - memset(&i2c_info, 0, sizeof(i2c_info)); - bus = pentry->host_num; - strncpy(i2c_info.type, pentry->name, SFI_NAME_LEN); - i2c_info.irq = pentry->irq; - i2c_info.addr = pentry->addr; - pr_debug("info[%2d]: I2C bus = %d, name = %16.16s, " - "irq = 0x%2x, addr = 0x%x\n", i, bus, - i2c_info.type, - i2c_info.irq, - i2c_info.addr); - sfi_handle_i2c_dev(bus, &i2c_info); - break; - case SFI_DEV_TYPE_UART: - case SFI_DEV_TYPE_HSI: - default: - ; - } - } - return 0; -} - -static int __init mrst_platform_init(void) -{ - sfi_table_parse(SFI_SIG_GPIO, NULL, NULL, sfi_parse_gpio); - sfi_table_parse(SFI_SIG_DEVS, NULL, NULL, sfi_parse_devs); - return 0; -} -arch_initcall(mrst_platform_init); - -/* - * we will search these buttons in SFI GPIO table (by name) - * and register them dynamically. Please add all possible - * buttons here, we will shrink them if no GPIO found. - */ -static struct gpio_keys_button gpio_button[] = { - {KEY_POWER, -1, 1, "power_btn", EV_KEY, 0, 3000}, - {KEY_PROG1, -1, 1, "prog_btn1", EV_KEY, 0, 20}, - {KEY_PROG2, -1, 1, "prog_btn2", EV_KEY, 0, 20}, - {SW_LID, -1, 1, "lid_switch", EV_SW, 0, 20}, - {KEY_VOLUMEUP, -1, 1, "vol_up", EV_KEY, 0, 20}, - {KEY_VOLUMEDOWN, -1, 1, "vol_down", EV_KEY, 0, 20}, - {KEY_CAMERA, -1, 1, "camera_full", EV_KEY, 0, 20}, - {KEY_CAMERA_FOCUS, -1, 1, "camera_half", EV_KEY, 0, 20}, - {SW_KEYPAD_SLIDE, -1, 1, "MagSw1", EV_SW, 0, 20}, - {SW_KEYPAD_SLIDE, -1, 1, "MagSw2", EV_SW, 0, 20}, -}; - -static struct gpio_keys_platform_data mrst_gpio_keys = { - .buttons = gpio_button, - .rep = 1, - .nbuttons = -1, /* will fill it after search */ -}; - -static struct platform_device pb_device = { - .name = "gpio-keys", - .id = -1, - .dev = { - .platform_data = &mrst_gpio_keys, - }, -}; - -/* - * Shrink the non-existent buttons, register the gpio button - * device if there is some - */ -static int __init pb_keys_init(void) -{ - struct gpio_keys_button *gb = gpio_button; - int i, num, good = 0; - - num = sizeof(gpio_button) / sizeof(struct gpio_keys_button); - for (i = 0; i < num; i++) { - gb[i].gpio = get_gpio_by_name(gb[i].desc); - if (gb[i].gpio == -1) - continue; - - if (i != good) - gb[good] = gb[i]; - good++; - } - - if (good) { - mrst_gpio_keys.nbuttons = good; - return platform_device_register(&pb_device); - } - return 0; -} -late_initcall(pb_keys_init); diff --git a/trunk/arch/x86/platform/mrst/vrtc.c b/trunk/arch/x86/platform/mrst/vrtc.c deleted file mode 100644 index 32cd7edd71a0..000000000000 --- a/trunk/arch/x86/platform/mrst/vrtc.c +++ /dev/null @@ -1,165 +0,0 @@ -/* - * vrtc.c: Driver for virtual RTC device on Intel MID platform - * - * (C) Copyright 2009 Intel Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - * - * Note: - * VRTC is emulated by system controller firmware, the real HW - * RTC is located in the PMIC device. SCU FW shadows PMIC RTC - * in a memory mapped IO space that is visible to the host IA - * processor. - * - * This driver is based on RTC CMOS driver. - */ - -#include -#include -#include -#include - -#include -#include -#include -#include - -static unsigned char __iomem *vrtc_virt_base; - -unsigned char vrtc_cmos_read(unsigned char reg) -{ - unsigned char retval; - - /* vRTC's registers range from 0x0 to 0xD */ - if (reg > 0xd || !vrtc_virt_base) - return 0xff; - - lock_cmos_prefix(reg); - retval = __raw_readb(vrtc_virt_base + (reg << 2)); - lock_cmos_suffix(reg); - return retval; -} -EXPORT_SYMBOL_GPL(vrtc_cmos_read); - -void vrtc_cmos_write(unsigned char val, unsigned char reg) -{ - if (reg > 0xd || !vrtc_virt_base) - return; - - lock_cmos_prefix(reg); - __raw_writeb(val, vrtc_virt_base + (reg << 2)); - lock_cmos_suffix(reg); -} -EXPORT_SYMBOL_GPL(vrtc_cmos_write); - -unsigned long vrtc_get_time(void) -{ - u8 sec, min, hour, mday, mon; - u32 year; - - while ((vrtc_cmos_read(RTC_FREQ_SELECT) & RTC_UIP)) - cpu_relax(); - - sec = vrtc_cmos_read(RTC_SECONDS); - min = vrtc_cmos_read(RTC_MINUTES); - hour = vrtc_cmos_read(RTC_HOURS); - mday = vrtc_cmos_read(RTC_DAY_OF_MONTH); - mon = vrtc_cmos_read(RTC_MONTH); - year = vrtc_cmos_read(RTC_YEAR); - - /* vRTC YEAR reg contains the offset to 1960 */ - year += 1960; - - printk(KERN_INFO "vRTC: sec: %d min: %d hour: %d day: %d " - "mon: %d year: %d\n", sec, min, hour, mday, mon, year); - - return mktime(year, mon, mday, hour, min, sec); -} - -/* Only care about the minutes and seconds */ -int vrtc_set_mmss(unsigned long nowtime) -{ - int real_sec, real_min; - int vrtc_min; - - vrtc_min = vrtc_cmos_read(RTC_MINUTES); - - real_sec = nowtime % 60; - real_min = nowtime / 60; - if (((abs(real_min - vrtc_min) + 15)/30) & 1) - real_min += 30; - real_min %= 60; - - vrtc_cmos_write(real_sec, RTC_SECONDS); - vrtc_cmos_write(real_min, RTC_MINUTES); - return 0; -} - -void __init mrst_rtc_init(void) -{ - unsigned long rtc_paddr; - void __iomem *virt_base; - - sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc); - if (!sfi_mrtc_num) - return; - - rtc_paddr = sfi_mrtc_array[0].phys_addr; - - /* vRTC's register address may not be page aligned */ - set_fixmap_nocache(FIX_LNW_VRTC, rtc_paddr); - - virt_base = (void __iomem *)__fix_to_virt(FIX_LNW_VRTC); - virt_base += rtc_paddr & ~PAGE_MASK; - vrtc_virt_base = virt_base; - - x86_platform.get_wallclock = vrtc_get_time; - x86_platform.set_wallclock = vrtc_set_mmss; -} - -/* - * The Moorestown platform has a memory mapped virtual RTC device that emulates - * the programming interface of the RTC. - */ - -static struct resource vrtc_resources[] = { - [0] = { - .flags = IORESOURCE_MEM, - }, - [1] = { - .flags = IORESOURCE_IRQ, - } -}; - -static struct platform_device vrtc_device = { - .name = "rtc_mrst", - .id = -1, - .resource = vrtc_resources, - .num_resources = ARRAY_SIZE(vrtc_resources), -}; - -/* Register the RTC device if appropriate */ -static int __init mrst_device_create(void) -{ - /* No Moorestown, no device */ - if (!mrst_identify_cpu()) - return -ENODEV; - /* No timer, no device */ - if (!sfi_mrtc_num) - return -ENODEV; - - /* iomem resource */ - vrtc_resources[0].start = sfi_mrtc_array[0].phys_addr; - vrtc_resources[0].end = sfi_mrtc_array[0].phys_addr + - MRST_VRTC_MAP_SZ; - /* irq resource */ - vrtc_resources[1].start = sfi_mrtc_array[0].irq; - vrtc_resources[1].end = sfi_mrtc_array[0].irq; - - return platform_device_register(&vrtc_device); -} - -module_init(mrst_device_create); diff --git a/trunk/arch/x86/platform/sfi/sfi.c b/trunk/arch/x86/platform/sfi/sfi.c index ca54875ac795..dd4c281ffe57 100644 --- a/trunk/arch/x86/platform/sfi/sfi.c +++ b/trunk/arch/x86/platform/sfi/sfi.c @@ -48,9 +48,9 @@ static void __init mp_sfi_register_lapic_address(unsigned long address) /* All CPUs enumerated by SFI must be present and enabled */ static void __cpuinit mp_sfi_register_lapic(u8 id) { - if (MAX_LOCAL_APIC - id <= 0) { + if (MAX_APICS - id <= 0) { pr_warning("Processor #%d invalid (max %d)\n", - id, MAX_LOCAL_APIC); + id, MAX_APICS); return; } diff --git a/trunk/arch/x86/platform/uv/tlb_uv.c b/trunk/arch/x86/platform/uv/tlb_uv.c index df58e9cad96a..ba9caa808a9c 100644 --- a/trunk/arch/x86/platform/uv/tlb_uv.c +++ b/trunk/arch/x86/platform/uv/tlb_uv.c @@ -1341,7 +1341,7 @@ uv_activation_descriptor_init(int node, int pnode) /* * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) - * per cpu; and one per cpu on the uvhub (UV_ADP_SIZE) + * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per uvhub */ bau_desc = kmalloc_node(sizeof(struct bau_desc) * UV_ADP_SIZE * UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); @@ -1490,7 +1490,7 @@ calculate_destination_timeout(void) /* * initialize the bau_control structure for each cpu */ -static int __init uv_init_per_cpu(int nuvhubs) +static void __init uv_init_per_cpu(int nuvhubs) { int i; int cpu; @@ -1507,7 +1507,7 @@ static int __init uv_init_per_cpu(int nuvhubs) struct bau_control *smaster = NULL; struct socket_desc { short num_cpus; - short cpu_number[MAX_CPUS_PER_SOCKET]; + short cpu_number[16]; }; struct uvhub_desc { unsigned short socket_mask; @@ -1540,10 +1540,6 @@ static int __init uv_init_per_cpu(int nuvhubs) sdp = &bdp->socket[socket]; sdp->cpu_number[sdp->num_cpus] = cpu; sdp->num_cpus++; - if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) { - printk(KERN_EMERG "%d cpus per socket invalid\n", sdp->num_cpus); - return 1; - } } for (uvhub = 0; uvhub < nuvhubs; uvhub++) { if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8)))) @@ -1574,12 +1570,6 @@ static int __init uv_init_per_cpu(int nuvhubs) bcp->uvhub_master = hmaster; bcp->uvhub_cpu = uv_cpu_hub_info(cpu)-> blade_processor_id; - if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { - printk(KERN_EMERG - "%d cpus per uvhub invalid\n", - bcp->uvhub_cpu); - return 1; - } } nextsocket: socket++; @@ -1605,7 +1595,6 @@ static int __init uv_init_per_cpu(int nuvhubs) bcp->congested_reps = congested_reps; bcp->congested_period = congested_period; } - return 0; } /* @@ -1636,10 +1625,7 @@ static int __init uv_bau_init(void) spin_lock_init(&disable_lock); congested_cycles = microsec_2_cycles(congested_response_us); - if (uv_init_per_cpu(nuvhubs)) { - nobau = 1; - return 0; - } + uv_init_per_cpu(nuvhubs); uv_partition_base_pnode = 0x7fffffff; for (uvhub = 0; uvhub < nuvhubs; uvhub++) diff --git a/trunk/arch/x86/platform/visws/visws_quirks.c b/trunk/arch/x86/platform/visws/visws_quirks.c index 632037671746..3371bd053b89 100644 --- a/trunk/arch/x86/platform/visws/visws_quirks.c +++ b/trunk/arch/x86/platform/visws/visws_quirks.c @@ -171,7 +171,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) ver = m->apicver; if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) { printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", - m->apicid, MAX_LOCAL_APIC); + m->apicid, MAX_APICS); return; } diff --git a/trunk/drivers/acpi/acpica/nsinit.c b/trunk/drivers/acpi/acpica/nsinit.c index 0cac7ec0d2ec..660a2728908d 100644 --- a/trunk/drivers/acpi/acpica/nsinit.c +++ b/trunk/drivers/acpi/acpica/nsinit.c @@ -577,7 +577,9 @@ acpi_ns_init_one_device(acpi_handle obj_handle, * as possible (without an NMI being received in the middle of * this) - so disable NMIs and initialize the device: */ + acpi_nmi_disable(); status = acpi_ns_evaluate(info); + acpi_nmi_enable(); if (ACPI_SUCCESS(status)) { walk_info->num_INI++; diff --git a/trunk/drivers/acpi/numa.c b/trunk/drivers/acpi/numa.c index d9926afec110..5718566e00f9 100644 --- a/trunk/drivers/acpi/numa.c +++ b/trunk/drivers/acpi/numa.c @@ -275,23 +275,13 @@ acpi_table_parse_srat(enum acpi_srat_type id, int __init acpi_numa_init(void) { int ret = 0; - int nr_cpu_entries = nr_cpu_ids; - -#ifdef CONFIG_X86 - /* - * Should not limit number with cpu num that is from NR_CPUS or nr_cpus= - * SRAT cpu entries could have different order with that in MADT. - * So go over all cpu entries in SRAT to get apicid to node mapping. - */ - nr_cpu_entries = MAX_LOCAL_APIC; -#endif /* SRAT: Static Resource Affinity Table */ if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) { acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY, - acpi_parse_x2apic_affinity, nr_cpu_entries); + acpi_parse_x2apic_affinity, nr_cpu_ids); acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY, - acpi_parse_processor_affinity, nr_cpu_entries); + acpi_parse_processor_affinity, nr_cpu_ids); ret = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, acpi_parse_memory_affinity, NR_NODE_MEMBLKS); diff --git a/trunk/drivers/atm/atmtcp.c b/trunk/drivers/atm/atmtcp.c index 0b0625054a87..2b464b631f22 100644 --- a/trunk/drivers/atm/atmtcp.c +++ b/trunk/drivers/atm/atmtcp.c @@ -392,10 +392,7 @@ static int atmtcp_attach(struct atm_vcc *vcc,int itf) atm_dev_put(dev); return -EMEDIUMTYPE; } - if (PRIV(dev)->vcc) { - atm_dev_put(dev); - return -EBUSY; - } + if (PRIV(dev)->vcc) return -EBUSY; } else { int error; diff --git a/trunk/drivers/char/agp/amd64-agp.c b/trunk/drivers/char/agp/amd64-agp.c index 9252e85706ef..42396df55556 100644 --- a/trunk/drivers/char/agp/amd64-agp.c +++ b/trunk/drivers/char/agp/amd64-agp.c @@ -38,7 +38,7 @@ static int agp_bridges_found; static void amd64_tlbflush(struct agp_memory *temp) { - amd_flush_garts(); + k8_flush_garts(); } static int amd64_insert_memory(struct agp_memory *mem, off_t pg_start, int type) @@ -124,7 +124,7 @@ static int amd64_fetch_size(void) u32 temp; struct aper_size_info_32 *values; - dev = node_to_amd_nb(0)->misc; + dev = k8_northbridges.nb_misc[0]; if (dev==NULL) return 0; @@ -181,15 +181,16 @@ static int amd_8151_configure(void) unsigned long gatt_bus = virt_to_phys(agp_bridge->gatt_table_real); int i; - if (!amd_nb_has_feature(AMD_NB_GART)) + if (!k8_northbridges.gart_supported) return 0; /* Configure AGP regs in each x86-64 host bridge. */ - for (i = 0; i < amd_nb_num(); i++) { + for (i = 0; i < k8_northbridges.num; i++) { agp_bridge->gart_bus_addr = - amd64_configure(node_to_amd_nb(i)->misc, gatt_bus); + amd64_configure(k8_northbridges.nb_misc[i], + gatt_bus); } - amd_flush_garts(); + k8_flush_garts(); return 0; } @@ -199,11 +200,11 @@ static void amd64_cleanup(void) u32 tmp; int i; - if (!amd_nb_has_feature(AMD_NB_GART)) + if (!k8_northbridges.gart_supported) return; - for (i = 0; i < amd_nb_num(); i++) { - struct pci_dev *dev = node_to_amd_nb(i)->misc; + for (i = 0; i < k8_northbridges.num; i++) { + struct pci_dev *dev = k8_northbridges.nb_misc[i]; /* disable gart translation */ pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &tmp); tmp &= ~GARTEN; @@ -330,15 +331,15 @@ static __devinit int cache_nbs(struct pci_dev *pdev, u32 cap_ptr) { int i; - if (amd_cache_northbridges() < 0) + if (cache_k8_northbridges() < 0) return -ENODEV; - if (!amd_nb_has_feature(AMD_NB_GART)) + if (!k8_northbridges.gart_supported) return -ENODEV; i = 0; - for (i = 0; i < amd_nb_num(); i++) { - struct pci_dev *dev = node_to_amd_nb(i)->misc; + for (i = 0; i < k8_northbridges.num; i++) { + struct pci_dev *dev = k8_northbridges.nb_misc[i]; if (fix_northbridge(dev, pdev, cap_ptr) < 0) { dev_err(&dev->dev, "no usable aperture found\n"); #ifdef __x86_64__ @@ -415,7 +416,7 @@ static int __devinit uli_agp_init(struct pci_dev *pdev) } /* shadow x86-64 registers into ULi registers */ - pci_read_config_dword (node_to_amd_nb(0)->misc, AMD64_GARTAPERTUREBASE, + pci_read_config_dword (k8_northbridges.nb_misc[0], AMD64_GARTAPERTUREBASE, &httfea); /* if x86-64 aperture base is beyond 4G, exit here */ @@ -483,7 +484,7 @@ static int nforce3_agp_init(struct pci_dev *pdev) pci_write_config_dword(dev1, NVIDIA_X86_64_1_APSIZE, tmp); /* shadow x86-64 registers into NVIDIA registers */ - pci_read_config_dword (node_to_amd_nb(0)->misc, AMD64_GARTAPERTUREBASE, + pci_read_config_dword (k8_northbridges.nb_misc[0], AMD64_GARTAPERTUREBASE, &apbase); /* if x86-64 aperture base is beyond 4G, exit here */ @@ -777,7 +778,7 @@ int __init agp_amd64_init(void) } /* First check that we have at least one AMD64 NB */ - if (!pci_dev_present(amd_nb_misc_ids)) + if (!pci_dev_present(k8_nb_ids)) return -ENODEV; /* Look for any AGP bridge */ diff --git a/trunk/drivers/cpufreq/cpufreq.c b/trunk/drivers/cpufreq/cpufreq.c index 1109f6848a43..c63a43823744 100644 --- a/trunk/drivers/cpufreq/cpufreq.c +++ b/trunk/drivers/cpufreq/cpufreq.c @@ -355,7 +355,6 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state) dprintk("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new, (unsigned long)freqs->cpu); trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu); - trace_cpu_frequency(freqs->new, freqs->cpu); srcu_notifier_call_chain(&cpufreq_transition_notifier_list, CPUFREQ_POSTCHANGE, freqs); if (likely(policy) && likely(policy->cpu == freqs->cpu)) diff --git a/trunk/drivers/cpuidle/cpuidle.c b/trunk/drivers/cpuidle/cpuidle.c index 08d5f05378d9..a50710843378 100644 --- a/trunk/drivers/cpuidle/cpuidle.c +++ b/trunk/drivers/cpuidle/cpuidle.c @@ -107,7 +107,6 @@ static void cpuidle_idle_call(void) if (cpuidle_curr_governor->reflect) cpuidle_curr_governor->reflect(dev); trace_power_end(smp_processor_id()); - trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); } /** diff --git a/trunk/drivers/dma/mv_xor.c b/trunk/drivers/dma/mv_xor.c index a25f5f61e0e0..411d5bf50fc4 100644 --- a/trunk/drivers/dma/mv_xor.c +++ b/trunk/drivers/dma/mv_xor.c @@ -449,7 +449,7 @@ mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan) static void mv_xor_tasklet(unsigned long data) { struct mv_xor_chan *chan = (struct mv_xor_chan *) data; - mv_xor_slot_cleanup(chan); + __mv_xor_slot_cleanup(chan); } static struct mv_xor_desc_slot * diff --git a/trunk/drivers/edac/amd64_edac.c b/trunk/drivers/edac/amd64_edac.c index df211181fca4..eca9ba193e94 100644 --- a/trunk/drivers/edac/amd64_edac.c +++ b/trunk/drivers/edac/amd64_edac.c @@ -2917,7 +2917,7 @@ static int __init amd64_edac_init(void) opstate_init(); - if (amd_cache_northbridges() < 0) + if (cache_k8_northbridges() < 0) goto err_ret; msrs = msrs_alloc(); @@ -2934,7 +2934,7 @@ static int __init amd64_edac_init(void) * to finish initialization of the MC instances. */ err = -ENODEV; - for (nb = 0; nb < amd_nb_num(); nb++) { + for (nb = 0; nb < k8_northbridges.num; nb++) { if (!pvt_lookup[nb]) continue; diff --git a/trunk/drivers/gpu/drm/i915/dvo_ch7017.c b/trunk/drivers/gpu/drm/i915/dvo_ch7017.c index d3e8c540f778..af70337567ce 100644 --- a/trunk/drivers/gpu/drm/i915/dvo_ch7017.c +++ b/trunk/drivers/gpu/drm/i915/dvo_ch7017.c @@ -242,7 +242,7 @@ static bool ch7017_init(struct intel_dvo_device *dvo, static enum drm_connector_status ch7017_detect(struct intel_dvo_device *dvo) { - return connector_status_connected; + return connector_status_unknown; } static enum drm_mode_status ch7017_mode_valid(struct intel_dvo_device *dvo, diff --git a/trunk/drivers/gpu/drm/i915/i915_dma.c b/trunk/drivers/gpu/drm/i915/i915_dma.c index cb900dc83d95..e6800819bca8 100644 --- a/trunk/drivers/gpu/drm/i915/i915_dma.c +++ b/trunk/drivers/gpu/drm/i915/i915_dma.c @@ -34,7 +34,6 @@ #include "i915_drm.h" #include "i915_drv.h" #include "i915_trace.h" -#include "../../../platform/x86/intel_ips.h" #include #include #include @@ -1871,26 +1870,6 @@ bool i915_gpu_turbo_disable(void) } EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); -/** - * Tells the intel_ips driver that the i915 driver is now loaded, if - * IPS got loaded first. - * - * This awkward dance is so that neither module has to depend on the - * other in order for IPS to do the appropriate communication of - * GPU turbo limits to i915. - */ -static void -ips_ping_for_i915_load(void) -{ - void (*link)(void); - - link = symbol_get(ips_link_to_i915_driver); - if (link) { - link(); - symbol_put(ips_link_to_i915_driver); - } -} - /** * i915_driver_load - setup chip and create an initial config * @dev: DRM device @@ -2096,8 +2075,6 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) dev_priv->mchdev_lock = &mchdev_lock; spin_unlock(&mchdev_lock); - ips_ping_for_i915_load(); - return 0; out_workqueue_free: diff --git a/trunk/drivers/gpu/drm/i915/i915_reg.h b/trunk/drivers/gpu/drm/i915/i915_reg.h index cb8f43429279..878fc766a12c 100644 --- a/trunk/drivers/gpu/drm/i915/i915_reg.h +++ b/trunk/drivers/gpu/drm/i915/i915_reg.h @@ -2471,9 +2471,6 @@ # define MARIUNIT_CLOCK_GATE_DISABLE (1 << 18) # define SVSMUNIT_CLOCK_GATE_DISABLE (1 << 1) -#define PCH_3DCGDIS1 0x46024 -# define VFMUNIT_CLOCK_GATE_DISABLE (1 << 11) - #define FDI_PLL_FREQ_CTL 0x46030 #define FDI_PLL_FREQ_CHANGE_REQUEST (1<<24) #define FDI_PLL_FREQ_LOCK_LIMIT_MASK 0xfff00 @@ -2591,13 +2588,6 @@ #define ILK_DISPLAY_CHICKEN2 0x42004 #define ILK_DPARB_GATE (1<<22) #define ILK_VSDPFD_FULL (1<<21) -#define ILK_DISPLAY_CHICKEN_FUSES 0x42014 -#define ILK_INTERNAL_GRAPHICS_DISABLE (1<<31) -#define ILK_INTERNAL_DISPLAY_DISABLE (1<<30) -#define ILK_DISPLAY_DEBUG_DISABLE (1<<29) -#define ILK_HDCP_DISABLE (1<<25) -#define ILK_eDP_A_DISABLE (1<<24) -#define ILK_DESKTOP (1<<23) #define ILK_DSPCLK_GATE 0x42020 #define ILK_DPARB_CLK_GATE (1<<5) /* According to spec this bit 7/8/9 of 0x42020 should be set to enable FBC */ diff --git a/trunk/drivers/gpu/drm/i915/intel_bios.c b/trunk/drivers/gpu/drm/i915/intel_bios.c index b0b1200ed650..2b2078695d2a 100644 --- a/trunk/drivers/gpu/drm/i915/intel_bios.c +++ b/trunk/drivers/gpu/drm/i915/intel_bios.c @@ -270,7 +270,7 @@ parse_general_features(struct drm_i915_private *dev_priv, general->ssc_freq ? 66 : 48; else if (IS_GEN5(dev) || IS_GEN6(dev)) dev_priv->lvds_ssc_freq = - general->ssc_freq ? 100 : 120; + general->ssc_freq ? 120 : 100; else dev_priv->lvds_ssc_freq = general->ssc_freq ? 100 : 96; diff --git a/trunk/drivers/gpu/drm/i915/intel_display.c b/trunk/drivers/gpu/drm/i915/intel_display.c index fca523288aca..d9b7092439ef 100644 --- a/trunk/drivers/gpu/drm/i915/intel_display.c +++ b/trunk/drivers/gpu/drm/i915/intel_display.c @@ -5379,23 +5379,6 @@ static int intel_encoder_clones(struct drm_device *dev, int type_mask) return index_mask; } -static bool has_edp_a(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - if (!IS_MOBILE(dev)) - return false; - - if ((I915_READ(DP_A) & DP_DETECTED) == 0) - return false; - - if (IS_GEN5(dev) && - (I915_READ(ILK_DISPLAY_CHICKEN_FUSES) & ILK_eDP_A_DISABLE)) - return false; - - return true; -} - static void intel_setup_outputs(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -5413,7 +5396,7 @@ static void intel_setup_outputs(struct drm_device *dev) if (HAS_PCH_SPLIT(dev)) { dpd_is_edp = intel_dpd_is_edp(dev); - if (has_edp_a(dev)) + if (IS_MOBILE(dev) && (I915_READ(DP_A) & DP_DETECTED)) intel_dp_init(dev, DP_A); if (dpd_is_edp && (I915_READ(PCH_DP_D) & DP_DETECTED)) @@ -5842,8 +5825,6 @@ void intel_init_clock_gating(struct drm_device *dev) I915_WRITE(PCH_3DCGDIS0, MARIUNIT_CLOCK_GATE_DISABLE | SVSMUNIT_CLOCK_GATE_DISABLE); - I915_WRITE(PCH_3DCGDIS1, - VFMUNIT_CLOCK_GATE_DISABLE); } I915_WRITE(PCH_DSPCLK_GATE_D, dspclk_gate); diff --git a/trunk/drivers/gpu/drm/i915/intel_sdvo.c b/trunk/drivers/gpu/drm/i915/intel_sdvo.c index 6bc42fa2a6ec..27e63abf2a73 100644 --- a/trunk/drivers/gpu/drm/i915/intel_sdvo.c +++ b/trunk/drivers/gpu/drm/i915/intel_sdvo.c @@ -2040,14 +2040,13 @@ intel_sdvo_dvi_init(struct intel_sdvo *intel_sdvo, int device) SDVO_COLORIMETRY_RGB256); connector->connector_type = DRM_MODE_CONNECTOR_HDMIA; + intel_sdvo_add_hdmi_properties(intel_sdvo_connector); intel_sdvo->is_hdmi = true; } intel_sdvo->base.clone_mask = ((1 << INTEL_SDVO_NON_TV_CLONE_BIT) | (1 << INTEL_ANALOG_CLONE_BIT)); intel_sdvo_connector_init(intel_sdvo_connector, intel_sdvo); - if (intel_sdvo->is_hdmi) - intel_sdvo_add_hdmi_properties(intel_sdvo_connector); return true; } diff --git a/trunk/drivers/hwmon/s3c-hwmon.c b/trunk/drivers/hwmon/s3c-hwmon.c index 92b42db43bcf..05248f2d7581 100644 --- a/trunk/drivers/hwmon/s3c-hwmon.c +++ b/trunk/drivers/hwmon/s3c-hwmon.c @@ -234,6 +234,7 @@ static int s3c_hwmon_create_attr(struct device *dev, attr->index = channel; attr->dev_attr.attr.name = attrs->in_name; attr->dev_attr.attr.mode = S_IRUGO; + attr->dev_attr.attr.owner = THIS_MODULE; attr->dev_attr.show = s3c_hwmon_ch_show; ret = device_create_file(dev, &attr->dev_attr); @@ -251,6 +252,7 @@ static int s3c_hwmon_create_attr(struct device *dev, attr->index = channel; attr->dev_attr.attr.name = attrs->label_name; attr->dev_attr.attr.mode = S_IRUGO; + attr->dev_attr.attr.owner = THIS_MODULE; attr->dev_attr.show = s3c_hwmon_label_show; ret = device_create_file(dev, &attr->dev_attr); diff --git a/trunk/drivers/idle/intel_idle.c b/trunk/drivers/idle/intel_idle.c index 56ac09d6c930..c131d58bcb50 100644 --- a/trunk/drivers/idle/intel_idle.c +++ b/trunk/drivers/idle/intel_idle.c @@ -220,8 +220,9 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state) kt_before = ktime_get_real(); stop_critical_timings(); +#ifndef MODULE trace_power_start(POWER_CSTATE, (eax >> 4) + 1, cpu); - trace_cpu_idle((eax >> 4) + 1, cpu); +#endif if (!need_resched()) { __monitor((void *)¤t_thread_info()->flags, 0, 0); diff --git a/trunk/drivers/isdn/gigaset/capi.c b/trunk/drivers/isdn/gigaset/capi.c index 658e75f18d05..bcc174e4f3b1 100644 --- a/trunk/drivers/isdn/gigaset/capi.c +++ b/trunk/drivers/isdn/gigaset/capi.c @@ -1900,7 +1900,6 @@ static void do_disconnect_req(struct gigaset_capi_ctr *iif, if (b3skb == NULL) { dev_err(cs->dev, "%s: out of memory\n", __func__); send_conf(iif, ap, skb, CAPI_MSGOSRESOURCEERR); - kfree(b3cmsg); return; } capi_cmsg2message(b3cmsg, diff --git a/trunk/drivers/media/video/cx25840/cx25840-core.c b/trunk/drivers/media/video/cx25840/cx25840-core.c index f16461844c5c..dfb198d0415b 100644 --- a/trunk/drivers/media/video/cx25840/cx25840-core.c +++ b/trunk/drivers/media/video/cx25840/cx25840-core.c @@ -1989,23 +1989,8 @@ static int cx25840_probe(struct i2c_client *client, v4l2_ctrl_new_std(&state->hdl, &cx25840_ctrl_ops, V4L2_CID_HUE, -128, 127, 1, 0); if (!is_cx2583x(state)) { - default_volume = cx25840_read(client, 0x8d4); - /* - * Enforce the legacy PVR-350/MSP3400 to PVR-150/CX25843 volume - * scale mapping limits to avoid -ERANGE errors when - * initializing the volume control - */ - if (default_volume > 228) { - /* Bottom out at -96 dB, v4l2 vol range 0x2e00-0x2fff */ - default_volume = 228; - cx25840_write(client, 0x8d4, 228); - } - else if (default_volume < 20) { - /* Top out at + 8 dB, v4l2 vol range 0xfe00-0xffff */ - default_volume = 20; - cx25840_write(client, 0x8d4, 20); - } - default_volume = (((228 - default_volume) >> 1) + 23) << 9; + default_volume = 228 - cx25840_read(client, 0x8d4); + default_volume = ((default_volume / 2) + 23) << 9; state->volume = v4l2_ctrl_new_std(&state->hdl, &cx25840_audio_ctrl_ops, V4L2_CID_AUDIO_VOLUME, diff --git a/trunk/drivers/media/video/cx88/cx88-alsa.c b/trunk/drivers/media/video/cx88/cx88-alsa.c index 54b7fcd469a8..4aaa47c0eabf 100644 --- a/trunk/drivers/media/video/cx88/cx88-alsa.c +++ b/trunk/drivers/media/video/cx88/cx88-alsa.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "cx88.h" #include "cx88-reg.h" @@ -586,26 +587,47 @@ static int snd_cx88_volume_put(struct snd_kcontrol *kcontrol, int left, right, v, b; int changed = 0; u32 old; + struct v4l2_control client_ctl; + + /* Pass volume & balance onto any WM8775 */ + if (value->value.integer.value[0] >= value->value.integer.value[1]) { + v = value->value.integer.value[0] << 10; + b = value->value.integer.value[0] ? + (0x8000 * value->value.integer.value[1]) / value->value.integer.value[0] : + 0x8000; + } else { + v = value->value.integer.value[1] << 10; + b = value->value.integer.value[1] ? + 0xffff - (0x8000 * value->value.integer.value[0]) / value->value.integer.value[1] : + 0x8000; + } + client_ctl.value = v; + client_ctl.id = V4L2_CID_AUDIO_VOLUME; + call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl); + + client_ctl.value = b; + client_ctl.id = V4L2_CID_AUDIO_BALANCE; + call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl); left = value->value.integer.value[0] & 0x3f; right = value->value.integer.value[1] & 0x3f; b = right - left; if (b < 0) { - v = 0x3f - left; - b = (-b) | 0x40; + v = 0x3f - left; + b = (-b) | 0x40; } else { - v = 0x3f - right; + v = 0x3f - right; } /* Do we really know this will always be called with IRQs on? */ spin_lock_irq(&chip->reg_lock); old = cx_read(AUD_VOL_CTL); if (v != (old & 0x3f)) { - cx_write(AUD_VOL_CTL, (old & ~0x3f) | v); - changed = 1; + cx_swrite(SHADOW_AUD_VOL_CTL, AUD_VOL_CTL, (old & ~0x3f) | v); + changed = 1; } - if (cx_read(AUD_BAL_CTL) != b) { - cx_write(AUD_BAL_CTL, b); - changed = 1; + if ((cx_read(AUD_BAL_CTL) & 0x7f) != b) { + cx_write(AUD_BAL_CTL, b); + changed = 1; } spin_unlock_irq(&chip->reg_lock); @@ -618,7 +640,7 @@ static const struct snd_kcontrol_new snd_cx88_volume = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, - .name = "Playback Volume", + .name = "Analog-TV Volume", .info = snd_cx88_volume_info, .get = snd_cx88_volume_get, .put = snd_cx88_volume_put, @@ -649,7 +671,14 @@ static int snd_cx88_switch_put(struct snd_kcontrol *kcontrol, vol = cx_read(AUD_VOL_CTL); if (value->value.integer.value[0] != !(vol & bit)) { vol ^= bit; - cx_write(AUD_VOL_CTL, vol); + cx_swrite(SHADOW_AUD_VOL_CTL, AUD_VOL_CTL, vol); + /* Pass mute onto any WM8775 */ + if ((1<<6) == bit) { + struct v4l2_control client_ctl; + client_ctl.value = 0 != (vol & bit); + client_ctl.id = V4L2_CID_AUDIO_MUTE; + call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl); + } ret = 1; } spin_unlock_irq(&chip->reg_lock); @@ -658,7 +687,7 @@ static int snd_cx88_switch_put(struct snd_kcontrol *kcontrol, static const struct snd_kcontrol_new snd_cx88_dac_switch = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, - .name = "Playback Switch", + .name = "Audio-Out Switch", .info = snd_ctl_boolean_mono_info, .get = snd_cx88_switch_get, .put = snd_cx88_switch_put, @@ -667,13 +696,49 @@ static const struct snd_kcontrol_new snd_cx88_dac_switch = { static const struct snd_kcontrol_new snd_cx88_source_switch = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, - .name = "Capture Switch", + .name = "Analog-TV Switch", .info = snd_ctl_boolean_mono_info, .get = snd_cx88_switch_get, .put = snd_cx88_switch_put, .private_value = (1<<6), }; +static int snd_cx88_alc_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *value) +{ + snd_cx88_card_t *chip = snd_kcontrol_chip(kcontrol); + struct cx88_core *core = chip->core; + struct v4l2_control client_ctl; + + client_ctl.id = V4L2_CID_AUDIO_LOUDNESS; + call_hw(core, WM8775_GID, core, g_ctrl, &client_ctl); + value->value.integer.value[0] = client_ctl.value ? 1 : 0; + + return 0; +} + +static int snd_cx88_alc_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *value) +{ + snd_cx88_card_t *chip = snd_kcontrol_chip(kcontrol); + struct cx88_core *core = chip->core; + struct v4l2_control client_ctl; + + client_ctl.value = 0 != value->value.integer.value[0]; + client_ctl.id = V4L2_CID_AUDIO_LOUDNESS; + call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl); + + return 0; +} + +static struct snd_kcontrol_new snd_cx88_alc_switch = { + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .name = "Line-In ALC Switch", + .info = snd_ctl_boolean_mono_info, + .get = snd_cx88_alc_get, + .put = snd_cx88_alc_put, +}; + /**************************************************************************** Basic Flow for Sound Devices ****************************************************************************/ @@ -795,6 +860,7 @@ static int __devinit cx88_audio_initdev(struct pci_dev *pci, { struct snd_card *card; snd_cx88_card_t *chip; + struct v4l2_subdev *sd; int err; if (devno >= SNDRV_CARDS) @@ -830,6 +896,15 @@ static int __devinit cx88_audio_initdev(struct pci_dev *pci, if (err < 0) goto error; + /* If there's a wm8775 then add a Line-In ALC switch */ + list_for_each_entry(sd, &chip->core->v4l2_dev.subdevs, list) { + if (WM8775_GID == sd->grp_id) { + snd_ctl_add(card, snd_ctl_new1(&snd_cx88_alc_switch, + chip)); + break; + } + } + strcpy (card->driver, "CX88x"); sprintf(card->shortname, "Conexant CX%x", pci->device); sprintf(card->longname, "%s at %#llx", diff --git a/trunk/drivers/media/video/cx88/cx88-cards.c b/trunk/drivers/media/video/cx88/cx88-cards.c index 0ccc2afd7266..9b9e169cce90 100644 --- a/trunk/drivers/media/video/cx88/cx88-cards.c +++ b/trunk/drivers/media/video/cx88/cx88-cards.c @@ -1007,22 +1007,15 @@ static const struct cx88_board cx88_boards[] = { .radio_type = UNSET, .tuner_addr = ADDR_UNSET, .radio_addr = ADDR_UNSET, - .audio_chip = V4L2_IDENT_WM8775, .input = {{ .type = CX88_VMUX_DVB, .vmux = 0, - /* 2: Line-In */ - .audioroute = 2, },{ .type = CX88_VMUX_COMPOSITE1, .vmux = 1, - /* 2: Line-In */ - .audioroute = 2, },{ .type = CX88_VMUX_SVIDEO, .vmux = 2, - /* 2: Line-In */ - .audioroute = 2, }}, .mpeg = CX88_MPEG_DVB, }, diff --git a/trunk/drivers/media/video/cx88/cx88-video.c b/trunk/drivers/media/video/cx88/cx88-video.c index d9249e5a04c9..62cea9549404 100644 --- a/trunk/drivers/media/video/cx88/cx88-video.c +++ b/trunk/drivers/media/video/cx88/cx88-video.c @@ -40,6 +40,7 @@ #include "cx88.h" #include #include +#include MODULE_DESCRIPTION("v4l2 driver module for cx2388x based TV cards"); MODULE_AUTHOR("Gerd Knorr [SuSE Labs]"); @@ -976,6 +977,7 @@ int cx88_set_control(struct cx88_core *core, struct v4l2_control *ctl) const struct cx88_ctrl *c = NULL; u32 value,mask; int i; + struct v4l2_control client_ctl; for (i = 0; i < CX8800_CTLS; i++) { if (cx8800_ctls[i].v.id == ctl->id) { @@ -989,6 +991,27 @@ int cx88_set_control(struct cx88_core *core, struct v4l2_control *ctl) ctl->value = c->v.minimum; if (ctl->value > c->v.maximum) ctl->value = c->v.maximum; + + /* Pass changes onto any WM8775 */ + client_ctl.id = ctl->id; + switch (ctl->id) { + case V4L2_CID_AUDIO_MUTE: + client_ctl.value = ctl->value; + break; + case V4L2_CID_AUDIO_VOLUME: + client_ctl.value = (ctl->value) ? + (0x90 + ctl->value) << 8 : 0; + break; + case V4L2_CID_AUDIO_BALANCE: + client_ctl.value = ctl->value << 9; + break; + default: + client_ctl.id = 0; + break; + } + if (client_ctl.id) + call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl); + mask=c->mask; switch (ctl->id) { case V4L2_CID_AUDIO_BALANCE: @@ -1535,7 +1558,9 @@ static int radio_queryctrl (struct file *file, void *priv, if (c->id < V4L2_CID_BASE || c->id >= V4L2_CID_LASTP1) return -EINVAL; - if (c->id == V4L2_CID_AUDIO_MUTE) { + if (c->id == V4L2_CID_AUDIO_MUTE || + c->id == V4L2_CID_AUDIO_VOLUME || + c->id == V4L2_CID_AUDIO_BALANCE) { for (i = 0; i < CX8800_CTLS; i++) { if (cx8800_ctls[i].v.id == c->id) break; diff --git a/trunk/drivers/media/video/cx88/cx88.h b/trunk/drivers/media/video/cx88/cx88.h index c9981e77416a..e8c732e7ae4f 100644 --- a/trunk/drivers/media/video/cx88/cx88.h +++ b/trunk/drivers/media/video/cx88/cx88.h @@ -398,17 +398,19 @@ static inline struct cx88_core *to_core(struct v4l2_device *v4l2_dev) return container_of(v4l2_dev, struct cx88_core, v4l2_dev); } -#define call_all(core, o, f, args...) \ +#define call_hw(core, grpid, o, f, args...) \ do { \ if (!core->i2c_rc) { \ if (core->gate_ctrl) \ core->gate_ctrl(core, 1); \ - v4l2_device_call_all(&core->v4l2_dev, 0, o, f, ##args); \ + v4l2_device_call_all(&core->v4l2_dev, grpid, o, f, ##args); \ if (core->gate_ctrl) \ core->gate_ctrl(core, 0); \ } \ } while (0) +#define call_all(core, o, f, args...) call_hw(core, 0, o, f, ##args) + struct cx8800_dev; struct cx8802_dev; diff --git a/trunk/drivers/media/video/em28xx/em28xx-video.c b/trunk/drivers/media/video/em28xx/em28xx-video.c index 2c3007280032..908e3bc88303 100644 --- a/trunk/drivers/media/video/em28xx/em28xx-video.c +++ b/trunk/drivers/media/video/em28xx/em28xx-video.c @@ -2377,7 +2377,7 @@ static const struct v4l2_file_operations radio_fops = { .owner = THIS_MODULE, .open = em28xx_v4l2_open, .release = em28xx_v4l2_close, - .unlocked_ioctl = video_ioctl2, + .ioctl = video_ioctl2, }; static const struct v4l2_ioctl_ops radio_ioctl_ops = { diff --git a/trunk/drivers/media/video/wm8775.c b/trunk/drivers/media/video/wm8775.c index fe8ef6419f83..135525649086 100644 --- a/trunk/drivers/media/video/wm8775.c +++ b/trunk/drivers/media/video/wm8775.c @@ -35,6 +35,7 @@ #include #include #include +#include MODULE_DESCRIPTION("wm8775 driver"); MODULE_AUTHOR("Ulf Eklund, Hans Verkuil"); @@ -50,10 +51,16 @@ enum { TOT_REGS }; +#define ALC_HOLD 0x85 /* R17: use zero cross detection, ALC hold time 42.6 ms */ +#define ALC_EN 0x100 /* R17: ALC enable */ + struct wm8775_state { struct v4l2_subdev sd; struct v4l2_ctrl_handler hdl; struct v4l2_ctrl *mute; + struct v4l2_ctrl *vol; + struct v4l2_ctrl *bal; + struct v4l2_ctrl *loud; u8 input; /* Last selected input (0-0xf) */ }; @@ -85,6 +92,30 @@ static int wm8775_write(struct v4l2_subdev *sd, int reg, u16 val) return -1; } +static void wm8775_set_audio(struct v4l2_subdev *sd, int quietly) +{ + struct wm8775_state *state = to_state(sd); + u8 vol_l, vol_r; + int muted = 0 != state->mute->val; + u16 volume = (u16)state->vol->val; + u16 balance = (u16)state->bal->val; + + /* normalize ( 65535 to 0 -> 255 to 0 (+24dB to -103dB) ) */ + vol_l = (min(65536 - balance, 32768) * volume) >> 23; + vol_r = (min(balance, (u16)32768) * volume) >> 23; + + /* Mute */ + if (muted || quietly) + wm8775_write(sd, R21, 0x0c0 | state->input); + + wm8775_write(sd, R14, vol_l | 0x100); /* 0x100= Left channel ADC zero cross enable */ + wm8775_write(sd, R15, vol_r | 0x100); /* 0x100= Right channel ADC zero cross enable */ + + /* Un-mute */ + if (!muted) + wm8775_write(sd, R21, state->input); +} + static int wm8775_s_routing(struct v4l2_subdev *sd, u32 input, u32 output, u32 config) { @@ -102,25 +133,26 @@ static int wm8775_s_routing(struct v4l2_subdev *sd, state->input = input; if (!v4l2_ctrl_g_ctrl(state->mute)) return 0; - wm8775_write(sd, R21, 0x0c0); - wm8775_write(sd, R14, 0x1d4); - wm8775_write(sd, R15, 0x1d4); - wm8775_write(sd, R21, 0x100 + state->input); + if (!v4l2_ctrl_g_ctrl(state->vol)) + return 0; + if (!v4l2_ctrl_g_ctrl(state->bal)) + return 0; + wm8775_set_audio(sd, 1); return 0; } static int wm8775_s_ctrl(struct v4l2_ctrl *ctrl) { struct v4l2_subdev *sd = to_sd(ctrl); - struct wm8775_state *state = to_state(sd); switch (ctrl->id) { case V4L2_CID_AUDIO_MUTE: - wm8775_write(sd, R21, 0x0c0); - wm8775_write(sd, R14, 0x1d4); - wm8775_write(sd, R15, 0x1d4); - if (!ctrl->val) - wm8775_write(sd, R21, 0x100 + state->input); + case V4L2_CID_AUDIO_VOLUME: + case V4L2_CID_AUDIO_BALANCE: + wm8775_set_audio(sd, 0); + return 0; + case V4L2_CID_AUDIO_LOUDNESS: + wm8775_write(sd, R17, (ctrl->val ? ALC_EN : 0) | ALC_HOLD); return 0; } return -EINVAL; @@ -144,16 +176,7 @@ static int wm8775_log_status(struct v4l2_subdev *sd) static int wm8775_s_frequency(struct v4l2_subdev *sd, struct v4l2_frequency *freq) { - struct wm8775_state *state = to_state(sd); - - /* If I remove this, then it can happen that I have no - sound the first time I tune from static to a valid channel. - It's difficult to reproduce and is almost certainly related - to the zero cross detect circuit. */ - wm8775_write(sd, R21, 0x0c0); - wm8775_write(sd, R14, 0x1d4); - wm8775_write(sd, R15, 0x1d4); - wm8775_write(sd, R21, 0x100 + state->input); + wm8775_set_audio(sd, 0); return 0; } @@ -203,6 +226,7 @@ static int wm8775_probe(struct i2c_client *client, { struct wm8775_state *state; struct v4l2_subdev *sd; + int err; /* Check if the adapter supports the needed features */ if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA)) @@ -216,15 +240,21 @@ static int wm8775_probe(struct i2c_client *client, return -ENOMEM; sd = &state->sd; v4l2_i2c_subdev_init(sd, client, &wm8775_ops); + sd->grp_id = WM8775_GID; /* subdev group id */ state->input = 2; - v4l2_ctrl_handler_init(&state->hdl, 1); + v4l2_ctrl_handler_init(&state->hdl, 4); state->mute = v4l2_ctrl_new_std(&state->hdl, &wm8775_ctrl_ops, V4L2_CID_AUDIO_MUTE, 0, 1, 1, 0); + state->vol = v4l2_ctrl_new_std(&state->hdl, &wm8775_ctrl_ops, + V4L2_CID_AUDIO_VOLUME, 0, 65535, (65535+99)/100, 0xCF00); /* 0dB*/ + state->bal = v4l2_ctrl_new_std(&state->hdl, &wm8775_ctrl_ops, + V4L2_CID_AUDIO_BALANCE, 0, 65535, (65535+99)/100, 32768); + state->loud = v4l2_ctrl_new_std(&state->hdl, &wm8775_ctrl_ops, + V4L2_CID_AUDIO_LOUDNESS, 0, 1, 1, 1); sd->ctrl_handler = &state->hdl; - if (state->hdl.error) { - int err = state->hdl.error; - + err = state->hdl.error; + if (err) { v4l2_ctrl_handler_free(&state->hdl); kfree(state); return err; @@ -236,29 +266,25 @@ static int wm8775_probe(struct i2c_client *client, wm8775_write(sd, R23, 0x000); /* Disable zero cross detect timeout */ wm8775_write(sd, R7, 0x000); - /* Left justified, 24-bit mode */ - wm8775_write(sd, R11, 0x021); + /* HPF enable, I2S mode, 24-bit */ + wm8775_write(sd, R11, 0x022); /* Master mode, clock ratio 256fs */ wm8775_write(sd, R12, 0x102); /* Powered up */ wm8775_write(sd, R13, 0x000); - /* ADC gain +2.5dB, enable zero cross */ - wm8775_write(sd, R14, 0x1d4); - /* ADC gain +2.5dB, enable zero cross */ - wm8775_write(sd, R15, 0x1d4); - /* ALC Stereo, ALC target level -1dB FS max gain +8dB */ - wm8775_write(sd, R16, 0x1bf); - /* Enable gain control, use zero cross detection, - ALC hold time 42.6 ms */ - wm8775_write(sd, R17, 0x185); + /* ALC stereo, ALC target level -5dB FS, ALC max gain +8dB */ + wm8775_write(sd, R16, 0x1bb); + /* Set ALC mode and hold time */ + wm8775_write(sd, R17, (state->loud->val ? ALC_EN : 0) | ALC_HOLD); /* ALC gain ramp up delay 34 s, ALC gain ramp down delay 33 ms */ wm8775_write(sd, R18, 0x0a2); /* Enable noise gate, threshold -72dBfs */ wm8775_write(sd, R19, 0x005); - /* Transient window 4ms, lower PGA gain limit -1dB */ - wm8775_write(sd, R20, 0x07a); - /* LRBOTH = 1, use input 2. */ - wm8775_write(sd, R21, 0x102); + /* Transient window 4ms, ALC min gain -5dB */ + wm8775_write(sd, R20, 0x0fb); + + wm8775_set_audio(sd, 1); /* set volume/mute/mux */ + return 0; } diff --git a/trunk/drivers/net/atlx/atl1.c b/trunk/drivers/net/atlx/atl1.c index 3acf5123a6ef..53363108994e 100644 --- a/trunk/drivers/net/atlx/atl1.c +++ b/trunk/drivers/net/atlx/atl1.c @@ -3504,8 +3504,6 @@ static int atl1_set_ringparam(struct net_device *netdev, struct atl1_rfd_ring rfd_old, rfd_new; struct atl1_rrd_ring rrd_old, rrd_new; struct atl1_ring_header rhdr_old, rhdr_new; - struct atl1_smb smb; - struct atl1_cmb cmb; int err; tpd_old = adapter->tpd_ring; @@ -3546,19 +3544,11 @@ static int atl1_set_ringparam(struct net_device *netdev, adapter->rrd_ring = rrd_old; adapter->tpd_ring = tpd_old; adapter->ring_header = rhdr_old; - /* - * Save SMB and CMB, since atl1_free_ring_resources - * will clear them. - */ - smb = adapter->smb; - cmb = adapter->cmb; atl1_free_ring_resources(adapter); adapter->rfd_ring = rfd_new; adapter->rrd_ring = rrd_new; adapter->tpd_ring = tpd_new; adapter->ring_header = rhdr_new; - adapter->smb = smb; - adapter->cmb = cmb; err = atl1_up(adapter); if (err) diff --git a/trunk/drivers/net/cnic.c b/trunk/drivers/net/cnic.c index 6dff32196c92..92bac19ad60a 100644 --- a/trunk/drivers/net/cnic.c +++ b/trunk/drivers/net/cnic.c @@ -940,7 +940,7 @@ static int cnic_alloc_uio_rings(struct cnic_dev *dev, int pages) &udev->l2_ring_map, GFP_KERNEL | __GFP_COMP); if (!udev->l2_ring) - goto err_udev; + return -ENOMEM; udev->l2_buf_size = (cp->l2_rx_ring_size + 1) * cp->l2_single_buf_size; udev->l2_buf_size = PAGE_ALIGN(udev->l2_buf_size); @@ -948,7 +948,7 @@ static int cnic_alloc_uio_rings(struct cnic_dev *dev, int pages) &udev->l2_buf_map, GFP_KERNEL | __GFP_COMP); if (!udev->l2_buf) - goto err_dma; + return -ENOMEM; write_lock(&cnic_dev_lock); list_add(&udev->list, &cnic_udev_list); @@ -959,12 +959,6 @@ static int cnic_alloc_uio_rings(struct cnic_dev *dev, int pages) cp->udev = udev; return 0; - err_dma: - dma_free_coherent(&udev->pdev->dev, udev->l2_ring_size, - udev->l2_ring, udev->l2_ring_map); - err_udev: - kfree(udev); - return -ENOMEM; } static int cnic_init_uio(struct cnic_dev *dev) diff --git a/trunk/drivers/net/ehea/ehea_ethtool.c b/trunk/drivers/net/ehea/ehea_ethtool.c index d6cf502906cf..1f37ee6b2a26 100644 --- a/trunk/drivers/net/ehea/ehea_ethtool.c +++ b/trunk/drivers/net/ehea/ehea_ethtool.c @@ -263,13 +263,6 @@ static void ehea_get_ethtool_stats(struct net_device *dev, static int ehea_set_flags(struct net_device *dev, u32 data) { - /* Avoid changing the VLAN flags */ - if ((data & (ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN)) != - (ethtool_op_get_flags(dev) & (ETH_FLAG_RXVLAN | - ETH_FLAG_TXVLAN))){ - return -EINVAL; - } - return ethtool_op_set_flags(dev, data, ETH_FLAG_LRO | ETH_FLAG_TXVLAN | ETH_FLAG_RXVLAN); diff --git a/trunk/drivers/net/ppp_generic.c b/trunk/drivers/net/ppp_generic.c index 89294b43c4a9..39659976a1ac 100644 --- a/trunk/drivers/net/ppp_generic.c +++ b/trunk/drivers/net/ppp_generic.c @@ -1285,11 +1285,6 @@ ppp_push(struct ppp *ppp) } #ifdef CONFIG_PPP_MULTILINK -static bool mp_protocol_compress __read_mostly = true; -module_param(mp_protocol_compress, bool, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(mp_protocol_compress, - "compress protocol id in multilink fragments"); - /* * Divide a packet to be transmitted into fragments and * send them out the individual links. @@ -1352,10 +1347,10 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb) if (nfree == 0 || nfree < navail / 2) return 0; /* can't take now, leave it in xmit_pending */ - /* Do protocol field compression */ + /* Do protocol field compression (XXX this should be optional) */ p = skb->data; len = skb->len; - if (*p == 0 && mp_protocol_compress) { + if (*p == 0) { ++p; --len; } diff --git a/trunk/drivers/net/skfp/skfddi.c b/trunk/drivers/net/skfp/skfddi.c index 16c62659cdd9..0a66fed52e8e 100644 --- a/trunk/drivers/net/skfp/skfddi.c +++ b/trunk/drivers/net/skfp/skfddi.c @@ -412,7 +412,7 @@ static int skfp_driver_init(struct net_device *dev) bp->SharedMemAddr = pci_alloc_consistent(&bp->pdev, bp->SharedMemSize, &bp->SharedMemDMA); - if (!bp->SharedMemAddr) { + if (!bp->SharedMemSize) { printk("could not allocate mem for "); printk("hardware module: %ld byte\n", bp->SharedMemSize); diff --git a/trunk/drivers/net/starfire.c b/trunk/drivers/net/starfire.c index a4f2bd52e546..4adf12422787 100644 --- a/trunk/drivers/net/starfire.c +++ b/trunk/drivers/net/starfire.c @@ -148,7 +148,7 @@ static int full_duplex[MAX_UNITS] = {0, }; * This SUCKS. * We need a much better method to determine if dma_addr_t is 64-bit. */ -#if (defined(__i386__) && defined(CONFIG_HIGHMEM64G)) || defined(__x86_64__) || defined (__ia64__) || defined(__alpha__) || (defined(CONFIG_MIPS) && ((defined(CONFIG_HIGHMEM) && defined(CONFIG_64BIT_PHYS_ADDR)) || defined(CONFIG_64BIT))) || (defined(__powerpc64__) || defined(CONFIG_PHYS_64BIT)) +#if (defined(__i386__) && defined(CONFIG_HIGHMEM64G)) || defined(__x86_64__) || defined (__ia64__) || defined(__alpha__) || defined(__mips64__) || (defined(__mips__) && defined(CONFIG_HIGHMEM) && defined(CONFIG_64BIT_PHYS_ADDR)) || (defined(__powerpc64__) || defined(CONFIG_PHYS_64BIT)) /* 64-bit dma_addr_t */ #define ADDR_64BITS /* This chip uses 64 bit addresses. */ #define netdrv_addr_t __le64 diff --git a/trunk/drivers/net/tg3.c b/trunk/drivers/net/tg3.c index 6f97b7bbcbf1..30ccbb6d097a 100644 --- a/trunk/drivers/net/tg3.c +++ b/trunk/drivers/net/tg3.c @@ -12658,7 +12658,7 @@ static void __devinit tg3_read_vpd(struct tg3 *tp) cnt = pci_read_vpd(tp->pdev, pos, TG3_NVM_VPD_LEN - pos, &vpd_data[pos]); - if (cnt == -ETIMEDOUT || cnt == -EINTR) + if (cnt == -ETIMEDOUT || -EINTR) cnt = 0; else if (cnt < 0) goto out_not_found; diff --git a/trunk/drivers/platform/x86/intel_ips.c b/trunk/drivers/platform/x86/intel_ips.c index f0b3ad13c273..c44a5e8b8b82 100644 --- a/trunk/drivers/platform/x86/intel_ips.c +++ b/trunk/drivers/platform/x86/intel_ips.c @@ -75,7 +75,6 @@ #include #include #include -#include "intel_ips.h" #define PCI_DEVICE_ID_INTEL_THERMAL_SENSOR 0x3b32 @@ -246,7 +245,6 @@ #define thm_writel(off, val) writel((val), ips->regmap + (off)) static const int IPS_ADJUST_PERIOD = 5000; /* ms */ -static bool late_i915_load = false; /* For initial average collection */ static const int IPS_SAMPLE_PERIOD = 200; /* ms */ @@ -341,9 +339,6 @@ struct ips_driver { u64 orig_turbo_ratios; }; -static bool -ips_gpu_turbo_enabled(struct ips_driver *ips); - /** * ips_cpu_busy - is CPU busy? * @ips: IPS driver struct @@ -522,7 +517,7 @@ static void ips_disable_cpu_turbo(struct ips_driver *ips) */ static bool ips_gpu_busy(struct ips_driver *ips) { - if (!ips_gpu_turbo_enabled(ips)) + if (!ips->gpu_turbo_enabled) return false; return ips->gpu_busy(); @@ -537,7 +532,7 @@ static bool ips_gpu_busy(struct ips_driver *ips) */ static void ips_gpu_raise(struct ips_driver *ips) { - if (!ips_gpu_turbo_enabled(ips)) + if (!ips->gpu_turbo_enabled) return; if (!ips->gpu_raise()) @@ -554,7 +549,7 @@ static void ips_gpu_raise(struct ips_driver *ips) */ static void ips_gpu_lower(struct ips_driver *ips) { - if (!ips_gpu_turbo_enabled(ips)) + if (!ips->gpu_turbo_enabled) return; if (!ips->gpu_lower()) @@ -1459,31 +1454,6 @@ static bool ips_get_i915_syms(struct ips_driver *ips) return false; } -static bool -ips_gpu_turbo_enabled(struct ips_driver *ips) -{ - if (!ips->gpu_busy && late_i915_load) { - if (ips_get_i915_syms(ips)) { - dev_info(&ips->dev->dev, - "i915 driver attached, reenabling gpu turbo\n"); - ips->gpu_turbo_enabled = !(thm_readl(THM_HTS) & HTS_GTD_DIS); - } - } - - return ips->gpu_turbo_enabled; -} - -void -ips_link_to_i915_driver() -{ - /* We can't cleanly get at the various ips_driver structs from - * this caller (the i915 driver), so just set a flag saying - * that it's time to try getting the symbols again. - */ - late_i915_load = true; -} -EXPORT_SYMBOL_GPL(ips_link_to_i915_driver); - static DEFINE_PCI_DEVICE_TABLE(ips_id_table) = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THERMAL_SENSOR), }, diff --git a/trunk/drivers/platform/x86/intel_ips.h b/trunk/drivers/platform/x86/intel_ips.h deleted file mode 100644 index 73299beff5b3..000000000000 --- a/trunk/drivers/platform/x86/intel_ips.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (c) 2010 Intel Corporation - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - */ - -void ips_link_to_i915_driver(void); diff --git a/trunk/drivers/platform/x86/intel_scu_ipc.c b/trunk/drivers/platform/x86/intel_scu_ipc.c index ca35b0ce944a..41a9e34899ac 100644 --- a/trunk/drivers/platform/x86/intel_scu_ipc.c +++ b/trunk/drivers/platform/x86/intel_scu_ipc.c @@ -26,7 +26,6 @@ #include #include #include -#include /* IPC defines the following message types */ #define IPCMSG_WATCHDOG_TIMER 0xF8 /* Set Kernel Watchdog Threshold */ @@ -700,9 +699,6 @@ static int ipc_probe(struct pci_dev *dev, const struct pci_device_id *id) iounmap(ipcdev.ipc_base); return -ENOMEM; } - - intel_scu_devices_create(); - return 0; } @@ -724,7 +720,6 @@ static void ipc_remove(struct pci_dev *pdev) iounmap(ipcdev.ipc_base); iounmap(ipcdev.i2c_base); ipcdev.pdev = NULL; - intel_scu_devices_destroy(); } static const struct pci_device_id pci_ids[] = { diff --git a/trunk/drivers/rtc/Kconfig b/trunk/drivers/rtc/Kconfig index 4941cade319f..2883428d5ac8 100644 --- a/trunk/drivers/rtc/Kconfig +++ b/trunk/drivers/rtc/Kconfig @@ -463,18 +463,6 @@ config RTC_DRV_CMOS This driver can also be built as a module. If so, the module will be called rtc-cmos. -config RTC_DRV_VRTC - tristate "Virtual RTC for Moorestown platforms" - depends on X86_MRST - default y if X86_MRST - - help - Say "yes" here to get direct support for the real time clock - found on Moorestown platforms. The VRTC is a emulated RTC that - derives its clock source from a real RTC in the PMIC. The MC146818 - style programming interface is mostly conserved, but any - updates are done via IPC calls to the system controller FW. - config RTC_DRV_DS1216 tristate "Dallas DS1216" depends on SNI_RM diff --git a/trunk/drivers/rtc/Makefile b/trunk/drivers/rtc/Makefile index 2afdaf3ff986..4c2832df4697 100644 --- a/trunk/drivers/rtc/Makefile +++ b/trunk/drivers/rtc/Makefile @@ -30,7 +30,6 @@ obj-$(CONFIG_RTC_DRV_CMOS) += rtc-cmos.o obj-$(CONFIG_RTC_DRV_COH901331) += rtc-coh901331.o obj-$(CONFIG_RTC_DRV_DAVINCI) += rtc-davinci.o obj-$(CONFIG_RTC_DRV_DM355EVM) += rtc-dm355evm.o -obj-$(CONFIG_RTC_DRV_VRTC) += rtc-mrst.o obj-$(CONFIG_RTC_DRV_DS1216) += rtc-ds1216.o obj-$(CONFIG_RTC_DRV_DS1286) += rtc-ds1286.o obj-$(CONFIG_RTC_DRV_DS1302) += rtc-ds1302.o diff --git a/trunk/drivers/rtc/rtc-mrst.c b/trunk/drivers/rtc/rtc-mrst.c deleted file mode 100644 index bcd0cf63eb16..000000000000 --- a/trunk/drivers/rtc/rtc-mrst.c +++ /dev/null @@ -1,582 +0,0 @@ -/* - * rtc-mrst.c: Driver for Moorestown virtual RTC - * - * (C) Copyright 2009 Intel Corporation - * Author: Jacob Pan (jacob.jun.pan@intel.com) - * Feng Tang (feng.tang@intel.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - * - * Note: - * VRTC is emulated by system controller firmware, the real HW - * RTC is located in the PMIC device. SCU FW shadows PMIC RTC - * in a memory mapped IO space that is visible to the host IA - * processor. - * - * This driver is based upon drivers/rtc/rtc-cmos.c - */ - -/* - * Note: - * * vRTC only supports binary mode and 24H mode - * * vRTC only support PIE and AIE, no UIE, and its PIE only happens - * at 23:59:59pm everyday, no support for adjustable frequency - * * Alarm function is also limited to hr/min/sec. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -struct mrst_rtc { - struct rtc_device *rtc; - struct device *dev; - int irq; - struct resource *iomem; - - u8 enabled_wake; - u8 suspend_ctrl; -}; - -static const char driver_name[] = "rtc_mrst"; - -#define RTC_IRQMASK (RTC_PF | RTC_AF) - -static inline int is_intr(u8 rtc_intr) -{ - if (!(rtc_intr & RTC_IRQF)) - return 0; - return rtc_intr & RTC_IRQMASK; -} - -/* - * rtc_time's year contains the increment over 1900, but vRTC's YEAR - * register can't be programmed to value larger than 0x64, so vRTC - * driver chose to use 1960 (1970 is UNIX time start point) as the base, - * and does the translation at read/write time. - * - * Why not just use 1970 as the offset? it's because using 1960 will - * make it consistent in leap year setting for both vrtc and low-level - * physical rtc devices. - */ -static int mrst_read_time(struct device *dev, struct rtc_time *time) -{ - unsigned long flags; - - if (rtc_is_updating()) - mdelay(20); - - spin_lock_irqsave(&rtc_lock, flags); - time->tm_sec = vrtc_cmos_read(RTC_SECONDS); - time->tm_min = vrtc_cmos_read(RTC_MINUTES); - time->tm_hour = vrtc_cmos_read(RTC_HOURS); - time->tm_mday = vrtc_cmos_read(RTC_DAY_OF_MONTH); - time->tm_mon = vrtc_cmos_read(RTC_MONTH); - time->tm_year = vrtc_cmos_read(RTC_YEAR); - spin_unlock_irqrestore(&rtc_lock, flags); - - /* Adjust for the 1960/1900 */ - time->tm_year += 60; - time->tm_mon--; - return RTC_24H; -} - -static int mrst_set_time(struct device *dev, struct rtc_time *time) -{ - int ret; - unsigned long flags; - unsigned char mon, day, hrs, min, sec; - unsigned int yrs; - - yrs = time->tm_year; - mon = time->tm_mon + 1; /* tm_mon starts at zero */ - day = time->tm_mday; - hrs = time->tm_hour; - min = time->tm_min; - sec = time->tm_sec; - - if (yrs < 70 || yrs > 138) - return -EINVAL; - yrs -= 60; - - spin_lock_irqsave(&rtc_lock, flags); - - vrtc_cmos_write(yrs, RTC_YEAR); - vrtc_cmos_write(mon, RTC_MONTH); - vrtc_cmos_write(day, RTC_DAY_OF_MONTH); - vrtc_cmos_write(hrs, RTC_HOURS); - vrtc_cmos_write(min, RTC_MINUTES); - vrtc_cmos_write(sec, RTC_SECONDS); - - spin_unlock_irqrestore(&rtc_lock, flags); - - ret = intel_scu_ipc_simple_command(IPCMSG_VRTC, IPC_CMD_VRTC_SETTIME); - return ret; -} - -static int mrst_read_alarm(struct device *dev, struct rtc_wkalrm *t) -{ - struct mrst_rtc *mrst = dev_get_drvdata(dev); - unsigned char rtc_control; - - if (mrst->irq <= 0) - return -EIO; - - /* Basic alarms only support hour, minute, and seconds fields. - * Some also support day and month, for alarms up to a year in - * the future. - */ - t->time.tm_mday = -1; - t->time.tm_mon = -1; - t->time.tm_year = -1; - - /* vRTC only supports binary mode */ - spin_lock_irq(&rtc_lock); - t->time.tm_sec = vrtc_cmos_read(RTC_SECONDS_ALARM); - t->time.tm_min = vrtc_cmos_read(RTC_MINUTES_ALARM); - t->time.tm_hour = vrtc_cmos_read(RTC_HOURS_ALARM); - - rtc_control = vrtc_cmos_read(RTC_CONTROL); - spin_unlock_irq(&rtc_lock); - - t->enabled = !!(rtc_control & RTC_AIE); - t->pending = 0; - - return 0; -} - -static void mrst_checkintr(struct mrst_rtc *mrst, unsigned char rtc_control) -{ - unsigned char rtc_intr; - - /* - * NOTE after changing RTC_xIE bits we always read INTR_FLAGS; - * allegedly some older rtcs need that to handle irqs properly - */ - rtc_intr = vrtc_cmos_read(RTC_INTR_FLAGS); - rtc_intr &= (rtc_control & RTC_IRQMASK) | RTC_IRQF; - if (is_intr(rtc_intr)) - rtc_update_irq(mrst->rtc, 1, rtc_intr); -} - -static void mrst_irq_enable(struct mrst_rtc *mrst, unsigned char mask) -{ - unsigned char rtc_control; - - /* - * Flush any pending IRQ status, notably for update irqs, - * before we enable new IRQs - */ - rtc_control = vrtc_cmos_read(RTC_CONTROL); - mrst_checkintr(mrst, rtc_control); - - rtc_control |= mask; - vrtc_cmos_write(rtc_control, RTC_CONTROL); - - mrst_checkintr(mrst, rtc_control); -} - -static void mrst_irq_disable(struct mrst_rtc *mrst, unsigned char mask) -{ - unsigned char rtc_control; - - rtc_control = vrtc_cmos_read(RTC_CONTROL); - rtc_control &= ~mask; - vrtc_cmos_write(rtc_control, RTC_CONTROL); - mrst_checkintr(mrst, rtc_control); -} - -static int mrst_set_alarm(struct device *dev, struct rtc_wkalrm *t) -{ - struct mrst_rtc *mrst = dev_get_drvdata(dev); - unsigned char hrs, min, sec; - int ret = 0; - - if (!mrst->irq) - return -EIO; - - hrs = t->time.tm_hour; - min = t->time.tm_min; - sec = t->time.tm_sec; - - spin_lock_irq(&rtc_lock); - /* Next rtc irq must not be from previous alarm setting */ - mrst_irq_disable(mrst, RTC_AIE); - - /* Update alarm */ - vrtc_cmos_write(hrs, RTC_HOURS_ALARM); - vrtc_cmos_write(min, RTC_MINUTES_ALARM); - vrtc_cmos_write(sec, RTC_SECONDS_ALARM); - - spin_unlock_irq(&rtc_lock); - - ret = intel_scu_ipc_simple_command(IPCMSG_VRTC, IPC_CMD_VRTC_SETALARM); - if (ret) - return ret; - - spin_lock_irq(&rtc_lock); - if (t->enabled) - mrst_irq_enable(mrst, RTC_AIE); - - spin_unlock_irq(&rtc_lock); - - return 0; -} - -static int mrst_irq_set_state(struct device *dev, int enabled) -{ - struct mrst_rtc *mrst = dev_get_drvdata(dev); - unsigned long flags; - - if (!mrst->irq) - return -ENXIO; - - spin_lock_irqsave(&rtc_lock, flags); - - if (enabled) - mrst_irq_enable(mrst, RTC_PIE); - else - mrst_irq_disable(mrst, RTC_PIE); - - spin_unlock_irqrestore(&rtc_lock, flags); - return 0; -} - -#if defined(CONFIG_RTC_INTF_DEV) || defined(CONFIG_RTC_INTF_DEV_MODULE) - -/* Currently, the vRTC doesn't support UIE ON/OFF */ -static int -mrst_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) -{ - struct mrst_rtc *mrst = dev_get_drvdata(dev); - unsigned long flags; - - switch (cmd) { - case RTC_AIE_OFF: - case RTC_AIE_ON: - if (!mrst->irq) - return -EINVAL; - break; - default: - /* PIE ON/OFF is handled by mrst_irq_set_state() */ - return -ENOIOCTLCMD; - } - - spin_lock_irqsave(&rtc_lock, flags); - switch (cmd) { - case RTC_AIE_OFF: /* alarm off */ - mrst_irq_disable(mrst, RTC_AIE); - break; - case RTC_AIE_ON: /* alarm on */ - mrst_irq_enable(mrst, RTC_AIE); - break; - } - spin_unlock_irqrestore(&rtc_lock, flags); - return 0; -} - -#else -#define mrst_rtc_ioctl NULL -#endif - -#if defined(CONFIG_RTC_INTF_PROC) || defined(CONFIG_RTC_INTF_PROC_MODULE) - -static int mrst_procfs(struct device *dev, struct seq_file *seq) -{ - unsigned char rtc_control, valid; - - spin_lock_irq(&rtc_lock); - rtc_control = vrtc_cmos_read(RTC_CONTROL); - valid = vrtc_cmos_read(RTC_VALID); - spin_unlock_irq(&rtc_lock); - - return seq_printf(seq, - "periodic_IRQ\t: %s\n" - "alarm\t\t: %s\n" - "BCD\t\t: no\n" - "periodic_freq\t: daily (not adjustable)\n", - (rtc_control & RTC_PIE) ? "on" : "off", - (rtc_control & RTC_AIE) ? "on" : "off"); -} - -#else -#define mrst_procfs NULL -#endif - -static const struct rtc_class_ops mrst_rtc_ops = { - .ioctl = mrst_rtc_ioctl, - .read_time = mrst_read_time, - .set_time = mrst_set_time, - .read_alarm = mrst_read_alarm, - .set_alarm = mrst_set_alarm, - .proc = mrst_procfs, - .irq_set_state = mrst_irq_set_state, -}; - -static struct mrst_rtc mrst_rtc; - -/* - * When vRTC IRQ is captured by SCU FW, FW will clear the AIE bit in - * Reg B, so no need for this driver to clear it - */ -static irqreturn_t mrst_rtc_irq(int irq, void *p) -{ - u8 irqstat; - - spin_lock(&rtc_lock); - /* This read will clear all IRQ flags inside Reg C */ - irqstat = vrtc_cmos_read(RTC_INTR_FLAGS); - spin_unlock(&rtc_lock); - - irqstat &= RTC_IRQMASK | RTC_IRQF; - if (is_intr(irqstat)) { - rtc_update_irq(p, 1, irqstat); - return IRQ_HANDLED; - } - return IRQ_NONE; -} - -static int __init -vrtc_mrst_do_probe(struct device *dev, struct resource *iomem, int rtc_irq) -{ - int retval = 0; - unsigned char rtc_control; - - /* There can be only one ... */ - if (mrst_rtc.dev) - return -EBUSY; - - if (!iomem) - return -ENODEV; - - iomem = request_mem_region(iomem->start, - iomem->end + 1 - iomem->start, - driver_name); - if (!iomem) { - dev_dbg(dev, "i/o mem already in use.\n"); - return -EBUSY; - } - - mrst_rtc.irq = rtc_irq; - mrst_rtc.iomem = iomem; - - mrst_rtc.rtc = rtc_device_register(driver_name, dev, - &mrst_rtc_ops, THIS_MODULE); - if (IS_ERR(mrst_rtc.rtc)) { - retval = PTR_ERR(mrst_rtc.rtc); - goto cleanup0; - } - - mrst_rtc.dev = dev; - dev_set_drvdata(dev, &mrst_rtc); - rename_region(iomem, dev_name(&mrst_rtc.rtc->dev)); - - spin_lock_irq(&rtc_lock); - mrst_irq_disable(&mrst_rtc, RTC_PIE | RTC_AIE); - rtc_control = vrtc_cmos_read(RTC_CONTROL); - spin_unlock_irq(&rtc_lock); - - if (!(rtc_control & RTC_24H) || (rtc_control & (RTC_DM_BINARY))) - dev_dbg(dev, "TODO: support more than 24-hr BCD mode\n"); - - if (rtc_irq) { - retval = request_irq(rtc_irq, mrst_rtc_irq, - IRQF_DISABLED, dev_name(&mrst_rtc.rtc->dev), - mrst_rtc.rtc); - if (retval < 0) { - dev_dbg(dev, "IRQ %d is already in use, err %d\n", - rtc_irq, retval); - goto cleanup1; - } - } - dev_dbg(dev, "initialised\n"); - return 0; - -cleanup1: - mrst_rtc.dev = NULL; - rtc_device_unregister(mrst_rtc.rtc); -cleanup0: - release_region(iomem->start, iomem->end + 1 - iomem->start); - dev_err(dev, "rtc-mrst: unable to initialise\n"); - return retval; -} - -static void rtc_mrst_do_shutdown(void) -{ - spin_lock_irq(&rtc_lock); - mrst_irq_disable(&mrst_rtc, RTC_IRQMASK); - spin_unlock_irq(&rtc_lock); -} - -static void __exit rtc_mrst_do_remove(struct device *dev) -{ - struct mrst_rtc *mrst = dev_get_drvdata(dev); - struct resource *iomem; - - rtc_mrst_do_shutdown(); - - if (mrst->irq) - free_irq(mrst->irq, mrst->rtc); - - rtc_device_unregister(mrst->rtc); - mrst->rtc = NULL; - - iomem = mrst->iomem; - release_region(iomem->start, iomem->end + 1 - iomem->start); - mrst->iomem = NULL; - - mrst->dev = NULL; - dev_set_drvdata(dev, NULL); -} - -#ifdef CONFIG_PM -static int mrst_suspend(struct device *dev, pm_message_t mesg) -{ - struct mrst_rtc *mrst = dev_get_drvdata(dev); - unsigned char tmp; - - /* Only the alarm might be a wakeup event source */ - spin_lock_irq(&rtc_lock); - mrst->suspend_ctrl = tmp = vrtc_cmos_read(RTC_CONTROL); - if (tmp & (RTC_PIE | RTC_AIE)) { - unsigned char mask; - - if (device_may_wakeup(dev)) - mask = RTC_IRQMASK & ~RTC_AIE; - else - mask = RTC_IRQMASK; - tmp &= ~mask; - vrtc_cmos_write(tmp, RTC_CONTROL); - - mrst_checkintr(mrst, tmp); - } - spin_unlock_irq(&rtc_lock); - - if (tmp & RTC_AIE) { - mrst->enabled_wake = 1; - enable_irq_wake(mrst->irq); - } - - dev_dbg(&mrst_rtc.rtc->dev, "suspend%s, ctrl %02x\n", - (tmp & RTC_AIE) ? ", alarm may wake" : "", - tmp); - - return 0; -} - -/* - * We want RTC alarms to wake us from the deep power saving state - */ -static inline int mrst_poweroff(struct device *dev) -{ - return mrst_suspend(dev, PMSG_HIBERNATE); -} - -static int mrst_resume(struct device *dev) -{ - struct mrst_rtc *mrst = dev_get_drvdata(dev); - unsigned char tmp = mrst->suspend_ctrl; - - /* Re-enable any irqs previously active */ - if (tmp & RTC_IRQMASK) { - unsigned char mask; - - if (mrst->enabled_wake) { - disable_irq_wake(mrst->irq); - mrst->enabled_wake = 0; - } - - spin_lock_irq(&rtc_lock); - do { - vrtc_cmos_write(tmp, RTC_CONTROL); - - mask = vrtc_cmos_read(RTC_INTR_FLAGS); - mask &= (tmp & RTC_IRQMASK) | RTC_IRQF; - if (!is_intr(mask)) - break; - - rtc_update_irq(mrst->rtc, 1, mask); - tmp &= ~RTC_AIE; - } while (mask & RTC_AIE); - spin_unlock_irq(&rtc_lock); - } - - dev_dbg(&mrst_rtc.rtc->dev, "resume, ctrl %02x\n", tmp); - - return 0; -} - -#else -#define mrst_suspend NULL -#define mrst_resume NULL - -static inline int mrst_poweroff(struct device *dev) -{ - return -ENOSYS; -} - -#endif - -static int __init vrtc_mrst_platform_probe(struct platform_device *pdev) -{ - return vrtc_mrst_do_probe(&pdev->dev, - platform_get_resource(pdev, IORESOURCE_MEM, 0), - platform_get_irq(pdev, 0)); -} - -static int __exit vrtc_mrst_platform_remove(struct platform_device *pdev) -{ - rtc_mrst_do_remove(&pdev->dev); - return 0; -} - -static void vrtc_mrst_platform_shutdown(struct platform_device *pdev) -{ - if (system_state == SYSTEM_POWER_OFF && !mrst_poweroff(&pdev->dev)) - return; - - rtc_mrst_do_shutdown(); -} - -MODULE_ALIAS("platform:vrtc_mrst"); - -static struct platform_driver vrtc_mrst_platform_driver = { - .probe = vrtc_mrst_platform_probe, - .remove = __exit_p(vrtc_mrst_platform_remove), - .shutdown = vrtc_mrst_platform_shutdown, - .driver = { - .name = (char *) driver_name, - .suspend = mrst_suspend, - .resume = mrst_resume, - } -}; - -static int __init vrtc_mrst_init(void) -{ - return platform_driver_register(&vrtc_mrst_platform_driver); -} - -static void __exit vrtc_mrst_exit(void) -{ - platform_driver_unregister(&vrtc_mrst_platform_driver); -} - -module_init(vrtc_mrst_init); -module_exit(vrtc_mrst_exit); - -MODULE_AUTHOR("Jacob Pan; Feng Tang"); -MODULE_DESCRIPTION("Driver for Moorestown virtual RTC"); -MODULE_LICENSE("GPL"); diff --git a/trunk/drivers/spi/coldfire_qspi.c b/trunk/drivers/spi/coldfire_qspi.c index 8856bcca9d29..052b3c7fa6a0 100644 --- a/trunk/drivers/spi/coldfire_qspi.c +++ b/trunk/drivers/spi/coldfire_qspi.c @@ -317,7 +317,7 @@ static void mcfqspi_work(struct work_struct *work) msg = container_of(mcfqspi->msgq.next, struct spi_message, queue); - list_del_init(&msg->queue); + list_del_init(&mcfqspi->msgq); spin_unlock_irqrestore(&mcfqspi->lock, flags); spi = msg->spi; diff --git a/trunk/drivers/spi/omap2_mcspi.c b/trunk/drivers/spi/omap2_mcspi.c index 951a160fc27f..2a651e61bfbf 100644 --- a/trunk/drivers/spi/omap2_mcspi.c +++ b/trunk/drivers/spi/omap2_mcspi.c @@ -1305,49 +1305,10 @@ static int __exit omap2_mcspi_remove(struct platform_device *pdev) /* work with hotplug and coldplug */ MODULE_ALIAS("platform:omap2_mcspi"); -#ifdef CONFIG_SUSPEND -/* - * When SPI wake up from off-mode, CS is in activate state. If it was in - * unactive state when driver was suspend, then force it to unactive state at - * wake up. - */ -static int omap2_mcspi_resume(struct device *dev) -{ - struct spi_master *master = dev_get_drvdata(dev); - struct omap2_mcspi *mcspi = spi_master_get_devdata(master); - struct omap2_mcspi_cs *cs; - - omap2_mcspi_enable_clocks(mcspi); - list_for_each_entry(cs, &omap2_mcspi_ctx[master->bus_num - 1].cs, - node) { - if ((cs->chconf0 & OMAP2_MCSPI_CHCONF_FORCE) == 0) { - - /* - * We need to toggle CS state for OMAP take this - * change in account. - */ - MOD_REG_BIT(cs->chconf0, OMAP2_MCSPI_CHCONF_FORCE, 1); - __raw_writel(cs->chconf0, cs->base + OMAP2_MCSPI_CHCONF0); - MOD_REG_BIT(cs->chconf0, OMAP2_MCSPI_CHCONF_FORCE, 0); - __raw_writel(cs->chconf0, cs->base + OMAP2_MCSPI_CHCONF0); - } - } - omap2_mcspi_disable_clocks(mcspi); - return 0; -} -#else -#define omap2_mcspi_resume NULL -#endif - -static const struct dev_pm_ops omap2_mcspi_pm_ops = { - .resume = omap2_mcspi_resume, -}; - static struct platform_driver omap2_mcspi_driver = { .driver = { .name = "omap2_mcspi", .owner = THIS_MODULE, - .pm = &omap2_mcspi_pm_ops }, .remove = __exit_p(omap2_mcspi_remove), }; diff --git a/trunk/drivers/staging/zram/zram_drv.c b/trunk/drivers/staging/zram/zram_drv.c index d0e9e0207539..8c3c057aa847 100644 --- a/trunk/drivers/staging/zram/zram_drv.c +++ b/trunk/drivers/staging/zram/zram_drv.c @@ -435,6 +435,12 @@ static int zram_make_request(struct request_queue *queue, struct bio *bio) int ret = 0; struct zram *zram = queue->queuedata; + if (unlikely(!zram->init_done)) { + set_bit(BIO_UPTODATE, &bio->bi_flags); + bio_endio(bio, 0); + return 0; + } + if (!valid_io_request(zram, bio)) { zram_stat64_inc(zram, &zram->stats.invalid_io); bio_io_error(bio); diff --git a/trunk/drivers/usb/atm/ueagle-atm.c b/trunk/drivers/usb/atm/ueagle-atm.c index 99ac70e32556..44447f54942f 100644 --- a/trunk/drivers/usb/atm/ueagle-atm.c +++ b/trunk/drivers/usb/atm/ueagle-atm.c @@ -2206,11 +2206,8 @@ static int uea_boot(struct uea_softc *sc) goto err1; } - /* Create worker thread, but don't start it here. Start it after - * all usbatm generic initialization is done. - */ - sc->kthread = kthread_create(uea_kthread, sc, "ueagle-atm"); - if (IS_ERR(sc->kthread)) { + sc->kthread = kthread_run(uea_kthread, sc, "ueagle-atm"); + if (sc->kthread == ERR_PTR(-ENOMEM)) { uea_err(INS_TO_USBDEV(sc), "failed to create thread\n"); goto err2; } @@ -2627,7 +2624,6 @@ static struct usbatm_driver uea_usbatm_driver = { static int uea_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *usb = interface_to_usbdev(intf); - int ret; uea_enters(usb); uea_info(usb, "ADSL device founded vid (%#X) pid (%#X) Rev (%#X): %s\n", @@ -2641,19 +2637,7 @@ static int uea_probe(struct usb_interface *intf, const struct usb_device_id *id) if (UEA_IS_PREFIRM(id)) return uea_load_firmware(usb, UEA_CHIP_VERSION(id)); - ret = usbatm_usb_probe(intf, id, &uea_usbatm_driver); - if (ret == 0) { - struct usbatm_data *usbatm = usb_get_intfdata(intf); - struct uea_softc *sc = usbatm->driver_data; - - /* Ensure carrier is initialized to off as early as possible */ - UPDATE_ATM_SIGNAL(ATM_PHY_SIG_LOST); - - /* Only start the worker thread when all init is done */ - wake_up_process(sc->kthread); - } - - return ret; + return usbatm_usb_probe(intf, id, &uea_usbatm_driver); } static void uea_disconnect(struct usb_interface *intf) diff --git a/trunk/drivers/watchdog/hpwdt.c b/trunk/drivers/watchdog/hpwdt.c index dea7b5bf6e2c..3d77116e4634 100644 --- a/trunk/drivers/watchdog/hpwdt.c +++ b/trunk/drivers/watchdog/hpwdt.c @@ -642,14 +642,19 @@ static struct notifier_block die_notifier = { */ #ifdef CONFIG_HPWDT_NMI_DECODING -#ifdef CONFIG_X86_LOCAL_APIC +#ifdef ARCH_HAS_NMI_WATCHDOG static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev) { /* * If nmi_watchdog is turned off then we can turn on * our nmi decoding capability. */ - hpwdt_nmi_decoding = 1; + if (!nmi_watchdog_active()) + hpwdt_nmi_decoding = 1; + else + dev_warn(&dev->dev, "NMI decoding is disabled. To enable this " + "functionality you must reboot with nmi_watchdog=0 " + "and load the hpwdt driver with priority=1.\n"); } #else static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev) @@ -657,7 +662,7 @@ static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev) dev_warn(&dev->dev, "NMI decoding is disabled. " "Your kernel does not support a NMI Watchdog.\n"); } -#endif /* CONFIG_X86_LOCAL_APIC */ +#endif /* ARCH_HAS_NMI_WATCHDOG */ static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev) { diff --git a/trunk/fs/gfs2/bmap.c b/trunk/fs/gfs2/bmap.c index 3c4039d5eef1..5476c066d4ee 100644 --- a/trunk/fs/gfs2/bmap.c +++ b/trunk/fs/gfs2/bmap.c @@ -763,7 +763,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, int metadata; unsigned int revokes = 0; int x; - int error = 0; + int error; if (!*top) sm->sm_first = 0; @@ -780,11 +780,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, if (metadata) revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; - if (ip != GFS2_I(sdp->sd_rindex)) - error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh); - else if (!sdp->sd_rgrps) - error = gfs2_ri_update(ip); - + error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh); if (error) return error; @@ -883,8 +879,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, out_rlist: gfs2_rlist_free(&rlist); out: - if (ip != GFS2_I(sdp->sd_rindex)) - gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh); + gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh); return error; } diff --git a/trunk/fs/gfs2/glock.c b/trunk/fs/gfs2/glock.c index 08a8beb152e6..f92c17704169 100644 --- a/trunk/fs/gfs2/glock.c +++ b/trunk/fs/gfs2/glock.c @@ -541,6 +541,21 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) spin_unlock(&gl->gl_spin); } +static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock, + unsigned int req_state, + unsigned int flags) +{ + int ret = LM_OUT_ERROR; + + if (!sdp->sd_lockstruct.ls_ops->lm_lock) + return req_state == LM_ST_UNLOCKED ? 0 : req_state; + + if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, + req_state, flags); + return ret; +} + /** * do_xmote - Calls the DLM to change the state of a lock * @gl: The lock state @@ -560,14 +575,13 @@ __acquires(&gl->gl_spin) lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | LM_FLAG_PRIORITY); - GLOCK_BUG_ON(gl, gl->gl_state == target); - GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target); + BUG_ON(gl->gl_state == target); + BUG_ON(gl->gl_state == gl->gl_target); if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) && glops->go_inval) { set_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); do_error(gl, 0); /* Fail queued try locks */ } - gl->gl_req = target; spin_unlock(&gl->gl_spin); if (glops->go_xmote_th) glops->go_xmote_th(gl); @@ -580,17 +594,15 @@ __acquires(&gl->gl_spin) gl->gl_state == LM_ST_DEFERRED) && !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) lck_flags |= LM_FLAG_TRY_1CB; + ret = gfs2_lm_lock(sdp, gl, target, lck_flags); - if (sdp->sd_lockstruct.ls_ops->lm_lock) { - /* lock_dlm */ - ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); - GLOCK_BUG_ON(gl, ret); - } else { /* lock_nolock */ - finish_xmote(gl, target); + if (!(ret & LM_OUT_ASYNC)) { + finish_xmote(gl, ret); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put(gl); + } else { + GLOCK_BUG_ON(gl, ret != LM_OUT_ASYNC); } - spin_lock(&gl->gl_spin); } @@ -939,22 +951,17 @@ int gfs2_glock_wait(struct gfs2_holder *gh) void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) { - struct va_format vaf; va_list args; va_start(args, fmt); - if (seq) { struct gfs2_glock_iter *gi = seq->private; vsprintf(gi->string, fmt, args); seq_printf(seq, gi->string); } else { - vaf.fmt = fmt; - vaf.va = &args; - - printk(KERN_ERR " %pV", &vaf); + printk(KERN_ERR " "); + vprintk(fmt, args); } - va_end(args); } @@ -1354,28 +1361,24 @@ static int gfs2_should_freeze(const struct gfs2_glock *gl) * @gl: Pointer to the glock * @ret: The return value from the dlm * - * The gl_reply field is under the gl_spin lock so that it is ok - * to use a bitfield shared with other glock state fields. */ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) { struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; - spin_lock(&gl->gl_spin); gl->gl_reply = ret; if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) { + spin_lock(&gl->gl_spin); if (gfs2_should_freeze(gl)) { set_bit(GLF_FROZEN, &gl->gl_flags); spin_unlock(&gl->gl_spin); return; } + spin_unlock(&gl->gl_spin); } - - spin_unlock(&gl->gl_spin); set_bit(GLF_REPLY_PENDING, &gl->gl_flags); - smp_wmb(); gfs2_glock_hold(gl); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put(gl); @@ -1623,17 +1626,18 @@ static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags) static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) { struct task_struct *gh_owner = NULL; + char buffer[KSYM_SYMBOL_LEN]; char flags_buf[32]; + sprint_symbol(buffer, gh->gh_ip); if (gh->gh_owner_pid) gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); - gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n", - state2str(gh->gh_state), - hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags), - gh->gh_error, - gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, - gh_owner ? gh_owner->comm : "(ended)", - (void *)gh->gh_ip); + gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %s\n", + state2str(gh->gh_state), + hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags), + gh->gh_error, + gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, + gh_owner ? gh_owner->comm : "(ended)", buffer); return 0; } @@ -1778,13 +1782,12 @@ int __init gfs2_glock_init(void) } #endif - glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | + glock_workqueue = alloc_workqueue("glock_workqueue", WQ_RESCUER | WQ_HIGHPRI | WQ_FREEZEABLE, 0); if (IS_ERR(glock_workqueue)) return PTR_ERR(glock_workqueue); - gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", - WQ_MEM_RECLAIM | WQ_FREEZEABLE, - 0); + gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", WQ_RESCUER | + WQ_FREEZEABLE, 0); if (IS_ERR(gfs2_delete_workqueue)) { destroy_workqueue(glock_workqueue); return PTR_ERR(gfs2_delete_workqueue); diff --git a/trunk/fs/gfs2/glock.h b/trunk/fs/gfs2/glock.h index 691851ceb615..db1c26d6d220 100644 --- a/trunk/fs/gfs2/glock.h +++ b/trunk/fs/gfs2/glock.h @@ -87,10 +87,11 @@ enum { #define GL_ASYNC 0x00000040 #define GL_EXACT 0x00000080 #define GL_SKIP 0x00000100 +#define GL_ATIME 0x00000200 #define GL_NOCACHE 0x00000400 /* - * lm_async_cb return flags + * lm_lock() and lm_async_cb return flags * * LM_OUT_ST_MASK * Masks the lower two bits of lock state in the returned value. @@ -98,11 +99,15 @@ enum { * LM_OUT_CANCELED * The lock request was canceled. * + * LM_OUT_ASYNC + * The result of the request will be returned in an LM_CB_ASYNC callback. + * */ #define LM_OUT_ST_MASK 0x00000003 #define LM_OUT_CANCELED 0x00000008 -#define LM_OUT_ERROR 0x00000004 +#define LM_OUT_ASYNC 0x00000080 +#define LM_OUT_ERROR 0x00000100 /* * lm_recovery_done() messages @@ -119,12 +124,25 @@ struct lm_lockops { void (*lm_unmount) (struct gfs2_sbd *sdp); void (*lm_withdraw) (struct gfs2_sbd *sdp); void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl); - int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state, - unsigned int flags); + unsigned int (*lm_lock) (struct gfs2_glock *gl, + unsigned int req_state, unsigned int flags); void (*lm_cancel) (struct gfs2_glock *gl); const match_table_t *lm_tokens; }; +#define LM_FLAG_TRY 0x00000001 +#define LM_FLAG_TRY_1CB 0x00000002 +#define LM_FLAG_NOEXP 0x00000004 +#define LM_FLAG_ANY 0x00000008 +#define LM_FLAG_PRIORITY 0x00000010 + +#define GL_ASYNC 0x00000040 +#define GL_EXACT 0x00000080 +#define GL_SKIP 0x00000100 +#define GL_NOCACHE 0x00000400 + +#define GLR_TRYFAILED 13 + extern struct workqueue_struct *gfs2_delete_workqueue; static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) { @@ -194,8 +212,6 @@ int gfs2_glock_nq_num(struct gfs2_sbd *sdp, int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); - -__attribute__ ((format(printf, 2, 3))) void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); /** diff --git a/trunk/fs/gfs2/glops.c b/trunk/fs/gfs2/glops.c index 263561bf1a50..0d149dcc04e5 100644 --- a/trunk/fs/gfs2/glops.c +++ b/trunk/fs/gfs2/glops.c @@ -325,6 +325,7 @@ static void trans_go_sync(struct gfs2_glock *gl) if (gl->gl_state != LM_ST_UNLOCKED && test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { + flush_workqueue(gfs2_delete_workqueue); gfs2_meta_syncfs(sdp); gfs2_log_shutdown(sdp); } diff --git a/trunk/fs/gfs2/incore.h b/trunk/fs/gfs2/incore.h index 8d3d2b4a0a7d..764fbb49efc8 100644 --- a/trunk/fs/gfs2/incore.h +++ b/trunk/fs/gfs2/incore.h @@ -207,14 +207,12 @@ struct gfs2_glock { spinlock_t gl_spin; - /* State fields protected by gl_spin */ - unsigned int gl_state:2, /* Current state */ - gl_target:2, /* Target state */ - gl_demote_state:2, /* State requested by remote node */ - gl_req:2, /* State in last dlm request */ - gl_reply:8; /* Last reply from the dlm */ - + unsigned int gl_state; + unsigned int gl_target; + unsigned int gl_reply; unsigned int gl_hash; + unsigned int gl_req; + unsigned int gl_demote_state; /* state requested by remote node */ unsigned long gl_demote_time; /* time of first demote request */ struct list_head gl_holders; diff --git a/trunk/fs/gfs2/inode.c b/trunk/fs/gfs2/inode.c index 14e682dbe8bf..e1213f7f9217 100644 --- a/trunk/fs/gfs2/inode.c +++ b/trunk/fs/gfs2/inode.c @@ -916,8 +916,17 @@ static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) if (error) return error; + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) { + error = vmtruncate(inode, attr->ia_size); + if (error) + return error; + } + setattr_copy(inode, attr); mark_inode_dirty(inode); + + gfs2_assert_warn(GFS2_SB(inode), !error); gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); diff --git a/trunk/fs/gfs2/lock_dlm.c b/trunk/fs/gfs2/lock_dlm.c index 6e493aee28f8..1c09425b45fd 100644 --- a/trunk/fs/gfs2/lock_dlm.c +++ b/trunk/fs/gfs2/lock_dlm.c @@ -146,13 +146,15 @@ static u32 make_flags(const u32 lkid, const unsigned int gfs_flags, return lkf; } -static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, - unsigned int flags) +static unsigned int gdlm_lock(struct gfs2_glock *gl, + unsigned int req_state, unsigned int flags) { struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; + int error; int req; u32 lkf; + gl->gl_req = req_state; req = make_mode(req_state); lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req); @@ -160,8 +162,13 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, * Submit the actual lock request. */ - return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname, - GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); + error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname, + GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); + if (error == -EAGAIN) + return 0; + if (error) + return LM_OUT_ERROR; + return LM_OUT_ASYNC; } static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl) diff --git a/trunk/fs/gfs2/ops_inode.c b/trunk/fs/gfs2/ops_inode.c index 1db6b7343229..12cbea7502c2 100644 --- a/trunk/fs/gfs2/ops_inode.c +++ b/trunk/fs/gfs2/ops_inode.c @@ -1069,6 +1069,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); + struct buffer_head *dibh; u32 ouid, ogid, nuid, ngid; int error; @@ -1099,10 +1100,25 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) if (error) goto out_gunlock_q; - error = gfs2_setattr_simple(ip, attr); + error = gfs2_meta_inode_buffer(ip, &dibh); if (error) goto out_end_trans; + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) { + int error; + + error = vmtruncate(inode, attr->ia_size); + gfs2_assert_warn(sdp, !error); + } + + setattr_copy(inode, attr); + mark_inode_dirty(inode); + + gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_dinode_out(ip, dibh->b_data); + brelse(dibh); + if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { u64 blocks = gfs2_get_inode_blocks(&ip->i_inode); gfs2_quota_change(ip, -blocks, ouid, ogid); diff --git a/trunk/fs/gfs2/quota.c b/trunk/fs/gfs2/quota.c index a689901963de..f606baf9ba72 100644 --- a/trunk/fs/gfs2/quota.c +++ b/trunk/fs/gfs2/quota.c @@ -666,10 +666,6 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift); qd->qd_qb.qb_limit = qp->qu_limit; } - if (fdq->d_fieldmask & FS_DQ_BCOUNT) { - qp->qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift); - qd->qd_qb.qb_value = qp->qu_value; - } } /* Write the quota into the quota file on disk */ @@ -1513,7 +1509,7 @@ static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id, } /* GFS2 only supports a subset of the XFS fields */ -#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD|FS_DQ_BCOUNT) +#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD) static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, struct fs_disk_quota *fdq) @@ -1573,15 +1569,9 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, if ((fdq->d_fieldmask & FS_DQ_BSOFT) && ((fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_warn))) fdq->d_fieldmask ^= FS_DQ_BSOFT; - if ((fdq->d_fieldmask & FS_DQ_BHARD) && ((fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_limit))) fdq->d_fieldmask ^= FS_DQ_BHARD; - - if ((fdq->d_fieldmask & FS_DQ_BCOUNT) && - ((fdq->d_bcount >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_value))) - fdq->d_fieldmask ^= FS_DQ_BCOUNT; - if (fdq->d_fieldmask == 0) goto out_i; @@ -1630,3 +1620,4 @@ const struct quotactl_ops gfs2_quotactl_ops = { .get_dqblk = gfs2_get_dqblk, .set_dqblk = gfs2_set_dqblk, }; + diff --git a/trunk/fs/gfs2/rgrp.c b/trunk/fs/gfs2/rgrp.c index 7293ea27020c..33c8407b876f 100644 --- a/trunk/fs/gfs2/rgrp.c +++ b/trunk/fs/gfs2/rgrp.c @@ -500,7 +500,7 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp) for (rgrps = 0;; rgrps++) { loff_t pos = rgrps * sizeof(struct gfs2_rindex); - if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode)) + if (pos + sizeof(struct gfs2_rindex) >= i_size_read(inode)) break; error = gfs2_internal_read(ip, &ra_state, buf, &pos, sizeof(struct gfs2_rindex)); @@ -583,7 +583,7 @@ static int read_rindex_entry(struct gfs2_inode *ip, * Returns: 0 on successful update, error code otherwise */ -int gfs2_ri_update(struct gfs2_inode *ip) +static int gfs2_ri_update(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct inode *inode = &ip->i_inode; @@ -613,6 +613,46 @@ int gfs2_ri_update(struct gfs2_inode *ip) return 0; } +/** + * gfs2_ri_update_special - Pull in a new resource index from the disk + * + * This is a special version that's safe to call from gfs2_inplace_reserve_i. + * In this case we know that we don't have any resource groups in memory yet. + * + * @ip: pointer to the rindex inode + * + * Returns: 0 on successful update, error code otherwise + */ +static int gfs2_ri_update_special(struct gfs2_inode *ip) +{ + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + struct inode *inode = &ip->i_inode; + struct file_ra_state ra_state; + struct gfs2_rgrpd *rgd; + unsigned int max_data = 0; + int error; + + file_ra_state_init(&ra_state, inode->i_mapping); + for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) { + /* Ignore partials */ + if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) > + i_size_read(inode)) + break; + error = read_rindex_entry(ip, &ra_state); + if (error) { + clear_rgrpdi(sdp); + return error; + } + } + list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list) + if (rgd->rd_data > max_data) + max_data = rgd->rd_data; + sdp->sd_max_rg_data = max_data; + + sdp->sd_rindex_uptodate = 1; + return 0; +} + /** * gfs2_rindex_hold - Grab a lock on the rindex * @sdp: The GFS2 superblock @@ -1186,25 +1226,16 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex, error = gfs2_rindex_hold(sdp, &al->al_ri_gh); else if (!sdp->sd_rgrps) /* We may not have the rindex read in, so: */ - error = gfs2_ri_update(ip); + error = gfs2_ri_update_special(ip); if (error) return error; } -try_again: do { error = get_local_rgrp(ip, &last_unlinked); /* If there is no space, flushing the log may release some */ - if (error) { - if (ip == GFS2_I(sdp->sd_rindex) && - !sdp->sd_rindex_uptodate) { - error = gfs2_ri_update(ip); - if (error) - return error; - goto try_again; - } + if (error) gfs2_log_flush(sdp, NULL); - } } while (error && tries++ < 3); if (error) { diff --git a/trunk/fs/gfs2/rgrp.h b/trunk/fs/gfs2/rgrp.h index 50c2bb04369c..0e35c0466f9a 100644 --- a/trunk/fs/gfs2/rgrp.h +++ b/trunk/fs/gfs2/rgrp.h @@ -48,7 +48,6 @@ extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex, extern void gfs2_inplace_release(struct gfs2_inode *ip); -extern int gfs2_ri_update(struct gfs2_inode *ip); extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation); diff --git a/trunk/fs/gfs2/xattr.c b/trunk/fs/gfs2/xattr.c index 439b61c03262..30b58f07c8a6 100644 --- a/trunk/fs/gfs2/xattr.c +++ b/trunk/fs/gfs2/xattr.c @@ -1296,8 +1296,10 @@ static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip, int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) { + struct inode *inode = &ip->i_inode; struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_ea_location el; + struct buffer_head *dibh; int error; error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, GFS2_POSIX_ACL_ACCESS, &el); @@ -1319,7 +1321,26 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) if (error) return error; - error = gfs2_setattr_simple(ip, attr); + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto out_trans_end; + + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) { + int error; + + error = vmtruncate(inode, attr->ia_size); + gfs2_assert_warn(GFS2_SB(inode), !error); + } + + setattr_copy(inode, attr); + mark_inode_dirty(inode); + + gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_dinode_out(ip, dibh->b_data); + brelse(dibh); + +out_trans_end: gfs2_trans_end(sdp); return error; } diff --git a/trunk/fs/proc/base.c b/trunk/fs/proc/base.c index 08cba2c3b612..182845147fe4 100644 --- a/trunk/fs/proc/base.c +++ b/trunk/fs/proc/base.c @@ -1407,82 +1407,6 @@ static const struct file_operations proc_pid_sched_operations = { #endif -#ifdef CONFIG_SCHED_AUTOGROUP -/* - * Print out autogroup related information: - */ -static int sched_autogroup_show(struct seq_file *m, void *v) -{ - struct inode *inode = m->private; - struct task_struct *p; - - p = get_proc_task(inode); - if (!p) - return -ESRCH; - proc_sched_autogroup_show_task(p, m); - - put_task_struct(p); - - return 0; -} - -static ssize_t -sched_autogroup_write(struct file *file, const char __user *buf, - size_t count, loff_t *offset) -{ - struct inode *inode = file->f_path.dentry->d_inode; - struct task_struct *p; - char buffer[PROC_NUMBUF]; - long nice; - int err; - - memset(buffer, 0, sizeof(buffer)); - if (count > sizeof(buffer) - 1) - count = sizeof(buffer) - 1; - if (copy_from_user(buffer, buf, count)) - return -EFAULT; - - err = strict_strtol(strstrip(buffer), 0, &nice); - if (err) - return -EINVAL; - - p = get_proc_task(inode); - if (!p) - return -ESRCH; - - err = nice; - err = proc_sched_autogroup_set_nice(p, &err); - if (err) - count = err; - - put_task_struct(p); - - return count; -} - -static int sched_autogroup_open(struct inode *inode, struct file *filp) -{ - int ret; - - ret = single_open(filp, sched_autogroup_show, NULL); - if (!ret) { - struct seq_file *m = filp->private_data; - - m->private = inode; - } - return ret; -} - -static const struct file_operations proc_pid_sched_autogroup_operations = { - .open = sched_autogroup_open, - .read = seq_read, - .write = sched_autogroup_write, - .llseek = seq_lseek, - .release = single_release, -}; - -#endif /* CONFIG_SCHED_AUTOGROUP */ - static ssize_t comm_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) { @@ -2808,9 +2732,6 @@ static const struct pid_entry tgid_base_stuff[] = { INF("limits", S_IRUGO, proc_pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), -#endif -#ifdef CONFIG_SCHED_AUTOGROUP - REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations), #endif REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), #ifdef CONFIG_HAVE_ARCH_TRACEHOOK diff --git a/trunk/include/linux/completion.h b/trunk/include/linux/completion.h index 51494e6b5548..36d57f74cd01 100644 --- a/trunk/include/linux/completion.h +++ b/trunk/include/linux/completion.h @@ -81,10 +81,10 @@ extern int wait_for_completion_interruptible(struct completion *x); extern int wait_for_completion_killable(struct completion *x); extern unsigned long wait_for_completion_timeout(struct completion *x, unsigned long timeout); -extern long wait_for_completion_interruptible_timeout( - struct completion *x, unsigned long timeout); -extern long wait_for_completion_killable_timeout( - struct completion *x, unsigned long timeout); +extern unsigned long wait_for_completion_interruptible_timeout( + struct completion *x, unsigned long timeout); +extern unsigned long wait_for_completion_killable_timeout( + struct completion *x, unsigned long timeout); extern bool try_wait_for_completion(struct completion *x); extern bool completion_done(struct completion *x); diff --git a/trunk/include/linux/dmaengine.h b/trunk/include/linux/dmaengine.h index 8cd00ad98d37..9d8688b92d8b 100644 --- a/trunk/include/linux/dmaengine.h +++ b/trunk/include/linux/dmaengine.h @@ -824,8 +824,6 @@ enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie); #ifdef CONFIG_DMA_ENGINE enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx); void dma_issue_pending_all(void); -struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param); -void dma_release_channel(struct dma_chan *chan); #else static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) { @@ -833,14 +831,7 @@ static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descript } static inline void dma_issue_pending_all(void) { -} -static inline struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, - dma_filter_fn fn, void *fn_param) -{ - return NULL; -} -static inline void dma_release_channel(struct dma_chan *chan) -{ + do { } while (0); } #endif @@ -851,6 +842,8 @@ void dma_async_device_unregister(struct dma_device *device); void dma_run_dependencies(struct dma_async_tx_descriptor *tx); struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type); #define dma_request_channel(mask, x, y) __dma_request_channel(&(mask), x, y) +struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param); +void dma_release_channel(struct dma_chan *chan); /* --- Helper iov-locking functions --- */ diff --git a/trunk/include/linux/ftrace_event.h b/trunk/include/linux/ftrace_event.h index 47e3997f7b5c..8beabb958f61 100644 --- a/trunk/include/linux/ftrace_event.h +++ b/trunk/include/linux/ftrace_event.h @@ -154,14 +154,12 @@ enum { TRACE_EVENT_FL_ENABLED_BIT, TRACE_EVENT_FL_FILTERED_BIT, TRACE_EVENT_FL_RECORDED_CMD_BIT, - TRACE_EVENT_FL_CAP_ANY_BIT, }; enum { TRACE_EVENT_FL_ENABLED = (1 << TRACE_EVENT_FL_ENABLED_BIT), TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), TRACE_EVENT_FL_RECORDED_CMD = (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT), - TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT), }; struct ftrace_event_call { @@ -198,14 +196,6 @@ struct ftrace_event_call { #endif }; -#define __TRACE_EVENT_FLAGS(name, value) \ - static int __init trace_init_flags_##name(void) \ - { \ - event_##name.flags = value; \ - return 0; \ - } \ - early_initcall(trace_init_flags_##name); - #define PERF_MAX_TRACE_SIZE 2048 #define MAX_FILTER_PRED 32 @@ -225,10 +215,6 @@ enum { FILTER_PTR_STRING, }; -#define EVENT_STORAGE_SIZE 128 -extern struct mutex event_storage_mutex; -extern char event_storage[EVENT_STORAGE_SIZE]; - extern int trace_event_raw_init(struct ftrace_event_call *call); extern int trace_define_field(struct ftrace_event_call *call, const char *type, const char *name, int offset, int size, diff --git a/trunk/include/linux/hrtimer.h b/trunk/include/linux/hrtimer.h index 330586ffffbb..fd0c1b857d3d 100644 --- a/trunk/include/linux/hrtimer.h +++ b/trunk/include/linux/hrtimer.h @@ -22,7 +22,7 @@ #include #include #include -#include + struct hrtimer_clock_base; struct hrtimer_cpu_base; @@ -79,8 +79,8 @@ enum hrtimer_restart { /** * struct hrtimer - the basic hrtimer structure - * @node: timerqueue node, which also manages node.expires, - * the absolute expiry time in the hrtimers internal + * @node: red black tree node for time ordered insertion + * @_expires: the absolute expiry time in the hrtimers internal * representation. The time is related to the clock on * which the timer is based. Is setup by adding * slack to the _softexpires value. For non range timers @@ -101,7 +101,8 @@ enum hrtimer_restart { * The hrtimer structure must be initialized by hrtimer_init() */ struct hrtimer { - struct timerqueue_node node; + struct rb_node node; + ktime_t _expires; ktime_t _softexpires; enum hrtimer_restart (*function)(struct hrtimer *); struct hrtimer_clock_base *base; @@ -140,7 +141,8 @@ struct hrtimer_sleeper { struct hrtimer_clock_base { struct hrtimer_cpu_base *cpu_base; clockid_t index; - struct timerqueue_head active; + struct rb_root active; + struct rb_node *first; ktime_t resolution; ktime_t (*get_time)(void); ktime_t softirq_time; @@ -156,6 +158,7 @@ struct hrtimer_clock_base { * @lock: lock protecting the base and associated clock bases * and timers * @clock_base: array of clock bases for this cpu + * @curr_timer: the timer which is executing a callback right now * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() * @hres_active: State of high resolution mode @@ -181,43 +184,43 @@ struct hrtimer_cpu_base { static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) { - timer->node.expires = time; + timer->_expires = time; timer->_softexpires = time; } static inline void hrtimer_set_expires_range(struct hrtimer *timer, ktime_t time, ktime_t delta) { timer->_softexpires = time; - timer->node.expires = ktime_add_safe(time, delta); + timer->_expires = ktime_add_safe(time, delta); } static inline void hrtimer_set_expires_range_ns(struct hrtimer *timer, ktime_t time, unsigned long delta) { timer->_softexpires = time; - timer->node.expires = ktime_add_safe(time, ns_to_ktime(delta)); + timer->_expires = ktime_add_safe(time, ns_to_ktime(delta)); } static inline void hrtimer_set_expires_tv64(struct hrtimer *timer, s64 tv64) { - timer->node.expires.tv64 = tv64; + timer->_expires.tv64 = tv64; timer->_softexpires.tv64 = tv64; } static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time) { - timer->node.expires = ktime_add_safe(timer->node.expires, time); + timer->_expires = ktime_add_safe(timer->_expires, time); timer->_softexpires = ktime_add_safe(timer->_softexpires, time); } static inline void hrtimer_add_expires_ns(struct hrtimer *timer, u64 ns) { - timer->node.expires = ktime_add_ns(timer->node.expires, ns); + timer->_expires = ktime_add_ns(timer->_expires, ns); timer->_softexpires = ktime_add_ns(timer->_softexpires, ns); } static inline ktime_t hrtimer_get_expires(const struct hrtimer *timer) { - return timer->node.expires; + return timer->_expires; } static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer) @@ -227,7 +230,7 @@ static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer) static inline s64 hrtimer_get_expires_tv64(const struct hrtimer *timer) { - return timer->node.expires.tv64; + return timer->_expires.tv64; } static inline s64 hrtimer_get_softexpires_tv64(const struct hrtimer *timer) { @@ -236,12 +239,12 @@ static inline s64 hrtimer_get_softexpires_tv64(const struct hrtimer *timer) static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer) { - return ktime_to_ns(timer->node.expires); + return ktime_to_ns(timer->_expires); } static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) { - return ktime_sub(timer->node.expires, timer->base->get_time()); + return ktime_sub(timer->_expires, timer->base->get_time()); } #ifdef CONFIG_HIGH_RES_TIMERS diff --git a/trunk/include/linux/init_task.h b/trunk/include/linux/init_task.h index caa151fbebb7..1f8c06ce0fa6 100644 --- a/trunk/include/linux/init_task.h +++ b/trunk/include/linux/init_task.h @@ -12,13 +12,6 @@ #include #include -#ifdef CONFIG_SMP -# define INIT_PUSHABLE_TASKS(tsk) \ - .pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), -#else -# define INIT_PUSHABLE_TASKS(tsk) -#endif - extern struct files_struct init_files; extern struct fs_struct init_fs; @@ -90,12 +83,6 @@ extern struct group_info init_groups; */ # define CAP_INIT_BSET CAP_FULL_SET -#ifdef CONFIG_RCU_BOOST -#define INIT_TASK_RCU_BOOST() \ - .rcu_boost_mutex = NULL, -#else -#define INIT_TASK_RCU_BOOST() -#endif #ifdef CONFIG_TREE_PREEMPT_RCU #define INIT_TASK_RCU_TREE_PREEMPT() \ .rcu_blocked_node = NULL, @@ -107,8 +94,7 @@ extern struct group_info init_groups; .rcu_read_lock_nesting = 0, \ .rcu_read_unlock_special = 0, \ .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \ - INIT_TASK_RCU_TREE_PREEMPT() \ - INIT_TASK_RCU_BOOST() + INIT_TASK_RCU_TREE_PREEMPT() #else #define INIT_TASK_RCU_PREEMPT(tsk) #endif @@ -151,7 +137,7 @@ extern struct cred init_cred; .nr_cpus_allowed = NR_CPUS, \ }, \ .tasks = LIST_HEAD_INIT(tsk.tasks), \ - INIT_PUSHABLE_TASKS(tsk) \ + .pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), \ .ptraced = LIST_HEAD_INIT(tsk.ptraced), \ .ptrace_entry = LIST_HEAD_INIT(tsk.ptrace_entry), \ .real_parent = &tsk, \ diff --git a/trunk/include/linux/interrupt.h b/trunk/include/linux/interrupt.h index 55e0d4253e49..79d0c4f6d071 100644 --- a/trunk/include/linux/interrupt.h +++ b/trunk/include/linux/interrupt.h @@ -114,15 +114,15 @@ typedef irqreturn_t (*irq_handler_t)(int, void *); struct irqaction { irq_handler_t handler; unsigned long flags; + const char *name; void *dev_id; struct irqaction *next; int irq; + struct proc_dir_entry *dir; irq_handler_t thread_fn; struct task_struct *thread; unsigned long thread_flags; - const char *name; - struct proc_dir_entry *dir; -} ____cacheline_internodealigned_in_smp; +}; extern irqreturn_t no_action(int cpl, void *dev_id); diff --git a/trunk/include/linux/kprobes.h b/trunk/include/linux/kprobes.h index b78edb58ee66..e7d1b2e0070d 100644 --- a/trunk/include/linux/kprobes.h +++ b/trunk/include/linux/kprobes.h @@ -275,9 +275,7 @@ extern int arch_prepared_optinsn(struct arch_optimized_insn *optinsn); extern int arch_check_optimized_kprobe(struct optimized_kprobe *op); extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op); extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op); -extern void arch_optimize_kprobes(struct list_head *oplist); -extern void arch_unoptimize_kprobes(struct list_head *oplist, - struct list_head *done_list); +extern int arch_optimize_kprobe(struct optimized_kprobe *op); extern void arch_unoptimize_kprobe(struct optimized_kprobe *op); extern kprobe_opcode_t *get_optinsn_slot(void); extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty); diff --git a/trunk/include/linux/module.h b/trunk/include/linux/module.h index 8b17fd8c790d..7575bbbdf2a2 100644 --- a/trunk/include/linux/module.h +++ b/trunk/include/linux/module.h @@ -308,9 +308,6 @@ struct module /* The size of the executable code in each section. */ unsigned int init_text_size, core_text_size; - /* Size of RO sections of the module (text+rodata) */ - unsigned int init_ro_size, core_ro_size; - /* Arch-specific module values */ struct mod_arch_specific arch; @@ -675,6 +672,7 @@ static inline int module_get_iter_tracepoints(struct tracepoint_iter *iter) { return 0; } + #endif /* CONFIG_MODULES */ #ifdef CONFIG_SYSFS @@ -689,13 +687,6 @@ extern int module_sysfs_initialized; #define __MODULE_STRING(x) __stringify(x) -#ifdef CONFIG_DEBUG_SET_MODULE_RONX -extern void set_all_modules_text_rw(void); -extern void set_all_modules_text_ro(void); -#else -static inline void set_all_modules_text_rw(void) { } -static inline void set_all_modules_text_ro(void) { } -#endif #ifdef CONFIG_GENERIC_BUG void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, diff --git a/trunk/include/linux/mutex.h b/trunk/include/linux/mutex.h index 94b48bd40dd7..f363bc8fdc74 100644 --- a/trunk/include/linux/mutex.h +++ b/trunk/include/linux/mutex.h @@ -160,8 +160,4 @@ extern int mutex_trylock(struct mutex *lock); extern void mutex_unlock(struct mutex *lock); extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); -#ifndef CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX -#define arch_mutex_cpu_relax() cpu_relax() -#endif - #endif diff --git a/trunk/include/linux/nmi.h b/trunk/include/linux/nmi.h index c536f8545f74..06aab5eee134 100644 --- a/trunk/include/linux/nmi.h +++ b/trunk/include/linux/nmi.h @@ -14,14 +14,22 @@ * may be used to reset the timeout - for code which intentionally * disables interrupts for a long time. This call is stateless. */ -#if defined(ARCH_HAS_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) +#ifdef ARCH_HAS_NMI_WATCHDOG #include extern void touch_nmi_watchdog(void); +extern void acpi_nmi_disable(void); +extern void acpi_nmi_enable(void); #else +#ifndef CONFIG_HARDLOCKUP_DETECTOR static inline void touch_nmi_watchdog(void) { touch_softlockup_watchdog(); } +#else +extern void touch_nmi_watchdog(void); +#endif +static inline void acpi_nmi_disable(void) { } +static inline void acpi_nmi_enable(void) { } #endif /* diff --git a/trunk/include/linux/perf_event.h b/trunk/include/linux/perf_event.h index dda5b0a3ff60..4f1279e105ee 100644 --- a/trunk/include/linux/perf_event.h +++ b/trunk/include/linux/perf_event.h @@ -215,9 +215,8 @@ struct perf_event_attr { */ precise_ip : 2, /* skid constraint */ mmap_data : 1, /* non-exec mmap data */ - sample_id_all : 1, /* sample_type all events */ - __reserved_1 : 45; + __reserved_1 : 46; union { __u32 wakeup_events; /* wakeup every n events */ @@ -328,15 +327,6 @@ struct perf_event_header { enum perf_event_type { /* - * If perf_event_attr.sample_id_all is set then all event types will - * have the sample_type selected fields related to where/when - * (identity) an event took place (TID, TIME, ID, CPU, STREAM_ID) - * described in PERF_RECORD_SAMPLE below, it will be stashed just after - * the perf_event_header and the fields already present for the existing - * fields, i.e. at the end of the payload. That way a newer perf.data - * file will be supported by older perf tools, with these new optional - * fields being ignored. - * * The MMAP events record the PROT_EXEC mappings so that we can * correlate userspace IPs to code. They have the following structure: * @@ -588,10 +578,6 @@ struct perf_event; struct pmu { struct list_head entry; - struct device *dev; - char *name; - int type; - int * __percpu pmu_disable_count; struct perf_cpu_context * __percpu pmu_cpu_context; int task_ctx_nr; @@ -772,9 +758,6 @@ struct perf_event { u64 shadow_ctx_time; struct perf_event_attr attr; - u16 header_size; - u16 id_header_size; - u16 read_size; struct hw_perf_event hw; struct perf_event_context *ctx; @@ -920,7 +903,7 @@ struct perf_output_handle { #ifdef CONFIG_PERF_EVENTS -extern int perf_pmu_register(struct pmu *pmu, char *name, int type); +extern int perf_pmu_register(struct pmu *pmu); extern void perf_pmu_unregister(struct pmu *pmu); extern int perf_num_counters(void); @@ -987,11 +970,6 @@ extern int perf_event_overflow(struct perf_event *event, int nmi, struct perf_sample_data *data, struct pt_regs *regs); -static inline bool is_sampling_event(struct perf_event *event) -{ - return event->attr.sample_period != 0; -} - /* * Return 1 for a software event, 0 for a hardware event */ diff --git a/trunk/include/linux/rculist.h b/trunk/include/linux/rculist.h index 2dea94fc4402..f31ef61f1c65 100644 --- a/trunk/include/linux/rculist.h +++ b/trunk/include/linux/rculist.h @@ -241,6 +241,11 @@ static inline void list_splice_init_rcu(struct list_head *list, #define list_first_entry_rcu(ptr, type, member) \ list_entry_rcu((ptr)->next, type, member) +#define __list_for_each_rcu(pos, head) \ + for (pos = rcu_dereference_raw(list_next_rcu(head)); \ + pos != (head); \ + pos = rcu_dereference_raw(list_next_rcu((pos))) + /** * list_for_each_entry_rcu - iterate over rcu list of given type * @pos: the type * to use as a loop cursor. diff --git a/trunk/include/linux/rcupdate.h b/trunk/include/linux/rcupdate.h index af5614856285..03cda7bed985 100644 --- a/trunk/include/linux/rcupdate.h +++ b/trunk/include/linux/rcupdate.h @@ -47,8 +47,6 @@ extern int rcutorture_runnable; /* for sysctl */ #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ -#define UINT_CMP_GE(a, b) (UINT_MAX / 2 >= (a) - (b)) -#define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b)) #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) @@ -68,6 +66,7 @@ extern void call_rcu_sched(struct rcu_head *head, extern void synchronize_sched(void); extern void rcu_barrier_bh(void); extern void rcu_barrier_sched(void); +extern void synchronize_sched_expedited(void); extern int sched_expedited_torture_stats(char *page); static inline void __rcu_read_lock_bh(void) @@ -119,6 +118,7 @@ static inline int rcu_preempt_depth(void) #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ /* Internal to kernel */ +extern void rcu_init(void); extern void rcu_sched_qs(int cpu); extern void rcu_bh_qs(int cpu); extern void rcu_check_callbacks(int cpu, int user); diff --git a/trunk/include/linux/rcutiny.h b/trunk/include/linux/rcutiny.h index 30ebd7c8d874..13877cb93a60 100644 --- a/trunk/include/linux/rcutiny.h +++ b/trunk/include/linux/rcutiny.h @@ -27,9 +27,7 @@ #include -static inline void rcu_init(void) -{ -} +#define rcu_init_sched() do { } while (0) #ifdef CONFIG_TINY_RCU @@ -60,11 +58,6 @@ static inline void synchronize_rcu_bh_expedited(void) synchronize_sched(); } -static inline void synchronize_sched_expedited(void) -{ - synchronize_sched(); -} - #ifdef CONFIG_TINY_RCU static inline void rcu_preempt_note_context_switch(void) @@ -132,12 +125,16 @@ static inline void rcu_cpu_stall_reset(void) } #ifdef CONFIG_DEBUG_LOCK_ALLOC + extern int rcu_scheduler_active __read_mostly; extern void rcu_scheduler_starting(void); + #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + static inline void rcu_scheduler_starting(void) { } + #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ #endif /* __LINUX_RCUTINY_H */ diff --git a/trunk/include/linux/rcutree.h b/trunk/include/linux/rcutree.h index 3a933482734a..95518e628794 100644 --- a/trunk/include/linux/rcutree.h +++ b/trunk/include/linux/rcutree.h @@ -30,7 +30,6 @@ #ifndef __LINUX_RCUTREE_H #define __LINUX_RCUTREE_H -extern void rcu_init(void); extern void rcu_note_context_switch(int cpu); extern int rcu_needs_cpu(int cpu); extern void rcu_cpu_stall_reset(void); @@ -48,7 +47,6 @@ static inline void exit_rcu(void) #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ extern void synchronize_rcu_bh(void); -extern void synchronize_sched_expedited(void); extern void synchronize_rcu_expedited(void); static inline void synchronize_rcu_bh_expedited(void) diff --git a/trunk/include/linux/sched.h b/trunk/include/linux/sched.h index 777cd01e240e..223874538b33 100644 --- a/trunk/include/linux/sched.h +++ b/trunk/include/linux/sched.h @@ -316,7 +316,6 @@ extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, size_t *lenp, loff_t *ppos); extern unsigned int softlockup_panic; extern int softlockup_thresh; -void lockup_detector_init(void); #else static inline void touch_softlockup_watchdog(void) { @@ -327,9 +326,6 @@ static inline void touch_softlockup_watchdog_sync(void) static inline void touch_all_softlockup_watchdogs(void) { } -static inline void lockup_detector_init(void) -{ -} #endif #ifdef CONFIG_DETECT_HUNG_TASK @@ -513,8 +509,6 @@ struct thread_group_cputimer { spinlock_t lock; }; -struct autogroup; - /* * NOTE! "signal_struct" does not have it's own * locking, because a shared signal_struct always @@ -582,9 +576,6 @@ struct signal_struct { struct tty_struct *tty; /* NULL if no tty */ -#ifdef CONFIG_SCHED_AUTOGROUP - struct autogroup *autogroup; -#endif /* * Cumulative resource counters for dead threads in the group, * and for reaped dead child processes forked by this group. @@ -1238,18 +1229,13 @@ struct task_struct { #ifdef CONFIG_TREE_PREEMPT_RCU struct rcu_node *rcu_blocked_node; #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ -#ifdef CONFIG_RCU_BOOST - struct rt_mutex *rcu_boost_mutex; -#endif /* #ifdef CONFIG_RCU_BOOST */ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) struct sched_info sched_info; #endif struct list_head tasks; -#ifdef CONFIG_SMP struct plist_node pushable_tasks; -#endif struct mm_struct *mm, *active_mm; #if defined(SPLIT_RSS_COUNTING) @@ -1773,8 +1759,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #ifdef CONFIG_PREEMPT_RCU #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ -#define RCU_READ_UNLOCK_BOOSTED (1 << 1) /* boosted while in RCU read-side. */ -#define RCU_READ_UNLOCK_NEED_QS (1 << 2) /* RCU core needs CPU response. */ +#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ static inline void rcu_copy_process(struct task_struct *p) { @@ -1782,10 +1767,7 @@ static inline void rcu_copy_process(struct task_struct *p) p->rcu_read_unlock_special = 0; #ifdef CONFIG_TREE_PREEMPT_RCU p->rcu_blocked_node = NULL; -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ -#ifdef CONFIG_RCU_BOOST - p->rcu_boost_mutex = NULL; -#endif /* #ifdef CONFIG_RCU_BOOST */ +#endif INIT_LIST_HEAD(&p->rcu_node_entry); } @@ -1890,11 +1872,14 @@ extern void sched_clock_idle_sleep_event(void); extern void sched_clock_idle_wakeup_event(u64 delta_ns); #ifdef CONFIG_HOTPLUG_CPU +extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p); extern void idle_task_exit(void); #else static inline void idle_task_exit(void) {} #endif +extern void sched_idle_next(void); + #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) extern void wake_up_idle_cpu(int cpu); #else @@ -1904,6 +1889,8 @@ static inline void wake_up_idle_cpu(int cpu) { } extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; +extern unsigned int sysctl_sched_shares_ratelimit; +extern unsigned int sysctl_sched_shares_thresh; extern unsigned int sysctl_sched_child_runs_first; enum sched_tunable_scaling { @@ -1919,7 +1906,6 @@ extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_nr_migrate; extern unsigned int sysctl_sched_time_avg; extern unsigned int sysctl_timer_migration; -extern unsigned int sysctl_sched_shares_window; int sched_proc_update_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, @@ -1945,24 +1931,6 @@ int sched_rt_handler(struct ctl_table *table, int write, extern unsigned int sysctl_sched_compat_yield; -#ifdef CONFIG_SCHED_AUTOGROUP -extern unsigned int sysctl_sched_autogroup_enabled; - -extern void sched_autogroup_create_attach(struct task_struct *p); -extern void sched_autogroup_detach(struct task_struct *p); -extern void sched_autogroup_fork(struct signal_struct *sig); -extern void sched_autogroup_exit(struct signal_struct *sig); -#ifdef CONFIG_PROC_FS -extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m); -extern int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice); -#endif -#else -static inline void sched_autogroup_create_attach(struct task_struct *p) { } -static inline void sched_autogroup_detach(struct task_struct *p) { } -static inline void sched_autogroup_fork(struct signal_struct *sig) { } -static inline void sched_autogroup_exit(struct signal_struct *sig) { } -#endif - #ifdef CONFIG_RT_MUTEXES extern int rt_mutex_getprio(struct task_struct *p); extern void rt_mutex_setprio(struct task_struct *p, int prio); @@ -1981,10 +1949,9 @@ extern int task_nice(const struct task_struct *p); extern int can_nice(const struct task_struct *p, const int nice); extern int task_curr(const struct task_struct *p); extern int idle_cpu(int cpu); -extern int sched_setscheduler(struct task_struct *, int, - const struct sched_param *); +extern int sched_setscheduler(struct task_struct *, int, struct sched_param *); extern int sched_setscheduler_nocheck(struct task_struct *, int, - const struct sched_param *); + struct sched_param *); extern struct task_struct *idle_task(int cpu); extern struct task_struct *curr_task(int cpu); extern void set_curr_task(int cpu, struct task_struct *p); diff --git a/trunk/include/linux/sfi.h b/trunk/include/linux/sfi.h index fe817918b30e..7f770c638e99 100644 --- a/trunk/include/linux/sfi.h +++ b/trunk/include/linux/sfi.h @@ -77,8 +77,6 @@ #define SFI_OEM_ID_SIZE 6 #define SFI_OEM_TABLE_ID_SIZE 8 -#define SFI_NAME_LEN 16 - #define SFI_SYST_SEARCH_BEGIN 0x000E0000 #define SFI_SYST_SEARCH_END 0x000FFFFF @@ -158,13 +156,13 @@ struct sfi_device_table_entry { u16 addr; u8 irq; u32 max_freq; - char name[SFI_NAME_LEN]; + char name[16]; } __packed; struct sfi_gpio_table_entry { - char controller_name[SFI_NAME_LEN]; + char controller_name[16]; u16 pin_no; - char pin_name[SFI_NAME_LEN]; + char pin_name[16]; } __packed; typedef int (*sfi_table_handler) (struct sfi_table_header *table); diff --git a/trunk/include/linux/stacktrace.h b/trunk/include/linux/stacktrace.h index 25310f1d7f37..51efbef38fb0 100644 --- a/trunk/include/linux/stacktrace.h +++ b/trunk/include/linux/stacktrace.h @@ -2,7 +2,6 @@ #define __LINUX_STACKTRACE_H struct task_struct; -struct pt_regs; #ifdef CONFIG_STACKTRACE struct task_struct; @@ -14,8 +13,7 @@ struct stack_trace { }; extern void save_stack_trace(struct stack_trace *trace); -extern void save_stack_trace_regs(struct stack_trace *trace, - struct pt_regs *regs); +extern void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp); extern void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace); diff --git a/trunk/include/linux/syscalls.h b/trunk/include/linux/syscalls.h index 18cd0684fc4e..cacc27a0e285 100644 --- a/trunk/include/linux/syscalls.h +++ b/trunk/include/linux/syscalls.h @@ -127,6 +127,8 @@ extern struct trace_event_functions exit_syscall_print_funcs; #define SYSCALL_TRACE_ENTER_EVENT(sname) \ static struct syscall_metadata \ __attribute__((__aligned__(4))) __syscall_meta_##sname; \ + static struct ftrace_event_call \ + __attribute__((__aligned__(4))) event_enter_##sname; \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ @@ -135,12 +137,13 @@ extern struct trace_event_functions exit_syscall_print_funcs; .class = &event_class_syscall_enter, \ .event.funcs = &enter_syscall_print_funcs, \ .data = (void *)&__syscall_meta_##sname,\ - }; \ - __TRACE_EVENT_FLAGS(enter_##sname, TRACE_EVENT_FL_CAP_ANY) + } #define SYSCALL_TRACE_EXIT_EVENT(sname) \ static struct syscall_metadata \ __attribute__((__aligned__(4))) __syscall_meta_##sname; \ + static struct ftrace_event_call \ + __attribute__((__aligned__(4))) event_exit_##sname; \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ @@ -149,8 +152,7 @@ extern struct trace_event_functions exit_syscall_print_funcs; .class = &event_class_syscall_exit, \ .event.funcs = &exit_syscall_print_funcs, \ .data = (void *)&__syscall_meta_##sname,\ - }; \ - __TRACE_EVENT_FLAGS(exit_##sname, TRACE_EVENT_FL_CAP_ANY) + } #define SYSCALL_METADATA(sname, nb) \ SYSCALL_TRACE_ENTER_EVENT(sname); \ diff --git a/trunk/include/linux/timer.h b/trunk/include/linux/timer.h index 6abd9138beda..38cf093ef62c 100644 --- a/trunk/include/linux/timer.h +++ b/trunk/include/linux/timer.h @@ -24,9 +24,9 @@ struct timer_list { int slack; #ifdef CONFIG_TIMER_STATS - int start_pid; void *start_site; char start_comm[16]; + int start_pid; #endif #ifdef CONFIG_LOCKDEP struct lockdep_map lockdep_map; @@ -48,38 +48,12 @@ extern struct tvec_base boot_tvec_bases; #define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn) #endif -/* - * Note that all tvec_bases are 2 byte aligned and lower bit of - * base in timer_list is guaranteed to be zero. Use the LSB to - * indicate whether the timer is deferrable. - * - * A deferrable timer will work normally when the system is busy, but - * will not cause a CPU to come out of idle just to service it; instead, - * the timer will be serviced when the CPU eventually wakes up with a - * subsequent non-deferrable timer. - */ -#define TBASE_DEFERRABLE_FLAG (0x1) - #define TIMER_INITIALIZER(_function, _expires, _data) { \ .entry = { .prev = TIMER_ENTRY_STATIC }, \ .function = (_function), \ .expires = (_expires), \ .data = (_data), \ .base = &boot_tvec_bases, \ - .slack = -1, \ - __TIMER_LOCKDEP_MAP_INITIALIZER( \ - __FILE__ ":" __stringify(__LINE__)) \ - } - -#define TBASE_MAKE_DEFERRED(ptr) ((struct tvec_base *) \ - ((unsigned char *)(ptr) + TBASE_DEFERRABLE_FLAG)) - -#define TIMER_DEFERRED_INITIALIZER(_function, _expires, _data) {\ - .entry = { .prev = TIMER_ENTRY_STATIC }, \ - .function = (_function), \ - .expires = (_expires), \ - .data = (_data), \ - .base = TBASE_MAKE_DEFERRED(&boot_tvec_bases), \ __TIMER_LOCKDEP_MAP_INITIALIZER( \ __FILE__ ":" __stringify(__LINE__)) \ } @@ -274,11 +248,11 @@ static inline void timer_stats_timer_clear_start_info(struct timer_list *timer) extern void add_timer(struct timer_list *timer); -extern int try_to_del_timer_sync(struct timer_list *timer); - #ifdef CONFIG_SMP + extern int try_to_del_timer_sync(struct timer_list *timer); extern int del_timer_sync(struct timer_list *timer); #else +# define try_to_del_timer_sync(t) del_timer(t) # define del_timer_sync(t) del_timer(t) #endif diff --git a/trunk/include/linux/timerqueue.h b/trunk/include/linux/timerqueue.h deleted file mode 100644 index d24aabaca474..000000000000 --- a/trunk/include/linux/timerqueue.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef _LINUX_TIMERQUEUE_H -#define _LINUX_TIMERQUEUE_H - -#include -#include - - -struct timerqueue_node { - struct rb_node node; - ktime_t expires; -}; - -struct timerqueue_head { - struct rb_root head; - struct timerqueue_node *next; -}; - - -extern void timerqueue_add(struct timerqueue_head *head, - struct timerqueue_node *node); -extern void timerqueue_del(struct timerqueue_head *head, - struct timerqueue_node *node); -extern struct timerqueue_node *timerqueue_iterate_next( - struct timerqueue_node *node); - -/** - * timerqueue_getnext - Returns the timer with the earlies expiration time - * - * @head: head of timerqueue - * - * Returns a pointer to the timer node that has the - * earliest expiration time. - */ -static inline -struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head) -{ - return head->next; -} - -static inline void timerqueue_init(struct timerqueue_node *node) -{ - RB_CLEAR_NODE(&node->node); -} - -static inline void timerqueue_init_head(struct timerqueue_head *head) -{ - head->head = RB_ROOT; - head->next = NULL; -} -#endif /* _LINUX_TIMERQUEUE_H */ diff --git a/trunk/include/linux/tracepoint.h b/trunk/include/linux/tracepoint.h index d3e4f87e95c0..a4a90b6726ce 100644 --- a/trunk/include/linux/tracepoint.h +++ b/trunk/include/linux/tracepoint.h @@ -106,7 +106,6 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, #define TP_PROTO(args...) args #define TP_ARGS(args...) args -#define TP_CONDITION(args...) args #ifdef CONFIG_TRACEPOINTS @@ -120,14 +119,12 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto". */ -#define __DO_TRACE(tp, proto, args, cond) \ +#define __DO_TRACE(tp, proto, args) \ do { \ struct tracepoint_func *it_func_ptr; \ void *it_func; \ void *__data; \ \ - if (!(cond)) \ - return; \ rcu_read_lock_sched_notrace(); \ it_func_ptr = rcu_dereference_sched((tp)->funcs); \ if (it_func_ptr) { \ @@ -145,7 +142,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. */ -#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ +#define __DECLARE_TRACE(name, proto, args, data_proto, data_args) \ extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ @@ -154,8 +151,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, do_trace: \ __DO_TRACE(&__tracepoint_##name, \ TP_PROTO(data_proto), \ - TP_ARGS(data_args), \ - TP_CONDITION(cond)); \ + TP_ARGS(data_args)); \ } \ static inline int \ register_trace_##name(void (*probe)(data_proto), void *data) \ @@ -190,7 +186,7 @@ do_trace: \ EXPORT_SYMBOL(__tracepoint_##name) #else /* !CONFIG_TRACEPOINTS */ -#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ +#define __DECLARE_TRACE(name, proto, args, data_proto, data_args) \ static inline void trace_##name(proto) \ { } \ static inline int \ @@ -231,20 +227,13 @@ do_trace: \ * "void *__data, proto" as the callback prototype. */ #define DECLARE_TRACE_NOARGS(name) \ - __DECLARE_TRACE(name, void, , 1, void *__data, __data) + __DECLARE_TRACE(name, void, , void *__data, __data) #define DECLARE_TRACE(name, proto, args) \ - __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), 1, \ + __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ PARAMS(void *__data, proto), \ PARAMS(__data, args)) -#define DECLARE_TRACE_CONDITION(name, proto, args, cond) \ - __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), PARAMS(cond), \ - PARAMS(void *__data, proto), \ - PARAMS(__data, args)) - -#define TRACE_EVENT_FLAGS(event, flag) - #endif /* DECLARE_TRACE */ #ifndef TRACE_EVENT @@ -358,21 +347,11 @@ do_trace: \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) -#define DEFINE_EVENT_CONDITION(template, name, proto, \ - args, cond) \ - DECLARE_TRACE_CONDITION(name, PARAMS(proto), \ - PARAMS(args), PARAMS(cond)) #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT_FN(name, proto, args, struct, \ assign, print, reg, unreg) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) -#define TRACE_EVENT_CONDITION(name, proto, args, cond, \ - struct, assign, print) \ - DECLARE_TRACE_CONDITION(name, PARAMS(proto), \ - PARAMS(args), PARAMS(cond)) - -#define TRACE_EVENT_FLAGS(event, flag) #endif /* ifdef TRACE_EVENT (see note above) */ diff --git a/trunk/include/linux/workqueue.h b/trunk/include/linux/workqueue.h index bd257fee6031..0c0771f06bfa 100644 --- a/trunk/include/linux/workqueue.h +++ b/trunk/include/linux/workqueue.h @@ -127,20 +127,12 @@ struct execute_work { .timer = TIMER_INITIALIZER(NULL, 0, 0), \ } -#define __DEFERRED_WORK_INITIALIZER(n, f) { \ - .work = __WORK_INITIALIZER((n).work, (f)), \ - .timer = TIMER_DEFERRED_INITIALIZER(NULL, 0, 0), \ - } - #define DECLARE_WORK(n, f) \ struct work_struct n = __WORK_INITIALIZER(n, f) #define DECLARE_DELAYED_WORK(n, f) \ struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f) -#define DECLARE_DEFERRED_WORK(n, f) \ - struct delayed_work n = __DEFERRED_WORK_INITIALIZER(n, f) - /* * initialize a work item's function pointer */ diff --git a/trunk/include/media/wm8775.h b/trunk/include/media/wm8775.h index 60739c5a23ae..a1c4d417dfa2 100644 --- a/trunk/include/media/wm8775.h +++ b/trunk/include/media/wm8775.h @@ -32,4 +32,7 @@ #define WM8775_AIN3 4 #define WM8775_AIN4 8 +/* subdev group ID */ +#define WM8775_GID (1 << 0) + #endif diff --git a/trunk/include/trace/define_trace.h b/trunk/include/trace/define_trace.h index b0b4eb24d592..1dfab5401511 100644 --- a/trunk/include/trace/define_trace.h +++ b/trunk/include/trace/define_trace.h @@ -26,15 +26,6 @@ #define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ DEFINE_TRACE(name) -#undef TRACE_EVENT_CONDITION -#define TRACE_EVENT_CONDITION(name, proto, args, cond, tstruct, assign, print) \ - TRACE_EVENT(name, \ - PARAMS(proto), \ - PARAMS(args), \ - PARAMS(tstruct), \ - PARAMS(assign), \ - PARAMS(print)) - #undef TRACE_EVENT_FN #define TRACE_EVENT_FN(name, proto, args, tstruct, \ assign, print, reg, unreg) \ @@ -48,10 +39,6 @@ #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ DEFINE_TRACE(name) -#undef DEFINE_EVENT_CONDITION -#define DEFINE_EVENT_CONDITION(template, name, proto, args, cond) \ - DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) - #undef DECLARE_TRACE #define DECLARE_TRACE(name, proto, args) \ DEFINE_TRACE(name) @@ -88,11 +75,9 @@ #undef TRACE_EVENT #undef TRACE_EVENT_FN -#undef TRACE_EVENT_CONDITION #undef DECLARE_EVENT_CLASS #undef DEFINE_EVENT #undef DEFINE_EVENT_PRINT -#undef DEFINE_EVENT_CONDITION #undef TRACE_HEADER_MULTI_READ #undef DECLARE_TRACE diff --git a/trunk/include/trace/events/power.h b/trunk/include/trace/events/power.h index 1bcc2a8c00e2..286784d69b8f 100644 --- a/trunk/include/trace/events/power.h +++ b/trunk/include/trace/events/power.h @@ -7,67 +7,16 @@ #include #include -DECLARE_EVENT_CLASS(cpu, - - TP_PROTO(unsigned int state, unsigned int cpu_id), - - TP_ARGS(state, cpu_id), - - TP_STRUCT__entry( - __field( u32, state ) - __field( u32, cpu_id ) - ), - - TP_fast_assign( - __entry->state = state; - __entry->cpu_id = cpu_id; - ), - - TP_printk("state=%lu cpu_id=%lu", (unsigned long)__entry->state, - (unsigned long)__entry->cpu_id) -); - -DEFINE_EVENT(cpu, cpu_idle, - - TP_PROTO(unsigned int state, unsigned int cpu_id), - - TP_ARGS(state, cpu_id) -); - -/* This file can get included multiple times, TRACE_HEADER_MULTI_READ at top */ -#ifndef _PWR_EVENT_AVOID_DOUBLE_DEFINING -#define _PWR_EVENT_AVOID_DOUBLE_DEFINING - -#define PWR_EVENT_EXIT -1 +#ifndef _TRACE_POWER_ENUM_ +#define _TRACE_POWER_ENUM_ +enum { + POWER_NONE = 0, + POWER_CSTATE = 1, /* C-State */ + POWER_PSTATE = 2, /* Fequency change or DVFS */ + POWER_SSTATE = 3, /* Suspend */ +}; #endif -DEFINE_EVENT(cpu, cpu_frequency, - - TP_PROTO(unsigned int frequency, unsigned int cpu_id), - - TP_ARGS(frequency, cpu_id) -); - -TRACE_EVENT(machine_suspend, - - TP_PROTO(unsigned int state), - - TP_ARGS(state), - - TP_STRUCT__entry( - __field( u32, state ) - ), - - TP_fast_assign( - __entry->state = state; - ), - - TP_printk("state=%lu", (unsigned long)__entry->state) -); - -/* This code will be removed after deprecation time exceeded (2.6.41) */ -#ifdef CONFIG_EVENT_POWER_TRACING_DEPRECATED - /* * The power events are used for cpuidle & suspend (power_start, power_end) * and for cpufreq (power_frequency) @@ -126,36 +75,6 @@ TRACE_EVENT(power_end, ); -/* Deprecated dummy functions must be protected against multi-declartion */ -#ifndef _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED -#define _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED - -enum { - POWER_NONE = 0, - POWER_CSTATE = 1, - POWER_PSTATE = 2, -}; -#endif /* _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED */ - -#else /* CONFIG_EVENT_POWER_TRACING_DEPRECATED */ - -#ifndef _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED -#define _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED -enum { - POWER_NONE = 0, - POWER_CSTATE = 1, - POWER_PSTATE = 2, -}; - -/* These dummy declaration have to be ripped out when the deprecated - events get removed */ -static inline void trace_power_start(u64 type, u64 state, u64 cpuid) {}; -static inline void trace_power_end(u64 cpuid) {}; -static inline void trace_power_frequency(u64 type, u64 state, u64 cpuid) {}; -#endif /* _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED */ - -#endif /* CONFIG_EVENT_POWER_TRACING_DEPRECATED */ - /* * The clock events are used for clock enable/disable and for * clock rate change @@ -234,6 +153,7 @@ DEFINE_EVENT(power_domain, power_domain_target, TP_ARGS(name, state, cpu_id) ); + #endif /* _TRACE_POWER_H */ /* This part must be outside protection */ diff --git a/trunk/include/trace/events/syscalls.h b/trunk/include/trace/events/syscalls.h index 5a4c04a75b3d..fb726ac7caee 100644 --- a/trunk/include/trace/events/syscalls.h +++ b/trunk/include/trace/events/syscalls.h @@ -40,8 +40,6 @@ TRACE_EVENT_FN(sys_enter, syscall_regfunc, syscall_unregfunc ); -TRACE_EVENT_FLAGS(sys_enter, TRACE_EVENT_FL_CAP_ANY) - TRACE_EVENT_FN(sys_exit, TP_PROTO(struct pt_regs *regs, long ret), @@ -64,8 +62,6 @@ TRACE_EVENT_FN(sys_exit, syscall_regfunc, syscall_unregfunc ); -TRACE_EVENT_FLAGS(sys_exit, TRACE_EVENT_FL_CAP_ANY) - #endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */ #endif /* _TRACE_EVENTS_SYSCALLS_H */ diff --git a/trunk/include/trace/ftrace.h b/trunk/include/trace/ftrace.h index e16610c208c9..a9377c0083ad 100644 --- a/trunk/include/trace/ftrace.h +++ b/trunk/include/trace/ftrace.h @@ -82,10 +82,6 @@ TRACE_EVENT(name, PARAMS(proto), PARAMS(args), \ PARAMS(tstruct), PARAMS(assign), PARAMS(print)) \ -#undef TRACE_EVENT_FLAGS -#define TRACE_EVENT_FLAGS(name, value) \ - __TRACE_EVENT_FLAGS(name, value) - #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) @@ -133,9 +129,6 @@ #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) -#undef TRACE_EVENT_FLAGS -#define TRACE_EVENT_FLAGS(event, flag) - #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -296,19 +289,13 @@ static struct trace_event_functions ftrace_event_type_funcs_##call = { \ #undef __array #define __array(type, item, len) \ - do { \ - mutex_lock(&event_storage_mutex); \ - BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ - snprintf(event_storage, sizeof(event_storage), \ - "%s[%d]", #type, len); \ - ret = trace_define_field(event_call, event_storage, #item, \ + BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ + ret = trace_define_field(event_call, #type "[" #len "]", #item, \ offsetof(typeof(field), item), \ sizeof(field.item), \ is_signed_type(type), FILTER_OTHER); \ - mutex_unlock(&event_storage_mutex); \ - if (ret) \ - return ret; \ - } while (0); + if (ret) \ + return ret; #undef __dynamic_array #define __dynamic_array(type, item, len) \ diff --git a/trunk/init/Kconfig b/trunk/init/Kconfig index 8dfd094e6875..c9728992a776 100644 --- a/trunk/init/Kconfig +++ b/trunk/init/Kconfig @@ -393,6 +393,7 @@ config PREEMPT_RCU config RCU_TRACE bool "Enable tracing for RCU" + depends on TREE_RCU || TREE_PREEMPT_RCU help This option provides tracing in RCU which presents stats in debugfs for debugging RCU implementation. @@ -458,60 +459,6 @@ config TREE_RCU_TRACE TREE_PREEMPT_RCU implementations, permitting Makefile to trivially select kernel/rcutree_trace.c. -config RCU_BOOST - bool "Enable RCU priority boosting" - depends on RT_MUTEXES && TINY_PREEMPT_RCU - default n - help - This option boosts the priority of preempted RCU readers that - block the current preemptible RCU grace period for too long. - This option also prevents heavy loads from blocking RCU - callback invocation for all flavors of RCU. - - Say Y here if you are working with real-time apps or heavy loads - Say N here if you are unsure. - -config RCU_BOOST_PRIO - int "Real-time priority to boost RCU readers to" - range 1 99 - depends on RCU_BOOST - default 1 - help - This option specifies the real-time priority to which preempted - RCU readers are to be boosted. If you are working with CPU-bound - real-time applications, you should specify a priority higher then - the highest-priority CPU-bound application. - - Specify the real-time priority, or take the default if unsure. - -config RCU_BOOST_DELAY - int "Milliseconds to delay boosting after RCU grace-period start" - range 0 3000 - depends on RCU_BOOST - default 500 - help - This option specifies the time to wait after the beginning of - a given grace period before priority-boosting preempted RCU - readers blocking that grace period. Note that any RCU reader - blocking an expedited RCU grace period is boosted immediately. - - Accept the default if unsure. - -config SRCU_SYNCHRONIZE_DELAY - int "Microseconds to delay before waiting for readers" - range 0 20 - default 10 - help - This option controls how long SRCU delays before entering its - loop waiting on SRCU readers. The purpose of this loop is - to avoid the unconditional context-switch penalty that would - otherwise be incurred if there was an active SRCU reader, - in a manner similar to adaptive locking schemes. This should - be set to be a bit longer than the common-case SRCU read-side - critical-section overhead. - - Accept the default if unsure. - endmenu # "RCU Subsystem" config IKCONFIG @@ -794,19 +741,6 @@ config NET_NS endif # NAMESPACES -config SCHED_AUTOGROUP - bool "Automatic process group scheduling" - select EVENTFD - select CGROUPS - select CGROUP_SCHED - select FAIR_GROUP_SCHED - help - This option optimizes the scheduler for common desktop workloads by - automatically creating and populating task groups. This separation - of workloads isolates aggressive CPU burners (like build jobs) from - desktop applications. Task group autogeneration is currently based - upon task session. - config MM_OWNER bool diff --git a/trunk/init/do_mounts.c b/trunk/init/do_mounts.c index 2b54bef33b55..830aaec9c7d5 100644 --- a/trunk/init/do_mounts.c +++ b/trunk/init/do_mounts.c @@ -93,7 +93,7 @@ static int match_dev_by_uuid(struct device *dev, void *data) * * Returns the matching dev_t on success or 0 on failure. */ -static dev_t devt_from_partuuid(char *uuid_str) +static dev_t __init devt_from_partuuid(char *uuid_str) { dev_t res = 0; struct device *dev = NULL; diff --git a/trunk/init/main.c b/trunk/init/main.c index ea51770c0170..8646401f7a0e 100644 --- a/trunk/init/main.c +++ b/trunk/init/main.c @@ -67,7 +67,6 @@ #include #include #include -#include #include #include @@ -604,8 +603,6 @@ asmlinkage void __init start_kernel(void) "enabled *very* early, fixing it\n"); local_irq_disable(); } - idr_init_cache(); - perf_event_init(); rcu_init(); radix_tree_init(); /* init some links before init_ISA_irqs() */ @@ -661,6 +658,7 @@ asmlinkage void __init start_kernel(void) enable_debug_pagealloc(); kmemleak_init(); debug_objects_mem_init(); + idr_init_cache(); setup_per_cpu_pageset(); numa_policy_init(); if (late_time_init) @@ -884,7 +882,6 @@ static int __init kernel_init(void * unused) smp_prepare_cpus(setup_max_cpus); do_pre_smp_initcalls(); - lockup_detector_init(); smp_init(); sched_init_smp(); diff --git a/trunk/kernel/cpu.c b/trunk/kernel/cpu.c index 156cc5556140..f6e726f18491 100644 --- a/trunk/kernel/cpu.c +++ b/trunk/kernel/cpu.c @@ -189,6 +189,7 @@ static inline void check_for_tasks(int cpu) } struct take_cpu_down_param { + struct task_struct *caller; unsigned long mod; void *hcpu; }; @@ -197,6 +198,7 @@ struct take_cpu_down_param { static int __ref take_cpu_down(void *_param) { struct take_cpu_down_param *param = _param; + unsigned int cpu = (unsigned long)param->hcpu; int err; /* Ensure this CPU doesn't handle any more interrupts. */ @@ -206,6 +208,11 @@ static int __ref take_cpu_down(void *_param) cpu_notify(CPU_DYING | param->mod, param->hcpu); + if (task_cpu(param->caller) == cpu) + move_task_off_dead_cpu(cpu, param->caller); + /* Force idle task to run as soon as we yield: it should + immediately notice cpu is offline and die quickly. */ + sched_idle_next(); return 0; } @@ -216,6 +223,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) void *hcpu = (void *)(long)cpu; unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; struct take_cpu_down_param tcd_param = { + .caller = current, .mod = mod, .hcpu = hcpu, }; @@ -245,15 +253,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) } BUG_ON(cpu_online(cpu)); - /* - * The migration_call() CPU_DYING callback will have removed all - * runnable tasks from the cpu, there's only the idle task left now - * that the migration thread is done doing the stop_machine thing. - * - * Wait for the stop thread to go away. - */ + /* Wait for it to sleep (leaving idle task). */ while (!idle_cpu(cpu)) - cpu_relax(); + yield(); /* This actually kills the CPU. */ __cpu_die(cpu); @@ -384,14 +386,6 @@ int __cpuinit cpu_up(unsigned int cpu) #ifdef CONFIG_PM_SLEEP_SMP static cpumask_var_t frozen_cpus; -void __weak arch_disable_nonboot_cpus_begin(void) -{ -} - -void __weak arch_disable_nonboot_cpus_end(void) -{ -} - int disable_nonboot_cpus(void) { int cpu, first_cpu, error = 0; @@ -403,7 +397,6 @@ int disable_nonboot_cpus(void) * with the userspace trying to use the CPU hotplug at the same time */ cpumask_clear(frozen_cpus); - arch_disable_nonboot_cpus_begin(); printk("Disabling non-boot CPUs ...\n"); for_each_online_cpu(cpu) { @@ -419,8 +412,6 @@ int disable_nonboot_cpus(void) } } - arch_disable_nonboot_cpus_end(); - if (!error) { BUG_ON(num_online_cpus() > 1); /* Make sure the CPUs won't be enabled by someone else */ diff --git a/trunk/kernel/fork.c b/trunk/kernel/fork.c index 7d164e25b0f0..5447dc7defa9 100644 --- a/trunk/kernel/fork.c +++ b/trunk/kernel/fork.c @@ -174,10 +174,8 @@ static inline void free_signal_struct(struct signal_struct *sig) static inline void put_signal_struct(struct signal_struct *sig) { - if (atomic_dec_and_test(&sig->sigcnt)) { - sched_autogroup_exit(sig); + if (atomic_dec_and_test(&sig->sigcnt)) free_signal_struct(sig); - } } void __put_task_struct(struct task_struct *tsk) @@ -907,7 +905,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) posix_cpu_timers_init_group(sig); tty_audit_fork(sig); - sched_autogroup_fork(sig); sig->oom_adj = current->signal->oom_adj; sig->oom_score_adj = current->signal->oom_score_adj; @@ -1318,7 +1315,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, } bad_fork_cleanup_signal: if (!(clone_flags & CLONE_THREAD)) - put_signal_struct(p->signal); + free_signal_struct(p->signal); bad_fork_cleanup_sighand: __cleanup_sighand(p->sighand); bad_fork_cleanup_fs: diff --git a/trunk/kernel/futex.c b/trunk/kernel/futex.c index 3019b92e6917..40a8777a27d0 100644 --- a/trunk/kernel/futex.c +++ b/trunk/kernel/futex.c @@ -68,14 +68,6 @@ int __read_mostly futex_cmpxchg_enabled; #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) -/* - * Futex flags used to encode options to functions and preserve them across - * restarts. - */ -#define FLAGS_SHARED 0x01 -#define FLAGS_CLOCKRT 0x02 -#define FLAGS_HAS_TIMEOUT 0x04 - /* * Priority Inheritance state: */ @@ -131,12 +123,6 @@ struct futex_q { u32 bitset; }; -static const struct futex_q futex_q_init = { - /* list gets initialized in queue_me()*/ - .key = FUTEX_KEY_INIT, - .bitset = FUTEX_BITSET_MATCH_ANY -}; - /* * Hash buckets are shared by all the futex_keys that hash to the same * location. Each key may have multiple futex_q structures, one for each task @@ -297,7 +283,8 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) return 0; } -static inline void put_futex_key(union futex_key *key) +static inline +void put_futex_key(int fshared, union futex_key *key) { drop_futex_key_refs(key); } @@ -883,8 +870,7 @@ double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) /* * Wake up waiters matching bitset queued on this futex (uaddr). */ -static int -futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) +static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) { struct futex_hash_bucket *hb; struct futex_q *this, *next; @@ -895,7 +881,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) if (!bitset) return -EINVAL; - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key); + ret = get_futex_key(uaddr, fshared, &key); if (unlikely(ret != 0)) goto out; @@ -921,7 +907,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) } spin_unlock(&hb->lock); - put_futex_key(&key); + put_futex_key(fshared, &key); out: return ret; } @@ -931,7 +917,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) * to this virtual address: */ static int -futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, +futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, int nr_wake, int nr_wake2, int op) { union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; @@ -941,10 +927,10 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, int ret, op_ret; retry: - ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1); + ret = get_futex_key(uaddr1, fshared, &key1); if (unlikely(ret != 0)) goto out; - ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); + ret = get_futex_key(uaddr2, fshared, &key2); if (unlikely(ret != 0)) goto out_put_key1; @@ -976,11 +962,11 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, if (ret) goto out_put_keys; - if (!(flags & FLAGS_SHARED)) + if (!fshared) goto retry_private; - put_futex_key(&key2); - put_futex_key(&key1); + put_futex_key(fshared, &key2); + put_futex_key(fshared, &key1); goto retry; } @@ -1010,9 +996,9 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, double_unlock_hb(hb1, hb2); out_put_keys: - put_futex_key(&key2); + put_futex_key(fshared, &key2); out_put_key1: - put_futex_key(&key1); + put_futex_key(fshared, &key1); out: return ret; } @@ -1147,13 +1133,13 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, /** * futex_requeue() - Requeue waiters from uaddr1 to uaddr2 * @uaddr1: source futex user address - * @flags: futex flags (FLAGS_SHARED, etc.) + * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED * @uaddr2: target futex user address * @nr_wake: number of waiters to wake (must be 1 for requeue_pi) * @nr_requeue: number of waiters to requeue (0-INT_MAX) * @cmpval: @uaddr1 expected value (or %NULL) * @requeue_pi: if we are attempting to requeue from a non-pi futex to a - * pi futex (pi to pi requeue is not supported) + * pi futex (pi to pi requeue is not supported) * * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire * uaddr2 atomically on behalf of the top waiter. @@ -1162,9 +1148,9 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, * >=0 - on success, the number of tasks requeued or woken * <0 - on error */ -static int futex_requeue(u32 __user *uaddr1, unsigned int flags, - u32 __user *uaddr2, int nr_wake, int nr_requeue, - u32 *cmpval, int requeue_pi) +static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, + int nr_wake, int nr_requeue, u32 *cmpval, + int requeue_pi) { union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; int drop_count = 0, task_count = 0, ret; @@ -1205,10 +1191,10 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, pi_state = NULL; } - ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1); + ret = get_futex_key(uaddr1, fshared, &key1); if (unlikely(ret != 0)) goto out; - ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); + ret = get_futex_key(uaddr2, fshared, &key2); if (unlikely(ret != 0)) goto out_put_key1; @@ -1230,11 +1216,11 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, if (ret) goto out_put_keys; - if (!(flags & FLAGS_SHARED)) + if (!fshared) goto retry_private; - put_futex_key(&key2); - put_futex_key(&key1); + put_futex_key(fshared, &key2); + put_futex_key(fshared, &key1); goto retry; } if (curval != *cmpval) { @@ -1274,8 +1260,8 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, break; case -EFAULT: double_unlock_hb(hb1, hb2); - put_futex_key(&key2); - put_futex_key(&key1); + put_futex_key(fshared, &key2); + put_futex_key(fshared, &key1); ret = fault_in_user_writeable(uaddr2); if (!ret) goto retry; @@ -1283,8 +1269,8 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, case -EAGAIN: /* The owner was exiting, try again. */ double_unlock_hb(hb1, hb2); - put_futex_key(&key2); - put_futex_key(&key1); + put_futex_key(fshared, &key2); + put_futex_key(fshared, &key1); cond_resched(); goto retry; default: @@ -1366,9 +1352,9 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, drop_futex_key_refs(&key1); out_put_keys: - put_futex_key(&key2); + put_futex_key(fshared, &key2); out_put_key1: - put_futex_key(&key1); + put_futex_key(fshared, &key1); out: if (pi_state != NULL) free_pi_state(pi_state); @@ -1508,7 +1494,7 @@ static void unqueue_me_pi(struct futex_q *q) * private futexes. */ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, - struct task_struct *newowner) + struct task_struct *newowner, int fshared) { u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; struct futex_pi_state *pi_state = q->pi_state; @@ -1601,11 +1587,20 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, goto retry; } +/* + * In case we must use restart_block to restart a futex_wait, + * we encode in the 'flags' shared capability + */ +#define FLAGS_SHARED 0x01 +#define FLAGS_CLOCKRT 0x02 +#define FLAGS_HAS_TIMEOUT 0x04 + static long futex_wait_restart(struct restart_block *restart); /** * fixup_owner() - Post lock pi_state and corner case management * @uaddr: user address of the futex + * @fshared: whether the futex is shared (1) or not (0) * @q: futex_q (contains pi_state and access to the rt_mutex) * @locked: if the attempt to take the rt_mutex succeeded (1) or not (0) * @@ -1618,7 +1613,8 @@ static long futex_wait_restart(struct restart_block *restart); * 0 - success, lock not taken * <0 - on error (-EFAULT) */ -static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) +static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q, + int locked) { struct task_struct *owner; int ret = 0; @@ -1629,7 +1625,7 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) * did a lock-steal - fix up the PI-state in that case: */ if (q->pi_state->owner != current) - ret = fixup_pi_state_owner(uaddr, q, current); + ret = fixup_pi_state_owner(uaddr, q, current, fshared); goto out; } @@ -1656,7 +1652,7 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) * lock. Fix the state up. */ owner = rt_mutex_owner(&q->pi_state->pi_mutex); - ret = fixup_pi_state_owner(uaddr, q, owner); + ret = fixup_pi_state_owner(uaddr, q, owner, fshared); goto out; } @@ -1719,7 +1715,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, * futex_wait_setup() - Prepare to wait on a futex * @uaddr: the futex userspace address * @val: the expected value - * @flags: futex flags (FLAGS_SHARED, etc.) + * @fshared: whether the futex is shared (1) or not (0) * @q: the associated futex_q * @hb: storage for hash_bucket pointer to be returned to caller * @@ -1732,7 +1728,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, * 0 - uaddr contains val and hb has been locked * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked */ -static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, +static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared, struct futex_q *q, struct futex_hash_bucket **hb) { u32 uval; @@ -1756,7 +1752,8 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, * rare, but normal. */ retry: - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key); + q->key = FUTEX_KEY_INIT; + ret = get_futex_key(uaddr, fshared, &q->key); if (unlikely(ret != 0)) return ret; @@ -1772,10 +1769,10 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, if (ret) goto out; - if (!(flags & FLAGS_SHARED)) + if (!fshared) goto retry_private; - put_futex_key(&q->key); + put_futex_key(fshared, &q->key); goto retry; } @@ -1786,29 +1783,32 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, out: if (ret) - put_futex_key(&q->key); + put_futex_key(fshared, &q->key); return ret; } -static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, - ktime_t *abs_time, u32 bitset) +static int futex_wait(u32 __user *uaddr, int fshared, + u32 val, ktime_t *abs_time, u32 bitset, int clockrt) { struct hrtimer_sleeper timeout, *to = NULL; struct restart_block *restart; struct futex_hash_bucket *hb; - struct futex_q q = futex_q_init; + struct futex_q q; int ret; if (!bitset) return -EINVAL; + + q.pi_state = NULL; q.bitset = bitset; + q.rt_waiter = NULL; + q.requeue_pi_key = NULL; if (abs_time) { to = &timeout; - hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ? - CLOCK_REALTIME : CLOCK_MONOTONIC, - HRTIMER_MODE_ABS); + hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME : + CLOCK_MONOTONIC, HRTIMER_MODE_ABS); hrtimer_init_sleeper(to, current); hrtimer_set_expires_range_ns(&to->timer, *abs_time, current->timer_slack_ns); @@ -1819,7 +1819,7 @@ static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, * Prepare to wait on uaddr. On success, holds hb lock and increments * q.key refs. */ - ret = futex_wait_setup(uaddr, val, flags, &q, &hb); + ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); if (ret) goto out; @@ -1852,7 +1852,12 @@ static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, restart->futex.val = val; restart->futex.time = abs_time->tv64; restart->futex.bitset = bitset; - restart->futex.flags = flags; + restart->futex.flags = FLAGS_HAS_TIMEOUT; + + if (fshared) + restart->futex.flags |= FLAGS_SHARED; + if (clockrt) + restart->futex.flags |= FLAGS_CLOCKRT; ret = -ERESTART_RESTARTBLOCK; @@ -1868,6 +1873,7 @@ static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, static long futex_wait_restart(struct restart_block *restart) { u32 __user *uaddr = restart->futex.uaddr; + int fshared = 0; ktime_t t, *tp = NULL; if (restart->futex.flags & FLAGS_HAS_TIMEOUT) { @@ -1875,9 +1881,11 @@ static long futex_wait_restart(struct restart_block *restart) tp = &t; } restart->fn = do_no_restart_syscall; - - return (long)futex_wait(uaddr, restart->futex.flags, - restart->futex.val, tp, restart->futex.bitset); + if (restart->futex.flags & FLAGS_SHARED) + fshared = 1; + return (long)futex_wait(uaddr, fshared, restart->futex.val, tp, + restart->futex.bitset, + restart->futex.flags & FLAGS_CLOCKRT); } @@ -1887,12 +1895,12 @@ static long futex_wait_restart(struct restart_block *restart) * if there are waiters then it will block, it does PI, etc. (Due to * races the kernel might see a 0 value of the futex too.) */ -static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect, - ktime_t *time, int trylock) +static int futex_lock_pi(u32 __user *uaddr, int fshared, + int detect, ktime_t *time, int trylock) { struct hrtimer_sleeper timeout, *to = NULL; struct futex_hash_bucket *hb; - struct futex_q q = futex_q_init; + struct futex_q q; int res, ret; if (refill_pi_state_cache()) @@ -1906,8 +1914,12 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect, hrtimer_set_expires(&to->timer, *time); } + q.pi_state = NULL; + q.rt_waiter = NULL; + q.requeue_pi_key = NULL; retry: - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key); + q.key = FUTEX_KEY_INIT; + ret = get_futex_key(uaddr, fshared, &q.key); if (unlikely(ret != 0)) goto out; @@ -1929,7 +1941,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect, * exit to complete. */ queue_unlock(&q, hb); - put_futex_key(&q.key); + put_futex_key(fshared, &q.key); cond_resched(); goto retry; default: @@ -1959,7 +1971,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect, * Fixup the pi_state owner and possibly acquire the lock if we * haven't already. */ - res = fixup_owner(uaddr, &q, !ret); + res = fixup_owner(uaddr, fshared, &q, !ret); /* * If fixup_owner() returned an error, proprogate that. If it acquired * the lock, clear our -ETIMEDOUT or -EINTR. @@ -1983,7 +1995,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect, queue_unlock(&q, hb); out_put_key: - put_futex_key(&q.key); + put_futex_key(fshared, &q.key); out: if (to) destroy_hrtimer_on_stack(&to->timer); @@ -1996,10 +2008,10 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect, if (ret) goto out_put_key; - if (!(flags & FLAGS_SHARED)) + if (!fshared) goto retry_private; - put_futex_key(&q.key); + put_futex_key(fshared, &q.key); goto retry; } @@ -2008,7 +2020,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect, * This is the in-kernel slowpath: we look up the PI state (if any), * and do the rt-mutex unlock. */ -static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) +static int futex_unlock_pi(u32 __user *uaddr, int fshared) { struct futex_hash_bucket *hb; struct futex_q *this, *next; @@ -2026,7 +2038,7 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current)) return -EPERM; - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key); + ret = get_futex_key(uaddr, fshared, &key); if (unlikely(ret != 0)) goto out; @@ -2081,14 +2093,14 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) out_unlock: spin_unlock(&hb->lock); - put_futex_key(&key); + put_futex_key(fshared, &key); out: return ret; pi_faulted: spin_unlock(&hb->lock); - put_futex_key(&key); + put_futex_key(fshared, &key); ret = fault_in_user_writeable(uaddr); if (!ret) @@ -2148,7 +2160,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, /** * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 * @uaddr: the futex we initially wait on (non-pi) - * @flags: futex flags (FLAGS_SHARED, FLAGS_CLOCKRT, etc.), they must be + * @fshared: whether the futexes are shared (1) or not (0). They must be * the same type, no requeueing from private to shared, etc. * @val: the expected value of uaddr * @abs_time: absolute timeout @@ -2186,16 +2198,16 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, * 0 - On success * <0 - On error */ -static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, +static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, u32 val, ktime_t *abs_time, u32 bitset, - u32 __user *uaddr2) + int clockrt, u32 __user *uaddr2) { struct hrtimer_sleeper timeout, *to = NULL; struct rt_mutex_waiter rt_waiter; struct rt_mutex *pi_mutex = NULL; struct futex_hash_bucket *hb; - union futex_key key2 = FUTEX_KEY_INIT; - struct futex_q q = futex_q_init; + union futex_key key2; + struct futex_q q; int res, ret; if (!bitset) @@ -2203,9 +2215,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (abs_time) { to = &timeout; - hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ? - CLOCK_REALTIME : CLOCK_MONOTONIC, - HRTIMER_MODE_ABS); + hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME : + CLOCK_MONOTONIC, HRTIMER_MODE_ABS); hrtimer_init_sleeper(to, current); hrtimer_set_expires_range_ns(&to->timer, *abs_time, current->timer_slack_ns); @@ -2218,10 +2229,12 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, debug_rt_mutex_init_waiter(&rt_waiter); rt_waiter.task = NULL; - ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); + key2 = FUTEX_KEY_INIT; + ret = get_futex_key(uaddr2, fshared, &key2); if (unlikely(ret != 0)) goto out; + q.pi_state = NULL; q.bitset = bitset; q.rt_waiter = &rt_waiter; q.requeue_pi_key = &key2; @@ -2230,7 +2243,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, * Prepare to wait on uaddr. On success, increments q.key (key1) ref * count. */ - ret = futex_wait_setup(uaddr, val, flags, &q, &hb); + ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); if (ret) goto out_key2; @@ -2260,7 +2273,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, */ if (q.pi_state && (q.pi_state->owner != current)) { spin_lock(q.lock_ptr); - ret = fixup_pi_state_owner(uaddr2, &q, current); + ret = fixup_pi_state_owner(uaddr2, &q, current, + fshared); spin_unlock(q.lock_ptr); } } else { @@ -2279,7 +2293,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, * Fixup the pi_state owner and possibly acquire the lock if we * haven't already. */ - res = fixup_owner(uaddr2, &q, !ret); + res = fixup_owner(uaddr2, fshared, &q, !ret); /* * If fixup_owner() returned an error, proprogate that. If it * acquired the lock, clear -ETIMEDOUT or -EINTR. @@ -2310,9 +2324,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, } out_put_keys: - put_futex_key(&q.key); + put_futex_key(fshared, &q.key); out_key2: - put_futex_key(&key2); + put_futex_key(fshared, &key2); out: if (to) { @@ -2537,57 +2551,58 @@ void exit_robust_list(struct task_struct *curr) long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3) { - int ret = -ENOSYS, cmd = op & FUTEX_CMD_MASK; - unsigned int flags = 0; + int clockrt, ret = -ENOSYS; + int cmd = op & FUTEX_CMD_MASK; + int fshared = 0; if (!(op & FUTEX_PRIVATE_FLAG)) - flags |= FLAGS_SHARED; + fshared = 1; - if (op & FUTEX_CLOCK_REALTIME) { - flags |= FLAGS_CLOCKRT; - if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI) - return -ENOSYS; - } + clockrt = op & FUTEX_CLOCK_REALTIME; + if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI) + return -ENOSYS; switch (cmd) { case FUTEX_WAIT: val3 = FUTEX_BITSET_MATCH_ANY; case FUTEX_WAIT_BITSET: - ret = futex_wait(uaddr, flags, val, timeout, val3); + ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt); break; case FUTEX_WAKE: val3 = FUTEX_BITSET_MATCH_ANY; case FUTEX_WAKE_BITSET: - ret = futex_wake(uaddr, flags, val, val3); + ret = futex_wake(uaddr, fshared, val, val3); break; case FUTEX_REQUEUE: - ret = futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0); + ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0); break; case FUTEX_CMP_REQUEUE: - ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0); + ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, + 0); break; case FUTEX_WAKE_OP: - ret = futex_wake_op(uaddr, flags, uaddr2, val, val2, val3); + ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); break; case FUTEX_LOCK_PI: if (futex_cmpxchg_enabled) - ret = futex_lock_pi(uaddr, flags, val, timeout, 0); + ret = futex_lock_pi(uaddr, fshared, val, timeout, 0); break; case FUTEX_UNLOCK_PI: if (futex_cmpxchg_enabled) - ret = futex_unlock_pi(uaddr, flags); + ret = futex_unlock_pi(uaddr, fshared); break; case FUTEX_TRYLOCK_PI: if (futex_cmpxchg_enabled) - ret = futex_lock_pi(uaddr, flags, 0, timeout, 1); + ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); break; case FUTEX_WAIT_REQUEUE_PI: val3 = FUTEX_BITSET_MATCH_ANY; - ret = futex_wait_requeue_pi(uaddr, flags, val, timeout, val3, - uaddr2); + ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3, + clockrt, uaddr2); break; case FUTEX_CMP_REQUEUE_PI: - ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1); + ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, + 1); break; default: ret = -ENOSYS; diff --git a/trunk/kernel/hrtimer.c b/trunk/kernel/hrtimer.c index f2429fc3438c..72206cf5c6cf 100644 --- a/trunk/kernel/hrtimer.c +++ b/trunk/kernel/hrtimer.c @@ -516,13 +516,10 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { struct hrtimer *timer; - struct timerqueue_node *next; - next = timerqueue_getnext(&base->active); - if (!next) + if (!base->first) continue; - timer = container_of(next, struct hrtimer, node); - + timer = rb_entry(base->first, struct hrtimer, node); expires = ktime_sub(hrtimer_get_expires(timer), base->offset); /* * clock_was_set() has changed base->offset so the @@ -843,17 +840,48 @@ EXPORT_SYMBOL_GPL(hrtimer_forward); static int enqueue_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) { + struct rb_node **link = &base->active.rb_node; + struct rb_node *parent = NULL; + struct hrtimer *entry; + int leftmost = 1; + debug_activate(timer); - timerqueue_add(&base->active, &timer->node); + /* + * Find the right place in the rbtree: + */ + while (*link) { + parent = *link; + entry = rb_entry(parent, struct hrtimer, node); + /* + * We dont care about collisions. Nodes with + * the same expiry time stay together. + */ + if (hrtimer_get_expires_tv64(timer) < + hrtimer_get_expires_tv64(entry)) { + link = &(*link)->rb_left; + } else { + link = &(*link)->rb_right; + leftmost = 0; + } + } + + /* + * Insert the timer to the rbtree and check whether it + * replaces the first pending timer + */ + if (leftmost) + base->first = &timer->node; + rb_link_node(&timer->node, parent, link); + rb_insert_color(&timer->node, &base->active); /* * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the * state of a possibly running callback. */ timer->state |= HRTIMER_STATE_ENQUEUED; - return (&timer->node == base->active.next); + return leftmost; } /* @@ -873,7 +901,12 @@ static void __remove_hrtimer(struct hrtimer *timer, if (!(timer->state & HRTIMER_STATE_ENQUEUED)) goto out; - if (&timer->node == timerqueue_getnext(&base->active)) { + /* + * Remove the timer from the rbtree and replace the first + * entry pointer if necessary. + */ + if (base->first == &timer->node) { + base->first = rb_next(&timer->node); #ifdef CONFIG_HIGH_RES_TIMERS /* Reprogram the clock event device. if enabled */ if (reprogram && hrtimer_hres_active()) { @@ -886,7 +919,7 @@ static void __remove_hrtimer(struct hrtimer *timer, } #endif } - timerqueue_del(&base->active, &timer->node); + rb_erase(&timer->node, &base->active); out: timer->state = newstate; } @@ -1095,13 +1128,11 @@ ktime_t hrtimer_get_next_event(void) if (!hrtimer_hres_active()) { for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { struct hrtimer *timer; - struct timerqueue_node *next; - next = timerqueue_getnext(&base->active); - if (!next) + if (!base->first) continue; - timer = container_of(next, struct hrtimer, node); + timer = rb_entry(base->first, struct hrtimer, node); delta.tv64 = hrtimer_get_expires_tv64(timer); delta = ktime_sub(delta, base->get_time()); if (delta.tv64 < mindelta.tv64) @@ -1131,7 +1162,6 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, timer->base = &cpu_base->clock_base[clock_id]; hrtimer_init_timer_hres(timer); - timerqueue_init(&timer->node); #ifdef CONFIG_TIMER_STATS timer->start_site = NULL; @@ -1248,14 +1278,14 @@ void hrtimer_interrupt(struct clock_event_device *dev) for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { ktime_t basenow; - struct timerqueue_node *node; + struct rb_node *node; basenow = ktime_add(now, base->offset); - while ((node = timerqueue_getnext(&base->active))) { + while ((node = base->first)) { struct hrtimer *timer; - timer = container_of(node, struct hrtimer, node); + timer = rb_entry(node, struct hrtimer, node); /* * The immediate goal for using the softexpires is @@ -1411,7 +1441,7 @@ void hrtimer_run_pending(void) */ void hrtimer_run_queues(void) { - struct timerqueue_node *node; + struct rb_node *node; struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); struct hrtimer_clock_base *base; int index, gettime = 1; @@ -1421,7 +1451,8 @@ void hrtimer_run_queues(void) for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) { base = &cpu_base->clock_base[index]; - if (!timerqueue_getnext(&base->active)) + + if (!base->first) continue; if (gettime) { @@ -1431,10 +1462,10 @@ void hrtimer_run_queues(void) raw_spin_lock(&cpu_base->lock); - while ((node = timerqueue_getnext(&base->active))) { + while ((node = base->first)) { struct hrtimer *timer; - timer = container_of(node, struct hrtimer, node); + timer = rb_entry(node, struct hrtimer, node); if (base->softirq_time.tv64 <= hrtimer_get_expires_tv64(timer)) break; @@ -1599,10 +1630,8 @@ static void __cpuinit init_hrtimers_cpu(int cpu) raw_spin_lock_init(&cpu_base->lock); - for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { + for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) cpu_base->clock_base[i].cpu_base = cpu_base; - timerqueue_init_head(&cpu_base->clock_base[i].active); - } hrtimer_init_hres(cpu_base); } @@ -1613,10 +1642,10 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, struct hrtimer_clock_base *new_base) { struct hrtimer *timer; - struct timerqueue_node *node; + struct rb_node *node; - while ((node = timerqueue_getnext(&old_base->active))) { - timer = container_of(node, struct hrtimer, node); + while ((node = rb_first(&old_base->active))) { + timer = rb_entry(node, struct hrtimer, node); BUG_ON(hrtimer_callback_running(timer)); debug_deactivate(timer); diff --git a/trunk/kernel/hw_breakpoint.c b/trunk/kernel/hw_breakpoint.c index 086adf25a55e..e5325825aeb6 100644 --- a/trunk/kernel/hw_breakpoint.c +++ b/trunk/kernel/hw_breakpoint.c @@ -641,7 +641,7 @@ int __init init_hw_breakpoint(void) constraints_initialized = 1; - perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT); + perf_pmu_register(&perf_breakpoint); return register_die_notifier(&hw_breakpoint_exceptions_nb); diff --git a/trunk/kernel/irq/manage.c b/trunk/kernel/irq/manage.c index 91a5fa25054e..5f92acc5f952 100644 --- a/trunk/kernel/irq/manage.c +++ b/trunk/kernel/irq/manage.c @@ -577,9 +577,7 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { } */ static int irq_thread(void *data) { - static struct sched_param param = { - .sched_priority = MAX_USER_RT_PRIO/2, - }; + struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, }; struct irqaction *action = data; struct irq_desc *desc = irq_to_desc(action->irq); int wake, oneshot = desc->status & IRQ_ONESHOT; diff --git a/trunk/kernel/kprobes.c b/trunk/kernel/kprobes.c index 7663e5df0e6f..9737a76e106f 100644 --- a/trunk/kernel/kprobes.c +++ b/trunk/kernel/kprobes.c @@ -354,20 +354,13 @@ static inline int kprobe_aggrprobe(struct kprobe *p) return p->pre_handler == aggr_pre_handler; } -/* Return true(!0) if the kprobe is unused */ -static inline int kprobe_unused(struct kprobe *p) -{ - return kprobe_aggrprobe(p) && kprobe_disabled(p) && - list_empty(&p->list); -} - /* * Keep all fields in the kprobe consistent */ -static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p) +static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p) { - memcpy(&p->opcode, &ap->opcode, sizeof(kprobe_opcode_t)); - memcpy(&p->ainsn, &ap->ainsn, sizeof(struct arch_specific_insn)); + memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t)); + memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn)); } #ifdef CONFIG_OPTPROBES @@ -391,17 +384,6 @@ void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs) } } -/* Free optimized instructions and optimized_kprobe */ -static __kprobes void free_aggr_kprobe(struct kprobe *p) -{ - struct optimized_kprobe *op; - - op = container_of(p, struct optimized_kprobe, kp); - arch_remove_optimized_kprobe(op); - arch_remove_kprobe(p); - kfree(op); -} - /* Return true(!0) if the kprobe is ready for optimization. */ static inline int kprobe_optready(struct kprobe *p) { @@ -415,33 +397,6 @@ static inline int kprobe_optready(struct kprobe *p) return 0; } -/* Return true(!0) if the kprobe is disarmed. Note: p must be on hash list */ -static inline int kprobe_disarmed(struct kprobe *p) -{ - struct optimized_kprobe *op; - - /* If kprobe is not aggr/opt probe, just return kprobe is disabled */ - if (!kprobe_aggrprobe(p)) - return kprobe_disabled(p); - - op = container_of(p, struct optimized_kprobe, kp); - - return kprobe_disabled(p) && list_empty(&op->list); -} - -/* Return true(!0) if the probe is queued on (un)optimizing lists */ -static int __kprobes kprobe_queued(struct kprobe *p) -{ - struct optimized_kprobe *op; - - if (kprobe_aggrprobe(p)) { - op = container_of(p, struct optimized_kprobe, kp); - if (!list_empty(&op->list)) - return 1; - } - return 0; -} - /* * Return an optimized kprobe whose optimizing code replaces * instructions including addr (exclude breakpoint). @@ -467,23 +422,30 @@ static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr) /* Optimization staging list, protected by kprobe_mutex */ static LIST_HEAD(optimizing_list); -static LIST_HEAD(unoptimizing_list); static void kprobe_optimizer(struct work_struct *work); static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer); -static DECLARE_COMPLETION(optimizer_comp); #define OPTIMIZE_DELAY 5 -/* - * Optimize (replace a breakpoint with a jump) kprobes listed on - * optimizing_list. - */ -static __kprobes void do_optimize_kprobes(void) +/* Kprobe jump optimizer */ +static __kprobes void kprobe_optimizer(struct work_struct *work) { - /* Optimization never be done when disarmed */ - if (kprobes_all_disarmed || !kprobes_allow_optimization || - list_empty(&optimizing_list)) - return; + struct optimized_kprobe *op, *tmp; + + /* Lock modules while optimizing kprobes */ + mutex_lock(&module_mutex); + mutex_lock(&kprobe_mutex); + if (kprobes_all_disarmed || !kprobes_allow_optimization) + goto end; + + /* + * Wait for quiesence period to ensure all running interrupts + * are done. Because optprobe may modify multiple instructions + * there is a chance that Nth instruction is interrupted. In that + * case, running interrupt can return to 2nd-Nth byte of jump + * instruction. This wait is for avoiding it. + */ + synchronize_sched(); /* * The optimization/unoptimization refers online_cpus via @@ -497,111 +459,17 @@ static __kprobes void do_optimize_kprobes(void) */ get_online_cpus(); mutex_lock(&text_mutex); - arch_optimize_kprobes(&optimizing_list); - mutex_unlock(&text_mutex); - put_online_cpus(); -} - -/* - * Unoptimize (replace a jump with a breakpoint and remove the breakpoint - * if need) kprobes listed on unoptimizing_list. - */ -static __kprobes void do_unoptimize_kprobes(struct list_head *free_list) -{ - struct optimized_kprobe *op, *tmp; - - /* Unoptimization must be done anytime */ - if (list_empty(&unoptimizing_list)) - return; - - /* Ditto to do_optimize_kprobes */ - get_online_cpus(); - mutex_lock(&text_mutex); - arch_unoptimize_kprobes(&unoptimizing_list, free_list); - /* Loop free_list for disarming */ - list_for_each_entry_safe(op, tmp, free_list, list) { - /* Disarm probes if marked disabled */ - if (kprobe_disabled(&op->kp)) - arch_disarm_kprobe(&op->kp); - if (kprobe_unused(&op->kp)) { - /* - * Remove unused probes from hash list. After waiting - * for synchronization, these probes are reclaimed. - * (reclaiming is done by do_free_cleaned_kprobes.) - */ - hlist_del_rcu(&op->kp.hlist); - } else - list_del_init(&op->list); + list_for_each_entry_safe(op, tmp, &optimizing_list, list) { + WARN_ON(kprobe_disabled(&op->kp)); + if (arch_optimize_kprobe(op) < 0) + op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; + list_del_init(&op->list); } mutex_unlock(&text_mutex); put_online_cpus(); -} - -/* Reclaim all kprobes on the free_list */ -static __kprobes void do_free_cleaned_kprobes(struct list_head *free_list) -{ - struct optimized_kprobe *op, *tmp; - - list_for_each_entry_safe(op, tmp, free_list, list) { - BUG_ON(!kprobe_unused(&op->kp)); - list_del_init(&op->list); - free_aggr_kprobe(&op->kp); - } -} - -/* Start optimizer after OPTIMIZE_DELAY passed */ -static __kprobes void kick_kprobe_optimizer(void) -{ - if (!delayed_work_pending(&optimizing_work)) - schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY); -} - -/* Kprobe jump optimizer */ -static __kprobes void kprobe_optimizer(struct work_struct *work) -{ - LIST_HEAD(free_list); - - /* Lock modules while optimizing kprobes */ - mutex_lock(&module_mutex); - mutex_lock(&kprobe_mutex); - - /* - * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed) - * kprobes before waiting for quiesence period. - */ - do_unoptimize_kprobes(&free_list); - - /* - * Step 2: Wait for quiesence period to ensure all running interrupts - * are done. Because optprobe may modify multiple instructions - * there is a chance that Nth instruction is interrupted. In that - * case, running interrupt can return to 2nd-Nth byte of jump - * instruction. This wait is for avoiding it. - */ - synchronize_sched(); - - /* Step 3: Optimize kprobes after quiesence period */ - do_optimize_kprobes(); - - /* Step 4: Free cleaned kprobes after quiesence period */ - do_free_cleaned_kprobes(&free_list); - +end: mutex_unlock(&kprobe_mutex); mutex_unlock(&module_mutex); - - /* Step 5: Kick optimizer again if needed */ - if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) - kick_kprobe_optimizer(); - else - /* Wake up all waiters */ - complete_all(&optimizer_comp); -} - -/* Wait for completing optimization and unoptimization */ -static __kprobes void wait_for_kprobe_optimizer(void) -{ - if (delayed_work_pending(&optimizing_work)) - wait_for_completion(&optimizer_comp); } /* Optimize kprobe if p is ready to be optimized */ @@ -627,99 +495,42 @@ static __kprobes void optimize_kprobe(struct kprobe *p) /* Check if it is already optimized. */ if (op->kp.flags & KPROBE_FLAG_OPTIMIZED) return; - op->kp.flags |= KPROBE_FLAG_OPTIMIZED; - if (!list_empty(&op->list)) - /* This is under unoptimizing. Just dequeue the probe */ - list_del_init(&op->list); - else { - list_add(&op->list, &optimizing_list); - kick_kprobe_optimizer(); - } -} - -/* Short cut to direct unoptimizing */ -static __kprobes void force_unoptimize_kprobe(struct optimized_kprobe *op) -{ - get_online_cpus(); - arch_unoptimize_kprobe(op); - put_online_cpus(); - if (kprobe_disabled(&op->kp)) - arch_disarm_kprobe(&op->kp); + op->kp.flags |= KPROBE_FLAG_OPTIMIZED; + list_add(&op->list, &optimizing_list); + if (!delayed_work_pending(&optimizing_work)) + schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY); } /* Unoptimize a kprobe if p is optimized */ -static __kprobes void unoptimize_kprobe(struct kprobe *p, bool force) +static __kprobes void unoptimize_kprobe(struct kprobe *p) { struct optimized_kprobe *op; - if (!kprobe_aggrprobe(p) || kprobe_disarmed(p)) - return; /* This is not an optprobe nor optimized */ - - op = container_of(p, struct optimized_kprobe, kp); - if (!kprobe_optimized(p)) { - /* Unoptimized or unoptimizing case */ - if (force && !list_empty(&op->list)) { - /* - * Only if this is unoptimizing kprobe and forced, - * forcibly unoptimize it. (No need to unoptimize - * unoptimized kprobe again :) - */ + if ((p->flags & KPROBE_FLAG_OPTIMIZED) && kprobe_aggrprobe(p)) { + op = container_of(p, struct optimized_kprobe, kp); + if (!list_empty(&op->list)) + /* Dequeue from the optimization queue */ list_del_init(&op->list); - force_unoptimize_kprobe(op); - } - return; - } - - op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; - if (!list_empty(&op->list)) { - /* Dequeue from the optimization queue */ - list_del_init(&op->list); - return; - } - /* Optimized kprobe case */ - if (force) - /* Forcibly update the code: this is a special case */ - force_unoptimize_kprobe(op); - else { - list_add(&op->list, &unoptimizing_list); - kick_kprobe_optimizer(); + else + /* Replace jump with break */ + arch_unoptimize_kprobe(op); + op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; } } -/* Cancel unoptimizing for reusing */ -static void reuse_unused_kprobe(struct kprobe *ap) -{ - struct optimized_kprobe *op; - - BUG_ON(!kprobe_unused(ap)); - /* - * Unused kprobe MUST be on the way of delayed unoptimizing (means - * there is still a relative jump) and disabled. - */ - op = container_of(ap, struct optimized_kprobe, kp); - if (unlikely(list_empty(&op->list))) - printk(KERN_WARNING "Warning: found a stray unused " - "aggrprobe@%p\n", ap->addr); - /* Enable the probe again */ - ap->flags &= ~KPROBE_FLAG_DISABLED; - /* Optimize it again (remove from op->list) */ - BUG_ON(!kprobe_optready(ap)); - optimize_kprobe(ap); -} - /* Remove optimized instructions */ static void __kprobes kill_optimized_kprobe(struct kprobe *p) { struct optimized_kprobe *op; op = container_of(p, struct optimized_kprobe, kp); - if (!list_empty(&op->list)) - /* Dequeue from the (un)optimization queue */ + if (!list_empty(&op->list)) { + /* Dequeue from the optimization queue */ list_del_init(&op->list); - - op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; - /* Don't touch the code, because it is already freed. */ + op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; + } + /* Don't unoptimize, because the target code will be freed. */ arch_remove_optimized_kprobe(op); } @@ -732,6 +543,16 @@ static __kprobes void prepare_optimized_kprobe(struct kprobe *p) arch_prepare_optimized_kprobe(op); } +/* Free optimized instructions and optimized_kprobe */ +static __kprobes void free_aggr_kprobe(struct kprobe *p) +{ + struct optimized_kprobe *op; + + op = container_of(p, struct optimized_kprobe, kp); + arch_remove_optimized_kprobe(op); + kfree(op); +} + /* Allocate new optimized_kprobe and try to prepare optimized instructions */ static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p) { @@ -766,8 +587,7 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p) op = container_of(ap, struct optimized_kprobe, kp); if (!arch_prepared_optinsn(&op->optinsn)) { /* If failed to setup optimizing, fallback to kprobe */ - arch_remove_optimized_kprobe(op); - kfree(op); + free_aggr_kprobe(ap); return; } @@ -811,16 +631,21 @@ static void __kprobes unoptimize_all_kprobes(void) return; kprobes_allow_optimization = false; + printk(KERN_INFO "Kprobes globally unoptimized\n"); + get_online_cpus(); /* For avoiding text_mutex deadlock */ + mutex_lock(&text_mutex); for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; hlist_for_each_entry_rcu(p, node, head, hlist) { if (!kprobe_disabled(p)) - unoptimize_kprobe(p, false); + unoptimize_kprobe(p); } } - /* Wait for unoptimizing completion */ - wait_for_kprobe_optimizer(); - printk(KERN_INFO "Kprobes globally unoptimized\n"); + + mutex_unlock(&text_mutex); + put_online_cpus(); + /* Allow all currently running kprobes to complete */ + synchronize_sched(); } int sysctl_kprobes_optimization; @@ -844,60 +669,44 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write, } #endif /* CONFIG_SYSCTL */ -/* Put a breakpoint for a probe. Must be called with text_mutex locked */ static void __kprobes __arm_kprobe(struct kprobe *p) { - struct kprobe *_p; + struct kprobe *old_p; /* Check collision with other optimized kprobes */ - _p = get_optimized_kprobe((unsigned long)p->addr); - if (unlikely(_p)) - /* Fallback to unoptimized kprobe */ - unoptimize_kprobe(_p, true); + old_p = get_optimized_kprobe((unsigned long)p->addr); + if (unlikely(old_p)) + unoptimize_kprobe(old_p); /* Fallback to unoptimized kprobe */ arch_arm_kprobe(p); optimize_kprobe(p); /* Try to optimize (add kprobe to a list) */ } -/* Remove the breakpoint of a probe. Must be called with text_mutex locked */ -static void __kprobes __disarm_kprobe(struct kprobe *p, bool reopt) +static void __kprobes __disarm_kprobe(struct kprobe *p) { - struct kprobe *_p; + struct kprobe *old_p; - unoptimize_kprobe(p, false); /* Try to unoptimize */ + unoptimize_kprobe(p); /* Try to unoptimize */ + arch_disarm_kprobe(p); - if (!kprobe_queued(p)) { - arch_disarm_kprobe(p); - /* If another kprobe was blocked, optimize it. */ - _p = get_optimized_kprobe((unsigned long)p->addr); - if (unlikely(_p) && reopt) - optimize_kprobe(_p); - } - /* TODO: reoptimize others after unoptimized this probe */ + /* If another kprobe was blocked, optimize it. */ + old_p = get_optimized_kprobe((unsigned long)p->addr); + if (unlikely(old_p)) + optimize_kprobe(old_p); } #else /* !CONFIG_OPTPROBES */ #define optimize_kprobe(p) do {} while (0) -#define unoptimize_kprobe(p, f) do {} while (0) +#define unoptimize_kprobe(p) do {} while (0) #define kill_optimized_kprobe(p) do {} while (0) #define prepare_optimized_kprobe(p) do {} while (0) #define try_to_optimize_kprobe(p) do {} while (0) #define __arm_kprobe(p) arch_arm_kprobe(p) -#define __disarm_kprobe(p, o) arch_disarm_kprobe(p) -#define kprobe_disarmed(p) kprobe_disabled(p) -#define wait_for_kprobe_optimizer() do {} while (0) - -/* There should be no unused kprobes can be reused without optimization */ -static void reuse_unused_kprobe(struct kprobe *ap) -{ - printk(KERN_ERR "Error: There should be no unused kprobe here.\n"); - BUG_ON(kprobe_unused(ap)); -} +#define __disarm_kprobe(p) arch_disarm_kprobe(p) static __kprobes void free_aggr_kprobe(struct kprobe *p) { - arch_remove_kprobe(p); kfree(p); } @@ -923,10 +732,11 @@ static void __kprobes arm_kprobe(struct kprobe *kp) /* Disarm a kprobe with text_mutex */ static void __kprobes disarm_kprobe(struct kprobe *kp) { - /* Ditto */ + get_online_cpus(); /* For avoiding text_mutex deadlock */ mutex_lock(&text_mutex); - __disarm_kprobe(kp, true); + __disarm_kprobe(kp); mutex_unlock(&text_mutex); + put_online_cpus(); } /* @@ -1132,7 +942,7 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p) BUG_ON(kprobe_gone(ap) || kprobe_gone(p)); if (p->break_handler || p->post_handler) - unoptimize_kprobe(ap, true); /* Fall back to normal kprobe */ + unoptimize_kprobe(ap); /* Fall back to normal kprobe */ if (p->break_handler) { if (ap->break_handler) @@ -1183,21 +993,19 @@ static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p) * This is the second or subsequent kprobe at the address - handle * the intricacies */ -static int __kprobes register_aggr_kprobe(struct kprobe *orig_p, +static int __kprobes register_aggr_kprobe(struct kprobe *old_p, struct kprobe *p) { int ret = 0; - struct kprobe *ap = orig_p; + struct kprobe *ap = old_p; - if (!kprobe_aggrprobe(orig_p)) { - /* If orig_p is not an aggr_kprobe, create new aggr_kprobe. */ - ap = alloc_aggr_kprobe(orig_p); + if (!kprobe_aggrprobe(old_p)) { + /* If old_p is not an aggr_kprobe, create new aggr_kprobe. */ + ap = alloc_aggr_kprobe(old_p); if (!ap) return -ENOMEM; - init_aggr_kprobe(ap, orig_p); - } else if (kprobe_unused(ap)) - /* This probe is going to die. Rescue it */ - reuse_unused_kprobe(ap); + init_aggr_kprobe(ap, old_p); + } if (kprobe_gone(ap)) { /* @@ -1231,6 +1039,23 @@ static int __kprobes register_aggr_kprobe(struct kprobe *orig_p, return add_new_kprobe(ap, p); } +/* Try to disable aggr_kprobe, and return 1 if succeeded.*/ +static int __kprobes try_to_disable_aggr_kprobe(struct kprobe *p) +{ + struct kprobe *kp; + + list_for_each_entry_rcu(kp, &p->list, list) { + if (!kprobe_disabled(kp)) + /* + * There is an active probe on the list. + * We can't disable aggr_kprobe. + */ + return 0; + } + p->flags |= KPROBE_FLAG_DISABLED; + return 1; +} + static int __kprobes in_kprobes_functions(unsigned long addr) { struct kprobe_blackpoint *kb; @@ -1273,33 +1098,34 @@ static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p) /* Check passed kprobe is valid and return kprobe in kprobe_table. */ static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p) { - struct kprobe *ap, *list_p; + struct kprobe *old_p, *list_p; - ap = get_kprobe(p->addr); - if (unlikely(!ap)) + old_p = get_kprobe(p->addr); + if (unlikely(!old_p)) return NULL; - if (p != ap) { - list_for_each_entry_rcu(list_p, &ap->list, list) + if (p != old_p) { + list_for_each_entry_rcu(list_p, &old_p->list, list) if (list_p == p) /* kprobe p is a valid probe */ goto valid; return NULL; } valid: - return ap; + return old_p; } /* Return error if the kprobe is being re-registered */ static inline int check_kprobe_rereg(struct kprobe *p) { int ret = 0; + struct kprobe *old_p; mutex_lock(&kprobe_mutex); - if (__get_valid_kprobe(p)) + old_p = __get_valid_kprobe(p); + if (old_p) ret = -EINVAL; mutex_unlock(&kprobe_mutex); - return ret; } @@ -1403,121 +1229,67 @@ int __kprobes register_kprobe(struct kprobe *p) } EXPORT_SYMBOL_GPL(register_kprobe); -/* Check if all probes on the aggrprobe are disabled */ -static int __kprobes aggr_kprobe_disabled(struct kprobe *ap) -{ - struct kprobe *kp; - - list_for_each_entry_rcu(kp, &ap->list, list) - if (!kprobe_disabled(kp)) - /* - * There is an active probe on the list. - * We can't disable this ap. - */ - return 0; - - return 1; -} - -/* Disable one kprobe: Make sure called under kprobe_mutex is locked */ -static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p) -{ - struct kprobe *orig_p; - - /* Get an original kprobe for return */ - orig_p = __get_valid_kprobe(p); - if (unlikely(orig_p == NULL)) - return NULL; - - if (!kprobe_disabled(p)) { - /* Disable probe if it is a child probe */ - if (p != orig_p) - p->flags |= KPROBE_FLAG_DISABLED; - - /* Try to disarm and disable this/parent probe */ - if (p == orig_p || aggr_kprobe_disabled(orig_p)) { - disarm_kprobe(orig_p); - orig_p->flags |= KPROBE_FLAG_DISABLED; - } - } - - return orig_p; -} - /* * Unregister a kprobe without a scheduler synchronization. */ static int __kprobes __unregister_kprobe_top(struct kprobe *p) { - struct kprobe *ap, *list_p; + struct kprobe *old_p, *list_p; - /* Disable kprobe. This will disarm it if needed. */ - ap = __disable_kprobe(p); - if (ap == NULL) + old_p = __get_valid_kprobe(p); + if (old_p == NULL) return -EINVAL; - if (ap == p) - /* - * This probe is an independent(and non-optimized) kprobe - * (not an aggrprobe). Remove from the hash list. - */ - goto disarmed; - - /* Following process expects this probe is an aggrprobe */ - WARN_ON(!kprobe_aggrprobe(ap)); - - if (list_is_singular(&ap->list) && kprobe_disarmed(ap)) + if (old_p == p || + (kprobe_aggrprobe(old_p) && + list_is_singular(&old_p->list))) { /* - * !disarmed could be happen if the probe is under delayed - * unoptimizing. + * Only probe on the hash list. Disarm only if kprobes are + * enabled and not gone - otherwise, the breakpoint would + * already have been removed. We save on flushing icache. */ - goto disarmed; - else { - /* If disabling probe has special handlers, update aggrprobe */ + if (!kprobes_all_disarmed && !kprobe_disabled(old_p)) + disarm_kprobe(old_p); + hlist_del_rcu(&old_p->hlist); + } else { if (p->break_handler && !kprobe_gone(p)) - ap->break_handler = NULL; + old_p->break_handler = NULL; if (p->post_handler && !kprobe_gone(p)) { - list_for_each_entry_rcu(list_p, &ap->list, list) { + list_for_each_entry_rcu(list_p, &old_p->list, list) { if ((list_p != p) && (list_p->post_handler)) goto noclean; } - ap->post_handler = NULL; + old_p->post_handler = NULL; } noclean: - /* - * Remove from the aggrprobe: this path will do nothing in - * __unregister_kprobe_bottom(). - */ list_del_rcu(&p->list); - if (!kprobe_disabled(ap) && !kprobes_all_disarmed) - /* - * Try to optimize this probe again, because post - * handler may have been changed. - */ - optimize_kprobe(ap); + if (!kprobe_disabled(old_p)) { + try_to_disable_aggr_kprobe(old_p); + if (!kprobes_all_disarmed) { + if (kprobe_disabled(old_p)) + disarm_kprobe(old_p); + else + /* Try to optimize this probe again */ + optimize_kprobe(old_p); + } + } } return 0; - -disarmed: - BUG_ON(!kprobe_disarmed(ap)); - hlist_del_rcu(&ap->hlist); - return 0; } static void __kprobes __unregister_kprobe_bottom(struct kprobe *p) { - struct kprobe *ap; + struct kprobe *old_p; if (list_empty(&p->list)) - /* This is an independent kprobe */ arch_remove_kprobe(p); else if (list_is_singular(&p->list)) { - /* This is the last child of an aggrprobe */ - ap = list_entry(p->list.next, struct kprobe, list); + /* "p" is the last child of an aggr_kprobe */ + old_p = list_entry(p->list.next, struct kprobe, list); list_del(&p->list); - free_aggr_kprobe(ap); + arch_remove_kprobe(old_p); + free_aggr_kprobe(old_p); } - /* Otherwise, do nothing. */ } int __kprobes register_kprobes(struct kprobe **kps, int num) @@ -1835,13 +1607,29 @@ static void __kprobes kill_kprobe(struct kprobe *p) int __kprobes disable_kprobe(struct kprobe *kp) { int ret = 0; + struct kprobe *p; mutex_lock(&kprobe_mutex); - /* Disable this kprobe */ - if (__disable_kprobe(kp) == NULL) + /* Check whether specified probe is valid. */ + p = __get_valid_kprobe(kp); + if (unlikely(p == NULL)) { ret = -EINVAL; + goto out; + } + /* If the probe is already disabled (or gone), just return */ + if (kprobe_disabled(kp)) + goto out; + + kp->flags |= KPROBE_FLAG_DISABLED; + if (p != kp) + /* When kp != p, p is always enabled. */ + try_to_disable_aggr_kprobe(p); + + if (!kprobes_all_disarmed && kprobe_disabled(p)) + disarm_kprobe(p); +out: mutex_unlock(&kprobe_mutex); return ret; } @@ -2139,27 +1927,36 @@ static void __kprobes disarm_all_kprobes(void) mutex_lock(&kprobe_mutex); /* If kprobes are already disarmed, just return */ - if (kprobes_all_disarmed) { - mutex_unlock(&kprobe_mutex); - return; - } + if (kprobes_all_disarmed) + goto already_disabled; kprobes_all_disarmed = true; printk(KERN_INFO "Kprobes globally disabled\n"); + /* + * Here we call get_online_cpus() for avoiding text_mutex deadlock, + * because disarming may also unoptimize kprobes. + */ + get_online_cpus(); mutex_lock(&text_mutex); for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; hlist_for_each_entry_rcu(p, node, head, hlist) { if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) - __disarm_kprobe(p, false); + __disarm_kprobe(p); } } + mutex_unlock(&text_mutex); + put_online_cpus(); mutex_unlock(&kprobe_mutex); + /* Allow all currently running kprobes to complete */ + synchronize_sched(); + return; - /* Wait for disarming all kprobes by optimizer */ - wait_for_kprobe_optimizer(); +already_disabled: + mutex_unlock(&kprobe_mutex); + return; } /* diff --git a/trunk/kernel/kthread.c b/trunk/kernel/kthread.c index 5355cfd44a3f..ca61bbdd44b2 100644 --- a/trunk/kernel/kthread.c +++ b/trunk/kernel/kthread.c @@ -148,7 +148,7 @@ struct task_struct *kthread_create(int (*threadfn)(void *data), wait_for_completion(&create.done); if (!IS_ERR(create.result)) { - static struct sched_param param = { .sched_priority = 0 }; + struct sched_param param = { .sched_priority = 0 }; va_list args; va_start(args, namefmt); diff --git a/trunk/kernel/lockdep_proc.c b/trunk/kernel/lockdep_proc.c index 1969d2fc4b36..59b76c8ce9d7 100644 --- a/trunk/kernel/lockdep_proc.c +++ b/trunk/kernel/lockdep_proc.c @@ -494,6 +494,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) namelen += 2; for (i = 0; i < LOCKSTAT_POINTS; i++) { + char sym[KSYM_SYMBOL_LEN]; char ip[32]; if (class->contention_point[i] == 0) @@ -502,13 +503,15 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) if (!i) seq_line(m, '-', 40-namelen, namelen); + sprint_symbol(sym, class->contention_point[i]); snprintf(ip, sizeof(ip), "[<%p>]", (void *)class->contention_point[i]); - seq_printf(m, "%40s %14lu %29s %pS\n", - name, stats->contention_point[i], - ip, (void *)class->contention_point[i]); + seq_printf(m, "%40s %14lu %29s %s\n", name, + stats->contention_point[i], + ip, sym); } for (i = 0; i < LOCKSTAT_POINTS; i++) { + char sym[KSYM_SYMBOL_LEN]; char ip[32]; if (class->contending_point[i] == 0) @@ -517,11 +520,12 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) if (!i) seq_line(m, '-', 40-namelen, namelen); + sprint_symbol(sym, class->contending_point[i]); snprintf(ip, sizeof(ip), "[<%p>]", (void *)class->contending_point[i]); - seq_printf(m, "%40s %14lu %29s %pS\n", - name, stats->contending_point[i], - ip, (void *)class->contending_point[i]); + seq_printf(m, "%40s %14lu %29s %s\n", name, + stats->contending_point[i], + ip, sym); } if (i) { seq_puts(m, "\n"); diff --git a/trunk/kernel/module.c b/trunk/kernel/module.c index 34e00b708fad..d190664f25ff 100644 --- a/trunk/kernel/module.c +++ b/trunk/kernel/module.c @@ -56,7 +56,6 @@ #include #include #include -#include #define CREATE_TRACE_POINTS #include @@ -71,26 +70,6 @@ #define ARCH_SHF_SMALL 0 #endif -/* - * Modules' sections will be aligned on page boundaries - * to ensure complete separation of code and data, but - * only when CONFIG_DEBUG_SET_MODULE_RONX=y - */ -#ifdef CONFIG_DEBUG_SET_MODULE_RONX -# define debug_align(X) ALIGN(X, PAGE_SIZE) -#else -# define debug_align(X) (X) -#endif - -/* - * Given BASE and SIZE this macro calculates the number of pages the - * memory regions occupies - */ -#define MOD_NUMBER_OF_PAGES(BASE, SIZE) (((SIZE) > 0) ? \ - (PFN_DOWN((unsigned long)(BASE) + (SIZE) - 1) - \ - PFN_DOWN((unsigned long)BASE) + 1) \ - : (0UL)) - /* If this is set, the section belongs in the init part of the module */ #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) @@ -1563,115 +1542,6 @@ static int __unlink_module(void *_mod) return 0; } -#ifdef CONFIG_DEBUG_SET_MODULE_RONX -/* - * LKM RO/NX protection: protect module's text/ro-data - * from modification and any data from execution. - */ -void set_page_attributes(void *start, void *end, int (*set)(unsigned long start, int num_pages)) -{ - unsigned long begin_pfn = PFN_DOWN((unsigned long)start); - unsigned long end_pfn = PFN_DOWN((unsigned long)end); - - if (end_pfn > begin_pfn) - set(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); -} - -static void set_section_ro_nx(void *base, - unsigned long text_size, - unsigned long ro_size, - unsigned long total_size) -{ - /* begin and end PFNs of the current subsection */ - unsigned long begin_pfn; - unsigned long end_pfn; - - /* - * Set RO for module text and RO-data: - * - Always protect first page. - * - Do not protect last partial page. - */ - if (ro_size > 0) - set_page_attributes(base, base + ro_size, set_memory_ro); - - /* - * Set NX permissions for module data: - * - Do not protect first partial page. - * - Always protect last page. - */ - if (total_size > text_size) { - begin_pfn = PFN_UP((unsigned long)base + text_size); - end_pfn = PFN_UP((unsigned long)base + total_size); - if (end_pfn > begin_pfn) - set_memory_nx(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); - } -} - -/* Setting memory back to RW+NX before releasing it */ -void unset_section_ro_nx(struct module *mod, void *module_region) -{ - unsigned long total_pages; - - if (mod->module_core == module_region) { - /* Set core as NX+RW */ - total_pages = MOD_NUMBER_OF_PAGES(mod->module_core, mod->core_size); - set_memory_nx((unsigned long)mod->module_core, total_pages); - set_memory_rw((unsigned long)mod->module_core, total_pages); - - } else if (mod->module_init == module_region) { - /* Set init as NX+RW */ - total_pages = MOD_NUMBER_OF_PAGES(mod->module_init, mod->init_size); - set_memory_nx((unsigned long)mod->module_init, total_pages); - set_memory_rw((unsigned long)mod->module_init, total_pages); - } -} - -/* Iterate through all modules and set each module's text as RW */ -void set_all_modules_text_rw() -{ - struct module *mod; - - mutex_lock(&module_mutex); - list_for_each_entry_rcu(mod, &modules, list) { - if ((mod->module_core) && (mod->core_text_size)) { - set_page_attributes(mod->module_core, - mod->module_core + mod->core_text_size, - set_memory_rw); - } - if ((mod->module_init) && (mod->init_text_size)) { - set_page_attributes(mod->module_init, - mod->module_init + mod->init_text_size, - set_memory_rw); - } - } - mutex_unlock(&module_mutex); -} - -/* Iterate through all modules and set each module's text as RO */ -void set_all_modules_text_ro() -{ - struct module *mod; - - mutex_lock(&module_mutex); - list_for_each_entry_rcu(mod, &modules, list) { - if ((mod->module_core) && (mod->core_text_size)) { - set_page_attributes(mod->module_core, - mod->module_core + mod->core_text_size, - set_memory_ro); - } - if ((mod->module_init) && (mod->init_text_size)) { - set_page_attributes(mod->module_init, - mod->module_init + mod->init_text_size, - set_memory_ro); - } - } - mutex_unlock(&module_mutex); -} -#else -static inline void set_section_ro_nx(void *base, unsigned long text_size, unsigned long ro_size, unsigned long total_size) { } -static inline void unset_section_ro_nx(struct module *mod, void *module_region) { } -#endif - /* Free a module, remove from lists, etc. */ static void free_module(struct module *mod) { @@ -1696,7 +1566,6 @@ static void free_module(struct module *mod) destroy_params(mod->kp, mod->num_kp); /* This may be NULL, but that's OK */ - unset_section_ro_nx(mod, mod->module_init); module_free(mod, mod->module_init); kfree(mod->args); percpu_modfree(mod); @@ -1705,7 +1574,6 @@ static void free_module(struct module *mod) lockdep_free_key_range(mod->module_core, mod->core_size); /* Finally, free the core (containing the module structure) */ - unset_section_ro_nx(mod, mod->module_core); module_free(mod, mod->module_core); #ifdef CONFIG_MPU @@ -1909,19 +1777,8 @@ static void layout_sections(struct module *mod, struct load_info *info) s->sh_entsize = get_offset(mod, &mod->core_size, s, i); DEBUGP("\t%s\n", name); } - switch (m) { - case 0: /* executable */ - mod->core_size = debug_align(mod->core_size); + if (m == 0) mod->core_text_size = mod->core_size; - break; - case 1: /* RO: text and ro-data */ - mod->core_size = debug_align(mod->core_size); - mod->core_ro_size = mod->core_size; - break; - case 3: /* whole core */ - mod->core_size = debug_align(mod->core_size); - break; - } } DEBUGP("Init section allocation order:\n"); @@ -1939,19 +1796,8 @@ static void layout_sections(struct module *mod, struct load_info *info) | INIT_OFFSET_MASK); DEBUGP("\t%s\n", sname); } - switch (m) { - case 0: /* executable */ - mod->init_size = debug_align(mod->init_size); + if (m == 0) mod->init_text_size = mod->init_size; - break; - case 1: /* RO: text and ro-data */ - mod->init_size = debug_align(mod->init_size); - mod->init_ro_size = mod->init_size; - break; - case 3: /* whole init */ - mod->init_size = debug_align(mod->init_size); - break; - } } } @@ -2876,18 +2722,6 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_COMING, mod); - /* Set RO and NX regions for core */ - set_section_ro_nx(mod->module_core, - mod->core_text_size, - mod->core_ro_size, - mod->core_size); - - /* Set RO and NX regions for init */ - set_section_ro_nx(mod->module_init, - mod->init_text_size, - mod->init_ro_size, - mod->init_size); - do_mod_ctors(mod); /* Start the module */ if (mod->init != NULL) @@ -2931,7 +2765,6 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, mod->symtab = mod->core_symtab; mod->strtab = mod->core_strtab; #endif - unset_section_ro_nx(mod, mod->module_init); module_free(mod, mod->module_init); mod->module_init = NULL; mod->init_size = 0; diff --git a/trunk/kernel/mutex.c b/trunk/kernel/mutex.c index a5889fb28ecf..200407c1502f 100644 --- a/trunk/kernel/mutex.c +++ b/trunk/kernel/mutex.c @@ -199,7 +199,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, * memory barriers as we'll eventually observe the right * values at the cost of a few extra spins. */ - arch_mutex_cpu_relax(); + cpu_relax(); } #endif spin_lock_mutex(&lock->wait_lock, flags); diff --git a/trunk/kernel/perf_event.c b/trunk/kernel/perf_event.c index 11847bf1e8cc..2870feee81dd 100644 --- a/trunk/kernel/perf_event.c +++ b/trunk/kernel/perf_event.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -22,9 +21,7 @@ #include #include #include -#include #include -#include #include #include #include @@ -136,28 +133,6 @@ static void unclone_ctx(struct perf_event_context *ctx) } } -static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) -{ - /* - * only top level events have the pid namespace they were created in - */ - if (event->parent) - event = event->parent; - - return task_tgid_nr_ns(p, event->ns); -} - -static u32 perf_event_tid(struct perf_event *event, struct task_struct *p) -{ - /* - * only top level events have the pid namespace they were created in - */ - if (event->parent) - event = event->parent; - - return task_pid_nr_ns(p, event->ns); -} - /* * If we inherit events we want to return the parent event id * to userspace. @@ -337,84 +312,9 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) ctx->nr_stat++; } -/* - * Called at perf_event creation and when events are attached/detached from a - * group. - */ -static void perf_event__read_size(struct perf_event *event) -{ - int entry = sizeof(u64); /* value */ - int size = 0; - int nr = 1; - - if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) - size += sizeof(u64); - - if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) - size += sizeof(u64); - - if (event->attr.read_format & PERF_FORMAT_ID) - entry += sizeof(u64); - - if (event->attr.read_format & PERF_FORMAT_GROUP) { - nr += event->group_leader->nr_siblings; - size += sizeof(u64); - } - - size += entry * nr; - event->read_size = size; -} - -static void perf_event__header_size(struct perf_event *event) -{ - struct perf_sample_data *data; - u64 sample_type = event->attr.sample_type; - u16 size = 0; - - perf_event__read_size(event); - - if (sample_type & PERF_SAMPLE_IP) - size += sizeof(data->ip); - - if (sample_type & PERF_SAMPLE_ADDR) - size += sizeof(data->addr); - - if (sample_type & PERF_SAMPLE_PERIOD) - size += sizeof(data->period); - - if (sample_type & PERF_SAMPLE_READ) - size += event->read_size; - - event->header_size = size; -} - -static void perf_event__id_header_size(struct perf_event *event) -{ - struct perf_sample_data *data; - u64 sample_type = event->attr.sample_type; - u16 size = 0; - - if (sample_type & PERF_SAMPLE_TID) - size += sizeof(data->tid_entry); - - if (sample_type & PERF_SAMPLE_TIME) - size += sizeof(data->time); - - if (sample_type & PERF_SAMPLE_ID) - size += sizeof(data->id); - - if (sample_type & PERF_SAMPLE_STREAM_ID) - size += sizeof(data->stream_id); - - if (sample_type & PERF_SAMPLE_CPU) - size += sizeof(data->cpu_entry); - - event->id_header_size = size; -} - static void perf_group_attach(struct perf_event *event) { - struct perf_event *group_leader = event->group_leader, *pos; + struct perf_event *group_leader = event->group_leader; /* * We can have double attach due to group movement in perf_event_open. @@ -433,11 +333,6 @@ static void perf_group_attach(struct perf_event *event) list_add_tail(&event->group_entry, &group_leader->sibling_list); group_leader->nr_siblings++; - - perf_event__header_size(group_leader); - - list_for_each_entry(pos, &group_leader->sibling_list, group_entry) - perf_event__header_size(pos); } /* @@ -496,7 +391,7 @@ static void perf_group_detach(struct perf_event *event) if (event->group_leader != event) { list_del_init(&event->group_entry); event->group_leader->nr_siblings--; - goto out; + return; } if (!list_empty(&event->group_entry)) @@ -515,12 +410,6 @@ static void perf_group_detach(struct perf_event *event) /* Inherit group flags from the previous leader */ sibling->group_flags = event->group_flags; } - -out: - perf_event__header_size(event->group_leader); - - list_for_each_entry(tmp, &event->group_leader->sibling_list, group_entry) - perf_event__header_size(tmp); } static inline int @@ -1184,7 +1073,7 @@ static int perf_event_refresh(struct perf_event *event, int refresh) /* * not supported on inherited events */ - if (event->attr.inherit || !is_sampling_event(event)) + if (event->attr.inherit) return -EINVAL; atomic_add(refresh, &event->event_limit); @@ -2400,6 +2289,31 @@ static int perf_release(struct inode *inode, struct file *file) return perf_event_release_kernel(event); } +static int perf_event_read_size(struct perf_event *event) +{ + int entry = sizeof(u64); /* value */ + int size = 0; + int nr = 1; + + if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + size += sizeof(u64); + + if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + size += sizeof(u64); + + if (event->attr.read_format & PERF_FORMAT_ID) + entry += sizeof(u64); + + if (event->attr.read_format & PERF_FORMAT_GROUP) { + nr += event->group_leader->nr_siblings; + size += sizeof(u64); + } + + size += entry * nr; + + return size; +} + u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) { struct perf_event *child; @@ -2514,7 +2428,7 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) if (event->state == PERF_EVENT_STATE_ERROR) return 0; - if (count < event->read_size) + if (count < perf_event_read_size(event)) return -ENOSPC; WARN_ON_ONCE(event->ctx->parent_ctx); @@ -2600,7 +2514,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) int ret = 0; u64 value; - if (!is_sampling_event(event)) + if (!event->attr.sample_period) return -EINVAL; if (copy_from_user(&value, arg, sizeof(value))) @@ -3391,73 +3305,6 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle, } while (len); } -static void __perf_event_header__init_id(struct perf_event_header *header, - struct perf_sample_data *data, - struct perf_event *event) -{ - u64 sample_type = event->attr.sample_type; - - data->type = sample_type; - header->size += event->id_header_size; - - if (sample_type & PERF_SAMPLE_TID) { - /* namespace issues */ - data->tid_entry.pid = perf_event_pid(event, current); - data->tid_entry.tid = perf_event_tid(event, current); - } - - if (sample_type & PERF_SAMPLE_TIME) - data->time = perf_clock(); - - if (sample_type & PERF_SAMPLE_ID) - data->id = primary_event_id(event); - - if (sample_type & PERF_SAMPLE_STREAM_ID) - data->stream_id = event->id; - - if (sample_type & PERF_SAMPLE_CPU) { - data->cpu_entry.cpu = raw_smp_processor_id(); - data->cpu_entry.reserved = 0; - } -} - -static void perf_event_header__init_id(struct perf_event_header *header, - struct perf_sample_data *data, - struct perf_event *event) -{ - if (event->attr.sample_id_all) - __perf_event_header__init_id(header, data, event); -} - -static void __perf_event__output_id_sample(struct perf_output_handle *handle, - struct perf_sample_data *data) -{ - u64 sample_type = data->type; - - if (sample_type & PERF_SAMPLE_TID) - perf_output_put(handle, data->tid_entry); - - if (sample_type & PERF_SAMPLE_TIME) - perf_output_put(handle, data->time); - - if (sample_type & PERF_SAMPLE_ID) - perf_output_put(handle, data->id); - - if (sample_type & PERF_SAMPLE_STREAM_ID) - perf_output_put(handle, data->stream_id); - - if (sample_type & PERF_SAMPLE_CPU) - perf_output_put(handle, data->cpu_entry); -} - -static void perf_event__output_id_sample(struct perf_event *event, - struct perf_output_handle *handle, - struct perf_sample_data *sample) -{ - if (event->attr.sample_id_all) - __perf_event__output_id_sample(handle, sample); -} - int perf_output_begin(struct perf_output_handle *handle, struct perf_event *event, unsigned int size, int nmi, int sample) @@ -3465,7 +3312,6 @@ int perf_output_begin(struct perf_output_handle *handle, struct perf_buffer *buffer; unsigned long tail, offset, head; int have_lost; - struct perf_sample_data sample_data; struct { struct perf_event_header header; u64 id; @@ -3492,12 +3338,8 @@ int perf_output_begin(struct perf_output_handle *handle, goto out; have_lost = local_read(&buffer->lost); - if (have_lost) { - lost_event.header.size = sizeof(lost_event); - perf_event_header__init_id(&lost_event.header, &sample_data, - event); - size += lost_event.header.size; - } + if (have_lost) + size += sizeof(lost_event); perf_output_get_handle(handle); @@ -3528,11 +3370,11 @@ int perf_output_begin(struct perf_output_handle *handle, if (have_lost) { lost_event.header.type = PERF_RECORD_LOST; lost_event.header.misc = 0; + lost_event.header.size = sizeof(lost_event); lost_event.id = event->id; lost_event.lost = local_xchg(&buffer->lost, 0); perf_output_put(handle, lost_event); - perf_event__output_id_sample(event, handle, &sample_data); } return 0; @@ -3565,6 +3407,28 @@ void perf_output_end(struct perf_output_handle *handle) rcu_read_unlock(); } +static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) +{ + /* + * only top level events have the pid namespace they were created in + */ + if (event->parent) + event = event->parent; + + return task_tgid_nr_ns(p, event->ns); +} + +static u32 perf_event_tid(struct perf_event *event, struct task_struct *p) +{ + /* + * only top level events have the pid namespace they were created in + */ + if (event->parent) + event = event->parent; + + return task_pid_nr_ns(p, event->ns); +} + static void perf_output_read_one(struct perf_output_handle *handle, struct perf_event *event, u64 enabled, u64 running) @@ -3739,17 +3603,62 @@ void perf_prepare_sample(struct perf_event_header *header, { u64 sample_type = event->attr.sample_type; + data->type = sample_type; + header->type = PERF_RECORD_SAMPLE; - header->size = sizeof(*header) + event->header_size; + header->size = sizeof(*header); header->misc = 0; header->misc |= perf_misc_flags(regs); - __perf_event_header__init_id(header, data, event); - - if (sample_type & PERF_SAMPLE_IP) + if (sample_type & PERF_SAMPLE_IP) { data->ip = perf_instruction_pointer(regs); + header->size += sizeof(data->ip); + } + + if (sample_type & PERF_SAMPLE_TID) { + /* namespace issues */ + data->tid_entry.pid = perf_event_pid(event, current); + data->tid_entry.tid = perf_event_tid(event, current); + + header->size += sizeof(data->tid_entry); + } + + if (sample_type & PERF_SAMPLE_TIME) { + data->time = perf_clock(); + + header->size += sizeof(data->time); + } + + if (sample_type & PERF_SAMPLE_ADDR) + header->size += sizeof(data->addr); + + if (sample_type & PERF_SAMPLE_ID) { + data->id = primary_event_id(event); + + header->size += sizeof(data->id); + } + + if (sample_type & PERF_SAMPLE_STREAM_ID) { + data->stream_id = event->id; + + header->size += sizeof(data->stream_id); + } + + if (sample_type & PERF_SAMPLE_CPU) { + data->cpu_entry.cpu = raw_smp_processor_id(); + data->cpu_entry.reserved = 0; + + header->size += sizeof(data->cpu_entry); + } + + if (sample_type & PERF_SAMPLE_PERIOD) + header->size += sizeof(data->period); + + if (sample_type & PERF_SAMPLE_READ) + header->size += perf_event_read_size(event); + if (sample_type & PERF_SAMPLE_CALLCHAIN) { int size = 1; @@ -3813,26 +3722,23 @@ perf_event_read_event(struct perf_event *event, struct task_struct *task) { struct perf_output_handle handle; - struct perf_sample_data sample; struct perf_read_event read_event = { .header = { .type = PERF_RECORD_READ, .misc = 0, - .size = sizeof(read_event) + event->read_size, + .size = sizeof(read_event) + perf_event_read_size(event), }, .pid = perf_event_pid(event, task), .tid = perf_event_tid(event, task), }; int ret; - perf_event_header__init_id(&read_event.header, &sample, event); ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0); if (ret) return; perf_output_put(&handle, read_event); perf_output_read(&handle, event); - perf_event__output_id_sample(event, &handle, &sample); perf_output_end(&handle); } @@ -3862,16 +3768,14 @@ static void perf_event_task_output(struct perf_event *event, struct perf_task_event *task_event) { struct perf_output_handle handle; - struct perf_sample_data sample; struct task_struct *task = task_event->task; - int ret, size = task_event->event_id.header.size; + int size, ret; - perf_event_header__init_id(&task_event->event_id.header, &sample, event); + size = task_event->event_id.header.size; + ret = perf_output_begin(&handle, event, size, 0, 0); - ret = perf_output_begin(&handle, event, - task_event->event_id.header.size, 0, 0); if (ret) - goto out; + return; task_event->event_id.pid = perf_event_pid(event, task); task_event->event_id.ppid = perf_event_pid(event, current); @@ -3881,11 +3785,7 @@ static void perf_event_task_output(struct perf_event *event, perf_output_put(&handle, task_event->event_id); - perf_event__output_id_sample(event, &handle, &sample); - perf_output_end(&handle); -out: - task_event->event_id.header.size = size; } static int perf_event_task_match(struct perf_event *event) @@ -4000,16 +3900,11 @@ static void perf_event_comm_output(struct perf_event *event, struct perf_comm_event *comm_event) { struct perf_output_handle handle; - struct perf_sample_data sample; int size = comm_event->event_id.header.size; - int ret; - - perf_event_header__init_id(&comm_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, - comm_event->event_id.header.size, 0, 0); + int ret = perf_output_begin(&handle, event, size, 0, 0); if (ret) - goto out; + return; comm_event->event_id.pid = perf_event_pid(event, comm_event->task); comm_event->event_id.tid = perf_event_tid(event, comm_event->task); @@ -4017,12 +3912,7 @@ static void perf_event_comm_output(struct perf_event *event, perf_output_put(&handle, comm_event->event_id); perf_output_copy(&handle, comm_event->comm, comm_event->comm_size); - - perf_event__output_id_sample(event, &handle, &sample); - perf_output_end(&handle); -out: - comm_event->event_id.header.size = size; } static int perf_event_comm_match(struct perf_event *event) @@ -4067,6 +3957,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) comm_event->comm_size = size; comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; + rcu_read_lock(); list_for_each_entry_rcu(pmu, &pmus, entry) { cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); @@ -4147,15 +4038,11 @@ static void perf_event_mmap_output(struct perf_event *event, struct perf_mmap_event *mmap_event) { struct perf_output_handle handle; - struct perf_sample_data sample; int size = mmap_event->event_id.header.size; - int ret; + int ret = perf_output_begin(&handle, event, size, 0, 0); - perf_event_header__init_id(&mmap_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, - mmap_event->event_id.header.size, 0, 0); if (ret) - goto out; + return; mmap_event->event_id.pid = perf_event_pid(event, current); mmap_event->event_id.tid = perf_event_tid(event, current); @@ -4163,12 +4050,7 @@ static void perf_event_mmap_output(struct perf_event *event, perf_output_put(&handle, mmap_event->event_id); perf_output_copy(&handle, mmap_event->file_name, mmap_event->file_size); - - perf_event__output_id_sample(event, &handle, &sample); - perf_output_end(&handle); -out: - mmap_event->event_id.header.size = size; } static int perf_event_mmap_match(struct perf_event *event, @@ -4323,7 +4205,6 @@ void perf_event_mmap(struct vm_area_struct *vma) static void perf_log_throttle(struct perf_event *event, int enable) { struct perf_output_handle handle; - struct perf_sample_data sample; int ret; struct { @@ -4345,15 +4226,11 @@ static void perf_log_throttle(struct perf_event *event, int enable) if (enable) throttle_event.header.type = PERF_RECORD_UNTHROTTLE; - perf_event_header__init_id(&throttle_event.header, &sample, event); - - ret = perf_output_begin(&handle, event, - throttle_event.header.size, 1, 0); + ret = perf_output_begin(&handle, event, sizeof(throttle_event), 1, 0); if (ret) return; perf_output_put(&handle, throttle_event); - perf_event__output_id_sample(event, &handle, &sample); perf_output_end(&handle); } @@ -4369,13 +4246,6 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, struct hw_perf_event *hwc = &event->hw; int ret = 0; - /* - * Non-sampling counters might still use the PMI to fold short - * hardware counters, ignore those. - */ - if (unlikely(!is_sampling_event(event))) - return 0; - if (!throttle) { hwc->interrupts++; } else { @@ -4521,7 +4391,7 @@ static void perf_swevent_event(struct perf_event *event, u64 nr, if (!regs) return; - if (!is_sampling_event(event)) + if (!hwc->sample_period) return; if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) @@ -4684,7 +4554,7 @@ static int perf_swevent_add(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; struct hlist_head *head; - if (is_sampling_event(event)) { + if (hwc->sample_period) { hwc->last_period = hwc->sample_period; perf_swevent_set_period(event); } @@ -4941,6 +4811,15 @@ static int perf_tp_event_init(struct perf_event *event) if (event->attr.type != PERF_TYPE_TRACEPOINT) return -ENOENT; + /* + * Raw tracepoint data is a severe data leak, only allow root to + * have these. + */ + if ((event->attr.sample_type & PERF_SAMPLE_RAW) && + perf_paranoid_tracepoint_raw() && + !capable(CAP_SYS_ADMIN)) + return -EPERM; + err = perf_trace_init(event); if (err) return err; @@ -4963,7 +4842,7 @@ static struct pmu perf_tracepoint = { static inline void perf_tp_register(void) { - perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT); + perf_pmu_register(&perf_tracepoint); } static int perf_event_set_filter(struct perf_event *event, void __user *arg) @@ -5053,33 +4932,31 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) static void perf_swevent_start_hrtimer(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - s64 period; - - if (!is_sampling_event(event)) - return; hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hwc->hrtimer.function = perf_swevent_hrtimer; + if (hwc->sample_period) { + s64 period = local64_read(&hwc->period_left); - period = local64_read(&hwc->period_left); - if (period) { - if (period < 0) - period = 10000; + if (period) { + if (period < 0) + period = 10000; - local64_set(&hwc->period_left, 0); - } else { - period = max_t(u64, 10000, hwc->sample_period); - } - __hrtimer_start_range_ns(&hwc->hrtimer, + local64_set(&hwc->period_left, 0); + } else { + period = max_t(u64, 10000, hwc->sample_period); + } + __hrtimer_start_range_ns(&hwc->hrtimer, ns_to_ktime(period), 0, HRTIMER_MODE_REL_PINNED, 0); + } } static void perf_swevent_cancel_hrtimer(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - if (is_sampling_event(event)) { + if (hwc->sample_period) { ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); local64_set(&hwc->period_left, ktime_to_ns(remaining)); @@ -5307,61 +5184,8 @@ static void free_pmu_context(struct pmu *pmu) out: mutex_unlock(&pmus_lock); } -static struct idr pmu_idr; - -static ssize_t -type_show(struct device *dev, struct device_attribute *attr, char *page) -{ - struct pmu *pmu = dev_get_drvdata(dev); - - return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type); -} - -static struct device_attribute pmu_dev_attrs[] = { - __ATTR_RO(type), - __ATTR_NULL, -}; - -static int pmu_bus_running; -static struct bus_type pmu_bus = { - .name = "event_source", - .dev_attrs = pmu_dev_attrs, -}; - -static void pmu_dev_release(struct device *dev) -{ - kfree(dev); -} - -static int pmu_dev_alloc(struct pmu *pmu) -{ - int ret = -ENOMEM; - - pmu->dev = kzalloc(sizeof(struct device), GFP_KERNEL); - if (!pmu->dev) - goto out; - - device_initialize(pmu->dev); - ret = dev_set_name(pmu->dev, "%s", pmu->name); - if (ret) - goto free_dev; - - dev_set_drvdata(pmu->dev, pmu); - pmu->dev->bus = &pmu_bus; - pmu->dev->release = pmu_dev_release; - ret = device_add(pmu->dev); - if (ret) - goto free_dev; - -out: - return ret; - -free_dev: - put_device(pmu->dev); - goto out; -} -int perf_pmu_register(struct pmu *pmu, char *name, int type) +int perf_pmu_register(struct pmu *pmu) { int cpu, ret; @@ -5371,38 +5195,13 @@ int perf_pmu_register(struct pmu *pmu, char *name, int type) if (!pmu->pmu_disable_count) goto unlock; - pmu->type = -1; - if (!name) - goto skip_type; - pmu->name = name; - - if (type < 0) { - int err = idr_pre_get(&pmu_idr, GFP_KERNEL); - if (!err) - goto free_pdc; - - err = idr_get_new_above(&pmu_idr, pmu, PERF_TYPE_MAX, &type); - if (err) { - ret = err; - goto free_pdc; - } - } - pmu->type = type; - - if (pmu_bus_running) { - ret = pmu_dev_alloc(pmu); - if (ret) - goto free_idr; - } - -skip_type: pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr); if (pmu->pmu_cpu_context) goto got_cpu_context; pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context); if (!pmu->pmu_cpu_context) - goto free_dev; + goto free_pdc; for_each_possible_cpu(cpu) { struct perf_cpu_context *cpuctx; @@ -5446,14 +5245,6 @@ int perf_pmu_register(struct pmu *pmu, char *name, int type) return ret; -free_dev: - device_del(pmu->dev); - put_device(pmu->dev); - -free_idr: - if (pmu->type >= PERF_TYPE_MAX) - idr_remove(&pmu_idr, pmu->type); - free_pdc: free_percpu(pmu->pmu_disable_count); goto unlock; @@ -5473,10 +5264,6 @@ void perf_pmu_unregister(struct pmu *pmu) synchronize_rcu(); free_percpu(pmu->pmu_disable_count); - if (pmu->type >= PERF_TYPE_MAX) - idr_remove(&pmu_idr, pmu->type); - device_del(pmu->dev); - put_device(pmu->dev); free_pmu_context(pmu); } @@ -5486,13 +5273,6 @@ struct pmu *perf_init_event(struct perf_event *event) int idx; idx = srcu_read_lock(&pmus_srcu); - - rcu_read_lock(); - pmu = idr_find(&pmu_idr, event->attr.type); - rcu_read_unlock(); - if (pmu) - goto unlock; - list_for_each_entry_rcu(pmu, &pmus, entry) { int ret = pmu->event_init(event); if (!ret) @@ -5957,12 +5737,6 @@ SYSCALL_DEFINE5(perf_event_open, list_add_tail(&event->owner_entry, ¤t->perf_event_list); mutex_unlock(¤t->perf_event_mutex); - /* - * Precalculate sample_data sizes - */ - perf_event__header_size(event); - perf_event__id_header_size(event); - /* * Drop the reference on the group_event after placing the * new event on the sibling_list. This ensures destruction @@ -6315,12 +6089,6 @@ inherit_event(struct perf_event *parent_event, child_event->ctx = child_ctx; child_event->overflow_handler = parent_event->overflow_handler; - /* - * Precalculate sample_data sizes - */ - perf_event__header_size(child_event); - perf_event__id_header_size(child_event); - /* * Link it up in the child's context: */ @@ -6552,7 +6320,7 @@ static void __cpuinit perf_event_init_cpu(int cpu) mutex_unlock(&swhash->hlist_mutex); } -#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC +#ifdef CONFIG_HOTPLUG_CPU static void perf_pmu_rotate_stop(struct pmu *pmu) { struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); @@ -6606,26 +6374,6 @@ static void perf_event_exit_cpu(int cpu) static inline void perf_event_exit_cpu(int cpu) { } #endif -static int -perf_reboot(struct notifier_block *notifier, unsigned long val, void *v) -{ - int cpu; - - for_each_online_cpu(cpu) - perf_event_exit_cpu(cpu); - - return NOTIFY_OK; -} - -/* - * Run the perf reboot notifier at the very last possible moment so that - * the generic watchdog code runs as long as possible. - */ -static struct notifier_block perf_reboot_notifier = { - .notifier_call = perf_reboot, - .priority = INT_MIN, -}; - static int __cpuinit perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { @@ -6654,45 +6402,14 @@ void __init perf_event_init(void) { int ret; - idr_init(&pmu_idr); - perf_event_init_all_cpus(); init_srcu_struct(&pmus_srcu); - perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE); - perf_pmu_register(&perf_cpu_clock, NULL, -1); - perf_pmu_register(&perf_task_clock, NULL, -1); + perf_pmu_register(&perf_swevent); + perf_pmu_register(&perf_cpu_clock); + perf_pmu_register(&perf_task_clock); perf_tp_register(); perf_cpu_notifier(perf_cpu_notify); - register_reboot_notifier(&perf_reboot_notifier); ret = init_hw_breakpoint(); WARN(ret, "hw_breakpoint initialization failed with: %d", ret); } - -static int __init perf_event_sysfs_init(void) -{ - struct pmu *pmu; - int ret; - - mutex_lock(&pmus_lock); - - ret = bus_register(&pmu_bus); - if (ret) - goto unlock; - - list_for_each_entry(pmu, &pmus, entry) { - if (!pmu->name || pmu->type < 0) - continue; - - ret = pmu_dev_alloc(pmu); - WARN(ret, "Failed to register pmu: %s, reason %d\n", pmu->name, ret); - } - pmu_bus_running = 1; - ret = 0; - -unlock: - mutex_unlock(&pmus_lock); - - return ret; -} -device_initcall(perf_event_sysfs_init); diff --git a/trunk/kernel/posix-timers.c b/trunk/kernel/posix-timers.c index 93bd2eb2bc53..9ca4973f736d 100644 --- a/trunk/kernel/posix-timers.c +++ b/trunk/kernel/posix-timers.c @@ -145,13 +145,7 @@ static int common_timer_del(struct k_itimer *timer); static enum hrtimer_restart posix_timer_fn(struct hrtimer *data); -static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags); - -#define lock_timer(tid, flags) \ -({ struct k_itimer *__timr; \ - __cond_lock(&__timr->it_lock, __timr = __lock_timer(tid, flags)); \ - __timr; \ -}) +static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags); static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) { @@ -625,7 +619,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, * the find to the timer lock. To avoid a dead lock, the timer id MUST * be release with out holding the timer lock. */ -static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags) +static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags) { struct k_itimer *timr; /* diff --git a/trunk/kernel/power/suspend.c b/trunk/kernel/power/suspend.c index 031d5e3a6197..ecf770509d0d 100644 --- a/trunk/kernel/power/suspend.c +++ b/trunk/kernel/power/suspend.c @@ -22,7 +22,6 @@ #include #include #include -#include #include "power.h" @@ -202,7 +201,6 @@ int suspend_devices_and_enter(suspend_state_t state) if (!suspend_ops) return -ENOSYS; - trace_machine_suspend(state); if (suspend_ops->begin) { error = suspend_ops->begin(state); if (error) @@ -231,7 +229,6 @@ int suspend_devices_and_enter(suspend_state_t state) Close: if (suspend_ops->end) suspend_ops->end(); - trace_machine_suspend(PWR_EVENT_EXIT); return error; Recover_platform: diff --git a/trunk/kernel/printk.c b/trunk/kernel/printk.c index ab3ffc5b3b64..a23315dc4498 100644 --- a/trunk/kernel/printk.c +++ b/trunk/kernel/printk.c @@ -1074,17 +1074,17 @@ static DEFINE_PER_CPU(int, printk_pending); void printk_tick(void) { - if (__this_cpu_read(printk_pending)) { - __this_cpu_write(printk_pending, 0); + if (__get_cpu_var(printk_pending)) { + __get_cpu_var(printk_pending) = 0; wake_up_interruptible(&log_wait); } } int printk_needs_cpu(int cpu) { - if (cpu_is_offline(cpu)) + if (unlikely(cpu_is_offline(cpu))) printk_tick(); - return __this_cpu_read(printk_pending); + return per_cpu(printk_pending, cpu); } void wake_up_klogd(void) diff --git a/trunk/kernel/rcutiny.c b/trunk/kernel/rcutiny.c index 034493724749..d806735342ac 100644 --- a/trunk/kernel/rcutiny.c +++ b/trunk/kernel/rcutiny.c @@ -36,16 +36,31 @@ #include #include -/* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */ -static struct task_struct *rcu_kthread_task; -static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq); -static unsigned long have_rcu_kthread_work; -static void invoke_rcu_kthread(void); +/* Global control variables for rcupdate callback mechanism. */ +struct rcu_ctrlblk { + struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ + struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ + struct rcu_head **curtail; /* ->next pointer of last CB. */ +}; + +/* Definition for rcupdate control block. */ +static struct rcu_ctrlblk rcu_sched_ctrlblk = { + .donetail = &rcu_sched_ctrlblk.rcucblist, + .curtail = &rcu_sched_ctrlblk.rcucblist, +}; + +static struct rcu_ctrlblk rcu_bh_ctrlblk = { + .donetail = &rcu_bh_ctrlblk.rcucblist, + .curtail = &rcu_bh_ctrlblk.rcucblist, +}; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +int rcu_scheduler_active __read_mostly; +EXPORT_SYMBOL_GPL(rcu_scheduler_active); +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ /* Forward declarations for rcutiny_plugin.h. */ -struct rcu_ctrlblk; -static void rcu_process_callbacks(struct rcu_ctrlblk *rcp); -static int rcu_kthread(void *arg); +static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp); static void __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), struct rcu_ctrlblk *rcp); @@ -108,7 +123,7 @@ void rcu_sched_qs(int cpu) { if (rcu_qsctr_help(&rcu_sched_ctrlblk) + rcu_qsctr_help(&rcu_bh_ctrlblk)) - invoke_rcu_kthread(); + raise_softirq(RCU_SOFTIRQ); } /* @@ -117,7 +132,7 @@ void rcu_sched_qs(int cpu) void rcu_bh_qs(int cpu) { if (rcu_qsctr_help(&rcu_bh_ctrlblk)) - invoke_rcu_kthread(); + raise_softirq(RCU_SOFTIRQ); } /* @@ -137,14 +152,13 @@ void rcu_check_callbacks(int cpu, int user) } /* - * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure - * whose grace period has elapsed. + * Helper function for rcu_process_callbacks() that operates on the + * specified rcu_ctrlkblk structure. */ -static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) +static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) { struct rcu_head *next, *list; unsigned long flags; - RCU_TRACE(int cb_count = 0); /* If no RCU callbacks ready to invoke, just return. */ if (&rcp->rcucblist == rcp->donetail) @@ -166,58 +180,19 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) next = list->next; prefetch(next); debug_rcu_head_unqueue(list); - local_bh_disable(); list->func(list); - local_bh_enable(); list = next; - RCU_TRACE(cb_count++); } - RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); } /* - * This kthread invokes RCU callbacks whose grace periods have - * elapsed. It is awakened as needed, and takes the place of the - * RCU_SOFTIRQ that was used previously for this purpose. - * This is a kthread, but it is never stopped, at least not until - * the system goes down. + * Invoke any callbacks whose grace period has completed. */ -static int rcu_kthread(void *arg) +static void rcu_process_callbacks(struct softirq_action *unused) { - unsigned long work; - unsigned long morework; - unsigned long flags; - - for (;;) { - wait_event(rcu_kthread_wq, have_rcu_kthread_work != 0); - morework = rcu_boost(); - local_irq_save(flags); - work = have_rcu_kthread_work; - have_rcu_kthread_work = morework; - local_irq_restore(flags); - if (work) { - rcu_process_callbacks(&rcu_sched_ctrlblk); - rcu_process_callbacks(&rcu_bh_ctrlblk); - rcu_preempt_process_callbacks(); - } - schedule_timeout_interruptible(1); /* Leave CPU for others. */ - } - - return 0; /* Not reached, but needed to shut gcc up. */ -} - -/* - * Wake up rcu_kthread() to process callbacks now eligible for invocation - * or to boost readers. - */ -static void invoke_rcu_kthread(void) -{ - unsigned long flags; - - local_irq_save(flags); - have_rcu_kthread_work = 1; - wake_up(&rcu_kthread_wq); - local_irq_restore(flags); + __rcu_process_callbacks(&rcu_sched_ctrlblk); + __rcu_process_callbacks(&rcu_bh_ctrlblk); + rcu_preempt_process_callbacks(); } /* @@ -255,7 +230,6 @@ static void __call_rcu(struct rcu_head *head, local_irq_save(flags); *rcp->curtail = head; rcp->curtail = &head->next; - RCU_TRACE(rcp->qlen++); local_irq_restore(flags); } @@ -308,16 +282,7 @@ void rcu_barrier_sched(void) } EXPORT_SYMBOL_GPL(rcu_barrier_sched); -/* - * Spawn the kthread that invokes RCU callbacks. - */ -static int __init rcu_spawn_kthreads(void) +void __init rcu_init(void) { - struct sched_param sp; - - rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread"); - sp.sched_priority = RCU_BOOST_PRIO; - sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp); - return 0; + open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); } -early_initcall(rcu_spawn_kthreads); diff --git a/trunk/kernel/rcutiny_plugin.h b/trunk/kernel/rcutiny_plugin.h index 015abaea962a..6ceca4f745ff 100644 --- a/trunk/kernel/rcutiny_plugin.h +++ b/trunk/kernel/rcutiny_plugin.h @@ -22,40 +22,6 @@ * Author: Paul E. McKenney */ -#include -#include -#include - -#ifdef CONFIG_RCU_TRACE -#define RCU_TRACE(stmt) stmt -#else /* #ifdef CONFIG_RCU_TRACE */ -#define RCU_TRACE(stmt) -#endif /* #else #ifdef CONFIG_RCU_TRACE */ - -/* Global control variables for rcupdate callback mechanism. */ -struct rcu_ctrlblk { - struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ - struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ - struct rcu_head **curtail; /* ->next pointer of last CB. */ - RCU_TRACE(long qlen); /* Number of pending CBs. */ -}; - -/* Definition for rcupdate control block. */ -static struct rcu_ctrlblk rcu_sched_ctrlblk = { - .donetail = &rcu_sched_ctrlblk.rcucblist, - .curtail = &rcu_sched_ctrlblk.rcucblist, -}; - -static struct rcu_ctrlblk rcu_bh_ctrlblk = { - .donetail = &rcu_bh_ctrlblk.rcucblist, - .curtail = &rcu_bh_ctrlblk.rcucblist, -}; - -#ifdef CONFIG_DEBUG_LOCK_ALLOC -int rcu_scheduler_active __read_mostly; -EXPORT_SYMBOL_GPL(rcu_scheduler_active); -#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ - #ifdef CONFIG_TINY_PREEMPT_RCU #include @@ -80,45 +46,17 @@ struct rcu_preempt_ctrlblk { struct list_head *gp_tasks; /* Pointer to the first task blocking the */ /* current grace period, or NULL if there */ - /* is no such task. */ + /* is not such task. */ struct list_head *exp_tasks; /* Pointer to first task blocking the */ /* current expedited grace period, or NULL */ /* if there is no such task. If there */ /* is no current expedited grace period, */ /* then there cannot be any such task. */ -#ifdef CONFIG_RCU_BOOST - struct list_head *boost_tasks; - /* Pointer to first task that needs to be */ - /* priority-boosted, or NULL if no priority */ - /* boosting is needed. If there is no */ - /* current or expedited grace period, there */ - /* can be no such task. */ -#endif /* #ifdef CONFIG_RCU_BOOST */ u8 gpnum; /* Current grace period. */ u8 gpcpu; /* Last grace period blocked by the CPU. */ u8 completed; /* Last grace period completed. */ /* If all three are equal, RCU is idle. */ -#ifdef CONFIG_RCU_BOOST - s8 boosted_this_gp; /* Has boosting already happened? */ - unsigned long boost_time; /* When to start boosting (jiffies) */ -#endif /* #ifdef CONFIG_RCU_BOOST */ -#ifdef CONFIG_RCU_TRACE - unsigned long n_grace_periods; -#ifdef CONFIG_RCU_BOOST - unsigned long n_tasks_boosted; - unsigned long n_exp_boosts; - unsigned long n_normal_boosts; - unsigned long n_normal_balk_blkd_tasks; - unsigned long n_normal_balk_gp_tasks; - unsigned long n_normal_balk_boost_tasks; - unsigned long n_normal_balk_boosted; - unsigned long n_normal_balk_notyet; - unsigned long n_normal_balk_nos; - unsigned long n_exp_balk_blkd_tasks; - unsigned long n_exp_balk_nos; -#endif /* #ifdef CONFIG_RCU_BOOST */ -#endif /* #ifdef CONFIG_RCU_TRACE */ }; static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { @@ -183,210 +121,6 @@ static int rcu_preempt_gp_in_progress(void) return rcu_preempt_ctrlblk.completed != rcu_preempt_ctrlblk.gpnum; } -/* - * Advance a ->blkd_tasks-list pointer to the next entry, instead - * returning NULL if at the end of the list. - */ -static struct list_head *rcu_next_node_entry(struct task_struct *t) -{ - struct list_head *np; - - np = t->rcu_node_entry.next; - if (np == &rcu_preempt_ctrlblk.blkd_tasks) - np = NULL; - return np; -} - -#ifdef CONFIG_RCU_TRACE - -#ifdef CONFIG_RCU_BOOST -static void rcu_initiate_boost_trace(void); -static void rcu_initiate_exp_boost_trace(void); -#endif /* #ifdef CONFIG_RCU_BOOST */ - -/* - * Dump additional statistice for TINY_PREEMPT_RCU. - */ -static void show_tiny_preempt_stats(struct seq_file *m) -{ - seq_printf(m, "rcu_preempt: qlen=%ld gp=%lu g%u/p%u/c%u tasks=%c%c%c\n", - rcu_preempt_ctrlblk.rcb.qlen, - rcu_preempt_ctrlblk.n_grace_periods, - rcu_preempt_ctrlblk.gpnum, - rcu_preempt_ctrlblk.gpcpu, - rcu_preempt_ctrlblk.completed, - "T."[list_empty(&rcu_preempt_ctrlblk.blkd_tasks)], - "N."[!rcu_preempt_ctrlblk.gp_tasks], - "E."[!rcu_preempt_ctrlblk.exp_tasks]); -#ifdef CONFIG_RCU_BOOST - seq_printf(m, " ttb=%c btg=", - "B."[!rcu_preempt_ctrlblk.boost_tasks]); - switch (rcu_preempt_ctrlblk.boosted_this_gp) { - case -1: - seq_puts(m, "exp"); - break; - case 0: - seq_puts(m, "no"); - break; - case 1: - seq_puts(m, "begun"); - break; - case 2: - seq_puts(m, "done"); - break; - default: - seq_printf(m, "?%d?", rcu_preempt_ctrlblk.boosted_this_gp); - } - seq_printf(m, " ntb=%lu neb=%lu nnb=%lu j=%04x bt=%04x\n", - rcu_preempt_ctrlblk.n_tasks_boosted, - rcu_preempt_ctrlblk.n_exp_boosts, - rcu_preempt_ctrlblk.n_normal_boosts, - (int)(jiffies & 0xffff), - (int)(rcu_preempt_ctrlblk.boost_time & 0xffff)); - seq_printf(m, " %s: nt=%lu gt=%lu bt=%lu b=%lu ny=%lu nos=%lu\n", - "normal balk", - rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks, - rcu_preempt_ctrlblk.n_normal_balk_gp_tasks, - rcu_preempt_ctrlblk.n_normal_balk_boost_tasks, - rcu_preempt_ctrlblk.n_normal_balk_boosted, - rcu_preempt_ctrlblk.n_normal_balk_notyet, - rcu_preempt_ctrlblk.n_normal_balk_nos); - seq_printf(m, " exp balk: bt=%lu nos=%lu\n", - rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks, - rcu_preempt_ctrlblk.n_exp_balk_nos); -#endif /* #ifdef CONFIG_RCU_BOOST */ -} - -#endif /* #ifdef CONFIG_RCU_TRACE */ - -#ifdef CONFIG_RCU_BOOST - -#include "rtmutex_common.h" - -/* - * Carry out RCU priority boosting on the task indicated by ->boost_tasks, - * and advance ->boost_tasks to the next task in the ->blkd_tasks list. - */ -static int rcu_boost(void) -{ - unsigned long flags; - struct rt_mutex mtx; - struct list_head *np; - struct task_struct *t; - - if (rcu_preempt_ctrlblk.boost_tasks == NULL) - return 0; /* Nothing to boost. */ - raw_local_irq_save(flags); - rcu_preempt_ctrlblk.boosted_this_gp++; - t = container_of(rcu_preempt_ctrlblk.boost_tasks, struct task_struct, - rcu_node_entry); - np = rcu_next_node_entry(t); - rt_mutex_init_proxy_locked(&mtx, t); - t->rcu_boost_mutex = &mtx; - t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED; - raw_local_irq_restore(flags); - rt_mutex_lock(&mtx); - RCU_TRACE(rcu_preempt_ctrlblk.n_tasks_boosted++); - rcu_preempt_ctrlblk.boosted_this_gp++; - rt_mutex_unlock(&mtx); - return rcu_preempt_ctrlblk.boost_tasks != NULL; -} - -/* - * Check to see if it is now time to start boosting RCU readers blocking - * the current grace period, and, if so, tell the rcu_kthread_task to - * start boosting them. If there is an expedited boost in progress, - * we wait for it to complete. - * - * If there are no blocked readers blocking the current grace period, - * return 0 to let the caller know, otherwise return 1. Note that this - * return value is independent of whether or not boosting was done. - */ -static int rcu_initiate_boost(void) -{ - if (!rcu_preempt_blocked_readers_cgp()) { - RCU_TRACE(rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks++); - return 0; - } - if (rcu_preempt_ctrlblk.gp_tasks != NULL && - rcu_preempt_ctrlblk.boost_tasks == NULL && - rcu_preempt_ctrlblk.boosted_this_gp == 0 && - ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) { - rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks; - invoke_rcu_kthread(); - RCU_TRACE(rcu_preempt_ctrlblk.n_normal_boosts++); - } else - RCU_TRACE(rcu_initiate_boost_trace()); - return 1; -} - -/* - * Initiate boosting for an expedited grace period. - */ -static void rcu_initiate_expedited_boost(void) -{ - unsigned long flags; - - raw_local_irq_save(flags); - if (!list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) { - rcu_preempt_ctrlblk.boost_tasks = - rcu_preempt_ctrlblk.blkd_tasks.next; - rcu_preempt_ctrlblk.boosted_this_gp = -1; - invoke_rcu_kthread(); - RCU_TRACE(rcu_preempt_ctrlblk.n_exp_boosts++); - } else - RCU_TRACE(rcu_initiate_exp_boost_trace()); - raw_local_irq_restore(flags); -} - -#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000); - -/* - * Do priority-boost accounting for the start of a new grace period. - */ -static void rcu_preempt_boost_start_gp(void) -{ - rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES; - if (rcu_preempt_ctrlblk.boosted_this_gp > 0) - rcu_preempt_ctrlblk.boosted_this_gp = 0; -} - -#else /* #ifdef CONFIG_RCU_BOOST */ - -/* - * If there is no RCU priority boosting, we don't boost. - */ -static int rcu_boost(void) -{ - return 0; -} - -/* - * If there is no RCU priority boosting, we don't initiate boosting, - * but we do indicate whether there are blocked readers blocking the - * current grace period. - */ -static int rcu_initiate_boost(void) -{ - return rcu_preempt_blocked_readers_cgp(); -} - -/* - * If there is no RCU priority boosting, we don't initiate expedited boosting. - */ -static void rcu_initiate_expedited_boost(void) -{ -} - -/* - * If there is no RCU priority boosting, nothing to do at grace-period start. - */ -static void rcu_preempt_boost_start_gp(void) -{ -} - -#endif /* else #ifdef CONFIG_RCU_BOOST */ - /* * Record a preemptible-RCU quiescent state for the specified CPU. Note * that this just means that the task currently running on the CPU is @@ -414,14 +148,11 @@ static void rcu_preempt_cpu_qs(void) rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum; current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; - /* If there is no GP then there is nothing more to do. */ - if (!rcu_preempt_gp_in_progress()) - return; /* - * Check up on boosting. If there are no readers blocking the - * current grace period, leave. + * If there is no GP, or if blocked readers are still blocking GP, + * then there is nothing more to do. */ - if (rcu_initiate_boost()) + if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp()) return; /* Advance callbacks. */ @@ -433,9 +164,9 @@ static void rcu_preempt_cpu_qs(void) if (!rcu_preempt_blocked_readers_any()) rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail; - /* If there are done callbacks, cause them to be invoked. */ + /* If there are done callbacks, make RCU_SOFTIRQ process them. */ if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) - invoke_rcu_kthread(); + raise_softirq(RCU_SOFTIRQ); } /* @@ -447,16 +178,12 @@ static void rcu_preempt_start_gp(void) /* Official start of GP. */ rcu_preempt_ctrlblk.gpnum++; - RCU_TRACE(rcu_preempt_ctrlblk.n_grace_periods++); /* Any blocked RCU readers block new GP. */ if (rcu_preempt_blocked_readers_any()) rcu_preempt_ctrlblk.gp_tasks = rcu_preempt_ctrlblk.blkd_tasks.next; - /* Set up for RCU priority boosting. */ - rcu_preempt_boost_start_gp(); - /* If there is no running reader, CPU is done with GP. */ if (!rcu_preempt_running_reader()) rcu_preempt_cpu_qs(); @@ -577,16 +304,14 @@ static void rcu_read_unlock_special(struct task_struct *t) */ empty = !rcu_preempt_blocked_readers_cgp(); empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL; - np = rcu_next_node_entry(t); + np = t->rcu_node_entry.next; + if (np == &rcu_preempt_ctrlblk.blkd_tasks) + np = NULL; list_del(&t->rcu_node_entry); if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks) rcu_preempt_ctrlblk.gp_tasks = np; if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks) rcu_preempt_ctrlblk.exp_tasks = np; -#ifdef CONFIG_RCU_BOOST - if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks) - rcu_preempt_ctrlblk.boost_tasks = np; -#endif /* #ifdef CONFIG_RCU_BOOST */ INIT_LIST_HEAD(&t->rcu_node_entry); /* @@ -606,14 +331,6 @@ static void rcu_read_unlock_special(struct task_struct *t) if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL) rcu_report_exp_done(); } -#ifdef CONFIG_RCU_BOOST - /* Unboost self if was boosted. */ - if (special & RCU_READ_UNLOCK_BOOSTED) { - t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED; - rt_mutex_unlock(t->rcu_boost_mutex); - t->rcu_boost_mutex = NULL; - } -#endif /* #ifdef CONFIG_RCU_BOOST */ local_irq_restore(flags); } @@ -657,7 +374,7 @@ static void rcu_preempt_check_callbacks(void) rcu_preempt_cpu_qs(); if (&rcu_preempt_ctrlblk.rcb.rcucblist != rcu_preempt_ctrlblk.rcb.donetail) - invoke_rcu_kthread(); + raise_softirq(RCU_SOFTIRQ); if (rcu_preempt_gp_in_progress() && rcu_cpu_blocking_cur_gp() && rcu_preempt_running_reader()) @@ -666,7 +383,7 @@ static void rcu_preempt_check_callbacks(void) /* * TINY_PREEMPT_RCU has an extra callback-list tail pointer to - * update, so this is invoked from rcu_process_callbacks() to + * update, so this is invoked from __rcu_process_callbacks() to * handle that case. Of course, it is invoked for all flavors of * RCU, but RCU callbacks can appear only on one of the lists, and * neither ->nexttail nor ->donetail can possibly be NULL, so there @@ -683,7 +400,7 @@ static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp) */ static void rcu_preempt_process_callbacks(void) { - rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); + __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); } /* @@ -700,7 +417,6 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) local_irq_save(flags); *rcu_preempt_ctrlblk.nexttail = head; rcu_preempt_ctrlblk.nexttail = &head->next; - RCU_TRACE(rcu_preempt_ctrlblk.rcb.qlen++); rcu_preempt_start_gp(); /* checks to see if GP needed. */ local_irq_restore(flags); } @@ -816,7 +532,6 @@ void synchronize_rcu_expedited(void) /* Wait for tail of ->blkd_tasks list to drain. */ if (rcu_preempted_readers_exp()) - rcu_initiate_expedited_boost(); wait_event(sync_rcu_preempt_exp_wq, !rcu_preempted_readers_exp()); @@ -857,27 +572,6 @@ void exit_rcu(void) #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ -#ifdef CONFIG_RCU_TRACE - -/* - * Because preemptible RCU does not exist, it is not necessary to - * dump out its statistics. - */ -static void show_tiny_preempt_stats(struct seq_file *m) -{ -} - -#endif /* #ifdef CONFIG_RCU_TRACE */ - -/* - * Because preemptible RCU does not exist, it is never necessary to - * boost preempted RCU readers. - */ -static int rcu_boost(void) -{ - return 0; -} - /* * Because preemptible RCU does not exist, it never has any callbacks * to check. @@ -905,116 +599,17 @@ static void rcu_preempt_process_callbacks(void) #endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */ #ifdef CONFIG_DEBUG_LOCK_ALLOC + #include /* * During boot, we forgive RCU lockdep issues. After this function is * invoked, we start taking RCU lockdep issues seriously. */ -void __init rcu_scheduler_starting(void) +void rcu_scheduler_starting(void) { WARN_ON(nr_context_switches() > 0); rcu_scheduler_active = 1; } #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ - -#ifdef CONFIG_RCU_BOOST -#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO -#else /* #ifdef CONFIG_RCU_BOOST */ -#define RCU_BOOST_PRIO 1 -#endif /* #else #ifdef CONFIG_RCU_BOOST */ - -#ifdef CONFIG_RCU_TRACE - -#ifdef CONFIG_RCU_BOOST - -static void rcu_initiate_boost_trace(void) -{ - if (rcu_preempt_ctrlblk.gp_tasks == NULL) - rcu_preempt_ctrlblk.n_normal_balk_gp_tasks++; - else if (rcu_preempt_ctrlblk.boost_tasks != NULL) - rcu_preempt_ctrlblk.n_normal_balk_boost_tasks++; - else if (rcu_preempt_ctrlblk.boosted_this_gp != 0) - rcu_preempt_ctrlblk.n_normal_balk_boosted++; - else if (!ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) - rcu_preempt_ctrlblk.n_normal_balk_notyet++; - else - rcu_preempt_ctrlblk.n_normal_balk_nos++; -} - -static void rcu_initiate_exp_boost_trace(void) -{ - if (list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) - rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks++; - else - rcu_preempt_ctrlblk.n_exp_balk_nos++; -} - -#endif /* #ifdef CONFIG_RCU_BOOST */ - -static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n) -{ - unsigned long flags; - - raw_local_irq_save(flags); - rcp->qlen -= n; - raw_local_irq_restore(flags); -} - -/* - * Dump statistics for TINY_RCU, such as they are. - */ -static int show_tiny_stats(struct seq_file *m, void *unused) -{ - show_tiny_preempt_stats(m); - seq_printf(m, "rcu_sched: qlen: %ld\n", rcu_sched_ctrlblk.qlen); - seq_printf(m, "rcu_bh: qlen: %ld\n", rcu_bh_ctrlblk.qlen); - return 0; -} - -static int show_tiny_stats_open(struct inode *inode, struct file *file) -{ - return single_open(file, show_tiny_stats, NULL); -} - -static const struct file_operations show_tiny_stats_fops = { - .owner = THIS_MODULE, - .open = show_tiny_stats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static struct dentry *rcudir; - -static int __init rcutiny_trace_init(void) -{ - struct dentry *retval; - - rcudir = debugfs_create_dir("rcu", NULL); - if (!rcudir) - goto free_out; - retval = debugfs_create_file("rcudata", 0444, rcudir, - NULL, &show_tiny_stats_fops); - if (!retval) - goto free_out; - return 0; -free_out: - debugfs_remove_recursive(rcudir); - return 1; -} - -static void __exit rcutiny_trace_cleanup(void) -{ - debugfs_remove_recursive(rcudir); -} - -module_init(rcutiny_trace_init); -module_exit(rcutiny_trace_cleanup); - -MODULE_AUTHOR("Paul E. McKenney"); -MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation"); -MODULE_LICENSE("GPL"); - -#endif /* #ifdef CONFIG_RCU_TRACE */ diff --git a/trunk/kernel/rcutorture.c b/trunk/kernel/rcutorture.c index 89613f97ff26..9d8e8fb2515f 100644 --- a/trunk/kernel/rcutorture.c +++ b/trunk/kernel/rcutorture.c @@ -47,7 +47,6 @@ #include #include #include -#include MODULE_LICENSE("GPL"); MODULE_AUTHOR("Paul E. McKenney and " @@ -65,9 +64,6 @@ static int irqreader = 1; /* RCU readers from irq (timers). */ static int fqs_duration = 0; /* Duration of bursts (us), 0 to disable. */ static int fqs_holdoff = 0; /* Hold time within burst (us). */ static int fqs_stutter = 3; /* Wait time between bursts (s). */ -static int test_boost = 1; /* Test RCU prio boost: 0=no, 1=maybe, 2=yes. */ -static int test_boost_interval = 7; /* Interval between boost tests, seconds. */ -static int test_boost_duration = 4; /* Duration of each boost test, seconds. */ static char *torture_type = "rcu"; /* What RCU implementation to torture. */ module_param(nreaders, int, 0444); @@ -92,12 +88,6 @@ module_param(fqs_holdoff, int, 0444); MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)"); module_param(fqs_stutter, int, 0444); MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); -module_param(test_boost, int, 0444); -MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes."); -module_param(test_boost_interval, int, 0444); -MODULE_PARM_DESC(test_boost_interval, "Interval between boost tests, seconds."); -module_param(test_boost_duration, int, 0444); -MODULE_PARM_DESC(test_boost_duration, "Duration of each boost test, seconds."); module_param(torture_type, charp, 0444); MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)"); @@ -119,7 +109,6 @@ static struct task_struct *stats_task; static struct task_struct *shuffler_task; static struct task_struct *stutter_task; static struct task_struct *fqs_task; -static struct task_struct *boost_tasks[NR_CPUS]; #define RCU_TORTURE_PIPE_LEN 10 @@ -145,12 +134,6 @@ static atomic_t n_rcu_torture_alloc_fail; static atomic_t n_rcu_torture_free; static atomic_t n_rcu_torture_mberror; static atomic_t n_rcu_torture_error; -static long n_rcu_torture_boost_ktrerror; -static long n_rcu_torture_boost_rterror; -static long n_rcu_torture_boost_allocerror; -static long n_rcu_torture_boost_afferror; -static long n_rcu_torture_boost_failure; -static long n_rcu_torture_boosts; static long n_rcu_torture_timers; static struct list_head rcu_torture_removed; static cpumask_var_t shuffle_tmp_mask; @@ -164,16 +147,6 @@ static int stutter_pause_test; #endif int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; -#ifdef CONFIG_RCU_BOOST -#define rcu_can_boost() 1 -#else /* #ifdef CONFIG_RCU_BOOST */ -#define rcu_can_boost() 0 -#endif /* #else #ifdef CONFIG_RCU_BOOST */ - -static unsigned long boost_starttime; /* jiffies of next boost test start. */ -DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ - /* and boost task create/destroy. */ - /* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */ #define FULLSTOP_DONTSTOP 0 /* Normal operation. */ @@ -304,7 +277,6 @@ struct rcu_torture_ops { void (*fqs)(void); int (*stats)(char *page); int irq_capable; - int can_boost; char *name; }; @@ -394,7 +366,6 @@ static struct rcu_torture_ops rcu_ops = { .fqs = rcu_force_quiescent_state, .stats = NULL, .irq_capable = 1, - .can_boost = rcu_can_boost(), .name = "rcu" }; @@ -437,7 +408,6 @@ static struct rcu_torture_ops rcu_sync_ops = { .fqs = rcu_force_quiescent_state, .stats = NULL, .irq_capable = 1, - .can_boost = rcu_can_boost(), .name = "rcu_sync" }; @@ -454,7 +424,6 @@ static struct rcu_torture_ops rcu_expedited_ops = { .fqs = rcu_force_quiescent_state, .stats = NULL, .irq_capable = 1, - .can_boost = rcu_can_boost(), .name = "rcu_expedited" }; @@ -714,110 +683,6 @@ static struct rcu_torture_ops sched_expedited_ops = { .name = "sched_expedited" }; -/* - * RCU torture priority-boost testing. Runs one real-time thread per - * CPU for moderate bursts, repeatedly registering RCU callbacks and - * spinning waiting for them to be invoked. If a given callback takes - * too long to be invoked, we assume that priority inversion has occurred. - */ - -struct rcu_boost_inflight { - struct rcu_head rcu; - int inflight; -}; - -static void rcu_torture_boost_cb(struct rcu_head *head) -{ - struct rcu_boost_inflight *rbip = - container_of(head, struct rcu_boost_inflight, rcu); - - smp_mb(); /* Ensure RCU-core accesses precede clearing ->inflight */ - rbip->inflight = 0; -} - -static int rcu_torture_boost(void *arg) -{ - unsigned long call_rcu_time; - unsigned long endtime; - unsigned long oldstarttime; - struct rcu_boost_inflight rbi = { .inflight = 0 }; - struct sched_param sp; - - VERBOSE_PRINTK_STRING("rcu_torture_boost started"); - - /* Set real-time priority. */ - sp.sched_priority = 1; - if (sched_setscheduler(current, SCHED_FIFO, &sp) < 0) { - VERBOSE_PRINTK_STRING("rcu_torture_boost RT prio failed!"); - n_rcu_torture_boost_rterror++; - } - - /* Each pass through the following loop does one boost-test cycle. */ - do { - /* Wait for the next test interval. */ - oldstarttime = boost_starttime; - while (jiffies - oldstarttime > ULONG_MAX / 2) { - schedule_timeout_uninterruptible(1); - rcu_stutter_wait("rcu_torture_boost"); - if (kthread_should_stop() || - fullstop != FULLSTOP_DONTSTOP) - goto checkwait; - } - - /* Do one boost-test interval. */ - endtime = oldstarttime + test_boost_duration * HZ; - call_rcu_time = jiffies; - while (jiffies - endtime > ULONG_MAX / 2) { - /* If we don't have a callback in flight, post one. */ - if (!rbi.inflight) { - smp_mb(); /* RCU core before ->inflight = 1. */ - rbi.inflight = 1; - call_rcu(&rbi.rcu, rcu_torture_boost_cb); - if (jiffies - call_rcu_time > - test_boost_duration * HZ - HZ / 2) { - VERBOSE_PRINTK_STRING("rcu_torture_boost boosting failed"); - n_rcu_torture_boost_failure++; - } - call_rcu_time = jiffies; - } - cond_resched(); - rcu_stutter_wait("rcu_torture_boost"); - if (kthread_should_stop() || - fullstop != FULLSTOP_DONTSTOP) - goto checkwait; - } - - /* - * Set the start time of the next test interval. - * Yes, this is vulnerable to long delays, but such - * delays simply cause a false negative for the next - * interval. Besides, we are running at RT priority, - * so delays should be relatively rare. - */ - while (oldstarttime == boost_starttime) { - if (mutex_trylock(&boost_mutex)) { - boost_starttime = jiffies + - test_boost_interval * HZ; - n_rcu_torture_boosts++; - mutex_unlock(&boost_mutex); - break; - } - schedule_timeout_uninterruptible(1); - } - - /* Go do the stutter. */ -checkwait: rcu_stutter_wait("rcu_torture_boost"); - } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); - - /* Clean up and exit. */ - VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping"); - rcutorture_shutdown_absorb("rcu_torture_boost"); - while (!kthread_should_stop() || rbi.inflight) - schedule_timeout_uninterruptible(1); - smp_mb(); /* order accesses to ->inflight before stack-frame death. */ - return 0; -} - /* * RCU torture force-quiescent-state kthread. Repeatedly induces * bursts of calls to force_quiescent_state(), increasing the probability @@ -1068,8 +933,7 @@ rcu_torture_printk(char *page) cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG); cnt += sprintf(&page[cnt], "rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d " - "rtmbe: %d rtbke: %ld rtbre: %ld rtbae: %ld rtbafe: %ld " - "rtbf: %ld rtb: %ld nt: %ld", + "rtmbe: %d nt: %ld", rcu_torture_current, rcu_torture_current_version, list_empty(&rcu_torture_freelist), @@ -1077,19 +941,8 @@ rcu_torture_printk(char *page) atomic_read(&n_rcu_torture_alloc_fail), atomic_read(&n_rcu_torture_free), atomic_read(&n_rcu_torture_mberror), - n_rcu_torture_boost_ktrerror, - n_rcu_torture_boost_rterror, - n_rcu_torture_boost_allocerror, - n_rcu_torture_boost_afferror, - n_rcu_torture_boost_failure, - n_rcu_torture_boosts, n_rcu_torture_timers); - if (atomic_read(&n_rcu_torture_mberror) != 0 || - n_rcu_torture_boost_ktrerror != 0 || - n_rcu_torture_boost_rterror != 0 || - n_rcu_torture_boost_allocerror != 0 || - n_rcu_torture_boost_afferror != 0 || - n_rcu_torture_boost_failure != 0) + if (atomic_read(&n_rcu_torture_mberror) != 0) cnt += sprintf(&page[cnt], " !!!"); cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG); if (i > 1) { @@ -1241,91 +1094,22 @@ rcu_torture_stutter(void *arg) } static inline void -rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, char *tag) +rcu_torture_print_module_parms(char *tag) { printk(KERN_ALERT "%s" TORTURE_FLAG "--- %s: nreaders=%d nfakewriters=%d " "stat_interval=%d verbose=%d test_no_idle_hz=%d " "shuffle_interval=%d stutter=%d irqreader=%d " - "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d " - "test_boost=%d/%d test_boost_interval=%d " - "test_boost_duration=%d\n", + "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d\n", torture_type, tag, nrealreaders, nfakewriters, stat_interval, verbose, test_no_idle_hz, shuffle_interval, - stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter, - test_boost, cur_ops->can_boost, - test_boost_interval, test_boost_duration); + stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter); } -static struct notifier_block rcutorture_shutdown_nb = { +static struct notifier_block rcutorture_nb = { .notifier_call = rcutorture_shutdown_notify, }; -static void rcutorture_booster_cleanup(int cpu) -{ - struct task_struct *t; - - if (boost_tasks[cpu] == NULL) - return; - mutex_lock(&boost_mutex); - VERBOSE_PRINTK_STRING("Stopping rcu_torture_boost task"); - t = boost_tasks[cpu]; - boost_tasks[cpu] = NULL; - mutex_unlock(&boost_mutex); - - /* This must be outside of the mutex, otherwise deadlock! */ - kthread_stop(t); -} - -static int rcutorture_booster_init(int cpu) -{ - int retval; - - if (boost_tasks[cpu] != NULL) - return 0; /* Already created, nothing more to do. */ - - /* Don't allow time recalculation while creating a new task. */ - mutex_lock(&boost_mutex); - VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task"); - boost_tasks[cpu] = kthread_create(rcu_torture_boost, NULL, - "rcu_torture_boost"); - if (IS_ERR(boost_tasks[cpu])) { - retval = PTR_ERR(boost_tasks[cpu]); - VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed"); - n_rcu_torture_boost_ktrerror++; - boost_tasks[cpu] = NULL; - mutex_unlock(&boost_mutex); - return retval; - } - kthread_bind(boost_tasks[cpu], cpu); - wake_up_process(boost_tasks[cpu]); - mutex_unlock(&boost_mutex); - return 0; -} - -static int rcutorture_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - long cpu = (long)hcpu; - - switch (action) { - case CPU_ONLINE: - case CPU_DOWN_FAILED: - (void)rcutorture_booster_init(cpu); - break; - case CPU_DOWN_PREPARE: - rcutorture_booster_cleanup(cpu); - break; - default: - break; - } - return NOTIFY_OK; -} - -static struct notifier_block rcutorture_cpu_nb = { - .notifier_call = rcutorture_cpu_notify, -}; - static void rcu_torture_cleanup(void) { @@ -1343,7 +1127,7 @@ rcu_torture_cleanup(void) } fullstop = FULLSTOP_RMMOD; mutex_unlock(&fullstop_mutex); - unregister_reboot_notifier(&rcutorture_shutdown_nb); + unregister_reboot_notifier(&rcutorture_nb); if (stutter_task) { VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task"); kthread_stop(stutter_task); @@ -1400,12 +1184,6 @@ rcu_torture_cleanup(void) kthread_stop(fqs_task); } fqs_task = NULL; - if ((test_boost == 1 && cur_ops->can_boost) || - test_boost == 2) { - unregister_cpu_notifier(&rcutorture_cpu_nb); - for_each_possible_cpu(i) - rcutorture_booster_cleanup(i); - } /* Wait for all RCU callbacks to fire. */ @@ -1417,9 +1195,9 @@ rcu_torture_cleanup(void) if (cur_ops->cleanup) cur_ops->cleanup(); if (atomic_read(&n_rcu_torture_error)) - rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE"); + rcu_torture_print_module_parms("End of test: FAILURE"); else - rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS"); + rcu_torture_print_module_parms("End of test: SUCCESS"); } static int __init @@ -1464,7 +1242,7 @@ rcu_torture_init(void) nrealreaders = nreaders; else nrealreaders = 2 * num_online_cpus(); - rcu_torture_print_module_parms(cur_ops, "Start of test"); + rcu_torture_print_module_parms("Start of test"); fullstop = FULLSTOP_DONTSTOP; /* Set up the freelist. */ @@ -1485,12 +1263,6 @@ rcu_torture_init(void) atomic_set(&n_rcu_torture_free, 0); atomic_set(&n_rcu_torture_mberror, 0); atomic_set(&n_rcu_torture_error, 0); - n_rcu_torture_boost_ktrerror = 0; - n_rcu_torture_boost_rterror = 0; - n_rcu_torture_boost_allocerror = 0; - n_rcu_torture_boost_afferror = 0; - n_rcu_torture_boost_failure = 0; - n_rcu_torture_boosts = 0; for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) atomic_set(&rcu_torture_wcount[i], 0); for_each_possible_cpu(cpu) { @@ -1604,27 +1376,7 @@ rcu_torture_init(void) goto unwind; } } - if (test_boost_interval < 1) - test_boost_interval = 1; - if (test_boost_duration < 2) - test_boost_duration = 2; - if ((test_boost == 1 && cur_ops->can_boost) || - test_boost == 2) { - int retval; - - boost_starttime = jiffies + test_boost_interval * HZ; - register_cpu_notifier(&rcutorture_cpu_nb); - for_each_possible_cpu(i) { - if (cpu_is_offline(i)) - continue; /* Heuristic: CPU can go offline. */ - retval = rcutorture_booster_init(i); - if (retval < 0) { - firsterr = retval; - goto unwind; - } - } - } - register_reboot_notifier(&rcutorture_shutdown_nb); + register_reboot_notifier(&rcutorture_nb); mutex_unlock(&fullstop_mutex); return 0; diff --git a/trunk/kernel/rcutree.c b/trunk/kernel/rcutree.c index d0ddfea6579d..ccdc04c47981 100644 --- a/trunk/kernel/rcutree.c +++ b/trunk/kernel/rcutree.c @@ -67,6 +67,9 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; .gpnum = -300, \ .completed = -300, \ .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \ + .orphan_cbs_list = NULL, \ + .orphan_cbs_tail = &structname.orphan_cbs_list, \ + .orphan_qlen = 0, \ .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \ .n_force_qs = 0, \ .n_force_qs_ngp = 0, \ @@ -617,17 +620,9 @@ static void __init check_cpu_stall_init(void) static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) { if (rdp->gpnum != rnp->gpnum) { - /* - * If the current grace period is waiting for this CPU, - * set up to detect a quiescent state, otherwise don't - * go looking for one. - */ + rdp->qs_pending = 1; + rdp->passed_quiesc = 0; rdp->gpnum = rnp->gpnum; - if (rnp->qsmask & rdp->grpmask) { - rdp->qs_pending = 1; - rdp->passed_quiesc = 0; - } else - rdp->qs_pending = 0; } } @@ -686,24 +681,6 @@ __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat /* Remember that we saw this grace-period completion. */ rdp->completed = rnp->completed; - - /* - * If we were in an extended quiescent state, we may have - * missed some grace periods that others CPUs handled on - * our behalf. Catch up with this state to avoid noting - * spurious new grace periods. If another grace period - * has started, then rnp->gpnum will have advanced, so - * we will detect this later on. - */ - if (ULONG_CMP_LT(rdp->gpnum, rdp->completed)) - rdp->gpnum = rdp->completed; - - /* - * If RCU does not need a quiescent state from this CPU, - * then make sure that this CPU doesn't go looking for one. - */ - if ((rnp->qsmask & rdp->grpmask) == 0) - rdp->qs_pending = 0; } } @@ -1007,31 +984,53 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) #ifdef CONFIG_HOTPLUG_CPU /* - * Move a dying CPU's RCU callbacks to online CPU's callback list. - * Synchronization is not required because this function executes - * in stop_machine() context. + * Move a dying CPU's RCU callbacks to the ->orphan_cbs_list for the + * specified flavor of RCU. The callbacks will be adopted by the next + * _rcu_barrier() invocation or by the CPU_DEAD notifier, whichever + * comes first. Because this is invoked from the CPU_DYING notifier, + * irqs are already disabled. */ -static void rcu_send_cbs_to_online(struct rcu_state *rsp) +static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) { int i; - /* current DYING CPU is cleared in the cpu_online_mask */ - int receive_cpu = cpumask_any(cpu_online_mask); struct rcu_data *rdp = this_cpu_ptr(rsp->rda); - struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); if (rdp->nxtlist == NULL) return; /* irqs disabled, so comparison is stable. */ - - *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; - receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; - receive_rdp->qlen += rdp->qlen; - receive_rdp->n_cbs_adopted += rdp->qlen; - rdp->n_cbs_orphaned += rdp->qlen; - + raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ + *rsp->orphan_cbs_tail = rdp->nxtlist; + rsp->orphan_cbs_tail = rdp->nxttail[RCU_NEXT_TAIL]; rdp->nxtlist = NULL; for (i = 0; i < RCU_NEXT_SIZE; i++) rdp->nxttail[i] = &rdp->nxtlist; + rsp->orphan_qlen += rdp->qlen; + rdp->n_cbs_orphaned += rdp->qlen; rdp->qlen = 0; + raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ +} + +/* + * Adopt previously orphaned RCU callbacks. + */ +static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) +{ + unsigned long flags; + struct rcu_data *rdp; + + raw_spin_lock_irqsave(&rsp->onofflock, flags); + rdp = this_cpu_ptr(rsp->rda); + if (rsp->orphan_cbs_list == NULL) { + raw_spin_unlock_irqrestore(&rsp->onofflock, flags); + return; + } + *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list; + rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_tail; + rdp->qlen += rsp->orphan_qlen; + rdp->n_cbs_adopted += rsp->orphan_qlen; + rsp->orphan_cbs_list = NULL; + rsp->orphan_cbs_tail = &rsp->orphan_cbs_list; + rsp->orphan_qlen = 0; + raw_spin_unlock_irqrestore(&rsp->onofflock, flags); } /* @@ -1082,6 +1081,8 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) raw_spin_unlock_irqrestore(&rnp->lock, flags); if (need_report & RCU_OFL_TASKS_EXP_GP) rcu_report_exp_rnp(rsp, rnp); + + rcu_adopt_orphan_cbs(rsp); } /* @@ -1099,7 +1100,11 @@ static void rcu_offline_cpu(int cpu) #else /* #ifdef CONFIG_HOTPLUG_CPU */ -static void rcu_send_cbs_to_online(struct rcu_state *rsp) +static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) +{ +} + +static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) { } @@ -1435,11 +1440,22 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), */ local_irq_save(flags); rdp = this_cpu_ptr(rsp->rda); + rcu_process_gp_end(rsp, rdp); + check_for_new_grace_period(rsp, rdp); /* Add the callback to our list. */ *rdp->nxttail[RCU_NEXT_TAIL] = head; rdp->nxttail[RCU_NEXT_TAIL] = &head->next; + /* Start a new grace period if one not already started. */ + if (!rcu_gp_in_progress(rsp)) { + unsigned long nestflag; + struct rcu_node *rnp_root = rcu_get_root(rsp); + + raw_spin_lock_irqsave(&rnp_root->lock, nestflag); + rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */ + } + /* * Force the grace period if too many callbacks or too long waiting. * Enforce hysteresis, and don't invoke force_quiescent_state() @@ -1448,27 +1464,12 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), * is the only one waiting for a grace period to complete. */ if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { - - /* Are we ignoring a completed grace period? */ - rcu_process_gp_end(rsp, rdp); - check_for_new_grace_period(rsp, rdp); - - /* Start a new grace period if one not already started. */ - if (!rcu_gp_in_progress(rsp)) { - unsigned long nestflag; - struct rcu_node *rnp_root = rcu_get_root(rsp); - - raw_spin_lock_irqsave(&rnp_root->lock, nestflag); - rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */ - } else { - /* Give the grace period a kick. */ - rdp->blimit = LONG_MAX; - if (rsp->n_force_qs == rdp->n_force_qs_snap && - *rdp->nxttail[RCU_DONE_TAIL] != head) - force_quiescent_state(rsp, 0); - rdp->n_force_qs_snap = rsp->n_force_qs; - rdp->qlen_last_fqs_check = rdp->qlen; - } + rdp->blimit = LONG_MAX; + if (rsp->n_force_qs == rdp->n_force_qs_snap && + *rdp->nxttail[RCU_DONE_TAIL] != head) + force_quiescent_state(rsp, 0); + rdp->n_force_qs_snap = rsp->n_force_qs; + rdp->qlen_last_fqs_check = rdp->qlen; } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) force_quiescent_state(rsp, 1); local_irq_restore(flags); @@ -1698,12 +1699,13 @@ static void _rcu_barrier(struct rcu_state *rsp, * decrement rcu_barrier_cpu_count -- otherwise the first CPU * might complete its grace period before all of the other CPUs * did their increment, causing this function to return too - * early. Note that on_each_cpu() disables irqs, which prevents - * any CPUs from coming online or going offline until each online - * CPU has queued its RCU-barrier callback. + * early. */ atomic_set(&rcu_barrier_cpu_count, 1); + preempt_disable(); /* stop CPU_DYING from filling orphan_cbs_list */ + rcu_adopt_orphan_cbs(rsp); on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1); + preempt_enable(); /* CPU_DYING can again fill orphan_cbs_list */ if (atomic_dec_and_test(&rcu_barrier_cpu_count)) complete(&rcu_barrier_completion); wait_for_completion(&rcu_barrier_completion); @@ -1829,13 +1831,18 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, case CPU_DYING: case CPU_DYING_FROZEN: /* - * The whole machine is "stopped" except this CPU, so we can - * touch any data without introducing corruption. We send the - * dying CPU's callbacks to an arbitrarily chosen online CPU. + * preempt_disable() in _rcu_barrier() prevents stop_machine(), + * so when "on_each_cpu(rcu_barrier_func, (void *)type, 1);" + * returns, all online cpus have queued rcu_barrier_func(). + * The dying CPU clears its cpu_online_mask bit and + * moves all of its RCU callbacks to ->orphan_cbs_list + * in the context of stop_machine(), so subsequent calls + * to _rcu_barrier() will adopt these callbacks and only + * then queue rcu_barrier_func() on all remaining CPUs. */ - rcu_send_cbs_to_online(&rcu_bh_state); - rcu_send_cbs_to_online(&rcu_sched_state); - rcu_preempt_send_cbs_to_online(); + rcu_send_cbs_to_orphanage(&rcu_bh_state); + rcu_send_cbs_to_orphanage(&rcu_sched_state); + rcu_preempt_send_cbs_to_orphanage(); break; case CPU_DEAD: case CPU_DEAD_FROZEN: @@ -1873,9 +1880,8 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) { int i; - for (i = NUM_RCU_LVLS - 1; i > 0; i--) + for (i = NUM_RCU_LVLS - 1; i >= 0; i--) rsp->levelspread[i] = CONFIG_RCU_FANOUT; - rsp->levelspread[0] = RCU_FANOUT_LEAF; } #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ static void __init rcu_init_levelspread(struct rcu_state *rsp) diff --git a/trunk/kernel/rcutree.h b/trunk/kernel/rcutree.h index e8f057e44e3e..91d4170c5c13 100644 --- a/trunk/kernel/rcutree.h +++ b/trunk/kernel/rcutree.h @@ -31,51 +31,46 @@ /* * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT. * In theory, it should be possible to add more levels straightforwardly. - * In practice, this did work well going from three levels to four. - * Of course, your mileage may vary. + * In practice, this has not been tested, so there is probably some + * bug somewhere. */ #define MAX_RCU_LVLS 4 -#if CONFIG_RCU_FANOUT > 16 -#define RCU_FANOUT_LEAF 16 -#else /* #if CONFIG_RCU_FANOUT > 16 */ -#define RCU_FANOUT_LEAF (CONFIG_RCU_FANOUT) -#endif /* #else #if CONFIG_RCU_FANOUT > 16 */ -#define RCU_FANOUT_1 (RCU_FANOUT_LEAF) -#define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT) -#define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT) -#define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) - -#if NR_CPUS <= RCU_FANOUT_1 +#define RCU_FANOUT (CONFIG_RCU_FANOUT) +#define RCU_FANOUT_SQ (RCU_FANOUT * RCU_FANOUT) +#define RCU_FANOUT_CUBE (RCU_FANOUT_SQ * RCU_FANOUT) +#define RCU_FANOUT_FOURTH (RCU_FANOUT_CUBE * RCU_FANOUT) + +#if NR_CPUS <= RCU_FANOUT # define NUM_RCU_LVLS 1 # define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_1 (NR_CPUS) # define NUM_RCU_LVL_2 0 # define NUM_RCU_LVL_3 0 # define NUM_RCU_LVL_4 0 -#elif NR_CPUS <= RCU_FANOUT_2 +#elif NR_CPUS <= RCU_FANOUT_SQ # define NUM_RCU_LVLS 2 # define NUM_RCU_LVL_0 1 -# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) +# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT) # define NUM_RCU_LVL_2 (NR_CPUS) # define NUM_RCU_LVL_3 0 # define NUM_RCU_LVL_4 0 -#elif NR_CPUS <= RCU_FANOUT_3 +#elif NR_CPUS <= RCU_FANOUT_CUBE # define NUM_RCU_LVLS 3 # define NUM_RCU_LVL_0 1 -# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) -# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) -# define NUM_RCU_LVL_3 (NR_CPUS) +# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ) +# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT) +# define NUM_RCU_LVL_3 NR_CPUS # define NUM_RCU_LVL_4 0 -#elif NR_CPUS <= RCU_FANOUT_4 +#elif NR_CPUS <= RCU_FANOUT_FOURTH # define NUM_RCU_LVLS 4 # define NUM_RCU_LVL_0 1 -# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3) -# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) -# define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) -# define NUM_RCU_LVL_4 (NR_CPUS) +# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_CUBE) +# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ) +# define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT) +# define NUM_RCU_LVL_4 NR_CPUS #else # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" -#endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */ +#endif /* #if (NR_CPUS) <= RCU_FANOUT */ #define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4) #define NUM_RCU_NODES (RCU_SUM - NR_CPUS) @@ -208,8 +203,8 @@ struct rcu_data { long qlen_last_fqs_check; /* qlen at last check for QS forcing */ unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */ - unsigned long n_cbs_orphaned; /* RCU cbs orphaned by dying CPU */ - unsigned long n_cbs_adopted; /* RCU cbs adopted from dying CPU */ + unsigned long n_cbs_orphaned; /* RCU cbs sent to orphanage. */ + unsigned long n_cbs_adopted; /* RCU cbs adopted from orphanage. */ unsigned long n_force_qs_snap; /* did other CPU force QS recently? */ long blimit; /* Upper limit on a processed batch */ @@ -314,7 +309,15 @@ struct rcu_state { /* End of fields guarded by root rcu_node's lock. */ raw_spinlock_t onofflock; /* exclude on/offline and */ - /* starting new GP. */ + /* starting new GP. Also */ + /* protects the following */ + /* orphan_cbs fields. */ + struct rcu_head *orphan_cbs_list; /* list of rcu_head structs */ + /* orphaned by all CPUs in */ + /* a given leaf rcu_node */ + /* going offline. */ + struct rcu_head **orphan_cbs_tail; /* And tail pointer. */ + long orphan_qlen; /* Number of orphaned cbs. */ raw_spinlock_t fqslock; /* Only one task forcing */ /* quiescent states. */ unsigned long jiffies_force_qs; /* Time at which to invoke */ @@ -387,7 +390,7 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp); static int rcu_preempt_pending(int cpu); static int rcu_preempt_needs_cpu(int cpu); static void __cpuinit rcu_preempt_init_percpu_data(int cpu); -static void rcu_preempt_send_cbs_to_online(void); +static void rcu_preempt_send_cbs_to_orphanage(void); static void __init __rcu_init_preempt(void); static void rcu_needs_cpu_flush(void); diff --git a/trunk/kernel/rcutree_plugin.h b/trunk/kernel/rcutree_plugin.h index a3638710dc67..71a4147473f9 100644 --- a/trunk/kernel/rcutree_plugin.h +++ b/trunk/kernel/rcutree_plugin.h @@ -25,7 +25,6 @@ */ #include -#include /* * Check the RCU kernel configuration parameters and print informative @@ -774,11 +773,11 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu) } /* - * Move preemptable RCU's callbacks from dying CPU to other online CPU. + * Move preemptable RCU's callbacks to ->orphan_cbs_list. */ -static void rcu_preempt_send_cbs_to_online(void) +static void rcu_preempt_send_cbs_to_orphanage(void) { - rcu_send_cbs_to_online(&rcu_preempt_state); + rcu_send_cbs_to_orphanage(&rcu_preempt_state); } /* @@ -1002,7 +1001,7 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu) /* * Because there is no preemptable RCU, there are no callbacks to move. */ -static void rcu_preempt_send_cbs_to_online(void) +static void rcu_preempt_send_cbs_to_orphanage(void) { } @@ -1015,132 +1014,6 @@ static void __init __rcu_init_preempt(void) #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ -#ifndef CONFIG_SMP - -void synchronize_sched_expedited(void) -{ - cond_resched(); -} -EXPORT_SYMBOL_GPL(synchronize_sched_expedited); - -#else /* #ifndef CONFIG_SMP */ - -static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0); -static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0); - -static int synchronize_sched_expedited_cpu_stop(void *data) -{ - /* - * There must be a full memory barrier on each affected CPU - * between the time that try_stop_cpus() is called and the - * time that it returns. - * - * In the current initial implementation of cpu_stop, the - * above condition is already met when the control reaches - * this point and the following smp_mb() is not strictly - * necessary. Do smp_mb() anyway for documentation and - * robustness against future implementation changes. - */ - smp_mb(); /* See above comment block. */ - return 0; -} - -/* - * Wait for an rcu-sched grace period to elapse, but use "big hammer" - * approach to force grace period to end quickly. This consumes - * significant time on all CPUs, and is thus not recommended for - * any sort of common-case code. - * - * Note that it is illegal to call this function while holding any - * lock that is acquired by a CPU-hotplug notifier. Failing to - * observe this restriction will result in deadlock. - * - * This implementation can be thought of as an application of ticket - * locking to RCU, with sync_sched_expedited_started and - * sync_sched_expedited_done taking on the roles of the halves - * of the ticket-lock word. Each task atomically increments - * sync_sched_expedited_started upon entry, snapshotting the old value, - * then attempts to stop all the CPUs. If this succeeds, then each - * CPU will have executed a context switch, resulting in an RCU-sched - * grace period. We are then done, so we use atomic_cmpxchg() to - * update sync_sched_expedited_done to match our snapshot -- but - * only if someone else has not already advanced past our snapshot. - * - * On the other hand, if try_stop_cpus() fails, we check the value - * of sync_sched_expedited_done. If it has advanced past our - * initial snapshot, then someone else must have forced a grace period - * some time after we took our snapshot. In this case, our work is - * done for us, and we can simply return. Otherwise, we try again, - * but keep our initial snapshot for purposes of checking for someone - * doing our work for us. - * - * If we fail too many times in a row, we fall back to synchronize_sched(). - */ -void synchronize_sched_expedited(void) -{ - int firstsnap, s, snap, trycount = 0; - - /* Note that atomic_inc_return() implies full memory barrier. */ - firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started); - get_online_cpus(); - - /* - * Each pass through the following loop attempts to force a - * context switch on each CPU. - */ - while (try_stop_cpus(cpu_online_mask, - synchronize_sched_expedited_cpu_stop, - NULL) == -EAGAIN) { - put_online_cpus(); - - /* No joy, try again later. Or just synchronize_sched(). */ - if (trycount++ < 10) - udelay(trycount * num_online_cpus()); - else { - synchronize_sched(); - return; - } - - /* Check to see if someone else did our work for us. */ - s = atomic_read(&sync_sched_expedited_done); - if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) { - smp_mb(); /* ensure test happens before caller kfree */ - return; - } - - /* - * Refetching sync_sched_expedited_started allows later - * callers to piggyback on our grace period. We subtract - * 1 to get the same token that the last incrementer got. - * We retry after they started, so our grace period works - * for them, and they started after our first try, so their - * grace period works for us. - */ - get_online_cpus(); - snap = atomic_read(&sync_sched_expedited_started) - 1; - smp_mb(); /* ensure read is before try_stop_cpus(). */ - } - - /* - * Everyone up to our most recent fetch is covered by our grace - * period. Update the counter, but only if our work is still - * relevant -- which it won't be if someone who started later - * than we did beat us to the punch. - */ - do { - s = atomic_read(&sync_sched_expedited_done); - if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) { - smp_mb(); /* ensure test happens before caller kfree */ - break; - } - } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s); - - put_online_cpus(); -} -EXPORT_SYMBOL_GPL(synchronize_sched_expedited); - -#endif /* #else #ifndef CONFIG_SMP */ - #if !defined(CONFIG_RCU_FAST_NO_HZ) /* diff --git a/trunk/kernel/rcutree_trace.c b/trunk/kernel/rcutree_trace.c index c8e97853b970..d15430b9d122 100644 --- a/trunk/kernel/rcutree_trace.c +++ b/trunk/kernel/rcutree_trace.c @@ -166,13 +166,13 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) gpnum = rsp->gpnum; seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x " - "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n", + "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld\n", rsp->completed, gpnum, rsp->signaled, (long)(rsp->jiffies_force_qs - jiffies), (int)(jiffies & 0xffff), rsp->n_force_qs, rsp->n_force_qs_ngp, rsp->n_force_qs - rsp->n_force_qs_ngp, - rsp->n_force_qs_lh); + rsp->n_force_qs_lh, rsp->orphan_qlen); for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) { if (rnp->level != level) { seq_puts(m, "\n"); @@ -300,7 +300,7 @@ static const struct file_operations rcu_pending_fops = { static struct dentry *rcudir; -static int __init rcutree_trace_init(void) +static int __init rcuclassic_trace_init(void) { struct dentry *retval; @@ -337,14 +337,14 @@ static int __init rcutree_trace_init(void) return 1; } -static void __exit rcutree_trace_cleanup(void) +static void __exit rcuclassic_trace_cleanup(void) { debugfs_remove_recursive(rcudir); } -module_init(rcutree_trace_init); -module_exit(rcutree_trace_cleanup); +module_init(rcuclassic_trace_init); +module_exit(rcuclassic_trace_cleanup); MODULE_AUTHOR("Paul E. McKenney"); MODULE_DESCRIPTION("Read-Copy Update tracing for hierarchical implementation"); diff --git a/trunk/kernel/sched.c b/trunk/kernel/sched.c index 04949089e760..297d1a0eedb0 100644 --- a/trunk/kernel/sched.c +++ b/trunk/kernel/sched.c @@ -75,11 +75,9 @@ #include #include -#include #include "sched_cpupri.h" #include "workqueue_sched.h" -#include "sched_autogroup.h" #define CREATE_TRACE_POINTS #include @@ -255,8 +253,6 @@ struct task_group { /* runqueue "owned" by this group on each cpu */ struct cfs_rq **cfs_rq; unsigned long shares; - - atomic_t load_weight; #endif #ifdef CONFIG_RT_GROUP_SCHED @@ -272,19 +268,24 @@ struct task_group { struct task_group *parent; struct list_head siblings; struct list_head children; - -#ifdef CONFIG_SCHED_AUTOGROUP - struct autogroup *autogroup; -#endif }; #define root_task_group init_task_group -/* task_group_lock serializes the addition/removal of task groups */ +/* task_group_lock serializes add/remove of task groups and also changes to + * a task group's cpu shares. + */ static DEFINE_SPINLOCK(task_group_lock); #ifdef CONFIG_FAIR_GROUP_SCHED +#ifdef CONFIG_SMP +static int root_task_group_empty(void) +{ + return list_empty(&root_task_group.children); +} +#endif + # define INIT_TASK_GROUP_LOAD NICE_0_LOAD /* @@ -341,7 +342,6 @@ struct cfs_rq { * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This * list is used during load balance. */ - int on_list; struct list_head leaf_cfs_rq_list; struct task_group *tg; /* group that "owns" this runqueue */ @@ -360,17 +360,14 @@ struct cfs_rq { unsigned long h_load; /* - * Maintaining per-cpu shares distribution for group scheduling - * - * load_stamp is the last time we updated the load average - * load_last is the last time we updated the load average and saw load - * load_unacc_exec_time is currently unaccounted execution time + * this cpu's part of tg->shares */ - u64 load_avg; - u64 load_period; - u64 load_stamp, load_last, load_unacc_exec_time; + unsigned long shares; - unsigned long load_contribution; + /* + * load.weight at the time we set shares + */ + unsigned long rq_weight; #endif #endif }; @@ -608,14 +605,11 @@ static inline int cpu_of(struct rq *rq) */ static inline struct task_group *task_group(struct task_struct *p) { - struct task_group *tg; struct cgroup_subsys_state *css; css = task_subsys_state_check(p, cpu_cgroup_subsys_id, lockdep_is_held(&task_rq(p)->lock)); - tg = container_of(css, struct task_group, css); - - return autogroup_task_group(p, tg); + return container_of(css, struct task_group, css); } /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ @@ -798,6 +792,20 @@ late_initcall(sched_init_debug); */ const_debug unsigned int sysctl_sched_nr_migrate = 32; +/* + * ratelimit for updating the group shares. + * default: 0.25ms + */ +unsigned int sysctl_sched_shares_ratelimit = 250000; +unsigned int normalized_sysctl_sched_shares_ratelimit = 250000; + +/* + * Inject some fuzzyness into changing the per-cpu group shares + * this avoids remote rq-locks at the expense of fairness. + * default: 4 + */ +unsigned int sysctl_sched_shares_thresh = 4; + /* * period over which we average the RT time consumption, measured * in ms. @@ -1347,12 +1355,6 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec) lw->inv_weight = 0; } -static inline void update_load_set(struct load_weight *lw, unsigned long w) -{ - lw->weight = w; - lw->inv_weight = 0; -} - /* * To aid in avoiding the subversion of "niceness" due to uneven distribution * of tasks with abnormal "nice" values across CPUs the contribution that @@ -1541,6 +1543,101 @@ static unsigned long cpu_avg_load_per_task(int cpu) #ifdef CONFIG_FAIR_GROUP_SCHED +static __read_mostly unsigned long __percpu *update_shares_data; + +static void __set_se_shares(struct sched_entity *se, unsigned long shares); + +/* + * Calculate and set the cpu's group shares. + */ +static void update_group_shares_cpu(struct task_group *tg, int cpu, + unsigned long sd_shares, + unsigned long sd_rq_weight, + unsigned long *usd_rq_weight) +{ + unsigned long shares, rq_weight; + int boost = 0; + + rq_weight = usd_rq_weight[cpu]; + if (!rq_weight) { + boost = 1; + rq_weight = NICE_0_LOAD; + } + + /* + * \Sum_j shares_j * rq_weight_i + * shares_i = ----------------------------- + * \Sum_j rq_weight_j + */ + shares = (sd_shares * rq_weight) / sd_rq_weight; + shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES); + + if (abs(shares - tg->se[cpu]->load.weight) > + sysctl_sched_shares_thresh) { + struct rq *rq = cpu_rq(cpu); + unsigned long flags; + + raw_spin_lock_irqsave(&rq->lock, flags); + tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight; + tg->cfs_rq[cpu]->shares = boost ? 0 : shares; + __set_se_shares(tg->se[cpu], shares); + raw_spin_unlock_irqrestore(&rq->lock, flags); + } +} + +/* + * Re-compute the task group their per cpu shares over the given domain. + * This needs to be done in a bottom-up fashion because the rq weight of a + * parent group depends on the shares of its child groups. + */ +static int tg_shares_up(struct task_group *tg, void *data) +{ + unsigned long weight, rq_weight = 0, sum_weight = 0, shares = 0; + unsigned long *usd_rq_weight; + struct sched_domain *sd = data; + unsigned long flags; + int i; + + if (!tg->se[0]) + return 0; + + local_irq_save(flags); + usd_rq_weight = per_cpu_ptr(update_shares_data, smp_processor_id()); + + for_each_cpu(i, sched_domain_span(sd)) { + weight = tg->cfs_rq[i]->load.weight; + usd_rq_weight[i] = weight; + + rq_weight += weight; + /* + * If there are currently no tasks on the cpu pretend there + * is one of average load so that when a new task gets to + * run here it will not get delayed by group starvation. + */ + if (!weight) + weight = NICE_0_LOAD; + + sum_weight += weight; + shares += tg->cfs_rq[i]->shares; + } + + if (!rq_weight) + rq_weight = sum_weight; + + if ((!shares && rq_weight) || shares > tg->shares) + shares = tg->shares; + + if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) + shares = tg->shares; + + for_each_cpu(i, sched_domain_span(sd)) + update_group_shares_cpu(tg, i, shares, rq_weight, usd_rq_weight); + + local_irq_restore(flags); + + return 0; +} + /* * Compute the cpu's hierarchical load factor for each task group. * This needs to be done in a top-down fashion because the load of a child @@ -1555,7 +1652,7 @@ static int tg_load_down(struct task_group *tg, void *data) load = cpu_rq(cpu)->load.weight; } else { load = tg->parent->cfs_rq[cpu]->h_load; - load *= tg->se[cpu]->load.weight; + load *= tg->cfs_rq[cpu]->shares; load /= tg->parent->cfs_rq[cpu]->load.weight + 1; } @@ -1564,11 +1661,34 @@ static int tg_load_down(struct task_group *tg, void *data) return 0; } +static void update_shares(struct sched_domain *sd) +{ + s64 elapsed; + u64 now; + + if (root_task_group_empty()) + return; + + now = local_clock(); + elapsed = now - sd->last_update; + + if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) { + sd->last_update = now; + walk_tg_tree(tg_nop, tg_shares_up, sd); + } +} + static void update_h_load(long cpu) { walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); } +#else + +static inline void update_shares(struct sched_domain *sd) +{ +} + #endif #ifdef CONFIG_PREEMPT @@ -1690,6 +1810,15 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) #endif +#ifdef CONFIG_FAIR_GROUP_SCHED +static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) +{ +#ifdef CONFIG_SMP + cfs_rq->shares = shares; +#endif +} +#endif + static void calc_load_account_idle(struct rq *this_rq); static void update_sysctl(void); static int get_update_sysctl_factor(void); @@ -1934,7 +2063,6 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) #include "sched_idletask.c" #include "sched_fair.c" #include "sched_rt.c" -#include "sched_autogroup.c" #include "sched_stoptask.c" #ifdef CONFIG_SCHED_DEBUG # include "sched_debug.c" @@ -2127,8 +2255,10 @@ static int migration_cpu_stop(void *data); * The task's runqueue lock must be held. * Returns true if you have to wait for migration thread. */ -static bool migrate_task(struct task_struct *p, struct rq *rq) +static bool migrate_task(struct task_struct *p, int dest_cpu) { + struct rq *rq = task_rq(p); + /* * If the task is not on a runqueue (and not running), then * the next wake-up will properly place the task. @@ -2308,15 +2438,18 @@ static int select_fallback_rq(int cpu, struct task_struct *p) return dest_cpu; /* No more Mr. Nice Guy. */ - dest_cpu = cpuset_cpus_allowed_fallback(p); - /* - * Don't tell them about moving exiting tasks or - * kernel threads (both mm NULL), since they never - * leave kernel. - */ - if (p->mm && printk_ratelimit()) { - printk(KERN_INFO "process %d (%s) no longer affine to cpu%d\n", - task_pid_nr(p), p->comm, cpu); + if (unlikely(dest_cpu >= nr_cpu_ids)) { + dest_cpu = cpuset_cpus_allowed_fallback(p); + /* + * Don't tell them about moving exiting tasks or + * kernel threads (both mm NULL), since they never + * leave kernel. + */ + if (p->mm && printk_ratelimit()) { + printk(KERN_INFO "process %d (%s) no " + "longer affine to cpu%d\n", + task_pid_nr(p), p->comm, cpu); + } } return dest_cpu; @@ -2652,9 +2785,7 @@ void sched_fork(struct task_struct *p, int clone_flags) /* Want to start with kernel preemption disabled. */ task_thread_info(p)->preempt_count = 1; #endif -#ifdef CONFIG_SMP plist_node_init(&p->pushable_tasks, MAX_PRIO); -#endif put_cpu(); } @@ -3418,7 +3549,7 @@ void sched_exec(void) * select_task_rq() can race against ->cpus_allowed */ if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) && - likely(cpu_active(dest_cpu)) && migrate_task(p, rq)) { + likely(cpu_active(dest_cpu)) && migrate_task(p, dest_cpu)) { struct migration_arg arg = { p, dest_cpu }; task_rq_unlock(rq, &flags); @@ -4083,7 +4214,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner) if (task_thread_info(rq->curr) != owner || need_resched()) return 0; - arch_mutex_cpu_relax(); + cpu_relax(); } return 1; @@ -4395,7 +4526,7 @@ EXPORT_SYMBOL(wait_for_completion_interruptible); * This waits for either a completion of a specific task to be signaled or for a * specified timeout to expire. It is interruptible. The timeout is in jiffies. */ -long __sched +unsigned long __sched wait_for_completion_interruptible_timeout(struct completion *x, unsigned long timeout) { @@ -4428,7 +4559,7 @@ EXPORT_SYMBOL(wait_for_completion_killable); * signaled or for a specified timeout to expire. It can be * interrupted by a kill signal. The timeout is in jiffies. */ -long __sched +unsigned long __sched wait_for_completion_killable_timeout(struct completion *x, unsigned long timeout) { @@ -4770,7 +4901,7 @@ static bool check_same_owner(struct task_struct *p) } static int __sched_setscheduler(struct task_struct *p, int policy, - const struct sched_param *param, bool user) + struct sched_param *param, bool user) { int retval, oldprio, oldpolicy = -1, on_rq, running; unsigned long flags; @@ -4925,7 +5056,7 @@ static int __sched_setscheduler(struct task_struct *p, int policy, * NOTE that the task may be already dead. */ int sched_setscheduler(struct task_struct *p, int policy, - const struct sched_param *param) + struct sched_param *param) { return __sched_setscheduler(p, policy, param, true); } @@ -4943,7 +5074,7 @@ EXPORT_SYMBOL_GPL(sched_setscheduler); * but our caller might not have that capability. */ int sched_setscheduler_nocheck(struct task_struct *p, int policy, - const struct sched_param *param) + struct sched_param *param) { return __sched_setscheduler(p, policy, param, false); } @@ -5459,7 +5590,7 @@ void sched_show_task(struct task_struct *p) unsigned state; state = p->state ? __ffs(p->state) + 1 : 0; - printk(KERN_INFO "%-15.15s %c", p->comm, + printk(KERN_INFO "%-13.13s %c", p->comm, state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?'); #if BITS_PER_LONG == 32 if (state == TASK_RUNNING) @@ -5623,6 +5754,7 @@ static void update_sysctl(void) SET_SYSCTL(sched_min_granularity); SET_SYSCTL(sched_latency); SET_SYSCTL(sched_wakeup_granularity); + SET_SYSCTL(sched_shares_ratelimit); #undef SET_SYSCTL } @@ -5698,7 +5830,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) goto out; dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); - if (migrate_task(p, rq)) { + if (migrate_task(p, dest_cpu)) { struct migration_arg arg = { p, dest_cpu }; /* Need help from migration thread: drop lock and wait. */ task_rq_unlock(rq, &flags); @@ -5780,20 +5912,29 @@ static int migration_cpu_stop(void *data) } #ifdef CONFIG_HOTPLUG_CPU - /* - * Ensures that the idle task is using init_mm right before its cpu goes - * offline. + * Figure out where task on dead CPU should go, use force if necessary. */ -void idle_task_exit(void) +void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) { - struct mm_struct *mm = current->active_mm; + struct rq *rq = cpu_rq(dead_cpu); + int needs_cpu, uninitialized_var(dest_cpu); + unsigned long flags; - BUG_ON(cpu_online(smp_processor_id())); + local_irq_save(flags); - if (mm != &init_mm) - switch_mm(mm, &init_mm, current); - mmdrop(mm); + raw_spin_lock(&rq->lock); + needs_cpu = (task_cpu(p) == dead_cpu) && (p->state != TASK_WAKING); + if (needs_cpu) + dest_cpu = select_fallback_rq(dead_cpu, p); + raw_spin_unlock(&rq->lock); + /* + * It can only fail if we race with set_cpus_allowed(), + * in the racer should migrate the task anyway. + */ + if (needs_cpu) + __migrate_task(p, dead_cpu, dest_cpu); + local_irq_restore(flags); } /* @@ -5806,69 +5947,128 @@ void idle_task_exit(void) static void migrate_nr_uninterruptible(struct rq *rq_src) { struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask)); + unsigned long flags; + local_irq_save(flags); + double_rq_lock(rq_src, rq_dest); rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible; rq_src->nr_uninterruptible = 0; + double_rq_unlock(rq_src, rq_dest); + local_irq_restore(flags); +} + +/* Run through task list and migrate tasks from the dead cpu. */ +static void migrate_live_tasks(int src_cpu) +{ + struct task_struct *p, *t; + + read_lock(&tasklist_lock); + + do_each_thread(t, p) { + if (p == current) + continue; + + if (task_cpu(p) == src_cpu) + move_task_off_dead_cpu(src_cpu, p); + } while_each_thread(t, p); + + read_unlock(&tasklist_lock); } /* - * remove the tasks which were accounted by rq from calc_load_tasks. + * Schedules idle task to be the next runnable task on current CPU. + * It does so by boosting its priority to highest possible. + * Used by CPU offline code. */ -static void calc_global_load_remove(struct rq *rq) +void sched_idle_next(void) { - atomic_long_sub(rq->calc_load_active, &calc_load_tasks); - rq->calc_load_active = 0; + int this_cpu = smp_processor_id(); + struct rq *rq = cpu_rq(this_cpu); + struct task_struct *p = rq->idle; + unsigned long flags; + + /* cpu has to be offline */ + BUG_ON(cpu_online(this_cpu)); + + /* + * Strictly not necessary since rest of the CPUs are stopped by now + * and interrupts disabled on the current cpu. + */ + raw_spin_lock_irqsave(&rq->lock, flags); + + __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); + + activate_task(rq, p, 0); + + raw_spin_unlock_irqrestore(&rq->lock, flags); } /* - * Migrate all tasks from the rq, sleeping tasks will be migrated by - * try_to_wake_up()->select_task_rq(). - * - * Called with rq->lock held even though we'er in stop_machine() and - * there's no concurrency possible, we hold the required locks anyway - * because of lock validation efforts. + * Ensures that the idle task is using init_mm right before its cpu goes + * offline. */ -static void migrate_tasks(unsigned int dead_cpu) +void idle_task_exit(void) +{ + struct mm_struct *mm = current->active_mm; + + BUG_ON(cpu_online(smp_processor_id())); + + if (mm != &init_mm) + switch_mm(mm, &init_mm, current); + mmdrop(mm); +} + +/* called under rq->lock with disabled interrupts */ +static void migrate_dead(unsigned int dead_cpu, struct task_struct *p) { struct rq *rq = cpu_rq(dead_cpu); - struct task_struct *next, *stop = rq->stop; - int dest_cpu; + + /* Must be exiting, otherwise would be on tasklist. */ + BUG_ON(!p->exit_state); + + /* Cannot have done final schedule yet: would have vanished. */ + BUG_ON(p->state == TASK_DEAD); + + get_task_struct(p); /* - * Fudge the rq selection such that the below task selection loop - * doesn't get stuck on the currently eligible stop task. - * - * We're currently inside stop_machine() and the rq is either stuck - * in the stop_machine_cpu_stop() loop, or we're executing this code, - * either way we should never end up calling schedule() until we're - * done here. + * Drop lock around migration; if someone else moves it, + * that's OK. No task can be added to this CPU, so iteration is + * fine. */ - rq->stop = NULL; + raw_spin_unlock_irq(&rq->lock); + move_task_off_dead_cpu(dead_cpu, p); + raw_spin_lock_irq(&rq->lock); + + put_task_struct(p); +} + +/* release_task() removes task from tasklist, so we won't find dead tasks. */ +static void migrate_dead_tasks(unsigned int dead_cpu) +{ + struct rq *rq = cpu_rq(dead_cpu); + struct task_struct *next; for ( ; ; ) { - /* - * There's this thread running, bail when that's the only - * remaining thread. - */ - if (rq->nr_running == 1) + if (!rq->nr_running) break; - next = pick_next_task(rq); - BUG_ON(!next); + if (!next) + break; next->sched_class->put_prev_task(rq, next); + migrate_dead(dead_cpu, next); - /* Find suitable destination for @next, with force if needed. */ - dest_cpu = select_fallback_rq(dead_cpu, next); - raw_spin_unlock(&rq->lock); - - __migrate_task(next, dead_cpu, dest_cpu); - - raw_spin_lock(&rq->lock); } - - rq->stop = stop; } +/* + * remove the tasks which were accounted by rq from calc_load_tasks. + */ +static void calc_global_load_remove(struct rq *rq) +{ + atomic_long_sub(rq->calc_load_active, &calc_load_tasks); + rq->calc_load_active = 0; +} #endif /* CONFIG_HOTPLUG_CPU */ #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) @@ -6078,13 +6278,15 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) unsigned long flags; struct rq *rq = cpu_rq(cpu); - switch (action & ~CPU_TASKS_FROZEN) { + switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: rq->calc_load_update = calc_load_update; break; case CPU_ONLINE: + case CPU_ONLINE_FROZEN: /* Update our root-domain */ raw_spin_lock_irqsave(&rq->lock, flags); if (rq->rd) { @@ -6096,19 +6298,30 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) break; #ifdef CONFIG_HOTPLUG_CPU + case CPU_DEAD: + case CPU_DEAD_FROZEN: + migrate_live_tasks(cpu); + /* Idle task back to normal (off runqueue, low prio) */ + raw_spin_lock_irq(&rq->lock); + deactivate_task(rq, rq->idle, 0); + __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); + rq->idle->sched_class = &idle_sched_class; + migrate_dead_tasks(cpu); + raw_spin_unlock_irq(&rq->lock); + migrate_nr_uninterruptible(rq); + BUG_ON(rq->nr_running != 0); + calc_global_load_remove(rq); + break; + case CPU_DYING: + case CPU_DYING_FROZEN: /* Update our root-domain */ raw_spin_lock_irqsave(&rq->lock, flags); if (rq->rd) { BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); set_rq_offline(rq); } - migrate_tasks(cpu); - BUG_ON(rq->nr_running != 1); /* the migration thread */ raw_spin_unlock_irqrestore(&rq->lock, flags); - - migrate_nr_uninterruptible(rq); - calc_global_load_remove(rq); break; #endif } @@ -7839,13 +8052,15 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) #ifdef CONFIG_FAIR_GROUP_SCHED static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, - struct sched_entity *se, int cpu, + struct sched_entity *se, int cpu, int add, struct sched_entity *parent) { struct rq *rq = cpu_rq(cpu); tg->cfs_rq[cpu] = cfs_rq; init_cfs_rq(cfs_rq, rq); cfs_rq->tg = tg; + if (add) + list_add(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list); tg->se[cpu] = se; /* se could be NULL for init_task_group */ @@ -7858,14 +8073,15 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, se->cfs_rq = parent->my_q; se->my_q = cfs_rq; - update_load_set(&se->load, 0); + se->load.weight = tg->shares; + se->load.inv_weight = 0; se->parent = parent; } #endif #ifdef CONFIG_RT_GROUP_SCHED static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, - struct sched_rt_entity *rt_se, int cpu, + struct sched_rt_entity *rt_se, int cpu, int add, struct sched_rt_entity *parent) { struct rq *rq = cpu_rq(cpu); @@ -7874,6 +8090,8 @@ static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, init_rt_rq(rt_rq, rq); rt_rq->tg = tg; rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime; + if (add) + list_add(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list); tg->rt_se[cpu] = rt_se; if (!rt_se) @@ -7946,9 +8164,13 @@ void __init sched_init(void) #ifdef CONFIG_CGROUP_SCHED list_add(&init_task_group.list, &task_groups); INIT_LIST_HEAD(&init_task_group.children); - autogroup_init(&init_task); + #endif /* CONFIG_CGROUP_SCHED */ +#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP + update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long), + __alignof__(unsigned long)); +#endif for_each_possible_cpu(i) { struct rq *rq; @@ -7962,6 +8184,7 @@ void __init sched_init(void) #ifdef CONFIG_FAIR_GROUP_SCHED init_task_group.shares = init_task_group_load; INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); +#ifdef CONFIG_CGROUP_SCHED /* * How much cpu bandwidth does init_task_group get? * @@ -7981,13 +8204,16 @@ void __init sched_init(void) * We achieve this by letting init_task_group's tasks sit * directly in rq->cfs (i.e init_task_group->se[] = NULL). */ - init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, NULL); + init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, 1, NULL); +#endif #endif /* CONFIG_FAIR_GROUP_SCHED */ rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime; #ifdef CONFIG_RT_GROUP_SCHED INIT_LIST_HEAD(&rq->leaf_rt_rq_list); - init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, NULL); +#ifdef CONFIG_CGROUP_SCHED + init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, 1, NULL); +#endif #endif for (j = 0; j < CPU_LOAD_IDX_MAX; j++) @@ -8067,6 +8293,8 @@ void __init sched_init(void) zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); #endif /* SMP */ + perf_event_init(); + scheduler_running = 1; } @@ -8260,7 +8488,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) if (!se) goto err_free_rq; - init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); + init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]); } return 1; @@ -8271,21 +8499,15 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) return 0; } -static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) +static inline void register_fair_sched_group(struct task_group *tg, int cpu) { - struct rq *rq = cpu_rq(cpu); - unsigned long flags; - - /* - * Only empty task groups can be destroyed; so we can speculatively - * check on_list without danger of it being re-added. - */ - if (!tg->cfs_rq[cpu]->on_list) - return; + list_add_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list, + &cpu_rq(cpu)->leaf_cfs_rq_list); +} - raw_spin_lock_irqsave(&rq->lock, flags); - list_del_leaf_cfs_rq(tg->cfs_rq[cpu]); - raw_spin_unlock_irqrestore(&rq->lock, flags); +static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) +{ + list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list); } #else /* !CONFG_FAIR_GROUP_SCHED */ static inline void free_fair_sched_group(struct task_group *tg) @@ -8298,6 +8520,10 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) return 1; } +static inline void register_fair_sched_group(struct task_group *tg, int cpu) +{ +} + static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) { } @@ -8352,7 +8578,7 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) if (!rt_se) goto err_free_rq; - init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]); + init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]); } return 1; @@ -8362,6 +8588,17 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) err: return 0; } + +static inline void register_rt_sched_group(struct task_group *tg, int cpu) +{ + list_add_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list, + &cpu_rq(cpu)->leaf_rt_rq_list); +} + +static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) +{ + list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list); +} #else /* !CONFIG_RT_GROUP_SCHED */ static inline void free_rt_sched_group(struct task_group *tg) { @@ -8372,6 +8609,14 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) { return 1; } + +static inline void register_rt_sched_group(struct task_group *tg, int cpu) +{ +} + +static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) +{ +} #endif /* CONFIG_RT_GROUP_SCHED */ #ifdef CONFIG_CGROUP_SCHED @@ -8387,6 +8632,7 @@ struct task_group *sched_create_group(struct task_group *parent) { struct task_group *tg; unsigned long flags; + int i; tg = kzalloc(sizeof(*tg), GFP_KERNEL); if (!tg) @@ -8399,6 +8645,10 @@ struct task_group *sched_create_group(struct task_group *parent) goto err; spin_lock_irqsave(&task_group_lock, flags); + for_each_possible_cpu(i) { + register_fair_sched_group(tg, i); + register_rt_sched_group(tg, i); + } list_add_rcu(&tg->list, &task_groups); WARN_ON(!parent); /* root should already exist */ @@ -8428,11 +8678,11 @@ void sched_destroy_group(struct task_group *tg) unsigned long flags; int i; - /* end participation in shares distribution */ - for_each_possible_cpu(i) - unregister_fair_sched_group(tg, i); - spin_lock_irqsave(&task_group_lock, flags); + for_each_possible_cpu(i) { + unregister_fair_sched_group(tg, i); + unregister_rt_sched_group(tg, i); + } list_del_rcu(&tg->list); list_del_rcu(&tg->siblings); spin_unlock_irqrestore(&task_group_lock, flags); @@ -8479,6 +8729,33 @@ void sched_move_task(struct task_struct *tsk) #endif /* CONFIG_CGROUP_SCHED */ #ifdef CONFIG_FAIR_GROUP_SCHED +static void __set_se_shares(struct sched_entity *se, unsigned long shares) +{ + struct cfs_rq *cfs_rq = se->cfs_rq; + int on_rq; + + on_rq = se->on_rq; + if (on_rq) + dequeue_entity(cfs_rq, se, 0); + + se->load.weight = shares; + se->load.inv_weight = 0; + + if (on_rq) + enqueue_entity(cfs_rq, se, 0); +} + +static void set_se_shares(struct sched_entity *se, unsigned long shares) +{ + struct cfs_rq *cfs_rq = se->cfs_rq; + struct rq *rq = cfs_rq->rq; + unsigned long flags; + + raw_spin_lock_irqsave(&rq->lock, flags); + __set_se_shares(se, shares); + raw_spin_unlock_irqrestore(&rq->lock, flags); +} + static DEFINE_MUTEX(shares_mutex); int sched_group_set_shares(struct task_group *tg, unsigned long shares) @@ -8501,19 +8778,37 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) if (tg->shares == shares) goto done; + spin_lock_irqsave(&task_group_lock, flags); + for_each_possible_cpu(i) + unregister_fair_sched_group(tg, i); + list_del_rcu(&tg->siblings); + spin_unlock_irqrestore(&task_group_lock, flags); + + /* wait for any ongoing reference to this group to finish */ + synchronize_sched(); + + /* + * Now we are free to modify the group's share on each cpu + * w/o tripping rebalance_share or load_balance_fair. + */ tg->shares = shares; for_each_possible_cpu(i) { - struct rq *rq = cpu_rq(i); - struct sched_entity *se; - - se = tg->se[i]; - /* Propagate contribution to hierarchy */ - raw_spin_lock_irqsave(&rq->lock, flags); - for_each_sched_entity(se) - update_cfs_shares(group_cfs_rq(se), 0); - raw_spin_unlock_irqrestore(&rq->lock, flags); + /* + * force a rebalance + */ + cfs_rq_set_shares(tg->cfs_rq[i], 0); + set_se_shares(tg->se[i], shares); } + /* + * Enable load balance activity on this group, by inserting it back on + * each cpu's rq->leaf_cfs_rq_list. + */ + spin_lock_irqsave(&task_group_lock, flags); + for_each_possible_cpu(i) + register_fair_sched_group(tg, i); + list_add_rcu(&tg->siblings, &tg->parent->children); + spin_unlock_irqrestore(&task_group_lock, flags); done: mutex_unlock(&shares_mutex); return 0; @@ -9239,3 +9534,72 @@ struct cgroup_subsys cpuacct_subsys = { }; #endif /* CONFIG_CGROUP_CPUACCT */ +#ifndef CONFIG_SMP + +void synchronize_sched_expedited(void) +{ + barrier(); +} +EXPORT_SYMBOL_GPL(synchronize_sched_expedited); + +#else /* #ifndef CONFIG_SMP */ + +static atomic_t synchronize_sched_expedited_count = ATOMIC_INIT(0); + +static int synchronize_sched_expedited_cpu_stop(void *data) +{ + /* + * There must be a full memory barrier on each affected CPU + * between the time that try_stop_cpus() is called and the + * time that it returns. + * + * In the current initial implementation of cpu_stop, the + * above condition is already met when the control reaches + * this point and the following smp_mb() is not strictly + * necessary. Do smp_mb() anyway for documentation and + * robustness against future implementation changes. + */ + smp_mb(); /* See above comment block. */ + return 0; +} + +/* + * Wait for an rcu-sched grace period to elapse, but use "big hammer" + * approach to force grace period to end quickly. This consumes + * significant time on all CPUs, and is thus not recommended for + * any sort of common-case code. + * + * Note that it is illegal to call this function while holding any + * lock that is acquired by a CPU-hotplug notifier. Failing to + * observe this restriction will result in deadlock. + */ +void synchronize_sched_expedited(void) +{ + int snap, trycount = 0; + + smp_mb(); /* ensure prior mod happens before capturing snap. */ + snap = atomic_read(&synchronize_sched_expedited_count) + 1; + get_online_cpus(); + while (try_stop_cpus(cpu_online_mask, + synchronize_sched_expedited_cpu_stop, + NULL) == -EAGAIN) { + put_online_cpus(); + if (trycount++ < 10) + udelay(trycount * num_online_cpus()); + else { + synchronize_sched(); + return; + } + if (atomic_read(&synchronize_sched_expedited_count) - snap > 0) { + smp_mb(); /* ensure test happens before caller kfree */ + return; + } + get_online_cpus(); + } + atomic_inc(&synchronize_sched_expedited_count); + smp_mb__after_atomic_inc(); /* ensure post-GP actions seen after GP. */ + put_online_cpus(); +} +EXPORT_SYMBOL_GPL(synchronize_sched_expedited); + +#endif /* #else #ifndef CONFIG_SMP */ diff --git a/trunk/kernel/sched_autogroup.c b/trunk/kernel/sched_autogroup.c deleted file mode 100644 index c80fedcd476b..000000000000 --- a/trunk/kernel/sched_autogroup.c +++ /dev/null @@ -1,238 +0,0 @@ -#ifdef CONFIG_SCHED_AUTOGROUP - -#include -#include -#include -#include - -unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; -static struct autogroup autogroup_default; -static atomic_t autogroup_seq_nr; - -static void autogroup_init(struct task_struct *init_task) -{ - autogroup_default.tg = &init_task_group; - init_task_group.autogroup = &autogroup_default; - kref_init(&autogroup_default.kref); - init_rwsem(&autogroup_default.lock); - init_task->signal->autogroup = &autogroup_default; -} - -static inline void autogroup_free(struct task_group *tg) -{ - kfree(tg->autogroup); -} - -static inline void autogroup_destroy(struct kref *kref) -{ - struct autogroup *ag = container_of(kref, struct autogroup, kref); - - sched_destroy_group(ag->tg); -} - -static inline void autogroup_kref_put(struct autogroup *ag) -{ - kref_put(&ag->kref, autogroup_destroy); -} - -static inline struct autogroup *autogroup_kref_get(struct autogroup *ag) -{ - kref_get(&ag->kref); - return ag; -} - -static inline struct autogroup *autogroup_task_get(struct task_struct *p) -{ - struct autogroup *ag; - unsigned long flags; - - if (!lock_task_sighand(p, &flags)) - return autogroup_kref_get(&autogroup_default); - - ag = autogroup_kref_get(p->signal->autogroup); - unlock_task_sighand(p, &flags); - - return ag; -} - -static inline struct autogroup *autogroup_create(void) -{ - struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL); - struct task_group *tg; - - if (!ag) - goto out_fail; - - tg = sched_create_group(&init_task_group); - - if (IS_ERR(tg)) - goto out_free; - - kref_init(&ag->kref); - init_rwsem(&ag->lock); - ag->id = atomic_inc_return(&autogroup_seq_nr); - ag->tg = tg; - tg->autogroup = ag; - - return ag; - -out_free: - kfree(ag); -out_fail: - if (printk_ratelimit()) { - printk(KERN_WARNING "autogroup_create: %s failure.\n", - ag ? "sched_create_group()" : "kmalloc()"); - } - - return autogroup_kref_get(&autogroup_default); -} - -static inline bool -task_wants_autogroup(struct task_struct *p, struct task_group *tg) -{ - if (tg != &root_task_group) - return false; - - if (p->sched_class != &fair_sched_class) - return false; - - /* - * We can only assume the task group can't go away on us if - * autogroup_move_group() can see us on ->thread_group list. - */ - if (p->flags & PF_EXITING) - return false; - - return true; -} - -static inline struct task_group * -autogroup_task_group(struct task_struct *p, struct task_group *tg) -{ - int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled); - - if (enabled && task_wants_autogroup(p, tg)) - return p->signal->autogroup->tg; - - return tg; -} - -static void -autogroup_move_group(struct task_struct *p, struct autogroup *ag) -{ - struct autogroup *prev; - struct task_struct *t; - unsigned long flags; - - BUG_ON(!lock_task_sighand(p, &flags)); - - prev = p->signal->autogroup; - if (prev == ag) { - unlock_task_sighand(p, &flags); - return; - } - - p->signal->autogroup = autogroup_kref_get(ag); - - t = p; - do { - sched_move_task(t); - } while_each_thread(p, t); - - unlock_task_sighand(p, &flags); - autogroup_kref_put(prev); -} - -/* Allocates GFP_KERNEL, cannot be called under any spinlock */ -void sched_autogroup_create_attach(struct task_struct *p) -{ - struct autogroup *ag = autogroup_create(); - - autogroup_move_group(p, ag); - /* drop extra refrence added by autogroup_create() */ - autogroup_kref_put(ag); -} -EXPORT_SYMBOL(sched_autogroup_create_attach); - -/* Cannot be called under siglock. Currently has no users */ -void sched_autogroup_detach(struct task_struct *p) -{ - autogroup_move_group(p, &autogroup_default); -} -EXPORT_SYMBOL(sched_autogroup_detach); - -void sched_autogroup_fork(struct signal_struct *sig) -{ - sig->autogroup = autogroup_task_get(current); -} - -void sched_autogroup_exit(struct signal_struct *sig) -{ - autogroup_kref_put(sig->autogroup); -} - -static int __init setup_autogroup(char *str) -{ - sysctl_sched_autogroup_enabled = 0; - - return 1; -} - -__setup("noautogroup", setup_autogroup); - -#ifdef CONFIG_PROC_FS - -int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice) -{ - static unsigned long next = INITIAL_JIFFIES; - struct autogroup *ag; - int err; - - if (*nice < -20 || *nice > 19) - return -EINVAL; - - err = security_task_setnice(current, *nice); - if (err) - return err; - - if (*nice < 0 && !can_nice(current, *nice)) - return -EPERM; - - /* this is a heavy operation taking global locks.. */ - if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next)) - return -EAGAIN; - - next = HZ / 10 + jiffies; - ag = autogroup_task_get(p); - - down_write(&ag->lock); - err = sched_group_set_shares(ag->tg, prio_to_weight[*nice + 20]); - if (!err) - ag->nice = *nice; - up_write(&ag->lock); - - autogroup_kref_put(ag); - - return err; -} - -void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m) -{ - struct autogroup *ag = autogroup_task_get(p); - - down_read(&ag->lock); - seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice); - up_read(&ag->lock); - - autogroup_kref_put(ag); -} -#endif /* CONFIG_PROC_FS */ - -#ifdef CONFIG_SCHED_DEBUG -static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) -{ - return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); -} -#endif /* CONFIG_SCHED_DEBUG */ - -#endif /* CONFIG_SCHED_AUTOGROUP */ diff --git a/trunk/kernel/sched_autogroup.h b/trunk/kernel/sched_autogroup.h deleted file mode 100644 index 5358e241cb20..000000000000 --- a/trunk/kernel/sched_autogroup.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifdef CONFIG_SCHED_AUTOGROUP - -struct autogroup { - struct kref kref; - struct task_group *tg; - struct rw_semaphore lock; - unsigned long id; - int nice; -}; - -static inline struct task_group * -autogroup_task_group(struct task_struct *p, struct task_group *tg); - -#else /* !CONFIG_SCHED_AUTOGROUP */ - -static inline void autogroup_init(struct task_struct *init_task) { } -static inline void autogroup_free(struct task_group *tg) { } - -static inline struct task_group * -autogroup_task_group(struct task_struct *p, struct task_group *tg) -{ - return tg; -} - -#ifdef CONFIG_SCHED_DEBUG -static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) -{ - return 0; -} -#endif - -#endif /* CONFIG_SCHED_AUTOGROUP */ diff --git a/trunk/kernel/sched_clock.c b/trunk/kernel/sched_clock.c index 9d8af0b3fb64..52f1a149bfb1 100644 --- a/trunk/kernel/sched_clock.c +++ b/trunk/kernel/sched_clock.c @@ -79,7 +79,7 @@ unsigned long long __attribute__((weak)) sched_clock(void) } EXPORT_SYMBOL_GPL(sched_clock); -__read_mostly int sched_clock_running; +static __read_mostly int sched_clock_running; #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK __read_mostly int sched_clock_stable; diff --git a/trunk/kernel/sched_debug.c b/trunk/kernel/sched_debug.c index 1dfae3d014b5..2e1b0d17dd9b 100644 --- a/trunk/kernel/sched_debug.c +++ b/trunk/kernel/sched_debug.c @@ -54,7 +54,8 @@ static unsigned long nsec_low(unsigned long long nsec) #define SPLIT_NS(x) nsec_high(x), nsec_low(x) #ifdef CONFIG_FAIR_GROUP_SCHED -static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg) +static void print_cfs_group_stats(struct seq_file *m, int cpu, + struct task_group *tg) { struct sched_entity *se = tg->se[cpu]; if (!se) @@ -109,6 +110,16 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L); #endif +#ifdef CONFIG_CGROUP_SCHED + { + char path[64]; + + rcu_read_lock(); + cgroup_path(task_group(p)->css.cgroup, path, sizeof(path)); + rcu_read_unlock(); + SEQ_printf(m, " %s", path); + } +#endif SEQ_printf(m, "\n"); } @@ -136,6 +147,19 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) read_unlock_irqrestore(&tasklist_lock, flags); } +#if defined(CONFIG_CGROUP_SCHED) && \ + (defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)) +static void task_group_path(struct task_group *tg, char *buf, int buflen) +{ + /* may be NULL if the underlying cgroup isn't fully-created yet */ + if (!tg->css.cgroup) { + buf[0] = '\0'; + return; + } + cgroup_path(tg->css.cgroup, buf, buflen); +} +#endif + void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) { s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1, @@ -144,7 +168,16 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) struct sched_entity *last; unsigned long flags; +#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED) + char path[128]; + struct task_group *tg = cfs_rq->tg; + + task_group_path(tg, path, sizeof(path)); + + SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path); +#else SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); +#endif SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", SPLIT_NS(cfs_rq->exec_clock)); @@ -169,29 +202,32 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) spread0 = min_vruntime - rq0_min_vruntime; SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread0", SPLIT_NS(spread0)); - SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over", - cfs_rq->nr_spread_over); SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running); SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); + + SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over", + cfs_rq->nr_spread_over); #ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_SMP - SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_avg", - SPLIT_NS(cfs_rq->load_avg)); - SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_period", - SPLIT_NS(cfs_rq->load_period)); - SEQ_printf(m, " .%-30s: %ld\n", "load_contrib", - cfs_rq->load_contribution); - SEQ_printf(m, " .%-30s: %d\n", "load_tg", - atomic_read(&cfs_rq->tg->load_weight)); + SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares); #endif - print_cfs_group_stats(m, cpu, cfs_rq->tg); #endif } void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) { +#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED) + char path[128]; + struct task_group *tg = rt_rq->tg; + + task_group_path(tg, path, sizeof(path)); + + SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path); +#else SEQ_printf(m, "\nrt_rq[%d]:\n", cpu); +#endif + #define P(x) \ SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) @@ -207,8 +243,6 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) #undef P } -extern __read_mostly int sched_clock_running; - static void print_cpu(struct seq_file *m, int cpu) { struct rq *rq = cpu_rq(cpu); @@ -280,42 +314,21 @@ static const char *sched_tunable_scaling_names[] = { static int sched_debug_show(struct seq_file *m, void *v) { - u64 ktime, sched_clk, cpu_clk; - unsigned long flags; + u64 now = ktime_to_ns(ktime_get()); int cpu; - local_irq_save(flags); - ktime = ktime_to_ns(ktime_get()); - sched_clk = sched_clock(); - cpu_clk = local_clock(); - local_irq_restore(flags); - - SEQ_printf(m, "Sched Debug Version: v0.10, %s %.*s\n", + SEQ_printf(m, "Sched Debug Version: v0.09, %s %.*s\n", init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); -#define P(x) \ - SEQ_printf(m, "%-40s: %Ld\n", #x, (long long)(x)) -#define PN(x) \ - SEQ_printf(m, "%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x)) - PN(ktime); - PN(sched_clk); - PN(cpu_clk); - P(jiffies); -#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK - P(sched_clock_stable); -#endif -#undef PN -#undef P - - SEQ_printf(m, "\n"); - SEQ_printf(m, "sysctl_sched\n"); + SEQ_printf(m, "now at %Lu.%06ld msecs\n", SPLIT_NS(now)); #define P(x) \ SEQ_printf(m, " .%-40s: %Ld\n", #x, (long long)(x)) #define PN(x) \ SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x)) + P(jiffies); PN(sysctl_sched_latency); PN(sysctl_sched_min_granularity); PN(sysctl_sched_wakeup_granularity); diff --git a/trunk/kernel/sched_fair.c b/trunk/kernel/sched_fair.c index c62ebae65cf0..00ebd7686676 100644 --- a/trunk/kernel/sched_fair.c +++ b/trunk/kernel/sched_fair.c @@ -89,13 +89,6 @@ unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; const_debug unsigned int sysctl_sched_migration_cost = 500000UL; -/* - * The exponential sliding window over which load is averaged for shares - * distribution. - * (default: 10msec) - */ -unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL; - static const struct sched_class fair_sched_class; /************************************************************** @@ -150,36 +143,6 @@ static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu) return cfs_rq->tg->cfs_rq[this_cpu]; } -static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) -{ - if (!cfs_rq->on_list) { - /* - * Ensure we either appear before our parent (if already - * enqueued) or force our parent to appear after us when it is - * enqueued. The fact that we always enqueue bottom-up - * reduces this to two cases. - */ - if (cfs_rq->tg->parent && - cfs_rq->tg->parent->cfs_rq[cpu_of(rq_of(cfs_rq))]->on_list) { - list_add_rcu(&cfs_rq->leaf_cfs_rq_list, - &rq_of(cfs_rq)->leaf_cfs_rq_list); - } else { - list_add_tail_rcu(&cfs_rq->leaf_cfs_rq_list, - &rq_of(cfs_rq)->leaf_cfs_rq_list); - } - - cfs_rq->on_list = 1; - } -} - -static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq) -{ - if (cfs_rq->on_list) { - list_del_rcu(&cfs_rq->leaf_cfs_rq_list); - cfs_rq->on_list = 0; - } -} - /* Iterate thr' all leaf cfs_rq's on a runqueue */ #define for_each_leaf_cfs_rq(rq, cfs_rq) \ list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list) @@ -283,14 +246,6 @@ static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu) return &cpu_rq(this_cpu)->cfs; } -static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) -{ -} - -static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq) -{ -} - #define for_each_leaf_cfs_rq(rq, cfs_rq) \ for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL) @@ -462,6 +417,7 @@ int sched_proc_update_handler(struct ctl_table *table, int write, WRT_SYSCTL(sched_min_granularity); WRT_SYSCTL(sched_latency); WRT_SYSCTL(sched_wakeup_granularity); + WRT_SYSCTL(sched_shares_ratelimit); #undef WRT_SYSCTL return 0; @@ -539,9 +495,6 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se) return calc_delta_fair(sched_slice(cfs_rq, se), se); } -static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update); -static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta); - /* * Update the current task's runtime statistics. Skip current tasks that * are not in our scheduling class. @@ -561,10 +514,6 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, curr->vruntime += delta_exec_weighted; update_min_vruntime(cfs_rq); - -#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED - cfs_rq->load_unacc_exec_time += delta_exec; -#endif } static void update_curr(struct cfs_rq *cfs_rq) @@ -684,6 +633,7 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) list_add(&se->group_node, &cfs_rq->tasks); } cfs_rq->nr_running++; + se->on_rq = 1; } static void @@ -697,140 +647,9 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) list_del_init(&se->group_node); } cfs_rq->nr_running--; + se->on_rq = 0; } -#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED -static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq, - int global_update) -{ - struct task_group *tg = cfs_rq->tg; - long load_avg; - - load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1); - load_avg -= cfs_rq->load_contribution; - - if (global_update || abs(load_avg) > cfs_rq->load_contribution / 8) { - atomic_add(load_avg, &tg->load_weight); - cfs_rq->load_contribution += load_avg; - } -} - -static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) -{ - u64 period = sysctl_sched_shares_window; - u64 now, delta; - unsigned long load = cfs_rq->load.weight; - - if (!cfs_rq) - return; - - now = rq_of(cfs_rq)->clock; - delta = now - cfs_rq->load_stamp; - - /* truncate load history at 4 idle periods */ - if (cfs_rq->load_stamp > cfs_rq->load_last && - now - cfs_rq->load_last > 4 * period) { - cfs_rq->load_period = 0; - cfs_rq->load_avg = 0; - } - - cfs_rq->load_stamp = now; - cfs_rq->load_unacc_exec_time = 0; - cfs_rq->load_period += delta; - if (load) { - cfs_rq->load_last = now; - cfs_rq->load_avg += delta * load; - } - - /* consider updating load contribution on each fold or truncate */ - if (global_update || cfs_rq->load_period > period - || !cfs_rq->load_period) - update_cfs_rq_load_contribution(cfs_rq, global_update); - - while (cfs_rq->load_period > period) { - /* - * Inline assembly required to prevent the compiler - * optimising this loop into a divmod call. - * See __iter_div_u64_rem() for another example of this. - */ - asm("" : "+rm" (cfs_rq->load_period)); - cfs_rq->load_period /= 2; - cfs_rq->load_avg /= 2; - } - - if (!cfs_rq->curr && !cfs_rq->nr_running && !cfs_rq->load_avg) - list_del_leaf_cfs_rq(cfs_rq); -} - -static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, - unsigned long weight) -{ - if (se->on_rq) { - /* commit outstanding execution time */ - if (cfs_rq->curr == se) - update_curr(cfs_rq); - account_entity_dequeue(cfs_rq, se); - } - - update_load_set(&se->load, weight); - - if (se->on_rq) - account_entity_enqueue(cfs_rq, se); -} - -static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) -{ - struct task_group *tg; - struct sched_entity *se; - long load_weight, load, shares; - - if (!cfs_rq) - return; - - tg = cfs_rq->tg; - se = tg->se[cpu_of(rq_of(cfs_rq))]; - if (!se) - return; - - load = cfs_rq->load.weight + weight_delta; - - load_weight = atomic_read(&tg->load_weight); - load_weight -= cfs_rq->load_contribution; - load_weight += load; - - shares = (tg->shares * load); - if (load_weight) - shares /= load_weight; - - if (shares < MIN_SHARES) - shares = MIN_SHARES; - if (shares > tg->shares) - shares = tg->shares; - - reweight_entity(cfs_rq_of(se), se, shares); -} - -static void update_entity_shares_tick(struct cfs_rq *cfs_rq) -{ - if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) { - update_cfs_load(cfs_rq, 0); - update_cfs_shares(cfs_rq, 0); - } -} -#else /* CONFIG_FAIR_GROUP_SCHED */ -static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) -{ -} - -static inline void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) -{ -} - -static inline void update_entity_shares_tick(struct cfs_rq *cfs_rq) -{ -} -#endif /* CONFIG_FAIR_GROUP_SCHED */ - static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) { #ifdef CONFIG_SCHEDSTATS @@ -952,8 +771,6 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) * Update run-time statistics of the 'current'. */ update_curr(cfs_rq); - update_cfs_load(cfs_rq, 0); - update_cfs_shares(cfs_rq, se->load.weight); account_entity_enqueue(cfs_rq, se); if (flags & ENQUEUE_WAKEUP) { @@ -965,10 +782,6 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) check_spread(cfs_rq, se); if (se != cfs_rq->curr) __enqueue_entity(cfs_rq, se); - se->on_rq = 1; - - if (cfs_rq->nr_running == 1) - list_add_leaf_cfs_rq(cfs_rq); } static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) @@ -1012,11 +825,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) if (se != cfs_rq->curr) __dequeue_entity(cfs_rq, se); - se->on_rq = 0; - update_cfs_load(cfs_rq, 0); account_entity_dequeue(cfs_rq, se); update_min_vruntime(cfs_rq); - update_cfs_shares(cfs_rq, 0); /* * Normalize the entity after updating the min_vruntime because the @@ -1145,11 +955,6 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) */ update_curr(cfs_rq); - /* - * Update share accounting for long-running entities. - */ - update_entity_shares_tick(cfs_rq); - #ifdef CONFIG_SCHED_HRTICK /* * queued ticks are scheduled to match the slice, so don't bother @@ -1250,13 +1055,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) flags = ENQUEUE_WAKEUP; } - for_each_sched_entity(se) { - struct cfs_rq *cfs_rq = cfs_rq_of(se); - - update_cfs_load(cfs_rq, 0); - update_cfs_shares(cfs_rq, 0); - } - hrtick_update(rq); } @@ -1273,20 +1071,12 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); dequeue_entity(cfs_rq, se, flags); - /* Don't dequeue parent if it has other entities besides us */ if (cfs_rq->load.weight) break; flags |= DEQUEUE_SLEEP; } - for_each_sched_entity(se) { - struct cfs_rq *cfs_rq = cfs_rq_of(se); - - update_cfs_load(cfs_rq, 0); - update_cfs_shares(cfs_rq, 0); - } - hrtick_update(rq); } @@ -1353,20 +1143,51 @@ static void task_waking_fair(struct rq *rq, struct task_struct *p) * Adding load to a group doesn't make a group heavier, but can cause movement * of group shares between cpus. Assuming the shares were perfectly aligned one * can calculate the shift in shares. + * + * The problem is that perfectly aligning the shares is rather expensive, hence + * we try to avoid doing that too often - see update_shares(), which ratelimits + * this change. + * + * We compensate this by not only taking the current delta into account, but + * also considering the delta between when the shares were last adjusted and + * now. + * + * We still saw a performance dip, some tracing learned us that between + * cgroup:/ and cgroup:/foo balancing the number of affine wakeups increased + * significantly. Therefore try to bias the error in direction of failing + * the affine wakeup. + * */ -static long effective_load(struct task_group *tg, int cpu, long wl, long wg) +static long effective_load(struct task_group *tg, int cpu, + long wl, long wg) { struct sched_entity *se = tg->se[cpu]; if (!tg->parent) return wl; + /* + * By not taking the decrease of shares on the other cpu into + * account our error leans towards reducing the affine wakeups. + */ + if (!wl && sched_feat(ASYM_EFF_LOAD)) + return wl; + for_each_sched_entity(se) { long S, rw, s, a, b; + long more_w; + + /* + * Instead of using this increment, also add the difference + * between when the shares were last updated and now. + */ + more_w = se->my_q->load.weight - se->my_q->rq_weight; + wl += more_w; + wg += more_w; S = se->my_q->tg->shares; - s = se->load.weight; - rw = se->my_q->load.weight; + s = se->my_q->shares; + rw = se->my_q->rq_weight; a = S*(rw + wl); b = S*rw + s*wg; @@ -1687,6 +1508,23 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_ sd = tmp; } +#ifdef CONFIG_FAIR_GROUP_SCHED + if (sched_feat(LB_SHARES_UPDATE)) { + /* + * Pick the largest domain to update shares over + */ + tmp = sd; + if (affine_sd && (!tmp || affine_sd->span_weight > sd->span_weight)) + tmp = affine_sd; + + if (tmp) { + raw_spin_unlock(&rq->lock); + update_shares(tmp); + raw_spin_lock(&rq->lock); + } + } +#endif + if (affine_sd) { if (cpu == prev_cpu || wake_affine(affine_sd, p, sync)) return select_idle_sibling(p, cpu); @@ -2071,48 +1909,6 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, } #ifdef CONFIG_FAIR_GROUP_SCHED -/* - * update tg->load_weight by folding this cpu's load_avg - */ -static int update_shares_cpu(struct task_group *tg, int cpu) -{ - struct cfs_rq *cfs_rq; - unsigned long flags; - struct rq *rq; - - if (!tg->se[cpu]) - return 0; - - rq = cpu_rq(cpu); - cfs_rq = tg->cfs_rq[cpu]; - - raw_spin_lock_irqsave(&rq->lock, flags); - - update_rq_clock(rq); - update_cfs_load(cfs_rq, 1); - - /* - * We need to update shares after updating tg->load_weight in - * order to adjust the weight of groups with long running tasks. - */ - update_cfs_shares(cfs_rq, 0); - - raw_spin_unlock_irqrestore(&rq->lock, flags); - - return 0; -} - -static void update_shares(int cpu) -{ - struct cfs_rq *cfs_rq; - struct rq *rq = cpu_rq(cpu); - - rcu_read_lock(); - for_each_leaf_cfs_rq(rq, cfs_rq) - update_shares_cpu(cfs_rq->tg, cpu); - rcu_read_unlock(); -} - static unsigned long load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_load_move, @@ -2160,10 +1956,6 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, return max_load_move - rem_load_move; } #else -static inline void update_shares(int cpu) -{ -} - static unsigned long load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_load_move, @@ -3240,6 +3032,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, schedstat_inc(sd, lb_count[idle]); redo: + update_shares(sd); group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, cpus, balance); @@ -3381,6 +3174,8 @@ static int load_balance(int this_cpu, struct rq *this_rq, else ld_moved = 0; out: + if (ld_moved) + update_shares(sd); return ld_moved; } @@ -3404,7 +3199,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq) */ raw_spin_unlock(&this_rq->lock); - update_shares(this_cpu); for_each_domain(this_cpu, sd) { unsigned long interval; int balance = 1; @@ -3775,8 +3569,6 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) int update_next_balance = 0; int need_serialize; - update_shares(cpu); - for_each_domain(cpu, sd) { if (!(sd->flags & SD_LOAD_BALANCE)) continue; diff --git a/trunk/kernel/sched_features.h b/trunk/kernel/sched_features.h index 68e69acc29b9..185f920ec1a2 100644 --- a/trunk/kernel/sched_features.h +++ b/trunk/kernel/sched_features.h @@ -52,6 +52,8 @@ SCHED_FEAT(ARCH_POWER, 0) SCHED_FEAT(HRTICK, 0) SCHED_FEAT(DOUBLE_TICK, 0) SCHED_FEAT(LB_BIAS, 1) +SCHED_FEAT(LB_SHARES_UPDATE, 1) +SCHED_FEAT(ASYM_EFF_LOAD, 1) /* * Spin-wait on mutex acquisition when the mutex owner is running on diff --git a/trunk/kernel/sched_rt.c b/trunk/kernel/sched_rt.c index c914ec747ca6..bea7d79f7e9c 100644 --- a/trunk/kernel/sched_rt.c +++ b/trunk/kernel/sched_rt.c @@ -183,17 +183,6 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq) return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period); } -static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq) -{ - list_add_rcu(&rt_rq->leaf_rt_rq_list, - &rq_of_rt_rq(rt_rq)->leaf_rt_rq_list); -} - -static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq) -{ - list_del_rcu(&rt_rq->leaf_rt_rq_list); -} - #define for_each_leaf_rt_rq(rt_rq, rq) \ list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list) @@ -287,14 +276,6 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq) return ktime_to_ns(def_rt_bandwidth.rt_period); } -static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq) -{ -} - -static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq) -{ -} - #define for_each_leaf_rt_rq(rt_rq, rq) \ for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL) @@ -844,9 +825,6 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head) if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) return; - if (!rt_rq->rt_nr_running) - list_add_leaf_rt_rq(rt_rq); - if (head) list_add(&rt_se->run_list, queue); else @@ -866,8 +844,6 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se) __clear_bit(rt_se_prio(rt_se), array->bitmap); dec_rt_tasks(rt_se, rt_rq); - if (!rt_rq->rt_nr_running) - list_del_leaf_rt_rq(rt_rq); } /* diff --git a/trunk/kernel/softirq.c b/trunk/kernel/softirq.c index d4d918a91881..18f4be0d5fe0 100644 --- a/trunk/kernel/softirq.c +++ b/trunk/kernel/softirq.c @@ -853,9 +853,7 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, cpumask_any(cpu_online_mask)); case CPU_DEAD: case CPU_DEAD_FROZEN: { - static struct sched_param param = { - .sched_priority = MAX_RT_PRIO-1 - }; + struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; p = per_cpu(ksoftirqd, hotcpu); per_cpu(ksoftirqd, hotcpu) = NULL; diff --git a/trunk/kernel/srcu.c b/trunk/kernel/srcu.c index 98d8c1e80edb..c71e07500536 100644 --- a/trunk/kernel/srcu.c +++ b/trunk/kernel/srcu.c @@ -31,7 +31,6 @@ #include #include #include -#include #include static int init_srcu_struct_fields(struct srcu_struct *sp) @@ -204,14 +203,9 @@ static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) * all srcu_read_lock() calls using the old counters have completed. * Their corresponding critical sections might well be still * executing, but the srcu_read_lock() primitives themselves - * will have finished executing. We initially give readers - * an arbitrarily chosen 10 microseconds to get out of their - * SRCU read-side critical sections, then loop waiting 1/HZ - * seconds per iteration. + * will have finished executing. */ - if (srcu_readers_active_idx(sp, idx)) - udelay(CONFIG_SRCU_SYNCHRONIZE_DELAY); while (srcu_readers_active_idx(sp, idx)) schedule_timeout_interruptible(1); diff --git a/trunk/kernel/sys.c b/trunk/kernel/sys.c index 2745dcdb6c6c..7f5a0cd296a9 100644 --- a/trunk/kernel/sys.c +++ b/trunk/kernel/sys.c @@ -1080,10 +1080,8 @@ SYSCALL_DEFINE0(setsid) err = session; out: write_unlock_irq(&tasklist_lock); - if (err > 0) { + if (err > 0) proc_sid_connector(group_leader); - sched_autogroup_create_attach(group_leader); - } return err; } diff --git a/trunk/kernel/sysctl.c b/trunk/kernel/sysctl.c index ae5cbb1e3ced..5abfa1518554 100644 --- a/trunk/kernel/sysctl.c +++ b/trunk/kernel/sysctl.c @@ -259,6 +259,8 @@ static int min_wakeup_granularity_ns; /* 0 usecs */ static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE; static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1; +static int min_sched_shares_ratelimit = 100000; /* 100 usec */ +static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */ #endif #ifdef CONFIG_COMPACTION @@ -302,6 +304,15 @@ static struct ctl_table kern_table[] = { .extra1 = &min_wakeup_granularity_ns, .extra2 = &max_wakeup_granularity_ns, }, + { + .procname = "sched_shares_ratelimit", + .data = &sysctl_sched_shares_ratelimit, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sched_proc_update_handler, + .extra1 = &min_sched_shares_ratelimit, + .extra2 = &max_sched_shares_ratelimit, + }, { .procname = "sched_tunable_scaling", .data = &sysctl_sched_tunable_scaling, @@ -311,6 +322,14 @@ static struct ctl_table kern_table[] = { .extra1 = &min_sched_tunable_scaling, .extra2 = &max_sched_tunable_scaling, }, + { + .procname = "sched_shares_thresh", + .data = &sysctl_sched_shares_thresh, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + }, { .procname = "sched_migration_cost", .data = &sysctl_sched_migration_cost, @@ -332,13 +351,6 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { - .procname = "sched_shares_window", - .data = &sysctl_sched_shares_window, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { .procname = "timer_migration", .data = &sysctl_timer_migration, @@ -370,17 +382,6 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, -#ifdef CONFIG_SCHED_AUTOGROUP - { - .procname = "sched_autogroup_enabled", - .data = &sysctl_sched_autogroup_enabled, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - .extra1 = &zero, - .extra2 = &one, - }, -#endif #ifdef CONFIG_PROVE_LOCKING { .procname = "prove_locking", @@ -744,15 +745,8 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &one, }, - { - .procname = "nmi_watchdog", - .data = &watchdog_enabled, - .maxlen = sizeof (int), - .mode = 0644, - .proc_handler = proc_dowatchdog_enabled, - }, #endif -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR) { .procname = "unknown_nmi_panic", .data = &unknown_nmi_panic, @@ -760,6 +754,13 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "nmi_watchdog", + .data = &nmi_watchdog_enabled, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = proc_nmi_enabled, + }, #endif #if defined(CONFIG_X86) { diff --git a/trunk/kernel/sysctl_binary.c b/trunk/kernel/sysctl_binary.c index 4b2545a136ff..1357c5786064 100644 --- a/trunk/kernel/sysctl_binary.c +++ b/trunk/kernel/sysctl_binary.c @@ -136,6 +136,7 @@ static const struct bin_table bin_kern_table[] = { { CTL_INT, KERN_IA64_UNALIGNED, "ignore-unaligned-usertrap" }, { CTL_INT, KERN_COMPAT_LOG, "compat-log" }, { CTL_INT, KERN_MAX_LOCK_DEPTH, "max_lock_depth" }, + { CTL_INT, KERN_NMI_WATCHDOG, "nmi_watchdog" }, { CTL_INT, KERN_PANIC_ON_NMI, "panic_on_unrecovered_nmi" }, {} }; diff --git a/trunk/kernel/time/timecompare.c b/trunk/kernel/time/timecompare.c index a9ae369925ce..ac38fbb176cc 100644 --- a/trunk/kernel/time/timecompare.c +++ b/trunk/kernel/time/timecompare.c @@ -21,7 +21,6 @@ #include #include #include -#include /* * fixed point arithmetic scale factor for skew @@ -58,11 +57,11 @@ int timecompare_offset(struct timecompare *sync, int index; int num_samples = sync->num_samples; - if (num_samples > ARRAY_SIZE(buffer)) { + if (num_samples > sizeof(buffer)/sizeof(buffer[0])) { samples = kmalloc(sizeof(*samples) * num_samples, GFP_ATOMIC); if (!samples) { samples = buffer; - num_samples = ARRAY_SIZE(buffer); + num_samples = sizeof(buffer)/sizeof(buffer[0]); } } else { samples = buffer; diff --git a/trunk/kernel/time/timekeeping.c b/trunk/kernel/time/timekeeping.c index 5bb86da82003..49010d822f72 100644 --- a/trunk/kernel/time/timekeeping.c +++ b/trunk/kernel/time/timekeeping.c @@ -32,8 +32,6 @@ struct timekeeper { cycle_t cycle_interval; /* Number of clock shifted nano seconds in one NTP interval. */ u64 xtime_interval; - /* shifted nano seconds left over when rounding cycle_interval */ - s64 xtime_remainder; /* Raw nano seconds accumulated per NTP interval. */ u32 raw_interval; @@ -64,7 +62,7 @@ struct timekeeper timekeeper; static void timekeeper_setup_internals(struct clocksource *clock) { cycle_t interval; - u64 tmp, ntpinterval; + u64 tmp; timekeeper.clock = clock; clock->cycle_last = clock->read(clock); @@ -72,7 +70,6 @@ static void timekeeper_setup_internals(struct clocksource *clock) /* Do the ns -> cycle conversion first, using original mult */ tmp = NTP_INTERVAL_LENGTH; tmp <<= clock->shift; - ntpinterval = tmp; tmp += clock->mult/2; do_div(tmp, clock->mult); if (tmp == 0) @@ -83,7 +80,6 @@ static void timekeeper_setup_internals(struct clocksource *clock) /* Go back from cycles -> shifted ns */ timekeeper.xtime_interval = (u64) interval * clock->mult; - timekeeper.xtime_remainder = ntpinterval - timekeeper.xtime_interval; timekeeper.raw_interval = ((u64) interval * clock->mult) >> clock->shift; @@ -723,8 +719,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift) /* Accumulate error between NTP and clock interval */ timekeeper.ntp_error += tick_length << shift; - timekeeper.ntp_error -= - (timekeeper.xtime_interval + timekeeper.xtime_remainder) << + timekeeper.ntp_error -= timekeeper.xtime_interval << (timekeeper.ntp_error_shift + shift); return offset; diff --git a/trunk/kernel/time/timer_list.c b/trunk/kernel/time/timer_list.c index 32a19f9397fc..ab8f5e33fa92 100644 --- a/trunk/kernel/time/timer_list.c +++ b/trunk/kernel/time/timer_list.c @@ -79,26 +79,26 @@ print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base, { struct hrtimer *timer, tmp; unsigned long next = 0, i; - struct timerqueue_node *curr; + struct rb_node *curr; unsigned long flags; next_one: i = 0; raw_spin_lock_irqsave(&base->cpu_base->lock, flags); - curr = timerqueue_getnext(&base->active); + curr = base->first; /* * Crude but we have to do this O(N*N) thing, because * we have to unlock the base when printing: */ while (curr && i < next) { - curr = timerqueue_iterate_next(curr); + curr = rb_next(curr); i++; } if (curr) { - timer = container_of(curr, struct hrtimer, node); + timer = rb_entry(curr, struct hrtimer, node); tmp = *timer; raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags); diff --git a/trunk/kernel/timer.c b/trunk/kernel/timer.c index 43ca9936f2d0..353b9227c2ec 100644 --- a/trunk/kernel/timer.c +++ b/trunk/kernel/timer.c @@ -88,6 +88,18 @@ struct tvec_base boot_tvec_bases; EXPORT_SYMBOL(boot_tvec_bases); static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; +/* + * Note that all tvec_bases are 2 byte aligned and lower bit of + * base in timer_list is guaranteed to be zero. Use the LSB to + * indicate whether the timer is deferrable. + * + * A deferrable timer will work normally when the system is busy, but + * will not cause a CPU to come out of idle just to service it; instead, + * the timer will be serviced when the CPU eventually wakes up with a + * subsequent non-deferrable timer. + */ +#define TBASE_DEFERRABLE_FLAG (0x1) + /* Functions below help us manage 'deferrable' flag */ static inline unsigned int tbase_get_deferrable(struct tvec_base *base) { @@ -101,7 +113,8 @@ static inline struct tvec_base *tbase_get_base(struct tvec_base *base) static inline void timer_set_deferrable(struct timer_list *timer) { - timer->base = TBASE_MAKE_DEFERRED(timer->base); + timer->base = ((struct tvec_base *)((unsigned long)(timer->base) | + TBASE_DEFERRABLE_FLAG)); } static inline void @@ -330,6 +343,15 @@ void set_timer_slack(struct timer_list *timer, int slack_hz) } EXPORT_SYMBOL_GPL(set_timer_slack); + +static inline void set_running_timer(struct tvec_base *base, + struct timer_list *timer) +{ +#ifdef CONFIG_SMP + base->running_timer = timer; +#endif +} + static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) { unsigned long expires = timer->expires; @@ -914,12 +936,15 @@ int del_timer(struct timer_list *timer) } EXPORT_SYMBOL(del_timer); +#ifdef CONFIG_SMP /** * try_to_del_timer_sync - Try to deactivate a timer * @timer: timer do del * * This function tries to deactivate a timer. Upon successful (ret >= 0) * exit the timer is not queued and the handler is not running on any CPU. + * + * It must not be called from interrupt contexts. */ int try_to_del_timer_sync(struct timer_list *timer) { @@ -948,7 +973,6 @@ int try_to_del_timer_sync(struct timer_list *timer) } EXPORT_SYMBOL(try_to_del_timer_sync); -#ifdef CONFIG_SMP /** * del_timer_sync - deactivate a timer and wait for the handler to finish. * @timer: the timer to be deactivated @@ -959,7 +983,7 @@ EXPORT_SYMBOL(try_to_del_timer_sync); * * Synchronization rules: Callers must prevent restarting of the timer, * otherwise this function is meaningless. It must not be called from - * hardirq contexts. The caller must not hold locks which would prevent + * interrupt contexts. The caller must not hold locks which would prevent * completion of the timer's handler. The timer's handler must not call * add_timer_on(). Upon exit the timer is not queued and the handler is * not running on any CPU. @@ -969,16 +993,14 @@ EXPORT_SYMBOL(try_to_del_timer_sync); int del_timer_sync(struct timer_list *timer) { #ifdef CONFIG_LOCKDEP - local_bh_disable(); + unsigned long flags; + + local_irq_save(flags); lock_map_acquire(&timer->lockdep_map); lock_map_release(&timer->lockdep_map); - local_bh_enable(); + local_irq_restore(flags); #endif - /* - * don't use it in hardirq context, because it - * could lead to deadlock. - */ - WARN_ON(in_irq()); + for (;;) { int ret = try_to_del_timer_sync(timer); if (ret >= 0) @@ -1089,7 +1111,7 @@ static inline void __run_timers(struct tvec_base *base) timer_stats_account_timer(timer); - base->running_timer = timer; + set_running_timer(base, timer); detach_timer(timer, 1); spin_unlock_irq(&base->lock); @@ -1097,7 +1119,7 @@ static inline void __run_timers(struct tvec_base *base) spin_lock_irq(&base->lock); } } - base->running_timer = NULL; + set_running_timer(base, NULL); spin_unlock_irq(&base->lock); } @@ -1227,7 +1249,7 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now, */ unsigned long get_next_timer_interrupt(unsigned long now) { - struct tvec_base *base = __this_cpu_read(tvec_bases); + struct tvec_base *base = __get_cpu_var(tvec_bases); unsigned long expires; /* @@ -1276,7 +1298,7 @@ void update_process_times(int user_tick) */ static void run_timer_softirq(struct softirq_action *h) { - struct tvec_base *base = __this_cpu_read(tvec_bases); + struct tvec_base *base = __get_cpu_var(tvec_bases); hrtimer_run_pending(); diff --git a/trunk/kernel/trace/Kconfig b/trunk/kernel/trace/Kconfig index 14674dce77a6..ea37e2ff4164 100644 --- a/trunk/kernel/trace/Kconfig +++ b/trunk/kernel/trace/Kconfig @@ -69,21 +69,6 @@ config EVENT_TRACING select CONTEXT_SWITCH_TRACER bool -config EVENT_POWER_TRACING_DEPRECATED - depends on EVENT_TRACING - bool "Deprecated power event trace API, to be removed" - default y - help - Provides old power event types: - C-state/idle accounting events: - power:power_start - power:power_end - and old cpufreq accounting event: - power:power_frequency - This is for userspace compatibility - and will vanish after 5 kernel iterations, - namely 2.6.41. - config CONTEXT_SWITCH_TRACER bool diff --git a/trunk/kernel/trace/power-traces.c b/trunk/kernel/trace/power-traces.c index f55fcf61b223..a22582a06161 100644 --- a/trunk/kernel/trace/power-traces.c +++ b/trunk/kernel/trace/power-traces.c @@ -13,8 +13,5 @@ #define CREATE_TRACE_POINTS #include -#ifdef EVENT_POWER_TRACING_DEPRECATED -EXPORT_TRACEPOINT_SYMBOL_GPL(power_start); -#endif -EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle); +EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency); diff --git a/trunk/kernel/trace/trace_event_perf.c b/trunk/kernel/trace/trace_event_perf.c index 19a359d5e6d5..39c059ca670e 100644 --- a/trunk/kernel/trace/trace_event_perf.c +++ b/trunk/kernel/trace/trace_event_perf.c @@ -21,46 +21,17 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)]) /* Count the events in use (per event id, not per instance) */ static int total_ref_count; -static int perf_trace_event_perm(struct ftrace_event_call *tp_event, - struct perf_event *p_event) -{ - /* No tracing, just counting, so no obvious leak */ - if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) - return 0; - - /* Some events are ok to be traced by non-root users... */ - if (p_event->attach_state == PERF_ATTACH_TASK) { - if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY) - return 0; - } - - /* - * ...otherwise raw tracepoint data can be a severe data leak, - * only allow root to have these. - */ - if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) - return -EPERM; - - return 0; -} - static int perf_trace_event_init(struct ftrace_event_call *tp_event, struct perf_event *p_event) { struct hlist_head __percpu *list; - int ret; + int ret = -ENOMEM; int cpu; - ret = perf_trace_event_perm(tp_event, p_event); - if (ret) - return ret; - p_event->tp_event = tp_event; if (tp_event->perf_refcount++ > 0) return 0; - ret = -ENOMEM; - list = alloc_percpu(struct hlist_head); if (!list) goto fail; diff --git a/trunk/kernel/trace/trace_events.c b/trunk/kernel/trace/trace_events.c index 35fde09b81de..0725eeab1937 100644 --- a/trunk/kernel/trace/trace_events.c +++ b/trunk/kernel/trace/trace_events.c @@ -27,12 +27,6 @@ DEFINE_MUTEX(event_mutex); -DEFINE_MUTEX(event_storage_mutex); -EXPORT_SYMBOL_GPL(event_storage_mutex); - -char event_storage[EVENT_STORAGE_SIZE]; -EXPORT_SYMBOL_GPL(event_storage); - LIST_HEAD(ftrace_events); LIST_HEAD(ftrace_common_fields); diff --git a/trunk/kernel/trace/trace_export.c b/trunk/kernel/trace/trace_export.c index 4b74d71705c0..4ba44deaac25 100644 --- a/trunk/kernel/trace/trace_export.c +++ b/trunk/kernel/trace/trace_export.c @@ -83,19 +83,13 @@ static void __always_unused ____ftrace_check_##name(void) \ #undef __array #define __array(type, item, len) \ - do { \ - BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ - mutex_lock(&event_storage_mutex); \ - snprintf(event_storage, sizeof(event_storage), \ - "%s[%d]", #type, len); \ - ret = trace_define_field(event_call, event_storage, #item, \ + BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ + ret = trace_define_field(event_call, #type "[" #len "]", #item, \ offsetof(typeof(field), item), \ sizeof(field.item), \ is_signed_type(type), FILTER_OTHER); \ - mutex_unlock(&event_storage_mutex); \ - if (ret) \ - return ret; \ - } while (0); + if (ret) \ + return ret; #undef __array_desc #define __array_desc(type, container, item, len) \ diff --git a/trunk/kernel/trace/trace_selftest.c b/trunk/kernel/trace/trace_selftest.c index 562c56e048fd..155a415b3209 100644 --- a/trunk/kernel/trace/trace_selftest.c +++ b/trunk/kernel/trace/trace_selftest.c @@ -558,7 +558,7 @@ trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr) static int trace_wakeup_test_thread(void *data) { /* Make this a RT thread, doesn't need to be too high */ - static struct sched_param param = { .sched_priority = 5 }; + struct sched_param param = { .sched_priority = 5 }; struct completion *x = data; sched_setscheduler(current, SCHED_FIFO, ¶m); diff --git a/trunk/kernel/user.c b/trunk/kernel/user.c index 5c598ca781df..2c7d8d5914b1 100644 --- a/trunk/kernel/user.c +++ b/trunk/kernel/user.c @@ -158,7 +158,6 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid) spin_lock_irq(&uidhash_lock); up = uid_hash_find(uid, hashent); if (up) { - put_user_ns(ns); key_put(new->uid_keyring); key_put(new->session_keyring); kmem_cache_free(uid_cachep, new); diff --git a/trunk/kernel/watchdog.c b/trunk/kernel/watchdog.c index 6e7b575ac33c..6e3c41a4024c 100644 --- a/trunk/kernel/watchdog.c +++ b/trunk/kernel/watchdog.c @@ -57,8 +57,6 @@ static int __init hardlockup_panic_setup(char *str) { if (!strncmp(str, "panic", 5)) hardlockup_panic = 1; - else if (!strncmp(str, "0", 1)) - no_watchdog = 1; return 1; } __setup("nmi_watchdog=", hardlockup_panic_setup); @@ -309,7 +307,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) */ static int watchdog(void *unused) { - static struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; + struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); sched_setscheduler(current, SCHED_FIFO, ¶m); @@ -366,8 +364,7 @@ static int watchdog_nmi_enable(int cpu) goto out_save; } - printk(KERN_ERR "NMI watchdog disabled for cpu%i: unable to create perf event: %ld\n", - cpu, PTR_ERR(event)); + printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event); return PTR_ERR(event); /* success path */ @@ -550,13 +547,13 @@ static struct notifier_block __cpuinitdata cpu_nfb = { .notifier_call = cpu_callback }; -void __init lockup_detector_init(void) +static int __init spawn_watchdog_task(void) { void *cpu = (void *)(long)smp_processor_id(); int err; if (no_watchdog) - return; + return 0; err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); WARN_ON(notifier_to_errno(err)); @@ -564,5 +561,6 @@ void __init lockup_detector_init(void) cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); register_cpu_notifier(&cpu_nfb); - return; + return 0; } +early_initcall(spawn_watchdog_task); diff --git a/trunk/lib/Kconfig.debug b/trunk/lib/Kconfig.debug index 2d05adb98401..28b42b9274d0 100644 --- a/trunk/lib/Kconfig.debug +++ b/trunk/lib/Kconfig.debug @@ -173,8 +173,7 @@ config LOCKUP_DETECTOR An NMI is generated every 60 seconds or so to check for hardlockups. config HARDLOCKUP_DETECTOR - def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \ - !ARCH_HAS_NMI_WATCHDOG + def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI config BOOTPARAM_SOFTLOCKUP_PANIC bool "Panic (Reboot) On Soft Lockups" diff --git a/trunk/lib/Makefile b/trunk/lib/Makefile index 9e2db72d128e..e6a3763b8212 100644 --- a/trunk/lib/Makefile +++ b/trunk/lib/Makefile @@ -8,7 +8,7 @@ KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) endif lib-y := ctype.o string.o vsprintf.o cmdline.o \ - rbtree.o radix-tree.o dump_stack.o timerqueue.o\ + rbtree.o radix-tree.o dump_stack.o \ idr.o int_sqrt.o extable.o prio_tree.o \ sha1.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o prio_heap.o ratelimit.o show_mem.o \ diff --git a/trunk/lib/timerqueue.c b/trunk/lib/timerqueue.c deleted file mode 100644 index e3a1050e6820..000000000000 --- a/trunk/lib/timerqueue.c +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Generic Timer-queue - * - * Manages a simple queue of timers, ordered by expiration time. - * Uses rbtrees for quick list adds and expiration. - * - * NOTE: All of the following functions need to be serialized - * to avoid races. No locking is done by this libary code. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include -#include -#include - -/** - * timerqueue_add - Adds timer to timerqueue. - * - * @head: head of timerqueue - * @node: timer node to be added - * - * Adds the timer node to the timerqueue, sorted by the - * node's expires value. - */ -void timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) -{ - struct rb_node **p = &head->head.rb_node; - struct rb_node *parent = NULL; - struct timerqueue_node *ptr; - - /* Make sure we don't add nodes that are already added */ - WARN_ON_ONCE(!RB_EMPTY_NODE(&node->node)); - - while (*p) { - parent = *p; - ptr = rb_entry(parent, struct timerqueue_node, node); - if (node->expires.tv64 < ptr->expires.tv64) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - rb_link_node(&node->node, parent, p); - rb_insert_color(&node->node, &head->head); - - if (!head->next || node->expires.tv64 < head->next->expires.tv64) - head->next = node; -} -EXPORT_SYMBOL_GPL(timerqueue_add); - -/** - * timerqueue_del - Removes a timer from the timerqueue. - * - * @head: head of timerqueue - * @node: timer node to be removed - * - * Removes the timer node from the timerqueue. - */ -void timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node) -{ - WARN_ON_ONCE(RB_EMPTY_NODE(&node->node)); - - /* update next pointer */ - if (head->next == node) { - struct rb_node *rbn = rb_next(&node->node); - - head->next = rbn ? - rb_entry(rbn, struct timerqueue_node, node) : NULL; - } - rb_erase(&node->node, &head->head); - RB_CLEAR_NODE(&node->node); -} -EXPORT_SYMBOL_GPL(timerqueue_del); - -/** - * timerqueue_iterate_next - Returns the timer after the provided timer - * - * @node: Pointer to a timer. - * - * Provides the timer that is after the given node. This is used, when - * necessary, to iterate through the list of timers in a timer list - * without modifying the list. - */ -struct timerqueue_node *timerqueue_iterate_next(struct timerqueue_node *node) -{ - struct rb_node *next; - - if (!node) - return NULL; - next = rb_next(&node->node); - if (!next) - return NULL; - return container_of(next, struct timerqueue_node, node); -} -EXPORT_SYMBOL_GPL(timerqueue_iterate_next); diff --git a/trunk/mm/memcontrol.c b/trunk/mm/memcontrol.c index 00bb8a64d028..7a22b4129211 100644 --- a/trunk/mm/memcontrol.c +++ b/trunk/mm/memcontrol.c @@ -1925,18 +1925,19 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, rcu_read_lock(); p = rcu_dereference(mm->owner); + VM_BUG_ON(!p); /* - * Because we don't have task_lock(), "p" can exit. - * In that case, "mem" can point to root or p can be NULL with - * race with swapoff. Then, we have small risk of mis-accouning. - * But such kind of mis-account by race always happens because - * we don't have cgroup_mutex(). It's overkill and we allo that - * small race, here. - * (*) swapoff at el will charge against mm-struct not against - * task-struct. So, mm->owner can be NULL. + * because we don't have task_lock(), "p" can exit while + * we're here. In that case, "mem" can point to root + * cgroup but never be NULL. (and task_struct itself is freed + * by RCU, cgroup itself is RCU safe.) Then, we have small + * risk here to get wrong cgroup. But such kind of mis-account + * by race always happens because we don't have cgroup_mutex(). + * It's overkill and we allow that small race, here. */ mem = mem_cgroup_from_task(p); - if (!mem || mem_cgroup_is_root(mem)) { + VM_BUG_ON(!mem); + if (mem_cgroup_is_root(mem)) { rcu_read_unlock(); goto done; } diff --git a/trunk/net/bridge/br_multicast.c b/trunk/net/bridge/br_multicast.c index 543b3262d002..f19e347f56f6 100644 --- a/trunk/net/bridge/br_multicast.c +++ b/trunk/net/bridge/br_multicast.c @@ -1430,7 +1430,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb) { - struct sk_buff *skb2; + struct sk_buff *skb2 = skb; struct ipv6hdr *ip6h; struct icmp6hdr *icmp6h; u8 nexthdr; @@ -1469,15 +1469,15 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, if (!skb2) return -ENOMEM; - err = -EINVAL; - if (!pskb_may_pull(skb2, offset + sizeof(struct icmp6hdr))) - goto out; - len -= offset - skb_network_offset(skb2); __skb_pull(skb2, offset); skb_reset_transport_header(skb2); + err = -EINVAL; + if (!pskb_may_pull(skb2, sizeof(*icmp6h))) + goto out; + icmp6h = icmp6_hdr(skb2); switch (icmp6h->icmp6_type) { @@ -1516,12 +1516,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, switch (icmp6h->icmp6_type) { case ICMPV6_MGM_REPORT: { - struct mld_msg *mld; - if (!pskb_may_pull(skb2, sizeof(*mld))) { - err = -EINVAL; - goto out; - } - mld = (struct mld_msg *)skb_transport_header(skb2); + struct mld_msg *mld = (struct mld_msg *)icmp6h; BR_INPUT_SKB_CB(skb2)->mrouters_only = 1; err = br_ip6_multicast_add_group(br, port, &mld->mld_mca); break; @@ -1534,18 +1529,15 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, break; case ICMPV6_MGM_REDUCTION: { - struct mld_msg *mld; - if (!pskb_may_pull(skb2, sizeof(*mld))) { - err = -EINVAL; - goto out; - } - mld = (struct mld_msg *)skb_transport_header(skb2); + struct mld_msg *mld = (struct mld_msg *)icmp6h; br_ip6_multicast_leave_group(br, port, &mld->mld_mca); } } out: - kfree_skb(skb2); + __skb_push(skb2, offset); + if (skb2 != skb) + kfree_skb(skb2); return err; } #endif diff --git a/trunk/net/bridge/br_stp_bpdu.c b/trunk/net/bridge/br_stp_bpdu.c index e3d7aefa9181..35cf27087b56 100644 --- a/trunk/net/bridge/br_stp_bpdu.c +++ b/trunk/net/bridge/br_stp_bpdu.c @@ -50,8 +50,6 @@ static void br_send_bpdu(struct net_bridge_port *p, llc_mac_hdr_init(skb, p->dev->dev_addr, p->br->group_addr); - skb_reset_mac_header(skb); - NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, dev_queue_xmit); } diff --git a/trunk/net/can/bcm.c b/trunk/net/can/bcm.c index 9d5e8accfab1..6faa8256e10c 100644 --- a/trunk/net/can/bcm.c +++ b/trunk/net/can/bcm.c @@ -125,7 +125,7 @@ struct bcm_sock { struct list_head tx_ops; unsigned long dropped_usr_msgs; struct proc_dir_entry *bcm_proc_read; - char procname [32]; /* inode number in decimal with \0 */ + char procname [20]; /* pointer printed in ASCII with \0 */ }; static inline struct bcm_sock *bcm_sk(const struct sock *sk) @@ -1521,7 +1521,7 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, if (proc_dir) { /* unique socket address as filename */ - sprintf(bo->procname, "%lu", sock_i_ino(sk)); + sprintf(bo->procname, "%p", sock); bo->bcm_proc_read = proc_create_data(bo->procname, 0644, proc_dir, &bcm_proc_fops, sk); diff --git a/trunk/net/ipv4/route.c b/trunk/net/ipv4/route.c index 93bfd95584f4..df948b0f1ac9 100644 --- a/trunk/net/ipv4/route.c +++ b/trunk/net/ipv4/route.c @@ -2649,12 +2649,8 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, } if (res.type == RTN_LOCAL) { - if (!fl.fl4_src) { - if (res.fi->fib_prefsrc) - fl.fl4_src = res.fi->fib_prefsrc; - else - fl.fl4_src = fl.fl4_dst; - } + if (!fl.fl4_src) + fl.fl4_src = fl.fl4_dst; dev_out = net->loopback_dev; fl.oif = dev_out->ifindex; res.fi = NULL; diff --git a/trunk/scripts/Makefile.build b/trunk/scripts/Makefile.build index 4eb99ab34053..5ad25e17b6cb 100644 --- a/trunk/scripts/Makefile.build +++ b/trunk/scripts/Makefile.build @@ -214,22 +214,17 @@ ifdef BUILD_C_RECORDMCOUNT # The empty.o file is created in the make process in order to determine # the target endianness and word size. It is made before all other C # files, including recordmcount. -sub_cmd_record_mcount = \ - if [ $(@) != "scripts/mod/empty.o" ]; then \ - $(objtree)/scripts/recordmcount "$(@)"; \ - fi; +cmd_record_mcount = if [ $(@) != "scripts/mod/empty.o" ]; then \ + $(objtree)/scripts/recordmcount "$(@)"; \ + fi; else -sub_cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ +cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ "$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \ "$(if $(CONFIG_64BIT),64,32)" \ "$(OBJDUMP)" "$(OBJCOPY)" "$(CC) $(KBUILD_CFLAGS)" \ "$(LD)" "$(NM)" "$(RM)" "$(MV)" \ "$(if $(part-of-module),1,0)" "$(@)"; endif -cmd_record_mcount = \ - if [ "$(findstring -pg,$(_c_flags))" = "-pg" ]; then \ - $(sub_cmd_record_mcount) \ - fi; endif define rule_cc_o_c diff --git a/trunk/scripts/kconfig/menu.c b/trunk/scripts/kconfig/menu.c index 5f77dcb8977e..b9d9aa18e6d6 100644 --- a/trunk/scripts/kconfig/menu.c +++ b/trunk/scripts/kconfig/menu.c @@ -140,20 +140,6 @@ struct property *menu_add_prop(enum prop_type type, char *prompt, struct expr *e } if (current_entry->prompt && current_entry != &rootmenu) prop_warn(prop, "prompt redefined"); - - /* Apply all upper menus' visibilities to actual prompts. */ - if(type == P_PROMPT) { - struct menu *menu = current_entry; - - while ((menu = menu->parent) != NULL) { - if (!menu->visibility) - continue; - prop->visible.expr - = expr_alloc_and(prop->visible.expr, - menu->visibility); - } - } - current_entry->prompt = prop; } prop->text = prompt; diff --git a/trunk/scripts/kernel-doc b/trunk/scripts/kernel-doc index 9f85012acf0d..39580a5dc5df 100755 --- a/trunk/scripts/kernel-doc +++ b/trunk/scripts/kernel-doc @@ -155,8 +155,6 @@ use strict; # '@parameter' - name of a parameter # '%CONST' - name of a constant. -## init lots of data - my $errors = 0; my $warnings = 0; my $anon_struct_union = 0; @@ -220,14 +218,21 @@ my %highlights_list = ( $type_constant, "\$1", $type_param, "\$1" ); my $blankline_list = ""; +sub usage { + print "Usage: $0 [ -v ] [ -docbook | -html | -text | -man | -list ]\n"; + print " [ -no-doc-sections ]\n"; + print " [ -function funcname [ -function funcname ...] ]\n"; + print " [ -nofunction funcname [ -nofunction funcname ...] ]\n"; + print " c source file(s) > outputfile\n"; + print " -v : verbose output, more warnings & other info listed\n"; + exit 1; +} + # read arguments if ($#ARGV == -1) { usage(); } -my $kernelversion; -my $dohighlight = ""; - my $verbose = 0; my $output_mode = "man"; my $no_doc_sections = 0; @@ -240,7 +245,7 @@ my $man_date = ('January', 'February', 'March', 'April', 'May', 'June', 'November', 'December')[(localtime)[4]] . " " . ((localtime)[5]+1900); -# Essentially these are globals. +# Essentially these are globals # They probably want to be tidied up, made more localised or something. # CAVEAT EMPTOR! Some of the others I localised may not want to be, which # could cause "use of undefined value" or other bugs. @@ -348,18 +353,6 @@ while ($ARGV[0] =~ m/^-(.*)/) { } } -# continue execution near EOF; - -sub usage { - print "Usage: $0 [ -v ] [ -docbook | -html | -text | -man | -list ]\n"; - print " [ -no-doc-sections ]\n"; - print " [ -function funcname [ -function funcname ...] ]\n"; - print " [ -nofunction funcname [ -nofunction funcname ...] ]\n"; - print " c source file(s) > outputfile\n"; - print " -v : verbose output, more warnings & other info listed\n"; - exit 1; -} - # get kernel version from env sub get_kernel_version() { my $version = 'unknown kernel version'; @@ -369,6 +362,15 @@ sub get_kernel_version() { } return $version; } +my $kernelversion = get_kernel_version(); + +# generate a sequence of code that will splice in highlighting information +# using the s// operator. +my $dohighlight = ""; +foreach my $pattern (keys %highlights) { +# print STDERR "scanning pattern:$pattern, highlight:($highlights{$pattern})\n"; + $dohighlight .= "\$contents =~ s:$pattern:$highlights{$pattern}:gs;\n"; +} ## # dumps section contents to arrays/hashes intended for that purpose. @@ -1849,6 +1851,34 @@ sub dump_function($$) { }); } +sub process_file($); + +# Read the file that maps relative names to absolute names for +# separate source and object directories and for shadow trees. +if (open(SOURCE_MAP, "<.tmp_filelist.txt")) { + my ($relname, $absname); + while() { + chop(); + ($relname, $absname) = (split())[0..1]; + $relname =~ s:^/+::; + $source_map{$relname} = $absname; + } + close(SOURCE_MAP); +} + +foreach (@ARGV) { + chomp; + process_file($_); +} +if ($verbose && $errors) { + print STDERR "$errors errors\n"; +} +if ($verbose && $warnings) { + print STDERR "$warnings warnings\n"; +} + +exit($errors); + sub reset_state { $function = ""; %constants = (); @@ -2255,39 +2285,3 @@ sub process_file($) { } } } - - -$kernelversion = get_kernel_version(); - -# generate a sequence of code that will splice in highlighting information -# using the s// operator. -foreach my $pattern (keys %highlights) { -# print STDERR "scanning pattern:$pattern, highlight:($highlights{$pattern})\n"; - $dohighlight .= "\$contents =~ s:$pattern:$highlights{$pattern}:gs;\n"; -} - -# Read the file that maps relative names to absolute names for -# separate source and object directories and for shadow trees. -if (open(SOURCE_MAP, "<.tmp_filelist.txt")) { - my ($relname, $absname); - while() { - chop(); - ($relname, $absname) = (split())[0..1]; - $relname =~ s:^/+::; - $source_map{$relname} = $absname; - } - close(SOURCE_MAP); -} - -foreach (@ARGV) { - chomp; - process_file($_); -} -if ($verbose && $errors) { - print STDERR "$errors errors\n"; -} -if ($verbose && $warnings) { - print STDERR "$warnings warnings\n"; -} - -exit($errors); diff --git a/trunk/security/integrity/ima/ima_policy.c b/trunk/security/integrity/ima/ima_policy.c index d661afbe474c..aef8c0a923ab 100644 --- a/trunk/security/integrity/ima/ima_policy.c +++ b/trunk/security/integrity/ima/ima_policy.c @@ -253,8 +253,6 @@ static int ima_lsm_rule_init(struct ima_measure_rule_entry *entry, result = security_filter_rule_init(entry->lsm[lsm_rule].type, Audit_equal, args, &entry->lsm[lsm_rule].rule); - if (!entry->lsm[lsm_rule].rule) - return -EINVAL; return result; } diff --git a/trunk/sound/oss/soundcard.c b/trunk/sound/oss/soundcard.c index fcb14a099822..46c0d03dbecc 100644 --- a/trunk/sound/oss/soundcard.c +++ b/trunk/sound/oss/soundcard.c @@ -87,7 +87,7 @@ int *load_mixer_volumes(char *name, int *levels, int present) int i, n; for (i = 0; i < num_mixer_volumes; i++) { - if (strncmp(name, mixer_vols[i].name, 32) == 0) { + if (strcmp(name, mixer_vols[i].name) == 0) { if (present) mixer_vols[i].num = i; return mixer_vols[i].levels; @@ -99,7 +99,7 @@ int *load_mixer_volumes(char *name, int *levels, int present) } n = num_mixer_volumes++; - strncpy(mixer_vols[n].name, name, 32); + strcpy(mixer_vols[n].name, name); if (present) mixer_vols[n].num = n; diff --git a/trunk/sound/pci/hda/hda_intel.c b/trunk/sound/pci/hda/hda_intel.c index a1c4008af891..b030c8eba21f 100644 --- a/trunk/sound/pci/hda/hda_intel.c +++ b/trunk/sound/pci/hda/hda_intel.c @@ -2300,7 +2300,6 @@ static struct snd_pci_quirk position_fix_list[] __devinitdata = { SND_PCI_QUIRK(0x1028, 0x01cc, "Dell D820", POS_FIX_LPIB), SND_PCI_QUIRK(0x1028, 0x01de, "Dell Precision 390", POS_FIX_LPIB), SND_PCI_QUIRK(0x1028, 0x01f6, "Dell Latitude 131L", POS_FIX_LPIB), - SND_PCI_QUIRK(0x1028, 0x0470, "Dell Inspiron 1120", POS_FIX_LPIB), SND_PCI_QUIRK(0x103c, 0x306d, "HP dv3", POS_FIX_LPIB), SND_PCI_QUIRK(0x1043, 0x813d, "ASUS P5AD2", POS_FIX_LPIB), SND_PCI_QUIRK(0x1043, 0x81b3, "ASUS", POS_FIX_LPIB), diff --git a/trunk/sound/soc/codecs/max98088.c b/trunk/sound/soc/codecs/max98088.c index 6447dbb2f123..d63e28773eb1 100644 --- a/trunk/sound/soc/codecs/max98088.c +++ b/trunk/sound/soc/codecs/max98088.c @@ -40,6 +40,7 @@ struct max98088_cdata { }; struct max98088_priv { + u8 reg_cache[M98088_REG_CNT]; enum max98088_type devtype; void *control_data; struct max98088_pdata *pdata; @@ -1587,7 +1588,7 @@ static int max98088_dai2_set_fmt(struct snd_soc_dai *codec_dai, static void max98088_sync_cache(struct snd_soc_codec *codec) { - u16 *reg_cache = codec->reg_cache; + struct max98088_priv *max98088 = snd_soc_codec_get_drvdata(codec); int i; if (!codec->cache_sync) @@ -1598,14 +1599,14 @@ static void max98088_sync_cache(struct snd_soc_codec *codec) /* write back cached values if they're writeable and * different from the hardware default. */ - for (i = 1; i < codec->driver->reg_cache_size; i++) { + for (i = 1; i < ARRAY_SIZE(max98088->reg_cache); i++) { if (!max98088_access[i].writable) continue; - if (reg_cache[i] == max98088_reg[i]) + if (max98088->reg_cache[i] == max98088_reg[i]) continue; - snd_soc_write(codec, i, reg_cache[i]); + snd_soc_write(codec, i, max98088->reg_cache[i]); } codec->cache_sync = 0; @@ -1950,6 +1951,7 @@ static int max98088_probe(struct snd_soc_codec *codec) int ret = 0; codec->cache_sync = 1; + memcpy(codec->reg_cache, max98088_reg, sizeof(max98088_reg)); ret = snd_soc_codec_set_cache_io(codec, 8, 8, SND_SOC_I2C); if (ret != 0) { diff --git a/trunk/sound/soc/codecs/wm8523.c b/trunk/sound/soc/codecs/wm8523.c index deca79ea2b4b..9a433a5396cb 100644 --- a/trunk/sound/soc/codecs/wm8523.c +++ b/trunk/sound/soc/codecs/wm8523.c @@ -41,6 +41,7 @@ static const char *wm8523_supply_names[WM8523_NUM_SUPPLIES] = { /* codec private data */ struct wm8523_priv { enum snd_soc_control_type control_type; + u16 reg_cache[WM8523_REGISTER_COUNT]; struct regulator_bulk_data supplies[WM8523_NUM_SUPPLIES]; unsigned int sysclk; unsigned int rate_constraint_list[WM8523_NUM_RATES]; @@ -313,7 +314,6 @@ static int wm8523_set_bias_level(struct snd_soc_codec *codec, enum snd_soc_bias_level level) { struct wm8523_priv *wm8523 = snd_soc_codec_get_drvdata(codec); - u16 *reg_cache = codec->reg_cache; int ret, i; switch (level) { @@ -344,7 +344,7 @@ static int wm8523_set_bias_level(struct snd_soc_codec *codec, /* Sync back default/cached values */ for (i = WM8523_AIF_CTRL1; i < WM8523_MAX_REGISTER; i++) - snd_soc_write(codec, i, reg_cache[i]); + snd_soc_write(codec, i, wm8523->reg_cache[i]); msleep(100); @@ -414,7 +414,6 @@ static int wm8523_resume(struct snd_soc_codec *codec) static int wm8523_probe(struct snd_soc_codec *codec) { struct wm8523_priv *wm8523 = snd_soc_codec_get_drvdata(codec); - u16 *reg_cache = codec->reg_cache; int ret, i; codec->hw_write = (hw_write_t)i2c_master_send; @@ -471,8 +470,8 @@ static int wm8523_probe(struct snd_soc_codec *codec) } /* Change some default settings - latch VU and enable ZC */ - reg_cache[WM8523_DAC_GAINR] |= WM8523_DACR_VU; - reg_cache[WM8523_DAC_CTRL3] |= WM8523_ZC; + wm8523->reg_cache[WM8523_DAC_GAINR] |= WM8523_DACR_VU; + wm8523->reg_cache[WM8523_DAC_CTRL3] |= WM8523_ZC; wm8523_set_bias_level(codec, SND_SOC_BIAS_STANDBY); diff --git a/trunk/sound/soc/codecs/wm8741.c b/trunk/sound/soc/codecs/wm8741.c index aea60ef8aba7..90e31e9aa6f7 100644 --- a/trunk/sound/soc/codecs/wm8741.c +++ b/trunk/sound/soc/codecs/wm8741.c @@ -41,6 +41,7 @@ static const char *wm8741_supply_names[WM8741_NUM_SUPPLIES] = { /* codec private data */ struct wm8741_priv { enum snd_soc_control_type control_type; + u16 reg_cache[WM8741_REGISTER_COUNT]; struct regulator_bulk_data supplies[WM8741_NUM_SUPPLIES]; unsigned int sysclk; struct snd_pcm_hw_constraint_list *sysclk_constraints; @@ -421,7 +422,6 @@ static int wm8741_resume(struct snd_soc_codec *codec) static int wm8741_probe(struct snd_soc_codec *codec) { struct wm8741_priv *wm8741 = snd_soc_codec_get_drvdata(codec); - u16 *reg_cache = codec->reg_cache; int ret = 0; ret = snd_soc_codec_set_cache_io(codec, 7, 9, wm8741->control_type); @@ -437,10 +437,10 @@ static int wm8741_probe(struct snd_soc_codec *codec) } /* Change some default settings - latch VU */ - reg_cache[WM8741_DACLLSB_ATTENUATION] |= WM8741_UPDATELL; - reg_cache[WM8741_DACLMSB_ATTENUATION] |= WM8741_UPDATELM; - reg_cache[WM8741_DACRLSB_ATTENUATION] |= WM8741_UPDATERL; - reg_cache[WM8741_DACRLSB_ATTENUATION] |= WM8741_UPDATERM; + wm8741->reg_cache[WM8741_DACLLSB_ATTENUATION] |= WM8741_UPDATELL; + wm8741->reg_cache[WM8741_DACLMSB_ATTENUATION] |= WM8741_UPDATELM; + wm8741->reg_cache[WM8741_DACRLSB_ATTENUATION] |= WM8741_UPDATERL; + wm8741->reg_cache[WM8741_DACRLSB_ATTENUATION] |= WM8741_UPDATERM; snd_soc_add_controls(codec, wm8741_snd_controls, ARRAY_SIZE(wm8741_snd_controls)); diff --git a/trunk/sound/soc/codecs/wm8753.c b/trunk/sound/soc/codecs/wm8753.c index 87caae59e939..8f679a13f2bc 100644 --- a/trunk/sound/soc/codecs/wm8753.c +++ b/trunk/sound/soc/codecs/wm8753.c @@ -65,22 +65,22 @@ static void wm8753_set_dai_mode(struct snd_soc_codec *codec, * are using 2 wire for device control, so we cache them instead. */ static const u16 wm8753_reg[] = { - 0x0000, 0x0008, 0x0000, 0x000a, - 0x000a, 0x0033, 0x0000, 0x0007, - 0x00ff, 0x00ff, 0x000f, 0x000f, - 0x007b, 0x0000, 0x0032, 0x0000, - 0x00c3, 0x00c3, 0x00c0, 0x0000, + 0x0008, 0x0000, 0x000a, 0x000a, + 0x0033, 0x0000, 0x0007, 0x00ff, + 0x00ff, 0x000f, 0x000f, 0x007b, + 0x0000, 0x0032, 0x0000, 0x00c3, + 0x00c3, 0x00c0, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0055, 0x0005, 0x0050, 0x0055, - 0x0050, 0x0055, 0x0050, 0x0055, + 0x0000, 0x0000, 0x0000, 0x0055, + 0x0005, 0x0050, 0x0055, 0x0050, + 0x0055, 0x0050, 0x0055, 0x0079, 0x0079, 0x0079, 0x0079, 0x0079, - 0x0079, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0097, 0x0097, 0x0000, - 0x0004, 0x0000, 0x0083, 0x0024, - 0x01ba, 0x0000, 0x0083, 0x0024, - 0x01ba, 0x0000, 0x0000, 0x0000 + 0x0000, 0x0000, 0x0000, 0x0000, + 0x0097, 0x0097, 0x0000, 0x0004, + 0x0000, 0x0083, 0x0024, 0x01ba, + 0x0000, 0x0083, 0x0024, 0x01ba, + 0x0000, 0x0000, 0x0000 }; /* codec private data */ @@ -88,10 +88,57 @@ struct wm8753_priv { enum snd_soc_control_type control_type; unsigned int sysclk; unsigned int pcmclk; + u16 reg_cache[ARRAY_SIZE(wm8753_reg)]; int dai_func; }; -#define wm8753_reset(c) snd_soc_write(c, WM8753_RESET, 0) +/* + * read wm8753 register cache + */ +static inline unsigned int wm8753_read_reg_cache(struct snd_soc_codec *codec, + unsigned int reg) +{ + u16 *cache = codec->reg_cache; + if (reg < 1 || reg >= (ARRAY_SIZE(wm8753_reg) + 1)) + return -1; + return cache[reg - 1]; +} + +/* + * write wm8753 register cache + */ +static inline void wm8753_write_reg_cache(struct snd_soc_codec *codec, + unsigned int reg, unsigned int value) +{ + u16 *cache = codec->reg_cache; + if (reg < 1 || reg >= (ARRAY_SIZE(wm8753_reg) + 1)) + return; + cache[reg - 1] = value; +} + +/* + * write to the WM8753 register space + */ +static int wm8753_write(struct snd_soc_codec *codec, unsigned int reg, + unsigned int value) +{ + u8 data[2]; + + /* data is + * D15..D9 WM8753 register offset + * D8...D0 register data + */ + data[0] = (reg << 1) | ((value >> 8) & 0x0001); + data[1] = value & 0x00ff; + + wm8753_write_reg_cache(codec, reg, value); + if (codec->hw_write(codec->control_data, data, 2) == 2) + return 0; + else + return -EIO; +} + +#define wm8753_reset(c) wm8753_write(c, WM8753_RESET, 0) /* * WM8753 Controls @@ -171,7 +218,7 @@ static int wm8753_get_dai(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); - int mode = snd_soc_read(codec, WM8753_IOCTL); + int mode = wm8753_read_reg_cache(codec, WM8753_IOCTL); ucontrol->value.integer.value[0] = (mode & 0xc) >> 2; return 0; @@ -181,7 +228,7 @@ static int wm8753_set_dai(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); - int mode = snd_soc_read(codec, WM8753_IOCTL); + int mode = wm8753_read_reg_cache(codec, WM8753_IOCTL); struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec); if (((mode & 0xc) >> 2) == ucontrol->value.integer.value[0]) @@ -691,17 +738,17 @@ static int wm8753_set_dai_pll(struct snd_soc_dai *codec_dai, int pll_id, if (pll_id == WM8753_PLL1) { offset = 0; enable = 0x10; - reg = snd_soc_read(codec, WM8753_CLOCK) & 0xffef; + reg = wm8753_read_reg_cache(codec, WM8753_CLOCK) & 0xffef; } else { offset = 4; enable = 0x8; - reg = snd_soc_read(codec, WM8753_CLOCK) & 0xfff7; + reg = wm8753_read_reg_cache(codec, WM8753_CLOCK) & 0xfff7; } if (!freq_in || !freq_out) { /* disable PLL */ - snd_soc_write(codec, WM8753_PLL1CTL1 + offset, 0x0026); - snd_soc_write(codec, WM8753_CLOCK, reg); + wm8753_write(codec, WM8753_PLL1CTL1 + offset, 0x0026); + wm8753_write(codec, WM8753_CLOCK, reg); return 0; } else { u16 value = 0; @@ -712,20 +759,20 @@ static int wm8753_set_dai_pll(struct snd_soc_dai *codec_dai, int pll_id, /* set up N and K PLL divisor ratios */ /* bits 8:5 = PLL_N, bits 3:0 = PLL_K[21:18] */ value = (pll_div.n << 5) + ((pll_div.k & 0x3c0000) >> 18); - snd_soc_write(codec, WM8753_PLL1CTL2 + offset, value); + wm8753_write(codec, WM8753_PLL1CTL2 + offset, value); /* bits 8:0 = PLL_K[17:9] */ value = (pll_div.k & 0x03fe00) >> 9; - snd_soc_write(codec, WM8753_PLL1CTL3 + offset, value); + wm8753_write(codec, WM8753_PLL1CTL3 + offset, value); /* bits 8:0 = PLL_K[8:0] */ value = pll_div.k & 0x0001ff; - snd_soc_write(codec, WM8753_PLL1CTL4 + offset, value); + wm8753_write(codec, WM8753_PLL1CTL4 + offset, value); /* set PLL as input and enable */ - snd_soc_write(codec, WM8753_PLL1CTL1 + offset, 0x0027 | + wm8753_write(codec, WM8753_PLL1CTL1 + offset, 0x0027 | (pll_div.div2 << 3)); - snd_soc_write(codec, WM8753_CLOCK, reg | enable); + wm8753_write(codec, WM8753_CLOCK, reg | enable); } return 0; } @@ -832,7 +879,7 @@ static int wm8753_vdac_adc_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) { struct snd_soc_codec *codec = codec_dai->codec; - u16 voice = snd_soc_read(codec, WM8753_PCM) & 0x01ec; + u16 voice = wm8753_read_reg_cache(codec, WM8753_PCM) & 0x01ec; /* interface format */ switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { @@ -854,7 +901,7 @@ static int wm8753_vdac_adc_set_dai_fmt(struct snd_soc_dai *codec_dai, return -EINVAL; } - snd_soc_write(codec, WM8753_PCM, voice); + wm8753_write(codec, WM8753_PCM, voice); return 0; } @@ -875,8 +922,8 @@ static int wm8753_pcm_hw_params(struct snd_pcm_substream *substream, struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_codec *codec = rtd->codec; struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec); - u16 voice = snd_soc_read(codec, WM8753_PCM) & 0x01f3; - u16 srate = snd_soc_read(codec, WM8753_SRATE1) & 0x017f; + u16 voice = wm8753_read_reg_cache(codec, WM8753_PCM) & 0x01f3; + u16 srate = wm8753_read_reg_cache(codec, WM8753_SRATE1) & 0x017f; /* bit size */ switch (params_format(params)) { @@ -896,9 +943,9 @@ static int wm8753_pcm_hw_params(struct snd_pcm_substream *substream, /* sample rate */ if (params_rate(params) * 384 == wm8753->pcmclk) srate |= 0x80; - snd_soc_write(codec, WM8753_SRATE1, srate); + wm8753_write(codec, WM8753_SRATE1, srate); - snd_soc_write(codec, WM8753_PCM, voice); + wm8753_write(codec, WM8753_PCM, voice); return 0; } @@ -911,8 +958,8 @@ static int wm8753_pcm_set_dai_fmt(struct snd_soc_dai *codec_dai, struct snd_soc_codec *codec = codec_dai->codec; u16 voice, ioctl; - voice = snd_soc_read(codec, WM8753_PCM) & 0x011f; - ioctl = snd_soc_read(codec, WM8753_IOCTL) & 0x015d; + voice = wm8753_read_reg_cache(codec, WM8753_PCM) & 0x011f; + ioctl = wm8753_read_reg_cache(codec, WM8753_IOCTL) & 0x015d; /* set master/slave audio interface */ switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { @@ -966,8 +1013,8 @@ static int wm8753_pcm_set_dai_fmt(struct snd_soc_dai *codec_dai, return -EINVAL; } - snd_soc_write(codec, WM8753_PCM, voice); - snd_soc_write(codec, WM8753_IOCTL, ioctl); + wm8753_write(codec, WM8753_PCM, voice); + wm8753_write(codec, WM8753_IOCTL, ioctl); return 0; } @@ -979,16 +1026,16 @@ static int wm8753_set_dai_clkdiv(struct snd_soc_dai *codec_dai, switch (div_id) { case WM8753_PCMDIV: - reg = snd_soc_read(codec, WM8753_CLOCK) & 0x003f; - snd_soc_write(codec, WM8753_CLOCK, reg | div); + reg = wm8753_read_reg_cache(codec, WM8753_CLOCK) & 0x003f; + wm8753_write(codec, WM8753_CLOCK, reg | div); break; case WM8753_BCLKDIV: - reg = snd_soc_read(codec, WM8753_SRATE2) & 0x01c7; - snd_soc_write(codec, WM8753_SRATE2, reg | div); + reg = wm8753_read_reg_cache(codec, WM8753_SRATE2) & 0x01c7; + wm8753_write(codec, WM8753_SRATE2, reg | div); break; case WM8753_VXCLKDIV: - reg = snd_soc_read(codec, WM8753_SRATE2) & 0x003f; - snd_soc_write(codec, WM8753_SRATE2, reg | div); + reg = wm8753_read_reg_cache(codec, WM8753_SRATE2) & 0x003f; + wm8753_write(codec, WM8753_SRATE2, reg | div); break; default: return -EINVAL; @@ -1003,7 +1050,7 @@ static int wm8753_hdac_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) { struct snd_soc_codec *codec = codec_dai->codec; - u16 hifi = snd_soc_read(codec, WM8753_HIFI) & 0x01e0; + u16 hifi = wm8753_read_reg_cache(codec, WM8753_HIFI) & 0x01e0; /* interface format */ switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { @@ -1025,7 +1072,7 @@ static int wm8753_hdac_set_dai_fmt(struct snd_soc_dai *codec_dai, return -EINVAL; } - snd_soc_write(codec, WM8753_HIFI, hifi); + wm8753_write(codec, WM8753_HIFI, hifi); return 0; } @@ -1038,8 +1085,8 @@ static int wm8753_i2s_set_dai_fmt(struct snd_soc_dai *codec_dai, struct snd_soc_codec *codec = codec_dai->codec; u16 ioctl, hifi; - hifi = snd_soc_read(codec, WM8753_HIFI) & 0x011f; - ioctl = snd_soc_read(codec, WM8753_IOCTL) & 0x00ae; + hifi = wm8753_read_reg_cache(codec, WM8753_HIFI) & 0x011f; + ioctl = wm8753_read_reg_cache(codec, WM8753_IOCTL) & 0x00ae; /* set master/slave audio interface */ switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { @@ -1093,8 +1140,8 @@ static int wm8753_i2s_set_dai_fmt(struct snd_soc_dai *codec_dai, return -EINVAL; } - snd_soc_write(codec, WM8753_HIFI, hifi); - snd_soc_write(codec, WM8753_IOCTL, ioctl); + wm8753_write(codec, WM8753_HIFI, hifi); + wm8753_write(codec, WM8753_IOCTL, ioctl); return 0; } @@ -1115,8 +1162,8 @@ static int wm8753_i2s_hw_params(struct snd_pcm_substream *substream, struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_codec *codec = rtd->codec; struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec); - u16 srate = snd_soc_read(codec, WM8753_SRATE1) & 0x01c0; - u16 hifi = snd_soc_read(codec, WM8753_HIFI) & 0x01f3; + u16 srate = wm8753_read_reg_cache(codec, WM8753_SRATE1) & 0x01c0; + u16 hifi = wm8753_read_reg_cache(codec, WM8753_HIFI) & 0x01f3; int coeff; /* is digital filter coefficient valid ? */ @@ -1125,7 +1172,7 @@ static int wm8753_i2s_hw_params(struct snd_pcm_substream *substream, printk(KERN_ERR "wm8753 invalid MCLK or rate\n"); return coeff; } - snd_soc_write(codec, WM8753_SRATE1, srate | (coeff_div[coeff].sr << 1) | + wm8753_write(codec, WM8753_SRATE1, srate | (coeff_div[coeff].sr << 1) | coeff_div[coeff].usb); /* bit size */ @@ -1143,7 +1190,7 @@ static int wm8753_i2s_hw_params(struct snd_pcm_substream *substream, break; } - snd_soc_write(codec, WM8753_HIFI, hifi); + wm8753_write(codec, WM8753_HIFI, hifi); return 0; } @@ -1154,8 +1201,8 @@ static int wm8753_mode1v_set_dai_fmt(struct snd_soc_dai *codec_dai, u16 clock; /* set clk source as pcmclk */ - clock = snd_soc_read(codec, WM8753_CLOCK) & 0xfffb; - snd_soc_write(codec, WM8753_CLOCK, clock); + clock = wm8753_read_reg_cache(codec, WM8753_CLOCK) & 0xfffb; + wm8753_write(codec, WM8753_CLOCK, clock); if (wm8753_vdac_adc_set_dai_fmt(codec_dai, fmt) < 0) return -EINVAL; @@ -1177,8 +1224,8 @@ static int wm8753_mode2_set_dai_fmt(struct snd_soc_dai *codec_dai, u16 clock; /* set clk source as pcmclk */ - clock = snd_soc_read(codec, WM8753_CLOCK) & 0xfffb; - snd_soc_write(codec, WM8753_CLOCK, clock); + clock = wm8753_read_reg_cache(codec, WM8753_CLOCK) & 0xfffb; + wm8753_write(codec, WM8753_CLOCK, clock); if (wm8753_vdac_adc_set_dai_fmt(codec_dai, fmt) < 0) return -EINVAL; @@ -1192,8 +1239,8 @@ static int wm8753_mode3_4_set_dai_fmt(struct snd_soc_dai *codec_dai, u16 clock; /* set clk source as mclk */ - clock = snd_soc_read(codec, WM8753_CLOCK) & 0xfffb; - snd_soc_write(codec, WM8753_CLOCK, clock | 0x4); + clock = wm8753_read_reg_cache(codec, WM8753_CLOCK) & 0xfffb; + wm8753_write(codec, WM8753_CLOCK, clock | 0x4); if (wm8753_hdac_set_dai_fmt(codec_dai, fmt) < 0) return -EINVAL; @@ -1205,19 +1252,19 @@ static int wm8753_mode3_4_set_dai_fmt(struct snd_soc_dai *codec_dai, static int wm8753_mute(struct snd_soc_dai *dai, int mute) { struct snd_soc_codec *codec = dai->codec; - u16 mute_reg = snd_soc_read(codec, WM8753_DAC) & 0xfff7; + u16 mute_reg = wm8753_read_reg_cache(codec, WM8753_DAC) & 0xfff7; struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec); /* the digital mute covers the HiFi and Voice DAC's on the WM8753. * make sure we check if they are not both active when we mute */ if (mute && wm8753->dai_func == 1) { if (!codec->active) - snd_soc_write(codec, WM8753_DAC, mute_reg | 0x8); + wm8753_write(codec, WM8753_DAC, mute_reg | 0x8); } else { if (mute) - snd_soc_write(codec, WM8753_DAC, mute_reg | 0x8); + wm8753_write(codec, WM8753_DAC, mute_reg | 0x8); else - snd_soc_write(codec, WM8753_DAC, mute_reg); + wm8753_write(codec, WM8753_DAC, mute_reg); } return 0; @@ -1226,23 +1273,23 @@ static int wm8753_mute(struct snd_soc_dai *dai, int mute) static int wm8753_set_bias_level(struct snd_soc_codec *codec, enum snd_soc_bias_level level) { - u16 pwr_reg = snd_soc_read(codec, WM8753_PWR1) & 0xfe3e; + u16 pwr_reg = wm8753_read_reg_cache(codec, WM8753_PWR1) & 0xfe3e; switch (level) { case SND_SOC_BIAS_ON: /* set vmid to 50k and unmute dac */ - snd_soc_write(codec, WM8753_PWR1, pwr_reg | 0x00c0); + wm8753_write(codec, WM8753_PWR1, pwr_reg | 0x00c0); break; case SND_SOC_BIAS_PREPARE: /* set vmid to 5k for quick power up */ - snd_soc_write(codec, WM8753_PWR1, pwr_reg | 0x01c1); + wm8753_write(codec, WM8753_PWR1, pwr_reg | 0x01c1); break; case SND_SOC_BIAS_STANDBY: /* mute dac and set vmid to 500k, enable VREF */ - snd_soc_write(codec, WM8753_PWR1, pwr_reg | 0x0141); + wm8753_write(codec, WM8753_PWR1, pwr_reg | 0x0141); break; case SND_SOC_BIAS_OFF: - snd_soc_write(codec, WM8753_PWR1, 0x0001); + wm8753_write(codec, WM8753_PWR1, 0x0001); break; } codec->bias_level = level; @@ -1430,7 +1477,7 @@ static void wm8753_set_dai_mode(struct snd_soc_codec *codec, else dai->driver = &wm8753_all_dai[(wm8753->dai_func << 1) + 1]; } - snd_soc_write(codec, WM8753_IOCTL, wm8753->dai_func); + wm8753_write(codec, WM8753_IOCTL, wm8753->dai_func); } static void wm8753_work(struct work_struct *work) @@ -1448,19 +1495,22 @@ static int wm8753_suspend(struct snd_soc_codec *codec, pm_message_t state) static int wm8753_resume(struct snd_soc_codec *codec) { - u16 *reg_cache = codec->reg_cache; int i; + u8 data[2]; + u16 *cache = codec->reg_cache; /* Sync reg_cache with the hardware */ - for (i = 1; i < ARRAY_SIZE(wm8753_reg); i++) { - if (i == WM8753_RESET) + for (i = 0; i < ARRAY_SIZE(wm8753_reg); i++) { + if (i + 1 == WM8753_RESET) continue; /* No point in writing hardware default values back */ - if (reg_cache[i] == wm8753_reg[i]) + if (cache[i] == wm8753_reg[i]) continue; - snd_soc_write(codec, i, reg_cache[i]); + data[0] = ((i + 1) << 1) | ((cache[i] >> 8) & 0x0001); + data[1] = cache[i] & 0x00ff; + codec->hw_write(codec->control_data, data, 2); } wm8753_set_bias_level(codec, SND_SOC_BIAS_STANDBY); @@ -1498,7 +1548,7 @@ static int run_delayed_work(struct delayed_work *dwork) static int wm8753_probe(struct snd_soc_codec *codec) { struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec); - int ret; + int ret = 0, reg; INIT_DELAYED_WORK(&codec->delayed_work, wm8753_work); @@ -1523,16 +1573,26 @@ static int wm8753_probe(struct snd_soc_codec *codec) msecs_to_jiffies(caps_charge)); /* set the update bits */ - snd_soc_update_bits(codec, WM8753_LDAC, 0x0100, 0x0100); - snd_soc_update_bits(codec, WM8753_RDAC, 0x0100, 0x0100); - snd_soc_update_bits(codec, WM8753_LDAC, 0x0100, 0x0100); - snd_soc_update_bits(codec, WM8753_RDAC, 0x0100, 0x0100); - snd_soc_update_bits(codec, WM8753_LOUT1V, 0x0100, 0x0100); - snd_soc_update_bits(codec, WM8753_ROUT1V, 0x0100, 0x0100); - snd_soc_update_bits(codec, WM8753_LOUT2V, 0x0100, 0x0100); - snd_soc_update_bits(codec, WM8753_ROUT2V, 0x0100, 0x0100); - snd_soc_update_bits(codec, WM8753_LINVOL, 0x0100, 0x0100); - snd_soc_update_bits(codec, WM8753_RINVOL, 0x0100, 0x0100); + reg = wm8753_read_reg_cache(codec, WM8753_LDAC); + wm8753_write(codec, WM8753_LDAC, reg | 0x0100); + reg = wm8753_read_reg_cache(codec, WM8753_RDAC); + wm8753_write(codec, WM8753_RDAC, reg | 0x0100); + reg = wm8753_read_reg_cache(codec, WM8753_LADC); + wm8753_write(codec, WM8753_LADC, reg | 0x0100); + reg = wm8753_read_reg_cache(codec, WM8753_RADC); + wm8753_write(codec, WM8753_RADC, reg | 0x0100); + reg = wm8753_read_reg_cache(codec, WM8753_LOUT1V); + wm8753_write(codec, WM8753_LOUT1V, reg | 0x0100); + reg = wm8753_read_reg_cache(codec, WM8753_ROUT1V); + wm8753_write(codec, WM8753_ROUT1V, reg | 0x0100); + reg = wm8753_read_reg_cache(codec, WM8753_LOUT2V); + wm8753_write(codec, WM8753_LOUT2V, reg | 0x0100); + reg = wm8753_read_reg_cache(codec, WM8753_ROUT2V); + wm8753_write(codec, WM8753_ROUT2V, reg | 0x0100); + reg = wm8753_read_reg_cache(codec, WM8753_LINVOL); + wm8753_write(codec, WM8753_LINVOL, reg | 0x0100); + reg = wm8753_read_reg_cache(codec, WM8753_RINVOL); + wm8753_write(codec, WM8753_RINVOL, reg | 0x0100); snd_soc_add_controls(codec, wm8753_snd_controls, ARRAY_SIZE(wm8753_snd_controls)); diff --git a/trunk/sound/soc/codecs/wm8904.c b/trunk/sound/soc/codecs/wm8904.c index 1ec12eff0620..9001cc48ba13 100644 --- a/trunk/sound/soc/codecs/wm8904.c +++ b/trunk/sound/soc/codecs/wm8904.c @@ -50,6 +50,8 @@ static const char *wm8904_supply_names[WM8904_NUM_SUPPLIES] = { /* codec private data */ struct wm8904_priv { + u16 reg_cache[WM8904_MAX_REGISTER + 1]; + enum wm8904_type devtype; void *control_data; @@ -2092,7 +2094,7 @@ static int wm8904_digital_mute(struct snd_soc_dai *codec_dai, int mute) static void wm8904_sync_cache(struct snd_soc_codec *codec) { - u16 *reg_cache = codec->reg_cache; + struct wm8904_priv *wm8904 = snd_soc_codec_get_drvdata(codec); int i; if (!codec->cache_sync) @@ -2103,14 +2105,14 @@ static void wm8904_sync_cache(struct snd_soc_codec *codec) /* Sync back cached values if they're different from the * hardware default. */ - for (i = 1; i < codec->driver->reg_cache_size; i++) { + for (i = 1; i < ARRAY_SIZE(wm8904->reg_cache); i++) { if (!wm8904_access[i].writable) continue; - if (reg_cache[i] == wm8904_reg[i]) + if (wm8904->reg_cache[i] == wm8904_reg[i]) continue; - snd_soc_write(codec, i, reg_cache[i]); + snd_soc_write(codec, i, wm8904->reg_cache[i]); } codec->cache_sync = 0; @@ -2369,7 +2371,6 @@ static int wm8904_probe(struct snd_soc_codec *codec) { struct wm8904_priv *wm8904 = snd_soc_codec_get_drvdata(codec); struct wm8904_pdata *pdata = wm8904->pdata; - u16 *reg_cache = codec->reg_cache; int ret, i; codec->cache_sync = 1; @@ -2436,19 +2437,19 @@ static int wm8904_probe(struct snd_soc_codec *codec) } /* Change some default settings - latch VU and enable ZC */ - reg_cache[WM8904_ADC_DIGITAL_VOLUME_LEFT] |= WM8904_ADC_VU; - reg_cache[WM8904_ADC_DIGITAL_VOLUME_RIGHT] |= WM8904_ADC_VU; - reg_cache[WM8904_DAC_DIGITAL_VOLUME_LEFT] |= WM8904_DAC_VU; - reg_cache[WM8904_DAC_DIGITAL_VOLUME_RIGHT] |= WM8904_DAC_VU; - reg_cache[WM8904_ANALOGUE_OUT1_LEFT] |= WM8904_HPOUT_VU | + wm8904->reg_cache[WM8904_ADC_DIGITAL_VOLUME_LEFT] |= WM8904_ADC_VU; + wm8904->reg_cache[WM8904_ADC_DIGITAL_VOLUME_RIGHT] |= WM8904_ADC_VU; + wm8904->reg_cache[WM8904_DAC_DIGITAL_VOLUME_LEFT] |= WM8904_DAC_VU; + wm8904->reg_cache[WM8904_DAC_DIGITAL_VOLUME_RIGHT] |= WM8904_DAC_VU; + wm8904->reg_cache[WM8904_ANALOGUE_OUT1_LEFT] |= WM8904_HPOUT_VU | WM8904_HPOUTLZC; - reg_cache[WM8904_ANALOGUE_OUT1_RIGHT] |= WM8904_HPOUT_VU | + wm8904->reg_cache[WM8904_ANALOGUE_OUT1_RIGHT] |= WM8904_HPOUT_VU | WM8904_HPOUTRZC; - reg_cache[WM8904_ANALOGUE_OUT2_LEFT] |= WM8904_LINEOUT_VU | + wm8904->reg_cache[WM8904_ANALOGUE_OUT2_LEFT] |= WM8904_LINEOUT_VU | WM8904_LINEOUTLZC; - reg_cache[WM8904_ANALOGUE_OUT2_RIGHT] |= WM8904_LINEOUT_VU | + wm8904->reg_cache[WM8904_ANALOGUE_OUT2_RIGHT] |= WM8904_LINEOUT_VU | WM8904_LINEOUTRZC; - reg_cache[WM8904_CLOCK_RATES_0] &= ~WM8904_SR_MODE; + wm8904->reg_cache[WM8904_CLOCK_RATES_0] &= ~WM8904_SR_MODE; /* Apply configuration from the platform data. */ if (wm8904->pdata) { @@ -2456,23 +2457,23 @@ static int wm8904_probe(struct snd_soc_codec *codec) if (!pdata->gpio_cfg[i]) continue; - reg_cache[WM8904_GPIO_CONTROL_1 + i] + wm8904->reg_cache[WM8904_GPIO_CONTROL_1 + i] = pdata->gpio_cfg[i] & 0xffff; } /* Zero is the default value for these anyway */ for (i = 0; i < WM8904_MIC_REGS; i++) - reg_cache[WM8904_MIC_BIAS_CONTROL_0 + i] + wm8904->reg_cache[WM8904_MIC_BIAS_CONTROL_0 + i] = pdata->mic_cfg[i]; } /* Set Class W by default - this will be managed by the Class * G widget at runtime where bypass paths are available. */ - reg_cache[WM8904_CLASS_W_0] |= WM8904_CP_DYN_PWR; + wm8904->reg_cache[WM8904_CLASS_W_0] |= WM8904_CP_DYN_PWR; /* Use normal bias source */ - reg_cache[WM8904_BIAS_CONTROL_0] &= ~WM8904_POBCTRL; + wm8904->reg_cache[WM8904_BIAS_CONTROL_0] &= ~WM8904_POBCTRL; wm8904_set_bias_level(codec, SND_SOC_BIAS_STANDBY); diff --git a/trunk/sound/soc/codecs/wm8940.c b/trunk/sound/soc/codecs/wm8940.c index 23086e2c976a..2cb16f895c46 100644 --- a/trunk/sound/soc/codecs/wm8940.c +++ b/trunk/sound/soc/codecs/wm8940.c @@ -768,7 +768,6 @@ static __devinit int wm8940_i2c_probe(struct i2c_client *i2c, i2c_set_clientdata(i2c, wm8940); wm8940->control_data = i2c; - wm8940->control_type = SND_SOC_I2C; ret = snd_soc_register_codec(&i2c->dev, &soc_codec_dev_wm8940, &wm8940_dai, 1); diff --git a/trunk/sound/soc/codecs/wm8955.c b/trunk/sound/soc/codecs/wm8955.c index 2ac35b0be86a..9cbab8e1de01 100644 --- a/trunk/sound/soc/codecs/wm8955.c +++ b/trunk/sound/soc/codecs/wm8955.c @@ -42,6 +42,8 @@ static const char *wm8955_supply_names[WM8955_NUM_SUPPLIES] = { struct wm8955_priv { enum snd_soc_control_type control_type; + u16 reg_cache[WM8955_MAX_REGISTER + 1]; + unsigned int mclk_rate; int deemph; @@ -766,7 +768,6 @@ static int wm8955_set_bias_level(struct snd_soc_codec *codec, enum snd_soc_bias_level level) { struct wm8955_priv *wm8955 = snd_soc_codec_get_drvdata(codec); - u16 *reg_cache = codec->reg_cache; int ret, i; switch (level) { @@ -799,14 +800,14 @@ static int wm8955_set_bias_level(struct snd_soc_codec *codec, /* Sync back cached values if they're * different from the hardware default. */ - for (i = 0; i < codec->driver->reg_cache_size; i++) { + for (i = 0; i < ARRAY_SIZE(wm8955->reg_cache); i++) { if (i == WM8955_RESET) continue; - if (reg_cache[i] == wm8955_reg[i]) + if (wm8955->reg_cache[i] == wm8955_reg[i]) continue; - snd_soc_write(codec, i, reg_cache[i]); + snd_soc_write(codec, i, wm8955->reg_cache[i]); } /* Enable VREF and VMID */ @@ -901,7 +902,6 @@ static int wm8955_probe(struct snd_soc_codec *codec) { struct wm8955_priv *wm8955 = snd_soc_codec_get_drvdata(codec); struct wm8955_pdata *pdata = dev_get_platdata(codec->dev); - u16 *reg_cache = codec->reg_cache; int ret, i; ret = snd_soc_codec_set_cache_io(codec, 7, 9, wm8955->control_type); @@ -934,25 +934,25 @@ static int wm8955_probe(struct snd_soc_codec *codec) } /* Change some default settings - latch VU and enable ZC */ - reg_cache[WM8955_LEFT_DAC_VOLUME] |= WM8955_LDVU; - reg_cache[WM8955_RIGHT_DAC_VOLUME] |= WM8955_RDVU; - reg_cache[WM8955_LOUT1_VOLUME] |= WM8955_LO1VU | WM8955_LO1ZC; - reg_cache[WM8955_ROUT1_VOLUME] |= WM8955_RO1VU | WM8955_RO1ZC; - reg_cache[WM8955_LOUT2_VOLUME] |= WM8955_LO2VU | WM8955_LO2ZC; - reg_cache[WM8955_ROUT2_VOLUME] |= WM8955_RO2VU | WM8955_RO2ZC; - reg_cache[WM8955_MONOOUT_VOLUME] |= WM8955_MOZC; + wm8955->reg_cache[WM8955_LEFT_DAC_VOLUME] |= WM8955_LDVU; + wm8955->reg_cache[WM8955_RIGHT_DAC_VOLUME] |= WM8955_RDVU; + wm8955->reg_cache[WM8955_LOUT1_VOLUME] |= WM8955_LO1VU | WM8955_LO1ZC; + wm8955->reg_cache[WM8955_ROUT1_VOLUME] |= WM8955_RO1VU | WM8955_RO1ZC; + wm8955->reg_cache[WM8955_LOUT2_VOLUME] |= WM8955_LO2VU | WM8955_LO2ZC; + wm8955->reg_cache[WM8955_ROUT2_VOLUME] |= WM8955_RO2VU | WM8955_RO2ZC; + wm8955->reg_cache[WM8955_MONOOUT_VOLUME] |= WM8955_MOZC; /* Also enable adaptive bass boost by default */ - reg_cache[WM8955_BASS_CONTROL] |= WM8955_BB; + wm8955->reg_cache[WM8955_BASS_CONTROL] |= WM8955_BB; /* Set platform data values */ if (pdata) { if (pdata->out2_speaker) - reg_cache[WM8955_ADDITIONAL_CONTROL_2] + wm8955->reg_cache[WM8955_ADDITIONAL_CONTROL_2] |= WM8955_ROUT2INV; if (pdata->monoin_diff) - reg_cache[WM8955_MONO_OUT_MIX_1] + wm8955->reg_cache[WM8955_MONO_OUT_MIX_1] |= WM8955_DMEN; } @@ -1003,7 +1003,6 @@ static __devinit int wm8955_i2c_probe(struct i2c_client *i2c, return -ENOMEM; i2c_set_clientdata(i2c, wm8955); - wm8955->control_type = SND_SOC_I2C; ret = snd_soc_register_codec(&i2c->dev, &soc_codec_dev_wm8955, &wm8955_dai, 1); diff --git a/trunk/sound/soc/codecs/wm8960.c b/trunk/sound/soc/codecs/wm8960.c index ff6ff2f529d2..21986c42272f 100644 --- a/trunk/sound/soc/codecs/wm8960.c +++ b/trunk/sound/soc/codecs/wm8960.c @@ -1013,7 +1013,6 @@ static __devinit int wm8960_i2c_probe(struct i2c_client *i2c, return -ENOMEM; i2c_set_clientdata(i2c, wm8960); - wm8960->control_type = SND_SOC_I2C; wm8960->control_data = i2c; ret = snd_soc_register_codec(&i2c->dev, diff --git a/trunk/sound/soc/codecs/wm8962.c b/trunk/sound/soc/codecs/wm8962.c index 7c421cc837bd..1304ca91a11c 100644 --- a/trunk/sound/soc/codecs/wm8962.c +++ b/trunk/sound/soc/codecs/wm8962.c @@ -52,6 +52,8 @@ static const char *wm8962_supply_names[WM8962_NUM_SUPPLIES] = { struct wm8962_priv { struct snd_soc_codec *codec; + u16 reg_cache[WM8962_MAX_REGISTER + 1]; + int sysclk; int sysclk_rate; @@ -1989,7 +1991,8 @@ static int wm8962_put_hp_sw(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); - u16 *reg_cache = codec->reg_cache; + struct wm8962_priv *wm8962 = snd_soc_codec_get_drvdata(codec); + u16 *reg_cache = wm8962->reg_cache; int ret; /* Apply the update (if any) */ @@ -2017,7 +2020,8 @@ static int wm8962_put_spk_sw(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); - u16 *reg_cache = codec->reg_cache; + struct wm8962_priv *wm8962 = snd_soc_codec_get_drvdata(codec); + u16 *reg_cache = wm8962->reg_cache; int ret; /* Apply the update (if any) */ @@ -2325,7 +2329,8 @@ static int out_pga_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event) { struct snd_soc_codec *codec = w->codec; - u16 *reg_cache = codec->reg_cache; + struct wm8962_priv *wm8962 = snd_soc_codec_get_drvdata(codec); + u16 *reg_cache = wm8962->reg_cache; int reg; switch (w->shift) { @@ -2714,7 +2719,7 @@ static int wm8962_add_widgets(struct snd_soc_codec *codec) static void wm8962_sync_cache(struct snd_soc_codec *codec) { - u16 *reg_cache = codec->reg_cache; + struct wm8962_priv *wm8962 = snd_soc_codec_get_drvdata(codec); int i; if (!codec->cache_sync) @@ -2727,13 +2732,13 @@ static void wm8962_sync_cache(struct snd_soc_codec *codec) /* Sync back cached values if they're different from the * hardware default. */ - for (i = 1; i < codec->driver->reg_cache_size; i++) { + for (i = 1; i < ARRAY_SIZE(wm8962->reg_cache); i++) { if (i == WM8962_SOFTWARE_RESET) continue; - if (reg_cache[i] == wm8962_reg[i]) + if (wm8962->reg_cache[i] == wm8962_reg[i]) continue; - snd_soc_write(codec, i, reg_cache[i]); + snd_soc_write(codec, i, wm8962->reg_cache[i]); } codec->cache_sync = 0; @@ -3401,11 +3406,12 @@ EXPORT_SYMBOL_GPL(wm8962_mic_detect); #ifdef CONFIG_PM static int wm8962_resume(struct snd_soc_codec *codec) { + struct wm8962_priv *wm8962 = snd_soc_codec_get_drvdata(codec); u16 *reg_cache = codec->reg_cache; int i; /* Restore the registers */ - for (i = 1; i < codec->driver->reg_cache_size; i++) { + for (i = 1; i < ARRAY_SIZE(wm8962->reg_cache); i++) { switch (i) { case WM8962_SOFTWARE_RESET: continue; @@ -3699,7 +3705,6 @@ static int wm8962_probe(struct snd_soc_codec *codec) struct wm8962_pdata *pdata = dev_get_platdata(codec->dev); struct i2c_client *i2c = container_of(codec->dev, struct i2c_client, dev); - u16 *reg_cache = codec->reg_cache; int i, trigger, irq_pol; wm8962->codec = codec; @@ -3799,7 +3804,7 @@ static int wm8962_probe(struct snd_soc_codec *codec) /* Put the speakers into mono mode? */ if (pdata->spk_mono) - reg_cache[WM8962_CLASS_D_CONTROL_2] + wm8962->reg_cache[WM8962_CLASS_D_CONTROL_2] |= WM8962_SPK_MONO; /* Micbias setup, detection enable and detection @@ -3814,16 +3819,16 @@ static int wm8962_probe(struct snd_soc_codec *codec) } /* Latch volume update bits */ - reg_cache[WM8962_LEFT_INPUT_VOLUME] |= WM8962_IN_VU; - reg_cache[WM8962_RIGHT_INPUT_VOLUME] |= WM8962_IN_VU; - reg_cache[WM8962_LEFT_ADC_VOLUME] |= WM8962_ADC_VU; - reg_cache[WM8962_RIGHT_ADC_VOLUME] |= WM8962_ADC_VU; - reg_cache[WM8962_LEFT_DAC_VOLUME] |= WM8962_DAC_VU; - reg_cache[WM8962_RIGHT_DAC_VOLUME] |= WM8962_DAC_VU; - reg_cache[WM8962_SPKOUTL_VOLUME] |= WM8962_SPKOUT_VU; - reg_cache[WM8962_SPKOUTR_VOLUME] |= WM8962_SPKOUT_VU; - reg_cache[WM8962_HPOUTL_VOLUME] |= WM8962_HPOUT_VU; - reg_cache[WM8962_HPOUTR_VOLUME] |= WM8962_HPOUT_VU; + wm8962->reg_cache[WM8962_LEFT_INPUT_VOLUME] |= WM8962_IN_VU; + wm8962->reg_cache[WM8962_RIGHT_INPUT_VOLUME] |= WM8962_IN_VU; + wm8962->reg_cache[WM8962_LEFT_ADC_VOLUME] |= WM8962_ADC_VU; + wm8962->reg_cache[WM8962_RIGHT_ADC_VOLUME] |= WM8962_ADC_VU; + wm8962->reg_cache[WM8962_LEFT_DAC_VOLUME] |= WM8962_DAC_VU; + wm8962->reg_cache[WM8962_RIGHT_DAC_VOLUME] |= WM8962_DAC_VU; + wm8962->reg_cache[WM8962_SPKOUTL_VOLUME] |= WM8962_SPKOUT_VU; + wm8962->reg_cache[WM8962_SPKOUTR_VOLUME] |= WM8962_SPKOUT_VU; + wm8962->reg_cache[WM8962_HPOUTL_VOLUME] |= WM8962_HPOUT_VU; + wm8962->reg_cache[WM8962_HPOUTR_VOLUME] |= WM8962_HPOUT_VU; wm8962_add_widgets(codec); diff --git a/trunk/sound/soc/codecs/wm8971.c b/trunk/sound/soc/codecs/wm8971.c index 9f18db6e167c..63f6dbf5d070 100644 --- a/trunk/sound/soc/codecs/wm8971.c +++ b/trunk/sound/soc/codecs/wm8971.c @@ -718,7 +718,6 @@ static __devinit int wm8971_i2c_probe(struct i2c_client *i2c, if (wm8971 == NULL) return -ENOMEM; - wm8971->control_type = SND_SOC_I2C; i2c_set_clientdata(i2c, wm8971); ret = snd_soc_register_codec(&i2c->dev, diff --git a/trunk/sound/soc/codecs/wm9081.c b/trunk/sound/soc/codecs/wm9081.c index a486670966bd..ecc7c37180c7 100644 --- a/trunk/sound/soc/codecs/wm9081.c +++ b/trunk/sound/soc/codecs/wm9081.c @@ -1335,7 +1335,6 @@ static __devinit int wm9081_i2c_probe(struct i2c_client *i2c, return -ENOMEM; i2c_set_clientdata(i2c, wm9081); - wm9081->control_type = SND_SOC_I2C; wm9081->control_data = i2c; ret = snd_soc_register_codec(&i2c->dev, diff --git a/trunk/sound/soc/codecs/wm9090.c b/trunk/sound/soc/codecs/wm9090.c index 6e5f64f627cb..99c046ba46bb 100644 --- a/trunk/sound/soc/codecs/wm9090.c +++ b/trunk/sound/soc/codecs/wm9090.c @@ -141,6 +141,7 @@ static const u16 wm9090_reg_defaults[] = { /* This struct is used to save the context */ struct wm9090_priv { struct mutex mutex; + u16 reg_cache[WM9090_MAX_REGISTER + 1]; struct wm9090_platform_data pdata; void *control_data; }; @@ -551,7 +552,6 @@ static int wm9090_set_bias_level(struct snd_soc_codec *codec, static int wm9090_probe(struct snd_soc_codec *codec) { struct wm9090_priv *wm9090 = snd_soc_codec_get_drvdata(codec); - u16 *reg_cache = codec->reg_cache; int ret; codec->control_data = wm9090->control_data; @@ -576,22 +576,22 @@ static int wm9090_probe(struct snd_soc_codec *codec) /* Configure some defaults; they will be written out when we * bring the bias up. */ - reg_cache[WM9090_IN1_LINE_INPUT_A_VOLUME] |= WM9090_IN1_VU + wm9090->reg_cache[WM9090_IN1_LINE_INPUT_A_VOLUME] |= WM9090_IN1_VU | WM9090_IN1A_ZC; - reg_cache[WM9090_IN1_LINE_INPUT_B_VOLUME] |= WM9090_IN1_VU + wm9090->reg_cache[WM9090_IN1_LINE_INPUT_B_VOLUME] |= WM9090_IN1_VU | WM9090_IN1B_ZC; - reg_cache[WM9090_IN2_LINE_INPUT_A_VOLUME] |= WM9090_IN2_VU + wm9090->reg_cache[WM9090_IN2_LINE_INPUT_A_VOLUME] |= WM9090_IN2_VU | WM9090_IN2A_ZC; - reg_cache[WM9090_IN2_LINE_INPUT_B_VOLUME] |= WM9090_IN2_VU + wm9090->reg_cache[WM9090_IN2_LINE_INPUT_B_VOLUME] |= WM9090_IN2_VU | WM9090_IN2B_ZC; - reg_cache[WM9090_SPEAKER_VOLUME_LEFT] |= + wm9090->reg_cache[WM9090_SPEAKER_VOLUME_LEFT] |= WM9090_SPKOUT_VU | WM9090_SPKOUTL_ZC; - reg_cache[WM9090_LEFT_OUTPUT_VOLUME] |= + wm9090->reg_cache[WM9090_LEFT_OUTPUT_VOLUME] |= WM9090_HPOUT1_VU | WM9090_HPOUT1L_ZC; - reg_cache[WM9090_RIGHT_OUTPUT_VOLUME] |= + wm9090->reg_cache[WM9090_RIGHT_OUTPUT_VOLUME] |= WM9090_HPOUT1_VU | WM9090_HPOUT1R_ZC; - reg_cache[WM9090_CLOCKING_1] |= WM9090_TOCLK_ENA; + wm9090->reg_cache[WM9090_CLOCKING_1] |= WM9090_TOCLK_ENA; wm9090_set_bias_level(codec, SND_SOC_BIAS_STANDBY); diff --git a/trunk/tools/perf/Documentation/perf-annotate.txt b/trunk/tools/perf/Documentation/perf-annotate.txt index 6f5a498608b2..b2c63309a651 100644 --- a/trunk/tools/perf/Documentation/perf-annotate.txt +++ b/trunk/tools/perf/Documentation/perf-annotate.txt @@ -24,47 +24,12 @@ OPTIONS --input=:: Input file name. (default: perf.data) --d:: ---dsos=:: - Only consider symbols in these dsos. --s:: ---symbol=:: - Symbol to annotate. - --f:: ---force:: - Don't complain, do it. - --v:: ---verbose:: - Be more verbose. (Show symbol address, etc) - --D:: ---dump-raw-trace:: - Dump raw trace in ASCII. - --k:: ---vmlinux=:: - vmlinux pathname. - --m:: ---modules:: - Load module symbols. WARNING: use only with -k and LIVE kernel. - --l:: ---print-line:: - Print matching source lines (may be slow). - --P:: ---full-paths:: - Don't shorten the displayed pathnames. - --stdio:: Use the stdio interface. --tui:: Use the TUI interface Use of --tui requires a tty, if one is not present, as when piping to other commands, the stdio interface is used. This interfaces starts by centering on the line with more - samples, TAB/UNTAB cycles through the lines with more samples. + samples, TAB/UNTAB cycles thru the lines with more samples. SEE ALSO -------- diff --git a/trunk/tools/perf/Documentation/perf-buildid-list.txt b/trunk/tools/perf/Documentation/perf-buildid-list.txt index 5eaac6f26d51..01b642c0bf8f 100644 --- a/trunk/tools/perf/Documentation/perf-buildid-list.txt +++ b/trunk/tools/perf/Documentation/perf-buildid-list.txt @@ -18,9 +18,6 @@ perf report. OPTIONS ------- --H:: ---with-hits:: - Show only DSOs with hits. -i:: --input=:: Input file name. (default: perf.data) diff --git a/trunk/tools/perf/Documentation/perf-diff.txt b/trunk/tools/perf/Documentation/perf-diff.txt index 74d7481ed7a6..20d97d84ea1c 100644 --- a/trunk/tools/perf/Documentation/perf-diff.txt +++ b/trunk/tools/perf/Documentation/perf-diff.txt @@ -19,18 +19,6 @@ If no parameters are passed it will assume perf.data.old and perf.data. OPTIONS ------- --M:: ---displacement:: - Show position displacement relative to baseline. - --D:: ---dump-raw-trace:: - Dump raw trace in ASCII. - --m:: ---modules:: - Load module symbols. WARNING: use only with -k and LIVE kernel - -d:: --dsos=:: Only consider symbols in these dsos. CSV that understands @@ -54,7 +42,7 @@ OPTIONS --field-separator=:: Use a special separator character and don't pad with spaces, replacing - all occurrences of this separator in symbol names (and other output) + all occurances of this separator in symbol names (and other output) with a '.' character, that thus it's the only non valid separator. -v:: @@ -62,13 +50,6 @@ OPTIONS Be verbose, for instance, show the raw counts in addition to the diff. --f:: ---force:: - Don't complain, do it. - ---symfs=:: - Look for files with symbols relative to this directory. - SEE ALSO -------- linkperf:perf-record[1] diff --git a/trunk/tools/perf/Documentation/perf-kvm.txt b/trunk/tools/perf/Documentation/perf-kvm.txt index dd84cb2f0a88..d004e19fe6d6 100644 --- a/trunk/tools/perf/Documentation/perf-kvm.txt +++ b/trunk/tools/perf/Documentation/perf-kvm.txt @@ -22,7 +22,7 @@ There are a couple of variants of perf kvm: a performance counter profile of guest os in realtime of an arbitrary workload. - 'perf kvm record ' to record the performance counter profile + 'perf kvm record ' to record the performance couinter profile of an arbitrary workload and save it into a perf data file. If both --host and --guest are input, the perf data file name is perf.data.kvm. If there is no --host but --guest, the file name is perf.data.guest. @@ -40,12 +40,6 @@ There are a couple of variants of perf kvm: OPTIONS ------- --i:: ---input=:: - Input file name. --o:: ---output:: - Output file name. --host=:: Collect host side performance profile. --guest=:: diff --git a/trunk/tools/perf/Documentation/perf-lock.txt b/trunk/tools/perf/Documentation/perf-lock.txt index 921de259ea10..b317102138c8 100644 --- a/trunk/tools/perf/Documentation/perf-lock.txt +++ b/trunk/tools/perf/Documentation/perf-lock.txt @@ -24,21 +24,6 @@ and statistics with this 'perf lock' command. 'perf lock report' reports statistical data. -OPTIONS -------- - --i:: ---input=:: - Input file name. - --v:: ---verbose:: - Be more verbose (show symbol address, etc). - --D:: ---dump-raw-trace:: - Dump raw trace in ASCII. - SEE ALSO -------- linkperf:perf[1] diff --git a/trunk/tools/perf/Documentation/perf-probe.txt b/trunk/tools/perf/Documentation/perf-probe.txt index 86b797a35aa6..62de1b7f4e76 100644 --- a/trunk/tools/perf/Documentation/perf-probe.txt +++ b/trunk/tools/perf/Documentation/perf-probe.txt @@ -115,9 +115,9 @@ Each probe argument follows below syntax. LINE SYNTAX ----------- -Line range is described by following syntax. +Line range is descripted by following syntax. - "FUNC[:RLN[+NUM|-RLN2]]|SRC[:ALN[+NUM|-ALN2]]" + "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]" FUNC specifies the function name of showing lines. 'RLN' is the start line number from function entry line, and 'RLN2' is the end line number. As same as diff --git a/trunk/tools/perf/Documentation/perf-record.txt b/trunk/tools/perf/Documentation/perf-record.txt index 52462ae26455..a91f9f9e6e5c 100644 --- a/trunk/tools/perf/Documentation/perf-record.txt +++ b/trunk/tools/perf/Documentation/perf-record.txt @@ -39,24 +39,15 @@ OPTIONS be passed as follows: '\mem:addr[:[r][w][x]]'. If you want to profile read-write accesses in 0x1000, just set 'mem:0x1000:rw'. - ---filter=:: - Event filter. - -a:: ---all-cpus:: - System-wide collection from all CPUs. + System-wide collection. -l:: Scale counter values. -p:: --pid=:: - Record events on existing process ID. - --t:: ---tid=:: - Record events on existing thread ID. + Record events on existing pid. -r:: --realtime=:: @@ -108,11 +99,6 @@ OPTIONS --data:: Sample addresses. --T:: ---timestamp:: - Sample timestamps. Use it with 'perf report -D' to see the timestamps, - for instance. - -n:: --no-samples:: Don't sample. @@ -123,8 +109,8 @@ Collect raw sample records from all opened counters (default for tracepoint coun -C:: --cpu:: -Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a -comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. +Collect samples only on the list of cpus provided. Multiple CPUs can be provided as a +comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. In per-thread mode with inheritance mode on (default), samples are captured only when the thread executes on the designated CPUs. Default is to monitor all CPUs. diff --git a/trunk/tools/perf/Documentation/perf-report.txt b/trunk/tools/perf/Documentation/perf-report.txt index 8ba03d6e5398..12052c9ed0ba 100644 --- a/trunk/tools/perf/Documentation/perf-report.txt +++ b/trunk/tools/perf/Documentation/perf-report.txt @@ -20,11 +20,6 @@ OPTIONS -i:: --input=:: Input file name. (default: perf.data) - --v:: ---verbose:: - Be more verbose. (show symbol address, etc) - -d:: --dsos=:: Only consider symbols in these dsos. CSV that understands @@ -32,10 +27,6 @@ OPTIONS -n:: --show-nr-samples:: Show the number of samples for each symbol - ---showcpuutilization:: - Show sample percentage for different cpu modes. - -T:: --threads:: Show per-thread event counters @@ -48,24 +39,12 @@ OPTIONS Only consider these symbols. CSV that understands file://filename entries. --U:: ---hide-unresolved:: - Only display entries resolved to a symbol. - -s:: --sort=:: Sort by key(s): pid, comm, dso, symbol, parent. --p:: ---parent=:: - regex filter to identify parent, see: '--sort parent' - --x:: ---exclude-other:: - Only display entries with parent-match. - -w:: ---column-widths=:: +--field-width=:: Force each column width to the provided list, for large terminal readability. @@ -73,26 +52,19 @@ OPTIONS --field-separator=:: Use a special separator character and don't pad with spaces, replacing - all occurrences of this separator in symbol names (and other output) + all occurances of this separator in symbol names (and other output) with a '.' character, that thus it's the only non valid separator. --D:: ---dump-raw-trace:: - Dump raw trace in ASCII. - -g [type,min]:: --call-graph:: - Display call chains using type and min percent threshold. + Display callchains using type and min percent threshold. type can be either: - - flat: single column, linear exposure of call chains. + - flat: single column, linear exposure of callchains. - graph: use a graph tree, displaying absolute overhead rates. - fractal: like graph, but displays relative rates. Each branch of the tree is considered as a new profiled object. + Default: fractal,0.5. ---pretty=:: - Pretty printing style. key: normal, raw - --stdio:: Use the stdio interface. --tui:: Use the TUI interface, that is integrated with annotate and allows @@ -100,25 +72,6 @@ OPTIONS requires a tty, if one is not present, as when piping to other commands, the stdio interface is used. --k:: ---vmlinux=:: - vmlinux pathname - ---kallsyms=:: - kallsyms pathname - --m:: ---modules:: - Load module symbols. WARNING: This should only be used with -k and - a LIVE kernel. - --f:: ---force:: - Don't complain, do it. - ---symfs=:: - Look for files with symbols relative to this directory. - SEE ALSO -------- linkperf:perf-stat[1] diff --git a/trunk/tools/perf/Documentation/perf-sched.txt b/trunk/tools/perf/Documentation/perf-sched.txt index 46822d5fde1c..8417644a6166 100644 --- a/trunk/tools/perf/Documentation/perf-sched.txt +++ b/trunk/tools/perf/Documentation/perf-sched.txt @@ -8,11 +8,11 @@ perf-sched - Tool to trace/measure scheduler properties (latencies) SYNOPSIS -------- [verse] -'perf sched' {record|latency|map|replay|trace} +'perf sched' {record|latency|replay|trace} DESCRIPTION ----------- -There are five variants of perf sched: +There are four variants of perf sched: 'perf sched record ' to record the scheduling events of an arbitrary workload. @@ -30,22 +30,8 @@ There are five variants of perf sched: of the workload as it occurred when it was recorded - and can repeat it a number of times, measuring its performance.) - 'perf sched map' to print a textual context-switching outline of - workload captured via perf sched record. Columns stand for - individual CPUs, and the two-letter shortcuts stand for tasks that - are running on a CPU. A '*' denotes the CPU that had the event, and - a dot signals an idle CPU. - OPTIONS ------- --i:: ---input=:: - Input file name. (default: perf.data) - --v:: ---verbose:: - Be more verbose. (show symbol address, etc) - -D:: --dump-raw-trace=:: Display verbose dump of the sched data. diff --git a/trunk/tools/perf/Documentation/perf-stat.txt b/trunk/tools/perf/Documentation/perf-stat.txt index b6da7affbbee..4b3a2d46b437 100644 --- a/trunk/tools/perf/Documentation/perf-stat.txt +++ b/trunk/tools/perf/Documentation/perf-stat.txt @@ -8,8 +8,8 @@ perf-stat - Run a command and gather performance counter statistics SYNOPSIS -------- [verse] -'perf stat' [-e | --event=EVENT] [-a] -'perf stat' [-e | --event=EVENT] [-a] -- [] +'perf stat' [-e | --event=EVENT] [-S] [-a] +'perf stat' [-e | --event=EVENT] [-S] [-a] -- [] DESCRIPTION ----------- @@ -35,54 +35,24 @@ OPTIONS child tasks do not inherit counters -p:: --pid=:: - stat events on existing process id - --t:: ---tid=:: - stat events on existing thread id - + stat events on existing pid -a:: ---all-cpus:: - system-wide collection from all CPUs + system-wide collection -c:: ---scale:: - scale/normalize counter values - --r:: ---repeat=:: - repeat command and print average + stddev (max: 100) + scale counter values -B:: ---big-num:: print large numbers with thousands' separators according to locale -C:: --cpu=:: -Count only on the list of CPUs provided. Multiple CPUs can be provided as a -comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. +Count only on the list of cpus provided. Multiple CPUs can be provided as a +comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. In per-thread mode, this option is ignored. The -a option is still necessary to activate system-wide monitoring. Default is to count on all CPUs. --A:: ---no-aggr:: -Do not aggregate counts across all monitored CPUs in system-wide mode (-a). -This option is only valid in system-wide mode. - --n:: ---null:: - null run - don't start any counters - --v:: ---verbose:: - be more verbose (show counter open errors, etc) - --x SEP:: ---field-separator SEP:: -print counts using a CSV-style output to make it easy to import directly into -spreadsheets. Columns are separated by the string specified in SEP. - EXAMPLES -------- diff --git a/trunk/tools/perf/Documentation/perf-test.txt b/trunk/tools/perf/Documentation/perf-test.txt index 2c3b462f64b0..1c4b5f5b7f71 100644 --- a/trunk/tools/perf/Documentation/perf-test.txt +++ b/trunk/tools/perf/Documentation/perf-test.txt @@ -12,7 +12,7 @@ SYNOPSIS DESCRIPTION ----------- -This command does assorted sanity tests, initially through linked routines but +This command does assorted sanity tests, initially thru linked routines but also will look for a directory with more tests in the form of scripts. OPTIONS diff --git a/trunk/tools/perf/Documentation/perf-timechart.txt b/trunk/tools/perf/Documentation/perf-timechart.txt index d7b79e2ba2ad..4b1788355eca 100644 --- a/trunk/tools/perf/Documentation/perf-timechart.txt +++ b/trunk/tools/perf/Documentation/perf-timechart.txt @@ -38,8 +38,6 @@ OPTIONS --process:: Select the processes to display, by name or PID ---symfs=:: - Look for files with symbols relative to this directory. SEE ALSO -------- diff --git a/trunk/tools/perf/Documentation/perf-top.txt b/trunk/tools/perf/Documentation/perf-top.txt index f6eb1cdafb77..1f9687663f2a 100644 --- a/trunk/tools/perf/Documentation/perf-top.txt +++ b/trunk/tools/perf/Documentation/perf-top.txt @@ -12,7 +12,7 @@ SYNOPSIS DESCRIPTION ----------- -This command generates and displays a performance counter profile in real time. +This command generates and displays a performance counter profile in realtime. OPTIONS @@ -27,8 +27,8 @@ OPTIONS -C :: --cpu=:: -Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a -comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. +Monitor only on the list of cpus provided. Multiple CPUs can be provided as a +comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. Default is to monitor all CPUS. -d :: @@ -50,10 +50,6 @@ Default is to monitor all CPUS. --count-filter=:: Only display functions with more events than this. --g:: ---group:: - Put the counters into a counter group. - -F :: --freq=:: Profile at this frequency. @@ -72,11 +68,7 @@ Default is to monitor all CPUS. -p :: --pid=:: - Profile events on existing Process ID. - --t :: ---tid=:: - Profile events on existing thread ID. + Profile events on existing pid. -r :: --realtime=:: @@ -86,18 +78,6 @@ Default is to monitor all CPUS. --sym-annotate=:: Annotate this symbol. --K:: ---hide_kernel_symbols:: - Hide kernel symbols. - --U:: ---hide_user_symbols:: - Hide user symbols. - --D:: ---dump-symtab:: - Dump the symbol table used for profiling. - -v:: --verbose:: Be more verbose (show counter open errors, etc). diff --git a/trunk/tools/perf/Documentation/perf-script-perl.txt b/trunk/tools/perf/Documentation/perf-trace-perl.txt similarity index 90% rename from trunk/tools/perf/Documentation/perf-script-perl.txt rename to trunk/tools/perf/Documentation/perf-trace-perl.txt index 5bb41e55a3ac..ee6525ee6d69 100644 --- a/trunk/tools/perf/Documentation/perf-script-perl.txt +++ b/trunk/tools/perf/Documentation/perf-trace-perl.txt @@ -1,19 +1,19 @@ -perf-script-perl(1) +perf-trace-perl(1) ================== NAME ---- -perf-script-perl - Process trace data with a Perl script +perf-trace-perl - Process trace data with a Perl script SYNOPSIS -------- [verse] -'perf script' [-s [Perl]:script[.pl] ] +'perf trace' [-s [Perl]:script[.pl] ] DESCRIPTION ----------- -This perf script option is used to process perf script data using perf's +This perf trace option is used to process perf trace data using perf's built-in Perl interpreter. It reads and processes the input file and displays the results of the trace analysis implemented in the given Perl script, if any. @@ -21,7 +21,7 @@ Perl script, if any. STARTER SCRIPTS --------------- -You can avoid reading the rest of this document by running 'perf script +You can avoid reading the rest of this document by running 'perf trace -g perl' in the same directory as an existing perf.data trace file. That will generate a starter script containing a handler for each of the event types in the trace file; it simply prints every available @@ -30,13 +30,13 @@ field for each event in the trace file. You can also look at the existing scripts in ~/libexec/perf-core/scripts/perl for typical examples showing how to do basic things like aggregate event data, print results, etc. Also, -the check-perf-script.pl script, while not interesting for its results, +the check-perf-trace.pl script, while not interesting for its results, attempts to exercise all of the main scripting features. EVENT HANDLERS -------------- -When perf script is invoked using a trace script, a user-defined +When perf trace is invoked using a trace script, a user-defined 'handler function' is called for each event in the trace. If there's no handler function defined for a given event type, the event is ignored (or passed to a 'trace_handled' function, see below) and the @@ -112,13 +112,13 @@ write a useful trace script. The sections below cover the rest. SCRIPT LAYOUT ------------- -Every perf script Perl script should start by setting up a Perl module +Every perf trace Perl script should start by setting up a Perl module search path and 'use'ing a few support modules (see module descriptions below): ---- - use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/perf-script-Util/lib"; - use lib "./perf-script-Util/lib"; + use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib"; + use lib "./Perf-Trace-Util/lib"; use Perf::Trace::Core; use Perf::Trace::Context; use Perf::Trace::Util; @@ -162,7 +162,7 @@ sub trace_unhandled ---- The remaining sections provide descriptions of each of the available -built-in perf script Perl modules and their associated functions. +built-in perf trace Perl modules and their associated functions. AVAILABLE MODULES AND FUNCTIONS ------------------------------- @@ -170,7 +170,7 @@ AVAILABLE MODULES AND FUNCTIONS The following sections describe the functions and variables available via the various Perf::Trace::* Perl modules. To use the functions and variables from the given module, add the corresponding 'use -Perf::Trace::XXX' line to your perf script script. +Perf::Trace::XXX' line to your perf trace script. Perf::Trace::Core Module ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -204,7 +204,7 @@ argument. Perf::Trace::Util Module ~~~~~~~~~~~~~~~~~~~~~~~~ -Various utility functions for use with perf script: +Various utility functions for use with perf trace: nsecs($secs, $nsecs) - returns total nsecs given secs/nsecs pair nsecs_secs($nsecs) - returns whole secs portion given nsecs @@ -214,4 +214,4 @@ Various utility functions for use with perf script: SEE ALSO -------- -linkperf:perf-script[1] +linkperf:perf-trace[1] diff --git a/trunk/tools/perf/Documentation/perf-script-python.txt b/trunk/tools/perf/Documentation/perf-trace-python.txt similarity index 89% rename from trunk/tools/perf/Documentation/perf-script-python.txt rename to trunk/tools/perf/Documentation/perf-trace-python.txt index 36b38277422c..693be804dd3d 100644 --- a/trunk/tools/perf/Documentation/perf-script-python.txt +++ b/trunk/tools/perf/Documentation/perf-trace-python.txt @@ -1,19 +1,19 @@ -perf-script-python(1) +perf-trace-python(1) ==================== NAME ---- -perf-script-python - Process trace data with a Python script +perf-trace-python - Process trace data with a Python script SYNOPSIS -------- [verse] -'perf script' [-s [Python]:script[.py] ] +'perf trace' [-s [Python]:script[.py] ] DESCRIPTION ----------- -This perf script option is used to process perf script data using perf's +This perf trace option is used to process perf trace data using perf's built-in Python interpreter. It reads and processes the input file and displays the results of the trace analysis implemented in the given Python script, if any. @@ -23,15 +23,15 @@ A QUICK EXAMPLE This section shows the process, start to finish, of creating a working Python script that aggregates and extracts useful information from a -raw perf script stream. You can avoid reading the rest of this +raw perf trace stream. You can avoid reading the rest of this document if an example is enough for you; the rest of the document provides more details on each step and lists the library functions available to script writers. This example actually details the steps that were used to create the -'syscall-counts' script you see when you list the available perf script -scripts via 'perf script -l'. As such, this script also shows how to -integrate your script into the list of general-purpose 'perf script' +'syscall-counts' script you see when you list the available perf trace +scripts via 'perf trace -l'. As such, this script also shows how to +integrate your script into the list of general-purpose 'perf trace' scripts listed by that command. The syscall-counts script is a simple script, but demonstrates all the @@ -105,31 +105,31 @@ That single stream will be recorded in a file in the current directory called perf.data. Once we have a perf.data file containing our data, we can use the -g -'perf script' option to generate a Python script that will contain a +'perf trace' option to generate a Python script that will contain a callback handler for each event type found in the perf.data trace stream (for more details, see the STARTER SCRIPTS section). ---- -# perf script -g python -generated Python script: perf-script.py +# perf trace -g python +generated Python script: perf-trace.py The output file created also in the current directory is named -perf-script.py. Here's the file in its entirety: +perf-trace.py. Here's the file in its entirety: -# perf script event handlers, generated by perf script -g python +# perf trace event handlers, generated by perf trace -g python # Licensed under the terms of the GNU GPL License version 2 # The common_* event handler fields are the most useful fields common to # all events. They don't necessarily correspond to the 'common_*' fields # in the format files. Those fields not available as handler params can # be retrieved using Python functions of the form common_*(context). -# See the perf-script-python Documentation for the list of available functions. +# See the perf-trace-python Documentation for the list of available functions. import os import sys sys.path.append(os.environ['PERF_EXEC_PATH'] + \ - '/scripts/python/perf-script-Util/lib/Perf/Trace') + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') from perf_trace_context import * from Core import * @@ -160,7 +160,7 @@ def print_header(event_name, cpu, secs, nsecs, pid, comm): ---- At the top is a comment block followed by some import statements and a -path append which every perf script script should include. +path append which every perf trace script should include. Following that are a couple generated functions, trace_begin() and trace_end(), which are called at the beginning and the end of the @@ -189,8 +189,8 @@ simply a utility function used for that purpose. Let's rename the script and run it to see the default output: ---- -# mv perf-script.py syscall-counts.py -# perf script -s syscall-counts.py +# mv perf-trace.py syscall-counts.py +# perf trace -s syscall-counts.py raw_syscalls__sys_enter 1 00840.847582083 7506 perf id=1, args= raw_syscalls__sys_enter 1 00840.847595764 7506 perf id=1, args= @@ -216,7 +216,7 @@ import os import sys sys.path.append(os.environ['PERF_EXEC_PATH'] + \ - '/scripts/python/perf-script-Util/lib/Perf/Trace') + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') from perf_trace_context import * from Core import * @@ -279,7 +279,7 @@ import os import sys sys.path.append(os.environ['PERF_EXEC_PATH'] + \ - '/scripts/python/perf-script-Util/lib/Perf/Trace') + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') from perf_trace_context import * from Core import * @@ -315,7 +315,7 @@ def print_syscall_totals(): The script can be run just as before: - # perf script -s syscall-counts.py + # perf trace -s syscall-counts.py So those are the essential steps in writing and running a script. The process can be generalized to any tracepoint or set of tracepoints @@ -324,17 +324,17 @@ interested in by looking at the list of available events shown by 'perf list' and/or look in /sys/kernel/debug/tracing events for detailed event and field info, record the corresponding trace data using 'perf record', passing it the list of interesting events, -generate a skeleton script using 'perf script -g python' and modify the +generate a skeleton script using 'perf trace -g python' and modify the code to aggregate and display it for your particular needs. After you've done that you may end up with a general-purpose script that you want to keep around and have available for future use. By writing a couple of very simple shell scripts and putting them in the right place, you can have your script listed alongside the other -scripts listed by the 'perf script -l' command e.g.: +scripts listed by the 'perf trace -l' command e.g.: ---- -root@tropicana:~# perf script -l +root@tropicana:~# perf trace -l List of available trace scripts: workqueue-stats workqueue stats (ins/exe/create/destroy) wakeup-latency system-wide min/max/avg wakeup latency @@ -365,14 +365,14 @@ perf record -a -e raw_syscalls:sys_enter The 'report' script is also a shell script with the same base name as your script, but with -report appended. It should also be located in the perf/scripts/python/bin directory. In that script, you write the -'perf script -s' command-line needed for running your script: +'perf trace -s' command-line needed for running your script: ---- # cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-report #!/bin/bash # description: system-wide syscall counts -perf script -s ~/libexec/perf-core/scripts/python/syscall-counts.py +perf trace -s ~/libexec/perf-core/scripts/python/syscall-counts.py ---- Note that the location of the Python script given in the shell script @@ -390,17 +390,17 @@ total 32 drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 . drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 .. drwxr-xr-x 2 trz trz 4096 2010-01-26 22:29 bin --rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-script.py -drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 perf-script-Util +-rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-trace.py +drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 Perf-Trace-Util -rw-r--r-- 1 trz trz 1462 2010-01-26 22:30 syscall-counts.py ---- Once you've done that (don't forget to do a new 'make install', -otherwise your script won't show up at run-time), 'perf script -l' +otherwise your script won't show up at run-time), 'perf trace -l' should show a new entry for your script: ---- -root@tropicana:~# perf script -l +root@tropicana:~# perf trace -l List of available trace scripts: workqueue-stats workqueue stats (ins/exe/create/destroy) wakeup-latency system-wide min/max/avg wakeup latency @@ -409,19 +409,19 @@ List of available trace scripts: syscall-counts system-wide syscall counts ---- -You can now perform the record step via 'perf script record': +You can now perform the record step via 'perf trace record': - # perf script record syscall-counts + # perf trace record syscall-counts -and display the output using 'perf script report': +and display the output using 'perf trace report': - # perf script report syscall-counts + # perf trace report syscall-counts STARTER SCRIPTS --------------- You can quickly get started writing a script for a particular set of -trace data by generating a skeleton script using 'perf script -g +trace data by generating a skeleton script using 'perf trace -g python' in the same directory as an existing perf.data trace file. That will generate a starter script containing a handler for each of the event types in the trace file; it simply prints every available @@ -430,13 +430,13 @@ field for each event in the trace file. You can also look at the existing scripts in ~/libexec/perf-core/scripts/python for typical examples showing how to do basic things like aggregate event data, print results, etc. Also, -the check-perf-script.py script, while not interesting for its results, +the check-perf-trace.py script, while not interesting for its results, attempts to exercise all of the main scripting features. EVENT HANDLERS -------------- -When perf script is invoked using a trace script, a user-defined +When perf trace is invoked using a trace script, a user-defined 'handler function' is called for each event in the trace. If there's no handler function defined for a given event type, the event is ignored (or passed to a 'trace_handled' function, see below) and the @@ -510,7 +510,7 @@ write a useful trace script. The sections below cover the rest. SCRIPT LAYOUT ------------- -Every perf script Python script should start by setting up a Python +Every perf trace Python script should start by setting up a Python module search path and 'import'ing a few support modules (see module descriptions below): @@ -519,7 +519,7 @@ descriptions below): import sys sys.path.append(os.environ['PERF_EXEC_PATH'] + \ - '/scripts/python/perf-script-Util/lib/Perf/Trace') + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') from perf_trace_context import * from Core import * @@ -559,15 +559,15 @@ def trace_unhandled(event_name, context, common_cpu, common_secs, ---- The remaining sections provide descriptions of each of the available -built-in perf script Python modules and their associated functions. +built-in perf trace Python modules and their associated functions. AVAILABLE MODULES AND FUNCTIONS ------------------------------- The following sections describe the functions and variables available -via the various perf script Python modules. To use the functions and +via the various perf trace Python modules. To use the functions and variables from the given module, add the corresponding 'from XXXX -import' line to your perf script script. +import' line to your perf trace script. Core.py Module ~~~~~~~~~~~~~~ @@ -610,7 +610,7 @@ argument. Util.py Module ~~~~~~~~~~~~~~ -Various utility functions for use with perf script: +Various utility functions for use with perf trace: nsecs(secs, nsecs) - returns total nsecs given secs/nsecs pair nsecs_secs(nsecs) - returns whole secs portion given nsecs @@ -620,4 +620,4 @@ Various utility functions for use with perf script: SEE ALSO -------- -linkperf:perf-script[1] +linkperf:perf-trace[1] diff --git a/trunk/tools/perf/Documentation/perf-script.txt b/trunk/tools/perf/Documentation/perf-trace.txt similarity index 62% rename from trunk/tools/perf/Documentation/perf-script.txt rename to trunk/tools/perf/Documentation/perf-trace.txt index 29ad94293cd2..26aff6bf9e50 100644 --- a/trunk/tools/perf/Documentation/perf-script.txt +++ b/trunk/tools/perf/Documentation/perf-trace.txt @@ -1,71 +1,71 @@ -perf-script(1) +perf-trace(1) ============= NAME ---- -perf-script - Read perf.data (created by perf record) and display trace output +perf-trace - Read perf.data (created by perf record) and display trace output SYNOPSIS -------- [verse] -'perf script' [] -'perf script' [] record