From c85f2dc97e42289995eb63c3ffd5c4b6a522c810 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 15 Dec 2006 13:57:26 -0800 Subject: [PATCH] --- yaml --- r: 44630 b: refs/heads/master c: c59a3da1342ff456e5123361739bc331446cda21 h: refs/heads/master v: v3 --- [refs] | 2 +- trunk/Makefile | 5 + trunk/arch/i386/defconfig | 2 + trunk/arch/i386/kernel/entry.S | 32 + trunk/arch/i386/kernel/traps.c | 83 ++ trunk/arch/s390/defconfig | 47 +- trunk/arch/s390/hypfs/hypfs_diag.c | 4 +- trunk/arch/s390/kernel/ipl.c | 59 +- trunk/arch/s390/kernel/reipl.S | 6 +- trunk/arch/s390/kernel/reipl64.S | 5 +- trunk/arch/s390/kernel/reset.S | 42 - trunk/arch/x86_64/Makefile | 2 + trunk/arch/x86_64/defconfig | 2 + trunk/arch/x86_64/kernel/entry.S | 33 + trunk/arch/x86_64/kernel/traps.c | 84 ++ trunk/arch/x86_64/kernel/vmlinux.lds.S | 2 + trunk/drivers/ata/Kconfig | 6 +- trunk/drivers/ata/ahci.c | 2 + trunk/drivers/ata/ata_piix.c | 22 +- trunk/drivers/ata/libata-core.c | 14 +- trunk/drivers/ata/libata-scsi.c | 4 +- trunk/drivers/ata/pata_legacy.c | 4 +- trunk/drivers/ata/pata_qdi.c | 4 +- trunk/drivers/ata/pata_rz1000.c | 2 + trunk/drivers/ata/pata_via.c | 9 +- trunk/drivers/ata/pata_winbond.c | 4 +- trunk/drivers/ata/sata_svw.c | 41 +- trunk/drivers/s390/char/sclp_cpi.c | 2 - trunk/drivers/s390/cio/cio.c | 25 +- trunk/drivers/s390/cio/css.c | 3 +- trunk/drivers/s390/cio/qdio.c | 13 +- trunk/drivers/s390/crypto/ap_bus.c | 14 +- trunk/include/asm-generic/vmlinux.lds.h | 22 + trunk/include/asm-i386/unwind.h | 91 ++ trunk/include/asm-s390/qdio.h | 1 - trunk/include/asm-s390/reset.h | 1 - trunk/include/asm-x86_64/unwind.h | 96 ++ trunk/include/linux/unwind.h | 63 +- trunk/include/linux/workqueue.h | 36 +- trunk/include/rdma/ib_verbs.h | 9 +- trunk/kernel/Makefile | 1 + trunk/kernel/unwind.c | 1305 +++++++++++++++++++++++ trunk/kernel/workqueue.c | 16 +- trunk/lib/Kconfig.debug | 18 + trunk/lib/fault-inject.c | 32 +- trunk/mm/mincore.c | 190 ++-- 46 files changed, 2107 insertions(+), 353 deletions(-) create mode 100644 trunk/kernel/unwind.c diff --git a/[refs] b/[refs] index b975c9e39789..87a75f9079ee 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 99f5e9718185f07458ae70c2282c2153a2256c91 +refs/heads/master: c59a3da1342ff456e5123361739bc331446cda21 diff --git a/trunk/Makefile b/trunk/Makefile index 4a4720387936..dc82462b68ba 100644 --- a/trunk/Makefile +++ b/trunk/Makefile @@ -496,6 +496,11 @@ else CFLAGS += -fomit-frame-pointer endif +ifdef CONFIG_UNWIND_INFO +CFLAGS += -fasynchronous-unwind-tables +LDFLAGS_vmlinux += --eh-frame-hdr +endif + ifdef CONFIG_DEBUG_INFO CFLAGS += -g endif diff --git a/trunk/arch/i386/defconfig b/trunk/arch/i386/defconfig index e075ff05c46d..3265208e5899 100644 --- a/trunk/arch/i386/defconfig +++ b/trunk/arch/i386/defconfig @@ -1493,6 +1493,8 @@ CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_VM is not set # CONFIG_DEBUG_LIST is not set # CONFIG_FRAME_POINTER is not set +CONFIG_UNWIND_INFO=y +CONFIG_STACK_UNWIND=y # CONFIG_FORCED_INLINING is not set # CONFIG_HEADERS_CHECK is not set # CONFIG_RCU_TORTURE_TEST is not set diff --git a/trunk/arch/i386/kernel/entry.S b/trunk/arch/i386/kernel/entry.S index 06461b8b715d..de34b7fed3c1 100644 --- a/trunk/arch/i386/kernel/entry.S +++ b/trunk/arch/i386/kernel/entry.S @@ -979,6 +979,38 @@ ENTRY(spurious_interrupt_bug) jmp error_code CFI_ENDPROC +#ifdef CONFIG_STACK_UNWIND +ENTRY(arch_unwind_init_running) + CFI_STARTPROC + movl 4(%esp), %edx + movl (%esp), %ecx + leal 4(%esp), %eax + movl %ebx, PT_EBX(%edx) + xorl %ebx, %ebx + movl %ebx, PT_ECX(%edx) + movl %ebx, PT_EDX(%edx) + movl %esi, PT_ESI(%edx) + movl %edi, PT_EDI(%edx) + movl %ebp, PT_EBP(%edx) + movl %ebx, PT_EAX(%edx) + movl $__USER_DS, PT_DS(%edx) + movl $__USER_DS, PT_ES(%edx) + movl $0, PT_GS(%edx) + movl %ebx, PT_ORIG_EAX(%edx) + movl %ecx, PT_EIP(%edx) + movl 12(%esp), %ecx + movl $__KERNEL_CS, PT_CS(%edx) + movl %ebx, PT_EFLAGS(%edx) + movl %eax, PT_OLDESP(%edx) + movl 8(%esp), %eax + movl %ecx, 8(%esp) + movl PT_EBX(%edx), %ebx + movl $__KERNEL_DS, PT_OLDSS(%edx) + jmpl *%eax + CFI_ENDPROC +ENDPROC(arch_unwind_init_running) +#endif + ENTRY(kernel_thread_helper) pushl $0 # fake return address for unwinder CFI_STARTPROC diff --git a/trunk/arch/i386/kernel/traps.c b/trunk/arch/i386/kernel/traps.c index 0efad8aeb41a..2b30dbf8d117 100644 --- a/trunk/arch/i386/kernel/traps.c +++ b/trunk/arch/i386/kernel/traps.c @@ -94,6 +94,11 @@ asmlinkage void spurious_interrupt_bug(void); asmlinkage void machine_check(void); int kstack_depth_to_print = 24; +#ifdef CONFIG_STACK_UNWIND +static int call_trace = 1; +#else +#define call_trace (-1) +#endif ATOMIC_NOTIFIER_HEAD(i386die_chain); int register_die_notifier(struct notifier_block *nb) @@ -147,6 +152,33 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo, return ebp; } +struct ops_and_data { + struct stacktrace_ops *ops; + void *data; +}; + +static asmlinkage int +dump_trace_unwind(struct unwind_frame_info *info, void *data) +{ + struct ops_and_data *oad = (struct ops_and_data *)data; + int n = 0; + unsigned long sp = UNW_SP(info); + + if (arch_unw_user_mode(info)) + return -1; + while (unwind(info) == 0 && UNW_PC(info)) { + n++; + oad->ops->address(oad->data, UNW_PC(info)); + if (arch_unw_user_mode(info)) + break; + if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1)) + && sp > UNW_SP(info)) + break; + sp = UNW_SP(info); + } + return n; +} + #define MSG(msg) ops->warning(data, msg) void dump_trace(struct task_struct *task, struct pt_regs *regs, @@ -158,6 +190,41 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (!task) task = current; + if (call_trace >= 0) { + int unw_ret = 0; + struct unwind_frame_info info; + struct ops_and_data oad = { .ops = ops, .data = data }; + + if (regs) { + if (unwind_init_frame_info(&info, task, regs) == 0) + unw_ret = dump_trace_unwind(&info, &oad); + } else if (task == current) + unw_ret = unwind_init_running(&info, dump_trace_unwind, + &oad); + else { + if (unwind_init_blocked(&info, task) == 0) + unw_ret = dump_trace_unwind(&info, &oad); + } + if (unw_ret > 0) { + if (call_trace == 1 && !arch_unw_user_mode(&info)) { + ops->warning_symbol(data, + "DWARF2 unwinder stuck at %s", + UNW_PC(&info)); + if (UNW_SP(&info) >= PAGE_OFFSET) { + MSG("Leftover inexact backtrace:"); + stack = (void *)UNW_SP(&info); + if (!stack) + return; + ebp = UNW_FP(&info); + } else + MSG("Full inexact backtrace again:"); + } else if (call_trace >= 1) + return; + else + MSG("Full inexact backtrace again:"); + } else + MSG("Inexact backtrace:"); + } if (!stack) { unsigned long dummy; stack = &dummy; @@ -1191,3 +1258,19 @@ static int __init kstack_setup(char *s) return 1; } __setup("kstack=", kstack_setup); + +#ifdef CONFIG_STACK_UNWIND +static int __init call_trace_setup(char *s) +{ + if (strcmp(s, "old") == 0) + call_trace = -1; + else if (strcmp(s, "both") == 0) + call_trace = 0; + else if (strcmp(s, "newfallback") == 0) + call_trace = 1; + else if (strcmp(s, "new") == 2) + call_trace = 2; + return 1; +} +__setup("call_trace=", call_trace_setup); +#endif diff --git a/trunk/arch/s390/defconfig b/trunk/arch/s390/defconfig index 5368cf4a350e..a6ec919ba83f 100644 --- a/trunk/arch/s390/defconfig +++ b/trunk/arch/s390/defconfig @@ -1,15 +1,14 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.20-rc1 -# Fri Dec 15 16:52:28 2006 +# Linux kernel version: 2.6.19-rc2 +# Wed Oct 18 17:11:10 2006 # CONFIG_MMU=y CONFIG_LOCKDEP_SUPPORT=y CONFIG_STACKTRACE_SUPPORT=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y -# CONFIG_ARCH_HAS_ILOG2_U32 is not set -# CONFIG_ARCH_HAS_ILOG2_U64 is not set CONFIG_GENERIC_HWEIGHT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_GENERIC_TIME=y CONFIG_S390=y CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" @@ -38,13 +37,12 @@ CONFIG_AUDIT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y # CONFIG_CPUSETS is not set -CONFIG_SYSFS_DEPRECATED=y # CONFIG_RELAY is not set CONFIG_INITRAMFS_SOURCE="" # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SYSCTL=y # CONFIG_EMBEDDED is not set -CONFIG_SYSCTL_SYSCALL=y +# CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set @@ -121,7 +119,6 @@ CONFIG_PACK_STACK=y CONFIG_CHECK_STACK=y CONFIG_STACK_GUARD=256 # CONFIG_WARN_STACK is not set -CONFIG_ARCH_POPULATES_NODE_MAP=y CONFIG_SELECT_MEMORY_MODEL=y CONFIG_FLATMEM_MANUAL=y # CONFIG_DISCONTIGMEM_MANUAL is not set @@ -131,7 +128,6 @@ CONFIG_FLAT_NODE_MEM_MAP=y # CONFIG_SPARSEMEM_STATIC is not set CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_RESOURCES_64BIT=y -CONFIG_HOLES_IN_ZONE=y # # I/O subsystem configuration @@ -200,7 +196,6 @@ CONFIG_INET_TCP_DIAG=y # CONFIG_TCP_CONG_ADVANCED is not set CONFIG_TCP_CONG_CUBIC=y CONFIG_DEFAULT_TCP_CONG="cubic" -# CONFIG_TCP_MD5SIG is not set CONFIG_IPV6=y # CONFIG_IPV6_PRIVACY is not set # CONFIG_IPV6_ROUTER_PREF is not set @@ -216,6 +211,7 @@ CONFIG_INET6_XFRM_MODE_BEET=y # CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set CONFIG_IPV6_SIT=y # CONFIG_IPV6_TUNNEL is not set +# CONFIG_IPV6_SUBTREES is not set # CONFIG_IPV6_MULTIPLE_TABLES is not set # CONFIG_NETWORK_SECMARK is not set # CONFIG_NETFILTER is not set @@ -250,7 +246,6 @@ CONFIG_IPV6_SIT=y # QoS and/or fair queueing # CONFIG_NET_SCHED=y -CONFIG_NET_SCH_FIFO=y CONFIG_NET_SCH_CLK_JIFFIES=y # CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set # CONFIG_NET_SCH_CLK_CPU is not set @@ -282,7 +277,6 @@ CONFIG_NET_CLS_ROUTE=y CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_U32=m # CONFIG_CLS_U32_PERF is not set -CONFIG_CLS_U32_MARK=y CONFIG_NET_CLS_RSVP=m CONFIG_NET_CLS_RSVP6=m # CONFIG_NET_EMATCH is not set @@ -321,7 +315,6 @@ CONFIG_SYS_HYPERVISOR=y # # CONFIG_RAID_ATTRS is not set CONFIG_SCSI=y -# CONFIG_SCSI_TGT is not set CONFIG_SCSI_NETLINK=y CONFIG_SCSI_PROC_FS=y @@ -342,7 +335,6 @@ CONFIG_CHR_DEV_SG=y CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y -CONFIG_SCSI_SCAN_ASYNC=y # # SCSI Transports @@ -554,7 +546,6 @@ CONFIG_DNOTIFY=y # CONFIG_AUTOFS_FS is not set # CONFIG_AUTOFS4_FS is not set # CONFIG_FUSE_FS is not set -CONFIG_GENERIC_ACL=y # # CD-ROM/DVD Filesystems @@ -580,7 +571,7 @@ CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y # CONFIG_HUGETLB_PAGE is not set CONFIG_RAMFS=y -CONFIG_CONFIGFS_FS=m +# CONFIG_CONFIGFS_FS is not set # # Miscellaneous filesystems @@ -625,6 +616,7 @@ CONFIG_SUNRPC=y # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set # CONFIG_9P_FS is not set +CONFIG_GENERIC_ACL=y # # Partition Types @@ -653,14 +645,6 @@ CONFIG_MSDOS_PARTITION=y # # CONFIG_NLS is not set -# -# Distributed Lock Manager -# -CONFIG_DLM=m -CONFIG_DLM_TCP=y -# CONFIG_DLM_SCTP is not set -# CONFIG_DLM_DEBUG is not set - # # Instrumentation Support # @@ -679,8 +663,6 @@ CONFIG_TRACE_IRQFLAGS_SUPPORT=y CONFIG_ENABLE_MUST_CHECK=y CONFIG_MAGIC_SYSRQ=y # CONFIG_UNUSED_SYMBOLS is not set -CONFIG_DEBUG_FS=y -CONFIG_HEADERS_CHECK=y CONFIG_DEBUG_KERNEL=y CONFIG_LOG_BUF_SHIFT=17 # CONFIG_SCHEDSTATS is not set @@ -697,11 +679,13 @@ CONFIG_DEBUG_SPINLOCK_SLEEP=y # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_INFO is not set +CONFIG_DEBUG_FS=y # CONFIG_DEBUG_VM is not set # CONFIG_DEBUG_LIST is not set # CONFIG_FRAME_POINTER is not set # CONFIG_UNWIND_INFO is not set CONFIG_FORCED_INLINING=y +CONFIG_HEADERS_CHECK=y # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_LKDTM is not set @@ -715,11 +699,10 @@ CONFIG_FORCED_INLINING=y # Cryptographic options # CONFIG_CRYPTO=y -CONFIG_CRYPTO_ALGAPI=y -CONFIG_CRYPTO_BLKCIPHER=y -CONFIG_CRYPTO_MANAGER=y +CONFIG_CRYPTO_ALGAPI=m +CONFIG_CRYPTO_BLKCIPHER=m +CONFIG_CRYPTO_MANAGER=m # CONFIG_CRYPTO_HMAC is not set -# CONFIG_CRYPTO_XCBC is not set # CONFIG_CRYPTO_NULL is not set # CONFIG_CRYPTO_MD4 is not set # CONFIG_CRYPTO_MD5 is not set @@ -730,10 +713,8 @@ CONFIG_CRYPTO_MANAGER=y # CONFIG_CRYPTO_SHA512 is not set # CONFIG_CRYPTO_WP512 is not set # CONFIG_CRYPTO_TGR192 is not set -# CONFIG_CRYPTO_GF128MUL is not set CONFIG_CRYPTO_ECB=m -CONFIG_CRYPTO_CBC=y -# CONFIG_CRYPTO_LRW is not set +CONFIG_CRYPTO_CBC=m # CONFIG_CRYPTO_DES is not set # CONFIG_CRYPTO_DES_S390 is not set # CONFIG_CRYPTO_BLOWFISH is not set @@ -759,10 +740,8 @@ CONFIG_CRYPTO_CBC=y # # Library routines # -CONFIG_BITREVERSE=m # CONFIG_CRC_CCITT is not set # CONFIG_CRC16 is not set CONFIG_CRC32=m # CONFIG_LIBCRC32C is not set CONFIG_PLIST=y -CONFIG_IOMAP_COPY=y diff --git a/trunk/arch/s390/hypfs/hypfs_diag.c b/trunk/arch/s390/hypfs/hypfs_diag.c index 2782cf9da5b4..443fa377d9ff 100644 --- a/trunk/arch/s390/hypfs/hypfs_diag.c +++ b/trunk/arch/s390/hypfs/hypfs_diag.c @@ -379,7 +379,7 @@ static void *diag204_alloc_vbuf(int pages) static void *diag204_alloc_rbuf(void) { diag204_buf = (void*)__get_free_pages(GFP_KERNEL,0); - if (!diag204_buf) + if (diag204_buf) return ERR_PTR(-ENOMEM); diag204_buf_pages = 1; return diag204_buf; @@ -521,7 +521,7 @@ __init int hypfs_diag_init(void) } rc = diag224_get_name_table(); if (rc) { - diag204_free_buffer(); + diag224_delete_name_table(); printk(KERN_ERR "hypfs: could not get name table.\n"); } return rc; diff --git a/trunk/arch/s390/kernel/ipl.c b/trunk/arch/s390/kernel/ipl.c index 9e9972e8a52b..a36bea1188d9 100644 --- a/trunk/arch/s390/kernel/ipl.c +++ b/trunk/arch/s390/kernel/ipl.c @@ -609,12 +609,42 @@ static ssize_t on_panic_store(struct subsystem *subsys, const char *buf, static struct subsys_attribute on_panic_attr = __ATTR(on_panic, 0644, on_panic_show, on_panic_store); +static void print_fcp_block(struct ipl_parameter_block *fcp_block) +{ + printk(KERN_EMERG "wwpn: %016llx\n", + (unsigned long long)fcp_block->ipl_info.fcp.wwpn); + printk(KERN_EMERG "lun: %016llx\n", + (unsigned long long)fcp_block->ipl_info.fcp.lun); + printk(KERN_EMERG "bootprog: %lld\n", + (unsigned long long)fcp_block->ipl_info.fcp.bootprog); + printk(KERN_EMERG "br_lba: %lld\n", + (unsigned long long)fcp_block->ipl_info.fcp.br_lba); + printk(KERN_EMERG "device: %llx\n", + (unsigned long long)fcp_block->ipl_info.fcp.devno); + printk(KERN_EMERG "opt: %x\n", fcp_block->ipl_info.fcp.opt); +} + void do_reipl(void) { struct ccw_dev_id devid; static char buf[100]; char loadparm[LOADPARM_LEN + 1]; + switch (reipl_type) { + case IPL_TYPE_CCW: + reipl_get_ascii_loadparm(loadparm); + printk(KERN_EMERG "reboot on ccw device: 0.0.%04x\n", + reipl_block_ccw->ipl_info.ccw.devno); + printk(KERN_EMERG "loadparm = '%s'\n", loadparm); + break; + case IPL_TYPE_FCP: + printk(KERN_EMERG "reboot on fcp device:\n"); + print_fcp_block(reipl_block_fcp); + break; + default: + break; + } + switch (reipl_method) { case IPL_METHOD_CCW_CIO: devid.devno = reipl_block_ccw->ipl_info.ccw.devno; @@ -624,7 +654,6 @@ void do_reipl(void) reipl_ccw_dev(&devid); break; case IPL_METHOD_CCW_VM: - reipl_get_ascii_loadparm(loadparm); if (strlen(loadparm) == 0) sprintf(buf, "IPL %X", reipl_block_ccw->ipl_info.ccw.devno); @@ -654,6 +683,7 @@ void do_reipl(void) diag308(DIAG308_IPL, NULL); break; } + printk(KERN_EMERG "reboot failed!\n"); signal_processor(smp_processor_id(), sigp_stop_and_store_status); } @@ -662,6 +692,19 @@ static void do_dump(void) struct ccw_dev_id devid; static char buf[100]; + switch (dump_type) { + case IPL_TYPE_CCW: + printk(KERN_EMERG "Automatic dump on ccw device: 0.0.%04x\n", + dump_block_ccw->ipl_info.ccw.devno); + break; + case IPL_TYPE_FCP: + printk(KERN_EMERG "Automatic dump on fcp device:\n"); + print_fcp_block(dump_block_fcp); + break; + default: + return; + } + switch (dump_method) { case IPL_METHOD_CCW_CIO: smp_send_stop(); @@ -994,21 +1037,15 @@ static void do_reset_calls(void) } extern void reset_mcck_handler(void); -extern void reset_pgm_handler(void); -extern __u32 dump_prefix_page; void s390_reset_system(void) { struct _lowcore *lc; - lc = (struct _lowcore *)(unsigned long) store_prefix(); - /* Stack for interrupt/machine check handler */ + lc = (struct _lowcore *)(unsigned long) store_prefix(); lc->panic_stack = S390_lowcore.panic_stack; - /* Save prefix page address for dump case */ - dump_prefix_page = (unsigned long) lc; - /* Disable prefixing */ set_prefix(0); @@ -1019,11 +1056,5 @@ void s390_reset_system(void) S390_lowcore.mcck_new_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_MCHECK; S390_lowcore.mcck_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) &reset_mcck_handler; - - /* Set new program check handler */ - S390_lowcore.program_new_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_MCHECK; - S390_lowcore.program_new_psw.addr = - PSW_ADDR_AMODE | (unsigned long) &reset_pgm_handler; - do_reset_calls(); } diff --git a/trunk/arch/s390/kernel/reipl.S b/trunk/arch/s390/kernel/reipl.S index c3f4d9b95083..f9434d42ce9f 100644 --- a/trunk/arch/s390/kernel/reipl.S +++ b/trunk/arch/s390/kernel/reipl.S @@ -16,7 +16,7 @@ do_reipl_asm: basr %r13,0 stm %r0,%r15,__LC_GPREGS_SAVE_AREA stctl %c0,%c15,__LC_CREGS_SAVE_AREA stam %a0,%a15,__LC_AREGS_SAVE_AREA - mvc __LC_PREFIX_SAVE_AREA(4),dump_prefix_page-.Lpg0(%r13) + stpx __LC_PREFIX_SAVE_AREA stckc .Lclkcmp-.Lpg0(%r13) mvc __LC_CLOCK_COMP_SAVE_AREA(8),.Lclkcmp-.Lpg0(%r13) stpt __LC_CPU_TIMER_SAVE_AREA @@ -79,7 +79,3 @@ do_reipl_asm: basr %r13,0 .long 0x00000000,0x00000000 .long 0x00000000,0x00000000 .long 0x00000000,0x00000000 - .globl dump_prefix_page -dump_prefix_page: - .long 0x00000000 - diff --git a/trunk/arch/s390/kernel/reipl64.S b/trunk/arch/s390/kernel/reipl64.S index dbb3eed38865..f18ef260ca23 100644 --- a/trunk/arch/s390/kernel/reipl64.S +++ b/trunk/arch/s390/kernel/reipl64.S @@ -20,7 +20,7 @@ do_reipl_asm: basr %r13,0 stg %r0,__LC_GPREGS_SAVE_AREA-0x1000+8(%r1) stctg %c0,%c15,__LC_CREGS_SAVE_AREA-0x1000(%r1) stam %a0,%a15,__LC_AREGS_SAVE_AREA-0x1000(%r1) - mvc __LC_PREFIX_SAVE_AREA-0x1000(4,%r1),dump_prefix_page-.Lpg0(%r13) + stpx __LC_PREFIX_SAVE_AREA-0x1000(%r1) stfpc __LC_FP_CREG_SAVE_AREA-0x1000(%r1) stckc .Lclkcmp-.Lpg0(%r13) mvc __LC_CLOCK_COMP_SAVE_AREA-0x1000(8,%r1),.Lclkcmp-.Lpg0(%r13) @@ -103,6 +103,3 @@ do_reipl_asm: basr %r13,0 .long 0x00000000,0x00000000 .long 0x00000000,0x00000000 .long 0x00000000,0x00000000 - .globl dump_prefix_page -dump_prefix_page: - .long 0x00000000 diff --git a/trunk/arch/s390/kernel/reset.S b/trunk/arch/s390/kernel/reset.S index 8a87355161fa..be8688c0665c 100644 --- a/trunk/arch/s390/kernel/reset.S +++ b/trunk/arch/s390/kernel/reset.S @@ -3,7 +3,6 @@ * * Copyright (C) IBM Corp. 2006 * Author(s): Heiko Carstens - * Michael Holzheu */ #include @@ -28,26 +27,6 @@ reset_mcck_handler: s390_reset_mcck_handler: .quad 0 - .globl reset_pgm_handler -reset_pgm_handler: - stmg %r0,%r15,__LC_SAVE_AREA - basr %r13,0 -0: lg %r15,__LC_PANIC_STACK # load panic stack - aghi %r15,-STACK_FRAME_OVERHEAD - lg %r1,s390_reset_pgm_handler-0b(%r13) - ltgr %r1,%r1 - jz 1f - basr %r14,%r1 - lmg %r0,%r15,__LC_SAVE_AREA - lpswe __LC_PGM_OLD_PSW -1: lpswe disabled_wait_psw-0b(%r13) - .globl s390_reset_pgm_handler -s390_reset_pgm_handler: - .quad 0 - .align 8 -disabled_wait_psw: - .quad 0x0002000180000000,0x0000000000000000 + reset_pgm_handler - #else /* CONFIG_64BIT */ .globl reset_mcck_handler @@ -66,25 +45,4 @@ reset_mcck_handler: s390_reset_mcck_handler: .long 0 - .globl reset_pgm_handler -reset_pgm_handler: - stm %r0,%r15,__LC_SAVE_AREA - basr %r13,0 -0: l %r15,__LC_PANIC_STACK # load panic stack - ahi %r15,-STACK_FRAME_OVERHEAD - l %r1,s390_reset_pgm_handler-0b(%r13) - ltr %r1,%r1 - jz 1f - basr %r14,%r1 - lm %r0,%r15,__LC_SAVE_AREA - lpsw __LC_PGM_OLD_PSW - -1: lpsw disabled_wait_psw-0b(%r13) - .globl s390_reset_pgm_handler -s390_reset_pgm_handler: - .long 0 -disabled_wait_psw: - .align 8 - .long 0x000a0000,0x00000000 + reset_pgm_handler - #endif /* CONFIG_64BIT */ diff --git a/trunk/arch/x86_64/Makefile b/trunk/arch/x86_64/Makefile index 2941a915d4ef..b471b8550d03 100644 --- a/trunk/arch/x86_64/Makefile +++ b/trunk/arch/x86_64/Makefile @@ -45,7 +45,9 @@ cflags-kernel-$(CONFIG_REORDER) += -ffunction-sections # actually it makes the kernel smaller too. cflags-y += -fno-reorder-blocks cflags-y += -Wno-sign-compare +ifneq ($(CONFIG_UNWIND_INFO),y) cflags-y += -fno-asynchronous-unwind-tables +endif ifneq ($(CONFIG_DEBUG_INFO),y) # -fweb shrinks the kernel a bit, but the difference is very small # it also messes up debugging, so don't use it for now. diff --git a/trunk/arch/x86_64/defconfig b/trunk/arch/x86_64/defconfig index ac80b1209fc0..1a1c6a1a299b 100644 --- a/trunk/arch/x86_64/defconfig +++ b/trunk/arch/x86_64/defconfig @@ -1523,6 +1523,8 @@ CONFIG_DEBUG_FS=y # CONFIG_DEBUG_VM is not set # CONFIG_DEBUG_LIST is not set # CONFIG_FRAME_POINTER is not set +CONFIG_UNWIND_INFO=y +CONFIG_STACK_UNWIND=y # CONFIG_FORCED_INLINING is not set # CONFIG_HEADERS_CHECK is not set # CONFIG_RCU_TORTURE_TEST is not set diff --git a/trunk/arch/x86_64/kernel/entry.S b/trunk/arch/x86_64/kernel/entry.S index 9f5dac64aa8f..601d332c4b79 100644 --- a/trunk/arch/x86_64/kernel/entry.S +++ b/trunk/arch/x86_64/kernel/entry.S @@ -1155,3 +1155,36 @@ ENTRY(call_softirq) ret CFI_ENDPROC ENDPROC(call_softirq) + +#ifdef CONFIG_STACK_UNWIND +ENTRY(arch_unwind_init_running) + CFI_STARTPROC + movq %r15, R15(%rdi) + movq %r14, R14(%rdi) + xchgq %rsi, %rdx + movq %r13, R13(%rdi) + movq %r12, R12(%rdi) + xorl %eax, %eax + movq %rbp, RBP(%rdi) + movq %rbx, RBX(%rdi) + movq (%rsp), %rcx + movq %rax, R11(%rdi) + movq %rax, R10(%rdi) + movq %rax, R9(%rdi) + movq %rax, R8(%rdi) + movq %rax, RAX(%rdi) + movq %rax, RCX(%rdi) + movq %rax, RDX(%rdi) + movq %rax, RSI(%rdi) + movq %rax, RDI(%rdi) + movq %rax, ORIG_RAX(%rdi) + movq %rcx, RIP(%rdi) + leaq 8(%rsp), %rcx + movq $__KERNEL_CS, CS(%rdi) + movq %rax, EFLAGS(%rdi) + movq %rcx, RSP(%rdi) + movq $__KERNEL_DS, SS(%rdi) + jmpq *%rdx + CFI_ENDPROC +ENDPROC(arch_unwind_init_running) +#endif diff --git a/trunk/arch/x86_64/kernel/traps.c b/trunk/arch/x86_64/kernel/traps.c index 1d9eb6db732a..b54ccc07f379 100644 --- a/trunk/arch/x86_64/kernel/traps.c +++ b/trunk/arch/x86_64/kernel/traps.c @@ -110,6 +110,11 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) } int kstack_depth_to_print = 12; +#ifdef CONFIG_STACK_UNWIND +static int call_trace = 1; +#else +#define call_trace (-1) +#endif #ifdef CONFIG_KALLSYMS void printk_address(unsigned long address) @@ -212,6 +217,32 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, return NULL; } +struct ops_and_data { + struct stacktrace_ops *ops; + void *data; +}; + +static int dump_trace_unwind(struct unwind_frame_info *info, void *context) +{ + struct ops_and_data *oad = (struct ops_and_data *)context; + int n = 0; + unsigned long sp = UNW_SP(info); + + if (arch_unw_user_mode(info)) + return -1; + while (unwind(info) == 0 && UNW_PC(info)) { + n++; + oad->ops->address(oad->data, UNW_PC(info)); + if (arch_unw_user_mode(info)) + break; + if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1)) + && sp > UNW_SP(info)) + break; + sp = UNW_SP(info); + } + return n; +} + #define MSG(txt) ops->warning(data, txt) /* @@ -239,6 +270,40 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, if (!tsk) tsk = current; + if (call_trace >= 0) { + int unw_ret = 0; + struct unwind_frame_info info; + struct ops_and_data oad = { .ops = ops, .data = data }; + + if (regs) { + if (unwind_init_frame_info(&info, tsk, regs) == 0) + unw_ret = dump_trace_unwind(&info, &oad); + } else if (tsk == current) + unw_ret = unwind_init_running(&info, dump_trace_unwind, + &oad); + else { + if (unwind_init_blocked(&info, tsk) == 0) + unw_ret = dump_trace_unwind(&info, &oad); + } + if (unw_ret > 0) { + if (call_trace == 1 && !arch_unw_user_mode(&info)) { + ops->warning_symbol(data, + "DWARF2 unwinder stuck at %s", + UNW_PC(&info)); + if ((long)UNW_SP(&info) < 0) { + MSG("Leftover inexact backtrace:"); + stack = (unsigned long *)UNW_SP(&info); + if (!stack) + goto out; + } else + MSG("Full inexact backtrace again:"); + } else if (call_trace >= 1) + goto out; + else + MSG("Full inexact backtrace again:"); + } else + MSG("Inexact backtrace:"); + } if (!stack) { unsigned long dummy; stack = &dummy; @@ -322,6 +387,7 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, tinfo = current_thread_info(); HANDLE_STACK (valid_stack_ptr(tinfo, stack)); #undef HANDLE_STACK +out: put_cpu(); } EXPORT_SYMBOL(dump_trace); @@ -1122,3 +1188,21 @@ static int __init kstack_setup(char *s) return 0; } early_param("kstack", kstack_setup); + +#ifdef CONFIG_STACK_UNWIND +static int __init call_trace_setup(char *s) +{ + if (!s) + return -EINVAL; + if (strcmp(s, "old") == 0) + call_trace = -1; + else if (strcmp(s, "both") == 0) + call_trace = 0; + else if (strcmp(s, "newfallback") == 0) + call_trace = 1; + else if (strcmp(s, "new") == 0) + call_trace = 2; + return 0; +} +early_param("call_trace", call_trace_setup); +#endif diff --git a/trunk/arch/x86_64/kernel/vmlinux.lds.S b/trunk/arch/x86_64/kernel/vmlinux.lds.S index 1e54ddf2338d..514be5dd2303 100644 --- a/trunk/arch/x86_64/kernel/vmlinux.lds.S +++ b/trunk/arch/x86_64/kernel/vmlinux.lds.S @@ -221,7 +221,9 @@ SECTIONS /* Sections to be discarded */ /DISCARD/ : { *(.exitcall.exit) +#ifndef CONFIG_UNWIND_INFO *(.eh_frame) +#endif } STABS_DEBUG diff --git a/trunk/drivers/ata/Kconfig b/trunk/drivers/ata/Kconfig index b34e0a958d0f..984ab284382a 100644 --- a/trunk/drivers/ata/Kconfig +++ b/trunk/drivers/ata/Kconfig @@ -40,9 +40,9 @@ config ATA_PIIX tristate "Intel PIIX/ICH SATA support" depends on PCI help - This option enables support for ICH5/6/7/8 Serial ATA - and support for PATA on the Intel PIIX3/PIIX4/ICH series - PATA host controllers. + This option enables support for ICH5/6/7/8 Serial ATA. + If PATA support was enabled previously, this enables + support for select Intel PIIX/ICH PATA host controllers. If unsure, say N. diff --git a/trunk/drivers/ata/ahci.c b/trunk/drivers/ata/ahci.c index dbae6d971041..f36da488a2c1 100644 --- a/trunk/drivers/ata/ahci.c +++ b/trunk/drivers/ata/ahci.c @@ -645,6 +645,8 @@ static int ahci_reset_controller(void __iomem *mmio, struct pci_dev *pdev) u32 cap_save, impl_save, tmp; cap_save = readl(mmio + HOST_CAP); + cap_save &= ( (1<<28) | (1<<17) ); + cap_save |= (1 << 27); impl_save = readl(mmio + HOST_PORTS_IMPL); /* global controller reset */ diff --git a/trunk/drivers/ata/ata_piix.c b/trunk/drivers/ata/ata_piix.c index 47701b286f8b..c7de0bb1591f 100644 --- a/trunk/drivers/ata/ata_piix.c +++ b/trunk/drivers/ata/ata_piix.c @@ -226,26 +226,14 @@ static const struct pci_device_id piix_pci_tbl[] = { { 0x8086, 0x27c0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich6_sata_ahci }, /* 2801GBM/GHM (ICH7M, identical to ICH6M) */ { 0x8086, 0x27c4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich6m_sata_ahci }, - /* Enterprise Southbridge 2 (631xESB/632xESB) */ + /* Enterprise Southbridge 2 (where's the datasheet?) */ { 0x8086, 0x2680, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich6_sata_ahci }, - /* SATA Controller 1 IDE (ICH8) */ + /* SATA Controller 1 IDE (ICH8, no datasheet yet) */ { 0x8086, 0x2820, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci }, - /* SATA Controller 2 IDE (ICH8) */ + /* SATA Controller 2 IDE (ICH8, ditto) */ { 0x8086, 0x2825, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci }, - /* Mobile SATA Controller IDE (ICH8M) */ + /* Mobile SATA Controller IDE (ICH8M, ditto) */ { 0x8086, 0x2828, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci }, - /* SATA Controller IDE (ICH9) */ - { 0x8086, 0x2920, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci }, - /* SATA Controller IDE (ICH9) */ - { 0x8086, 0x2921, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci }, - /* SATA Controller IDE (ICH9) */ - { 0x8086, 0x2926, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci }, - /* SATA Controller IDE (ICH9M) */ - { 0x8086, 0x2928, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci }, - /* SATA Controller IDE (ICH9M) */ - { 0x8086, 0x292d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci }, - /* SATA Controller IDE (ICH9M) */ - { 0x8086, 0x292e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci }, { } /* terminate list */ }; @@ -342,7 +330,7 @@ static const struct ata_port_operations ich_pata_ops = { .port_start = ata_port_start, .port_stop = ata_port_stop, - .host_stop = piix_host_stop, + .host_stop = ata_host_stop, }; static const struct ata_port_operations piix_sata_ops = { diff --git a/trunk/drivers/ata/libata-core.c b/trunk/drivers/ata/libata-core.c index 0d51d13b16bf..011c0a8a2dcc 100644 --- a/trunk/drivers/ata/libata-core.c +++ b/trunk/drivers/ata/libata-core.c @@ -1332,7 +1332,7 @@ unsigned ata_exec_internal_sg(struct ata_device *dev, } /** - * ata_exec_internal - execute libata internal command + * ata_exec_internal_sg - execute libata internal command * @dev: Device to which the command is sent * @tf: Taskfile registers for the command and the result * @cdb: CDB for packet command @@ -1353,17 +1353,11 @@ unsigned ata_exec_internal(struct ata_device *dev, struct ata_taskfile *tf, const u8 *cdb, int dma_dir, void *buf, unsigned int buflen) { - struct scatterlist *psg = NULL, sg; - unsigned int n_elem = 0; + struct scatterlist sg; - if (dma_dir != DMA_NONE) { - WARN_ON(!buf); - sg_init_one(&sg, buf, buflen); - psg = &sg; - n_elem++; - } + sg_init_one(&sg, buf, buflen); - return ata_exec_internal_sg(dev, tf, cdb, dma_dir, psg, n_elem); + return ata_exec_internal_sg(dev, tf, cdb, dma_dir, &sg, 1); } /** diff --git a/trunk/drivers/ata/libata-scsi.c b/trunk/drivers/ata/libata-scsi.c index a4790be41d1c..664e1377b54c 100644 --- a/trunk/drivers/ata/libata-scsi.c +++ b/trunk/drivers/ata/libata-scsi.c @@ -1539,7 +1539,7 @@ static unsigned int ata_scsi_rbuf_get(struct scsi_cmnd *cmd, u8 **buf_out) struct scatterlist *sg; sg = (struct scatterlist *) cmd->request_buffer; - buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; + buf = kmap_atomic(sg->page, KM_USER0) + sg->offset; buflen = sg->length; } else { buf = cmd->request_buffer; @@ -1567,7 +1567,7 @@ static inline void ata_scsi_rbuf_put(struct scsi_cmnd *cmd, u8 *buf) struct scatterlist *sg; sg = (struct scatterlist *) cmd->request_buffer; - kunmap_atomic(buf - sg->offset, KM_IRQ0); + kunmap_atomic(buf - sg->offset, KM_USER0); } } diff --git a/trunk/drivers/ata/pata_legacy.c b/trunk/drivers/ata/pata_legacy.c index e7bf9d89c8ee..c7d1738e4e69 100644 --- a/trunk/drivers/ata/pata_legacy.c +++ b/trunk/drivers/ata/pata_legacy.c @@ -698,10 +698,8 @@ static __init int legacy_init_one(int port, unsigned long io, unsigned long ctrl goto fail_io; pdev = platform_device_register_simple(DRV_NAME, nr_legacy_host, NULL, 0); - if (IS_ERR(pdev)) { - ret = PTR_ERR(pdev); + if (pdev == NULL) goto fail_dev; - } if (ht6560a & mask) { ops = &ht6560a_port_ops; diff --git a/trunk/drivers/ata/pata_qdi.c b/trunk/drivers/ata/pata_qdi.c index afc0d990e7d6..36f621abc390 100644 --- a/trunk/drivers/ata/pata_qdi.c +++ b/trunk/drivers/ata/pata_qdi.c @@ -247,8 +247,8 @@ static __init int qdi_init_one(unsigned long port, int type, unsigned long io, i */ pdev = platform_device_register_simple(DRV_NAME, nr_qdi_host, NULL, 0); - if (IS_ERR(pdev)) - return PTR_ERR(pdev); + if (pdev == NULL) + return -ENOMEM; memset(&ae, 0, sizeof(struct ata_probe_ent)); INIT_LIST_HEAD(&ae.node); diff --git a/trunk/drivers/ata/pata_rz1000.c b/trunk/drivers/ata/pata_rz1000.c index adf4cc134f25..3677c642c9f9 100644 --- a/trunk/drivers/ata/pata_rz1000.c +++ b/trunk/drivers/ata/pata_rz1000.c @@ -105,6 +105,8 @@ static struct ata_port_operations rz1000_port_ops = { .exec_command = ata_exec_command, .dev_select = ata_std_dev_select, + .error_handler = rz1000_error_handler, + .bmdma_setup = ata_bmdma_setup, .bmdma_start = ata_bmdma_start, .bmdma_stop = ata_bmdma_stop, diff --git a/trunk/drivers/ata/pata_via.c b/trunk/drivers/ata/pata_via.c index ff93e8f71cf8..cc09d47fb927 100644 --- a/trunk/drivers/ata/pata_via.c +++ b/trunk/drivers/ata/pata_via.c @@ -161,15 +161,10 @@ static int via_pre_reset(struct ata_port *ap) return -ENOENT; } - if ((config->flags & VIA_UDMA) >= VIA_UDMA_100) + if ((config->flags & VIA_UDMA) >= VIA_UDMA_66) ap->cbl = via_cable_detect(ap); - /* The UDMA66 series has no cable detect so do drive side detect */ - else if ((config->flags & VIA_UDMA) < VIA_UDMA_66) - ap->cbl = ATA_CBL_PATA40; else - ap->cbl = ATA_CBL_PATA_UNK; - - + ap->cbl = ATA_CBL_PATA40; return ata_std_prereset(ap); } diff --git a/trunk/drivers/ata/pata_winbond.c b/trunk/drivers/ata/pata_winbond.c index 5d1f518e1cc7..3ea345cde52e 100644 --- a/trunk/drivers/ata/pata_winbond.c +++ b/trunk/drivers/ata/pata_winbond.c @@ -206,8 +206,8 @@ static __init int winbond_init_one(unsigned long port) */ pdev = platform_device_register_simple(DRV_NAME, nr_winbond_host, NULL, 0); - if (IS_ERR(pdev)) - return PTR_ERR(pdev); + if (pdev == NULL) + return -ENOMEM; memset(&ae, 0, sizeof(struct ata_probe_ent)); INIT_LIST_HEAD(&ae.node); diff --git a/trunk/drivers/ata/sata_svw.c b/trunk/drivers/ata/sata_svw.c index d89c9590b845..db32d15b7fa1 100644 --- a/trunk/drivers/ata/sata_svw.c +++ b/trunk/drivers/ata/sata_svw.c @@ -56,8 +56,6 @@ #define DRV_VERSION "2.0" enum { - K2_FLAG_NO_ATAPI_DMA = (1 << 29), - /* Taskfile registers offsets */ K2_SATA_TF_CMD_OFFSET = 0x00, K2_SATA_TF_DATA_OFFSET = 0x00, @@ -85,33 +83,11 @@ enum { /* Port stride */ K2_SATA_PORT_OFFSET = 0x100, - - board_svw4 = 0, - board_svw8 = 1, -}; - -static const struct k2_board_info { - unsigned int n_ports; - unsigned long port_flags; -} k2_board_info[] = { - /* board_svw4 */ - { 4, K2_FLAG_NO_ATAPI_DMA }, - - /* board_svw8 */ - { 8, K2_FLAG_NO_ATAPI_DMA }, }; static u8 k2_stat_check_status(struct ata_port *ap); -static int k2_sata_check_atapi_dma(struct ata_queued_cmd *qc) -{ - if (qc->ap->flags & K2_FLAG_NO_ATAPI_DMA) - return -1; /* ATAPI DMA not supported */ - - return 0; -} - static u32 k2_sata_scr_read (struct ata_port *ap, unsigned int sc_reg) { if (sc_reg > SCR_CONTROL) @@ -337,7 +313,6 @@ static const struct ata_port_operations k2_sata_ops = { .check_status = k2_stat_check_status, .exec_command = ata_exec_command, .dev_select = ata_std_dev_select, - .check_atapi_dma = k2_sata_check_atapi_dma, .bmdma_setup = k2_bmdma_setup_mmio, .bmdma_start = k2_bmdma_start_mmio, .bmdma_stop = ata_bmdma_stop, @@ -384,8 +359,6 @@ static int k2_sata_init_one (struct pci_dev *pdev, const struct pci_device_id *e struct ata_probe_ent *probe_ent = NULL; unsigned long base; void __iomem *mmio_base; - const struct k2_board_info *board_info = - &k2_board_info[ent->driver_data]; int pci_dev_busy = 0; int rc; int i; @@ -451,7 +424,7 @@ static int k2_sata_init_one (struct pci_dev *pdev, const struct pci_device_id *e probe_ent->sht = &k2_sata_sht; probe_ent->port_flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | - ATA_FLAG_MMIO | board_info->port_flags; + ATA_FLAG_MMIO; probe_ent->port_ops = &k2_sata_ops; probe_ent->n_ports = 4; probe_ent->irq = pdev->irq; @@ -468,7 +441,7 @@ static int k2_sata_init_one (struct pci_dev *pdev, const struct pci_device_id *e /* different controllers have different number of ports - currently 4 or 8 */ /* All ports are on the same function. Multi-function device is no * longer available. This should not be seen in any system. */ - for (i = 0; i < board_info->n_ports; i++) + for (i = 0; i < ent->driver_data; i++) k2_sata_setup_port(&probe_ent->port[i], base + i * K2_SATA_PORT_OFFSET); pci_set_master(pdev); @@ -496,11 +469,11 @@ static int k2_sata_init_one (struct pci_dev *pdev, const struct pci_device_id *e * controller * */ static const struct pci_device_id k2_sata_pci_tbl[] = { - { PCI_VDEVICE(SERVERWORKS, 0x0240), board_svw4 }, - { PCI_VDEVICE(SERVERWORKS, 0x0241), board_svw4 }, - { PCI_VDEVICE(SERVERWORKS, 0x0242), board_svw8 }, - { PCI_VDEVICE(SERVERWORKS, 0x024a), board_svw4 }, - { PCI_VDEVICE(SERVERWORKS, 0x024b), board_svw4 }, + { PCI_VDEVICE(SERVERWORKS, 0x0240), 4 }, + { PCI_VDEVICE(SERVERWORKS, 0x0241), 4 }, + { PCI_VDEVICE(SERVERWORKS, 0x0242), 8 }, + { PCI_VDEVICE(SERVERWORKS, 0x024a), 4 }, + { PCI_VDEVICE(SERVERWORKS, 0x024b), 4 }, { } }; diff --git a/trunk/drivers/s390/char/sclp_cpi.c b/trunk/drivers/s390/char/sclp_cpi.c index 4f873ae148b7..f7c10d954ec6 100644 --- a/trunk/drivers/s390/char/sclp_cpi.c +++ b/trunk/drivers/s390/char/sclp_cpi.c @@ -49,8 +49,6 @@ static struct sclp_register sclp_cpi_event = .send_mask = EvTyp_CtlProgIdent_Mask }; -MODULE_LICENSE("GPL"); - MODULE_AUTHOR( "Martin Peschke, IBM Deutschland Entwicklung GmbH " ""); diff --git a/trunk/drivers/s390/cio/cio.c b/trunk/drivers/s390/cio/cio.c index 3a403f195cf8..7835a714a405 100644 --- a/trunk/drivers/s390/cio/cio.c +++ b/trunk/drivers/s390/cio/cio.c @@ -871,32 +871,11 @@ __clear_subchannel_easy(struct subchannel_id schid) return -EBUSY; } -static int pgm_check_occured; - -static void cio_reset_pgm_check_handler(void) -{ - pgm_check_occured = 1; -} - -static int stsch_reset(struct subchannel_id schid, volatile struct schib *addr) -{ - int rc; - - pgm_check_occured = 0; - s390_reset_pgm_handler = cio_reset_pgm_check_handler; - rc = stsch(schid, addr); - s390_reset_pgm_handler = NULL; - if (pgm_check_occured) - return -EIO; - else - return rc; -} - static int __shutdown_subchannel_easy(struct subchannel_id schid, void *data) { struct schib schib; - if (stsch_reset(schid, &schib)) + if (stsch_err(schid, &schib)) return -ENXIO; if (!schib.pmcw.ena) return 0; @@ -993,7 +972,7 @@ static int __reipl_subchannel_match(struct subchannel_id schid, void *data) struct schib schib; struct sch_match_id *match_id = data; - if (stsch_reset(schid, &schib)) + if (stsch_err(schid, &schib)) return -ENXIO; if (schib.pmcw.dnv && (schib.pmcw.dev == match_id->devid.devno) && diff --git a/trunk/drivers/s390/cio/css.c b/trunk/drivers/s390/cio/css.c index 9d6c02446863..4c81d890791e 100644 --- a/trunk/drivers/s390/cio/css.c +++ b/trunk/drivers/s390/cio/css.c @@ -139,8 +139,6 @@ css_register_subchannel(struct subchannel *sch) sch->dev.release = &css_subchannel_release; sch->dev.groups = subch_attr_groups; - css_get_ssd_info(sch); - /* make it known to the system */ ret = css_sch_device_register(sch); if (ret) { @@ -148,6 +146,7 @@ css_register_subchannel(struct subchannel *sch) __func__, sch->dev.bus_id); return ret; } + css_get_ssd_info(sch); return ret; } diff --git a/trunk/drivers/s390/cio/qdio.c b/trunk/drivers/s390/cio/qdio.c index 6fd1940842eb..9d4ea449a608 100644 --- a/trunk/drivers/s390/cio/qdio.c +++ b/trunk/drivers/s390/cio/qdio.c @@ -979,11 +979,12 @@ __qdio_outbound_processing(struct qdio_q *q) if (q->is_iqdio_q) { /* - * for asynchronous queues, we better check, if the sent - * buffer is already switched from PRIMED to EMPTY. + * for asynchronous queues, we better check, if the fill + * level is too high. for synchronous queues, the fill + * level will never be that high. */ - if ((q->queue_type == QDIO_IQDIO_QFMT_ASYNCH) && - !qdio_is_outbound_q_done(q)) + if (atomic_read(&q->number_of_buffers_used)> + IQDIO_FILL_LEVEL_TO_POLL) qdio_mark_q(q); } else if (!q->hydra_gives_outbound_pcis) @@ -1824,10 +1825,6 @@ qdio_fill_qs(struct qdio_irq *irq_ptr, struct ccw_device *cdev, q->sbal[j]=*(outbound_sbals_array++); q->queue_type=q_format; - if ((q->queue_type == QDIO_IQDIO_QFMT) && - (no_output_qs > 1) && - (i == no_output_qs-1)) - q->queue_type = QDIO_IQDIO_QFMT_ASYNCH; q->int_parm=int_parm; q->is_input_q=0; q->schid = irq_ptr->schid; diff --git a/trunk/drivers/s390/crypto/ap_bus.c b/trunk/drivers/s390/crypto/ap_bus.c index 81b5899f4010..ad60afe5dd11 100644 --- a/trunk/drivers/s390/crypto/ap_bus.c +++ b/trunk/drivers/s390/crypto/ap_bus.c @@ -1129,15 +1129,7 @@ static void ap_poll_thread_stop(void) mutex_unlock(&ap_poll_thread_mutex); } -static void ap_reset_domain(void) -{ - int i; - - for (i = 0; i < AP_DEVICES; i++) - ap_reset_queue(AP_MKQID(i, ap_domain_index)); -} - -static void ap_reset_all(void) +static void ap_reset(void) { int i, j; @@ -1147,7 +1139,7 @@ static void ap_reset_all(void) } static struct reset_call ap_reset_call = { - .fn = ap_reset_all, + .fn = ap_reset, }; /** @@ -1237,12 +1229,10 @@ void ap_module_exit(void) int i; struct device *dev; - ap_reset_domain(); ap_poll_thread_stop(); del_timer_sync(&ap_config_timer); del_timer_sync(&ap_poll_timer); destroy_workqueue(ap_work_queue); - tasklet_kill(&ap_tasklet); s390_root_dev_unregister(ap_root_device); while ((dev = bus_find_device(&ap_bus_type, NULL, NULL, __ap_match_all))) diff --git a/trunk/include/asm-generic/vmlinux.lds.h b/trunk/include/asm-generic/vmlinux.lds.h index 1587121730c5..7437ccaada77 100644 --- a/trunk/include/asm-generic/vmlinux.lds.h +++ b/trunk/include/asm-generic/vmlinux.lds.h @@ -119,6 +119,8 @@ *(__ksymtab_strings) \ } \ \ + EH_FRAME \ + \ /* Built-in module parameters. */ \ __param : AT(ADDR(__param) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___param) = .; \ @@ -158,6 +160,26 @@ *(.kprobes.text) \ VMLINUX_SYMBOL(__kprobes_text_end) = .; +#ifdef CONFIG_STACK_UNWIND +#define EH_FRAME \ + /* Unwind data binary search table */ \ + . = ALIGN(8); \ + .eh_frame_hdr : AT(ADDR(.eh_frame_hdr) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start_unwind_hdr) = .; \ + *(.eh_frame_hdr) \ + VMLINUX_SYMBOL(__end_unwind_hdr) = .; \ + } \ + /* Unwind data */ \ + . = ALIGN(8); \ + .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start_unwind) = .; \ + *(.eh_frame) \ + VMLINUX_SYMBOL(__end_unwind) = .; \ + } +#else +#define EH_FRAME +#endif + /* DWARF debug sections. Symbols in the DWARF debugging sections are relative to the beginning of the section so we begin them at 0. */ diff --git a/trunk/include/asm-i386/unwind.h b/trunk/include/asm-i386/unwind.h index 43c70c3de2f9..aa2c931e30db 100644 --- a/trunk/include/asm-i386/unwind.h +++ b/trunk/include/asm-i386/unwind.h @@ -1,6 +1,95 @@ #ifndef _ASM_I386_UNWIND_H #define _ASM_I386_UNWIND_H +/* + * Copyright (C) 2002-2006 Novell, Inc. + * Jan Beulich + * This code is released under version 2 of the GNU GPL. + */ + +#ifdef CONFIG_STACK_UNWIND + +#include +#include +#include +#include + +struct unwind_frame_info +{ + struct pt_regs regs; + struct task_struct *task; + unsigned call_frame:1; +}; + +#define UNW_PC(frame) (frame)->regs.eip +#define UNW_SP(frame) (frame)->regs.esp +#ifdef CONFIG_FRAME_POINTER +#define UNW_FP(frame) (frame)->regs.ebp +#define FRAME_RETADDR_OFFSET 4 +#define FRAME_LINK_OFFSET 0 +#define STACK_BOTTOM(tsk) STACK_LIMIT((tsk)->thread.esp0) +#define STACK_TOP(tsk) ((tsk)->thread.esp0) +#else +#define UNW_FP(frame) ((void)(frame), 0) +#endif +#define STACK_LIMIT(ptr) (((ptr) - 1) & ~(THREAD_SIZE - 1)) + +#define UNW_REGISTER_INFO \ + PTREGS_INFO(eax), \ + PTREGS_INFO(ecx), \ + PTREGS_INFO(edx), \ + PTREGS_INFO(ebx), \ + PTREGS_INFO(esp), \ + PTREGS_INFO(ebp), \ + PTREGS_INFO(esi), \ + PTREGS_INFO(edi), \ + PTREGS_INFO(eip) + +#define UNW_DEFAULT_RA(raItem, dataAlign) \ + ((raItem).where == Memory && \ + !((raItem).value * (dataAlign) + 4)) + +static inline void arch_unw_init_frame_info(struct unwind_frame_info *info, + /*const*/ struct pt_regs *regs) +{ + if (user_mode_vm(regs)) + info->regs = *regs; + else { + memcpy(&info->regs, regs, offsetof(struct pt_regs, esp)); + info->regs.esp = (unsigned long)®s->esp; + info->regs.xss = __KERNEL_DS; + } +} + +static inline void arch_unw_init_blocked(struct unwind_frame_info *info) +{ + memset(&info->regs, 0, sizeof(info->regs)); + info->regs.eip = info->task->thread.eip; + info->regs.xcs = __KERNEL_CS; + __get_user(info->regs.ebp, (long *)info->task->thread.esp); + info->regs.esp = info->task->thread.esp; + info->regs.xss = __KERNEL_DS; + info->regs.xds = __USER_DS; + info->regs.xes = __USER_DS; + info->regs.xgs = __KERNEL_PDA; +} + +extern asmlinkage int arch_unwind_init_running(struct unwind_frame_info *, + asmlinkage int (*callback)(struct unwind_frame_info *, + void *arg), + void *arg); + +static inline int arch_unw_user_mode(/*const*/ struct unwind_frame_info *info) +{ + return user_mode_vm(&info->regs) + || info->regs.eip < PAGE_OFFSET + || (info->regs.eip >= __fix_to_virt(FIX_VDSO) + && info->regs.eip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE) + || info->regs.esp < PAGE_OFFSET; +} + +#else + #define UNW_PC(frame) ((void)(frame), 0) #define UNW_SP(frame) ((void)(frame), 0) #define UNW_FP(frame) ((void)(frame), 0) @@ -10,4 +99,6 @@ static inline int arch_unw_user_mode(const void *info) return 0; } +#endif + #endif /* _ASM_I386_UNWIND_H */ diff --git a/trunk/include/asm-s390/qdio.h b/trunk/include/asm-s390/qdio.h index 127f72e77419..7189c79bc673 100644 --- a/trunk/include/asm-s390/qdio.h +++ b/trunk/include/asm-s390/qdio.h @@ -34,7 +34,6 @@ #define QDIO_QETH_QFMT 0 #define QDIO_ZFCP_QFMT 1 #define QDIO_IQDIO_QFMT 2 -#define QDIO_IQDIO_QFMT_ASYNCH 3 struct qdio_buffer_element{ unsigned int flags; diff --git a/trunk/include/asm-s390/reset.h b/trunk/include/asm-s390/reset.h index 532e65a2aafc..9b439cf67800 100644 --- a/trunk/include/asm-s390/reset.h +++ b/trunk/include/asm-s390/reset.h @@ -19,6 +19,5 @@ extern void register_reset_call(struct reset_call *reset); extern void unregister_reset_call(struct reset_call *reset); extern void s390_reset_system(void); extern void (*s390_reset_mcck_handler)(void); -extern void (*s390_reset_pgm_handler)(void); #endif /* _ASM_S390_RESET_H */ diff --git a/trunk/include/asm-x86_64/unwind.h b/trunk/include/asm-x86_64/unwind.h index 02710f6a4560..2f6349e48717 100644 --- a/trunk/include/asm-x86_64/unwind.h +++ b/trunk/include/asm-x86_64/unwind.h @@ -1,6 +1,100 @@ #ifndef _ASM_X86_64_UNWIND_H #define _ASM_X86_64_UNWIND_H +/* + * Copyright (C) 2002-2006 Novell, Inc. + * Jan Beulich + * This code is released under version 2 of the GNU GPL. + */ + +#ifdef CONFIG_STACK_UNWIND + +#include +#include +#include +#include + +struct unwind_frame_info +{ + struct pt_regs regs; + struct task_struct *task; + unsigned call_frame:1; +}; + +#define UNW_PC(frame) (frame)->regs.rip +#define UNW_SP(frame) (frame)->regs.rsp +#ifdef CONFIG_FRAME_POINTER +#define UNW_FP(frame) (frame)->regs.rbp +#define FRAME_RETADDR_OFFSET 8 +#define FRAME_LINK_OFFSET 0 +#define STACK_BOTTOM(tsk) (((tsk)->thread.rsp0 - 1) & ~(THREAD_SIZE - 1)) +#define STACK_TOP(tsk) ((tsk)->thread.rsp0) +#endif +/* Might need to account for the special exception and interrupt handling + stacks here, since normally + EXCEPTION_STACK_ORDER < THREAD_ORDER < IRQSTACK_ORDER, + but the construct is needed only for getting across the stack switch to + the interrupt stack - thus considering the IRQ stack itself is unnecessary, + and the overhead of comparing against all exception handling stacks seems + not desirable. */ +#define STACK_LIMIT(ptr) (((ptr) - 1) & ~(THREAD_SIZE - 1)) + +#define UNW_REGISTER_INFO \ + PTREGS_INFO(rax), \ + PTREGS_INFO(rdx), \ + PTREGS_INFO(rcx), \ + PTREGS_INFO(rbx), \ + PTREGS_INFO(rsi), \ + PTREGS_INFO(rdi), \ + PTREGS_INFO(rbp), \ + PTREGS_INFO(rsp), \ + PTREGS_INFO(r8), \ + PTREGS_INFO(r9), \ + PTREGS_INFO(r10), \ + PTREGS_INFO(r11), \ + PTREGS_INFO(r12), \ + PTREGS_INFO(r13), \ + PTREGS_INFO(r14), \ + PTREGS_INFO(r15), \ + PTREGS_INFO(rip) + +#define UNW_DEFAULT_RA(raItem, dataAlign) \ + ((raItem).where == Memory && \ + !((raItem).value * (dataAlign) + 8)) + +static inline void arch_unw_init_frame_info(struct unwind_frame_info *info, + /*const*/ struct pt_regs *regs) +{ + info->regs = *regs; +} + +static inline void arch_unw_init_blocked(struct unwind_frame_info *info) +{ + extern const char thread_return[]; + + memset(&info->regs, 0, sizeof(info->regs)); + info->regs.rip = (unsigned long)thread_return; + info->regs.cs = __KERNEL_CS; + __get_user(info->regs.rbp, (unsigned long *)info->task->thread.rsp); + info->regs.rsp = info->task->thread.rsp; + info->regs.ss = __KERNEL_DS; +} + +extern int arch_unwind_init_running(struct unwind_frame_info *, + int (*callback)(struct unwind_frame_info *, + void *arg), + void *arg); + +static inline int arch_unw_user_mode(const struct unwind_frame_info *info) +{ + return user_mode(&info->regs) + || (long)info->regs.rip >= 0 + || (info->regs.rip >= VSYSCALL_START && info->regs.rip < VSYSCALL_END) + || (long)info->regs.rsp >= 0; +} + +#else + #define UNW_PC(frame) ((void)(frame), 0UL) #define UNW_SP(frame) ((void)(frame), 0UL) @@ -9,4 +103,6 @@ static inline int arch_unw_user_mode(const void *info) return 0; } +#endif + #endif /* _ASM_X86_64_UNWIND_H */ diff --git a/trunk/include/linux/unwind.h b/trunk/include/linux/unwind.h index 7760860fa170..749928c161fb 100644 --- a/trunk/include/linux/unwind.h +++ b/trunk/include/linux/unwind.h @@ -14,6 +14,63 @@ struct module; +#ifdef CONFIG_STACK_UNWIND + +#include + +#ifndef ARCH_UNWIND_SECTION_NAME +#define ARCH_UNWIND_SECTION_NAME ".eh_frame" +#endif + +/* + * Initialize unwind support. + */ +extern void unwind_init(void); +extern void unwind_setup(void); + +#ifdef CONFIG_MODULES + +extern void *unwind_add_table(struct module *, + const void *table_start, + unsigned long table_size); + +extern void unwind_remove_table(void *handle, int init_only); + +#endif + +extern int unwind_init_frame_info(struct unwind_frame_info *, + struct task_struct *, + /*const*/ struct pt_regs *); + +/* + * Prepare to unwind a blocked task. + */ +extern int unwind_init_blocked(struct unwind_frame_info *, + struct task_struct *); + +/* + * Prepare to unwind the currently running thread. + */ +extern int unwind_init_running(struct unwind_frame_info *, + asmlinkage int (*callback)(struct unwind_frame_info *, + void *arg), + void *arg); + +/* + * Unwind to previous to frame. Returns 0 if successful, negative + * number in case of an error. + */ +extern int unwind(struct unwind_frame_info *); + +/* + * Unwind until the return pointer is in user-land (or until an error + * occurs). Returns 0 if successful, negative number in case of + * error. + */ +extern int unwind_to_user(struct unwind_frame_info *); + +#else + struct unwind_frame_info {}; static inline void unwind_init(void) {} @@ -28,12 +85,12 @@ static inline void *unwind_add_table(struct module *mod, return NULL; } +#endif + static inline void unwind_remove_table(void *handle, int init_only) { } -#endif - static inline int unwind_init_frame_info(struct unwind_frame_info *info, struct task_struct *tsk, const struct pt_regs *regs) @@ -65,4 +122,6 @@ static inline int unwind_to_user(struct unwind_frame_info *info) return -ENOSYS; } +#endif + #endif /* _LINUX_UNWIND_H */ diff --git a/trunk/include/linux/workqueue.h b/trunk/include/linux/workqueue.h index 2a7b38d87018..edef8d50b26b 100644 --- a/trunk/include/linux/workqueue.h +++ b/trunk/include/linux/workqueue.h @@ -8,21 +8,16 @@ #include #include #include -#include struct workqueue_struct; struct work_struct; typedef void (*work_func_t)(struct work_struct *work); -/* - * The first word is the work queue pointer and the flags rolled into - * one - */ -#define work_data_bits(work) ((unsigned long *)(&(work)->data)) - struct work_struct { - atomic_long_t data; + /* the first word is the work queue pointer and the flags rolled into + * one */ + unsigned long management; #define WORK_STRUCT_PENDING 0 /* T if work item pending execution */ #define WORK_STRUCT_NOAUTOREL 1 /* F if work item automatically released on exec */ #define WORK_STRUCT_FLAG_MASK (3UL) @@ -31,9 +26,6 @@ struct work_struct { work_func_t func; }; -#define WORK_DATA_INIT(autorelease) \ - ATOMIC_LONG_INIT((autorelease) << WORK_STRUCT_NOAUTOREL) - struct delayed_work { struct work_struct work; struct timer_list timer; @@ -44,13 +36,13 @@ struct execute_work { }; #define __WORK_INITIALIZER(n, f) { \ - .data = WORK_DATA_INIT(0), \ + .management = 0, \ .entry = { &(n).entry, &(n).entry }, \ .func = (f), \ } #define __WORK_INITIALIZER_NAR(n, f) { \ - .data = WORK_DATA_INIT(1), \ + .management = (1 << WORK_STRUCT_NOAUTOREL), \ .entry = { &(n).entry, &(n).entry }, \ .func = (f), \ } @@ -90,21 +82,17 @@ struct execute_work { /* * initialize all of a work item in one go - * - * NOTE! No point in using "atomic_long_set()": useing a direct - * assignment of the work data initializer allows the compiler - * to generate better code. */ #define INIT_WORK(_work, _func) \ do { \ - (_work)->data = (atomic_long_t) WORK_DATA_INIT(0); \ + (_work)->management = 0; \ INIT_LIST_HEAD(&(_work)->entry); \ PREPARE_WORK((_work), (_func)); \ } while (0) #define INIT_WORK_NAR(_work, _func) \ do { \ - (_work)->data = (atomic_long_t) WORK_DATA_INIT(1); \ + (_work)->management = (1 << WORK_STRUCT_NOAUTOREL); \ INIT_LIST_HEAD(&(_work)->entry); \ PREPARE_WORK((_work), (_func)); \ } while (0) @@ -126,15 +114,15 @@ struct execute_work { * @work: The work item in question */ #define work_pending(work) \ - test_bit(WORK_STRUCT_PENDING, work_data_bits(work)) + test_bit(WORK_STRUCT_PENDING, &(work)->management) /** * delayed_work_pending - Find out whether a delayable work item is currently * pending * @work: The work item in question */ -#define delayed_work_pending(w) \ - work_pending(&(w)->work) +#define delayed_work_pending(work) \ + test_bit(WORK_STRUCT_PENDING, &(work)->work.management) /** * work_release - Release a work item under execution @@ -155,7 +143,7 @@ struct execute_work { * This should also be used to release a delayed work item. */ #define work_release(work) \ - clear_bit(WORK_STRUCT_PENDING, work_data_bits(work)) + clear_bit(WORK_STRUCT_PENDING, &(work)->management) extern struct workqueue_struct *__create_workqueue(const char *name, @@ -200,7 +188,7 @@ static inline int cancel_delayed_work(struct delayed_work *work) ret = del_timer_sync(&work->timer); if (ret) - work_release(&work->work); + clear_bit(WORK_STRUCT_PENDING, &work->work.management); return ret; } diff --git a/trunk/include/rdma/ib_verbs.h b/trunk/include/rdma/ib_verbs.h index 3c2e10574b23..0bfa3328d686 100644 --- a/trunk/include/rdma/ib_verbs.h +++ b/trunk/include/rdma/ib_verbs.h @@ -1639,7 +1639,14 @@ static inline void *ib_dma_alloc_coherent(struct ib_device *dev, { if (dev->dma_ops) return dev->dma_ops->alloc_coherent(dev, size, dma_handle, flag); - return dma_alloc_coherent(dev->dma_device, size, dma_handle, flag); + else { + dma_addr_t handle; + void *ret; + + ret = dma_alloc_coherent(dev->dma_device, size, &handle, flag); + *dma_handle = handle; + return ret; + } } /** diff --git a/trunk/kernel/Makefile b/trunk/kernel/Makefile index 14f4d45e0ae9..5e3f3b75563a 100644 --- a/trunk/kernel/Makefile +++ b/trunk/kernel/Makefile @@ -31,6 +31,7 @@ obj-$(CONFIG_PROVE_LOCKING) += spinlock.o obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_KALLSYMS) += kallsyms.o +obj-$(CONFIG_STACK_UNWIND) += unwind.o obj-$(CONFIG_PM) += power/ obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o obj-$(CONFIG_KEXEC) += kexec.o diff --git a/trunk/kernel/unwind.c b/trunk/kernel/unwind.c new file mode 100644 index 000000000000..09c261329249 --- /dev/null +++ b/trunk/kernel/unwind.c @@ -0,0 +1,1305 @@ +/* + * Copyright (C) 2002-2006 Novell, Inc. + * Jan Beulich + * This code is released under version 2 of the GNU GPL. + * + * A simple API for unwinding kernel stacks. This is used for + * debugging and error reporting purposes. The kernel doesn't need + * full-blown stack unwinding with all the bells and whistles, so there + * is not much point in implementing the full Dwarf2 unwind API. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern const char __start_unwind[], __end_unwind[]; +extern const u8 __start_unwind_hdr[], __end_unwind_hdr[]; + +#define MAX_STACK_DEPTH 8 + +#define EXTRA_INFO(f) { \ + BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \ + % FIELD_SIZEOF(struct unwind_frame_info, f)) \ + + offsetof(struct unwind_frame_info, f) \ + / FIELD_SIZEOF(struct unwind_frame_info, f), \ + FIELD_SIZEOF(struct unwind_frame_info, f) \ + } +#define PTREGS_INFO(f) EXTRA_INFO(regs.f) + +static const struct { + unsigned offs:BITS_PER_LONG / 2; + unsigned width:BITS_PER_LONG / 2; +} reg_info[] = { + UNW_REGISTER_INFO +}; + +#undef PTREGS_INFO +#undef EXTRA_INFO + +#ifndef REG_INVALID +#define REG_INVALID(r) (reg_info[r].width == 0) +#endif + +#define DW_CFA_nop 0x00 +#define DW_CFA_set_loc 0x01 +#define DW_CFA_advance_loc1 0x02 +#define DW_CFA_advance_loc2 0x03 +#define DW_CFA_advance_loc4 0x04 +#define DW_CFA_offset_extended 0x05 +#define DW_CFA_restore_extended 0x06 +#define DW_CFA_undefined 0x07 +#define DW_CFA_same_value 0x08 +#define DW_CFA_register 0x09 +#define DW_CFA_remember_state 0x0a +#define DW_CFA_restore_state 0x0b +#define DW_CFA_def_cfa 0x0c +#define DW_CFA_def_cfa_register 0x0d +#define DW_CFA_def_cfa_offset 0x0e +#define DW_CFA_def_cfa_expression 0x0f +#define DW_CFA_expression 0x10 +#define DW_CFA_offset_extended_sf 0x11 +#define DW_CFA_def_cfa_sf 0x12 +#define DW_CFA_def_cfa_offset_sf 0x13 +#define DW_CFA_val_offset 0x14 +#define DW_CFA_val_offset_sf 0x15 +#define DW_CFA_val_expression 0x16 +#define DW_CFA_lo_user 0x1c +#define DW_CFA_GNU_window_save 0x2d +#define DW_CFA_GNU_args_size 0x2e +#define DW_CFA_GNU_negative_offset_extended 0x2f +#define DW_CFA_hi_user 0x3f + +#define DW_EH_PE_FORM 0x07 +#define DW_EH_PE_native 0x00 +#define DW_EH_PE_leb128 0x01 +#define DW_EH_PE_data2 0x02 +#define DW_EH_PE_data4 0x03 +#define DW_EH_PE_data8 0x04 +#define DW_EH_PE_signed 0x08 +#define DW_EH_PE_ADJUST 0x70 +#define DW_EH_PE_abs 0x00 +#define DW_EH_PE_pcrel 0x10 +#define DW_EH_PE_textrel 0x20 +#define DW_EH_PE_datarel 0x30 +#define DW_EH_PE_funcrel 0x40 +#define DW_EH_PE_aligned 0x50 +#define DW_EH_PE_indirect 0x80 +#define DW_EH_PE_omit 0xff + +typedef unsigned long uleb128_t; +typedef signed long sleb128_t; +#define sleb128abs __builtin_labs + +static struct unwind_table { + struct { + unsigned long pc; + unsigned long range; + } core, init; + const void *address; + unsigned long size; + const unsigned char *header; + unsigned long hdrsz; + struct unwind_table *link; + const char *name; +} root_table; + +struct unwind_item { + enum item_location { + Nowhere, + Memory, + Register, + Value + } where; + uleb128_t value; +}; + +struct unwind_state { + uleb128_t loc, org; + const u8 *cieStart, *cieEnd; + uleb128_t codeAlign; + sleb128_t dataAlign; + struct cfa { + uleb128_t reg, offs; + } cfa; + struct unwind_item regs[ARRAY_SIZE(reg_info)]; + unsigned stackDepth:8; + unsigned version:8; + const u8 *label; + const u8 *stack[MAX_STACK_DEPTH]; +}; + +static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 }; + +static unsigned unwind_debug; +static int __init unwind_debug_setup(char *s) +{ + unwind_debug = simple_strtoul(s, NULL, 0); + return 1; +} +__setup("unwind_debug=", unwind_debug_setup); +#define dprintk(lvl, fmt, args...) \ + ((void)(lvl > unwind_debug \ + || printk(KERN_DEBUG "unwind: " fmt "\n", ##args))) + +static struct unwind_table *find_table(unsigned long pc) +{ + struct unwind_table *table; + + for (table = &root_table; table; table = table->link) + if ((pc >= table->core.pc + && pc < table->core.pc + table->core.range) + || (pc >= table->init.pc + && pc < table->init.pc + table->init.range)) + break; + + return table; +} + +static unsigned long read_pointer(const u8 **pLoc, + const void *end, + signed ptrType, + unsigned long text_base, + unsigned long data_base); + +static void init_unwind_table(struct unwind_table *table, + const char *name, + const void *core_start, + unsigned long core_size, + const void *init_start, + unsigned long init_size, + const void *table_start, + unsigned long table_size, + const u8 *header_start, + unsigned long header_size) +{ + const u8 *ptr = header_start + 4; + const u8 *end = header_start + header_size; + + table->core.pc = (unsigned long)core_start; + table->core.range = core_size; + table->init.pc = (unsigned long)init_start; + table->init.range = init_size; + table->address = table_start; + table->size = table_size; + /* See if the linker provided table looks valid. */ + if (header_size <= 4 + || header_start[0] != 1 + || (void *)read_pointer(&ptr, end, header_start[1], 0, 0) + != table_start + || !read_pointer(&ptr, end, header_start[2], 0, 0) + || !read_pointer(&ptr, end, header_start[3], 0, + (unsigned long)header_start) + || !read_pointer(&ptr, end, header_start[3], 0, + (unsigned long)header_start)) + header_start = NULL; + table->hdrsz = header_size; + smp_wmb(); + table->header = header_start; + table->link = NULL; + table->name = name; +} + +void __init unwind_init(void) +{ + init_unwind_table(&root_table, "kernel", + _text, _end - _text, + NULL, 0, + __start_unwind, __end_unwind - __start_unwind, + __start_unwind_hdr, __end_unwind_hdr - __start_unwind_hdr); +} + +static const u32 bad_cie, not_fde; +static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *); +static signed fde_pointer_type(const u32 *cie); + +struct eh_frame_hdr_table_entry { + unsigned long start, fde; +}; + +static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2) +{ + const struct eh_frame_hdr_table_entry *e1 = p1; + const struct eh_frame_hdr_table_entry *e2 = p2; + + return (e1->start > e2->start) - (e1->start < e2->start); +} + +static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size) +{ + struct eh_frame_hdr_table_entry *e1 = p1; + struct eh_frame_hdr_table_entry *e2 = p2; + unsigned long v; + + v = e1->start; + e1->start = e2->start; + e2->start = v; + v = e1->fde; + e1->fde = e2->fde; + e2->fde = v; +} + +static void __init setup_unwind_table(struct unwind_table *table, + void *(*alloc)(unsigned long)) +{ + const u8 *ptr; + unsigned long tableSize = table->size, hdrSize; + unsigned n; + const u32 *fde; + struct { + u8 version; + u8 eh_frame_ptr_enc; + u8 fde_count_enc; + u8 table_enc; + unsigned long eh_frame_ptr; + unsigned int fde_count; + struct eh_frame_hdr_table_entry table[]; + } __attribute__((__packed__)) *header; + + if (table->header) + return; + + if (table->hdrsz) + printk(KERN_WARNING ".eh_frame_hdr for '%s' present but unusable\n", + table->name); + + if (tableSize & (sizeof(*fde) - 1)) + return; + + for (fde = table->address, n = 0; + tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde; + tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) { + const u32 *cie = cie_for_fde(fde, table); + signed ptrType; + + if (cie == ¬_fde) + continue; + if (cie == NULL + || cie == &bad_cie + || (ptrType = fde_pointer_type(cie)) < 0) + return; + ptr = (const u8 *)(fde + 2); + if (!read_pointer(&ptr, + (const u8 *)(fde + 1) + *fde, + ptrType, 0, 0)) + return; + ++n; + } + + if (tableSize || !n) + return; + + hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int) + + 2 * n * sizeof(unsigned long); + dprintk(2, "Binary lookup table size for %s: %lu bytes", table->name, hdrSize); + header = alloc(hdrSize); + if (!header) + return; + header->version = 1; + header->eh_frame_ptr_enc = DW_EH_PE_abs|DW_EH_PE_native; + header->fde_count_enc = DW_EH_PE_abs|DW_EH_PE_data4; + header->table_enc = DW_EH_PE_abs|DW_EH_PE_native; + put_unaligned((unsigned long)table->address, &header->eh_frame_ptr); + BUILD_BUG_ON(offsetof(typeof(*header), fde_count) + % __alignof(typeof(header->fde_count))); + header->fde_count = n; + + BUILD_BUG_ON(offsetof(typeof(*header), table) + % __alignof(typeof(*header->table))); + for (fde = table->address, tableSize = table->size, n = 0; + tableSize; + tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) { + const u32 *cie = fde + 1 - fde[1] / sizeof(*fde); + + if (!fde[1]) + continue; /* this is a CIE */ + ptr = (const u8 *)(fde + 2); + header->table[n].start = read_pointer(&ptr, + (const u8 *)(fde + 1) + *fde, + fde_pointer_type(cie), 0, 0); + header->table[n].fde = (unsigned long)fde; + ++n; + } + WARN_ON(n != header->fde_count); + + sort(header->table, + n, + sizeof(*header->table), + cmp_eh_frame_hdr_table_entries, + swap_eh_frame_hdr_table_entries); + + table->hdrsz = hdrSize; + smp_wmb(); + table->header = (const void *)header; +} + +static void *__init balloc(unsigned long sz) +{ + return __alloc_bootmem_nopanic(sz, + sizeof(unsigned int), + __pa(MAX_DMA_ADDRESS)); +} + +void __init unwind_setup(void) +{ + setup_unwind_table(&root_table, balloc); +} + +#ifdef CONFIG_MODULES + +static struct unwind_table *last_table; + +/* Must be called with module_mutex held. */ +void *unwind_add_table(struct module *module, + const void *table_start, + unsigned long table_size) +{ + struct unwind_table *table; + + if (table_size <= 0) + return NULL; + + table = kmalloc(sizeof(*table), GFP_KERNEL); + if (!table) + return NULL; + + init_unwind_table(table, module->name, + module->module_core, module->core_size, + module->module_init, module->init_size, + table_start, table_size, + NULL, 0); + + if (last_table) + last_table->link = table; + else + root_table.link = table; + last_table = table; + + return table; +} + +struct unlink_table_info +{ + struct unwind_table *table; + int init_only; +}; + +static int unlink_table(void *arg) +{ + struct unlink_table_info *info = arg; + struct unwind_table *table = info->table, *prev; + + for (prev = &root_table; prev->link && prev->link != table; prev = prev->link) + ; + + if (prev->link) { + if (info->init_only) { + table->init.pc = 0; + table->init.range = 0; + info->table = NULL; + } else { + prev->link = table->link; + if (!prev->link) + last_table = prev; + } + } else + info->table = NULL; + + return 0; +} + +/* Must be called with module_mutex held. */ +void unwind_remove_table(void *handle, int init_only) +{ + struct unwind_table *table = handle; + struct unlink_table_info info; + + if (!table || table == &root_table) + return; + + if (init_only && table == last_table) { + table->init.pc = 0; + table->init.range = 0; + return; + } + + info.table = table; + info.init_only = init_only; + stop_machine_run(unlink_table, &info, NR_CPUS); + + if (info.table) + kfree(table); +} + +#endif /* CONFIG_MODULES */ + +static uleb128_t get_uleb128(const u8 **pcur, const u8 *end) +{ + const u8 *cur = *pcur; + uleb128_t value; + unsigned shift; + + for (shift = 0, value = 0; cur < end; shift += 7) { + if (shift + 7 > 8 * sizeof(value) + && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) { + cur = end + 1; + break; + } + value |= (uleb128_t)(*cur & 0x7f) << shift; + if (!(*cur++ & 0x80)) + break; + } + *pcur = cur; + + return value; +} + +static sleb128_t get_sleb128(const u8 **pcur, const u8 *end) +{ + const u8 *cur = *pcur; + sleb128_t value; + unsigned shift; + + for (shift = 0, value = 0; cur < end; shift += 7) { + if (shift + 7 > 8 * sizeof(value) + && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) { + cur = end + 1; + break; + } + value |= (sleb128_t)(*cur & 0x7f) << shift; + if (!(*cur & 0x80)) { + value |= -(*cur++ & 0x40) << shift; + break; + } + } + *pcur = cur; + + return value; +} + +static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table) +{ + const u32 *cie; + + if (!*fde || (*fde & (sizeof(*fde) - 1))) + return &bad_cie; + if (!fde[1]) + return ¬_fde; /* this is a CIE */ + if ((fde[1] & (sizeof(*fde) - 1)) + || fde[1] > (unsigned long)(fde + 1) - (unsigned long)table->address) + return NULL; /* this is not a valid FDE */ + cie = fde + 1 - fde[1] / sizeof(*fde); + if (*cie <= sizeof(*cie) + 4 + || *cie >= fde[1] - sizeof(*fde) + || (*cie & (sizeof(*cie) - 1)) + || cie[1]) + return NULL; /* this is not a (valid) CIE */ + return cie; +} + +static unsigned long read_pointer(const u8 **pLoc, + const void *end, + signed ptrType, + unsigned long text_base, + unsigned long data_base) +{ + unsigned long value = 0; + union { + const u8 *p8; + const u16 *p16u; + const s16 *p16s; + const u32 *p32u; + const s32 *p32s; + const unsigned long *pul; + } ptr; + + if (ptrType < 0 || ptrType == DW_EH_PE_omit) { + dprintk(1, "Invalid pointer encoding %02X (%p,%p).", ptrType, *pLoc, end); + return 0; + } + ptr.p8 = *pLoc; + switch(ptrType & DW_EH_PE_FORM) { + case DW_EH_PE_data2: + if (end < (const void *)(ptr.p16u + 1)) { + dprintk(1, "Data16 overrun (%p,%p).", ptr.p8, end); + return 0; + } + if(ptrType & DW_EH_PE_signed) + value = get_unaligned(ptr.p16s++); + else + value = get_unaligned(ptr.p16u++); + break; + case DW_EH_PE_data4: +#ifdef CONFIG_64BIT + if (end < (const void *)(ptr.p32u + 1)) { + dprintk(1, "Data32 overrun (%p,%p).", ptr.p8, end); + return 0; + } + if(ptrType & DW_EH_PE_signed) + value = get_unaligned(ptr.p32s++); + else + value = get_unaligned(ptr.p32u++); + break; + case DW_EH_PE_data8: + BUILD_BUG_ON(sizeof(u64) != sizeof(value)); +#else + BUILD_BUG_ON(sizeof(u32) != sizeof(value)); +#endif + case DW_EH_PE_native: + if (end < (const void *)(ptr.pul + 1)) { + dprintk(1, "DataUL overrun (%p,%p).", ptr.p8, end); + return 0; + } + value = get_unaligned(ptr.pul++); + break; + case DW_EH_PE_leb128: + BUILD_BUG_ON(sizeof(uleb128_t) > sizeof(value)); + value = ptrType & DW_EH_PE_signed + ? get_sleb128(&ptr.p8, end) + : get_uleb128(&ptr.p8, end); + if ((const void *)ptr.p8 > end) { + dprintk(1, "DataLEB overrun (%p,%p).", ptr.p8, end); + return 0; + } + break; + default: + dprintk(2, "Cannot decode pointer type %02X (%p,%p).", + ptrType, ptr.p8, end); + return 0; + } + switch(ptrType & DW_EH_PE_ADJUST) { + case DW_EH_PE_abs: + break; + case DW_EH_PE_pcrel: + value += (unsigned long)*pLoc; + break; + case DW_EH_PE_textrel: + if (likely(text_base)) { + value += text_base; + break; + } + dprintk(2, "Text-relative encoding %02X (%p,%p), but zero text base.", + ptrType, *pLoc, end); + return 0; + case DW_EH_PE_datarel: + if (likely(data_base)) { + value += data_base; + break; + } + dprintk(2, "Data-relative encoding %02X (%p,%p), but zero data base.", + ptrType, *pLoc, end); + return 0; + default: + dprintk(2, "Cannot adjust pointer type %02X (%p,%p).", + ptrType, *pLoc, end); + return 0; + } + if ((ptrType & DW_EH_PE_indirect) + && probe_kernel_address((unsigned long *)value, value)) { + dprintk(1, "Cannot read indirect value %lx (%p,%p).", + value, *pLoc, end); + return 0; + } + *pLoc = ptr.p8; + + return value; +} + +static signed fde_pointer_type(const u32 *cie) +{ + const u8 *ptr = (const u8 *)(cie + 2); + unsigned version = *ptr; + + if (version != 1) + return -1; /* unsupported */ + if (*++ptr) { + const char *aug; + const u8 *end = (const u8 *)(cie + 1) + *cie; + uleb128_t len; + + /* check if augmentation size is first (and thus present) */ + if (*ptr != 'z') + return -1; + /* check if augmentation string is nul-terminated */ + if ((ptr = memchr(aug = (const void *)ptr, 0, end - ptr)) == NULL) + return -1; + ++ptr; /* skip terminator */ + get_uleb128(&ptr, end); /* skip code alignment */ + get_sleb128(&ptr, end); /* skip data alignment */ + /* skip return address column */ + version <= 1 ? (void)++ptr : (void)get_uleb128(&ptr, end); + len = get_uleb128(&ptr, end); /* augmentation length */ + if (ptr + len < ptr || ptr + len > end) + return -1; + end = ptr + len; + while (*++aug) { + if (ptr >= end) + return -1; + switch(*aug) { + case 'L': + ++ptr; + break; + case 'P': { + signed ptrType = *ptr++; + + if (!read_pointer(&ptr, end, ptrType, 0, 0) + || ptr > end) + return -1; + } + break; + case 'R': + return *ptr; + default: + return -1; + } + } + } + return DW_EH_PE_native|DW_EH_PE_abs; +} + +static int advance_loc(unsigned long delta, struct unwind_state *state) +{ + state->loc += delta * state->codeAlign; + + return delta > 0; +} + +static void set_rule(uleb128_t reg, + enum item_location where, + uleb128_t value, + struct unwind_state *state) +{ + if (reg < ARRAY_SIZE(state->regs)) { + state->regs[reg].where = where; + state->regs[reg].value = value; + } +} + +static int processCFI(const u8 *start, + const u8 *end, + unsigned long targetLoc, + signed ptrType, + struct unwind_state *state) +{ + union { + const u8 *p8; + const u16 *p16; + const u32 *p32; + } ptr; + int result = 1; + + if (start != state->cieStart) { + state->loc = state->org; + result = processCFI(state->cieStart, state->cieEnd, 0, ptrType, state); + if (targetLoc == 0 && state->label == NULL) + return result; + } + for (ptr.p8 = start; result && ptr.p8 < end; ) { + switch(*ptr.p8 >> 6) { + uleb128_t value; + + case 0: + switch(*ptr.p8++) { + case DW_CFA_nop: + break; + case DW_CFA_set_loc: + state->loc = read_pointer(&ptr.p8, end, ptrType, 0, 0); + if (state->loc == 0) + result = 0; + break; + case DW_CFA_advance_loc1: + result = ptr.p8 < end && advance_loc(*ptr.p8++, state); + break; + case DW_CFA_advance_loc2: + result = ptr.p8 <= end + 2 + && advance_loc(*ptr.p16++, state); + break; + case DW_CFA_advance_loc4: + result = ptr.p8 <= end + 4 + && advance_loc(*ptr.p32++, state); + break; + case DW_CFA_offset_extended: + value = get_uleb128(&ptr.p8, end); + set_rule(value, Memory, get_uleb128(&ptr.p8, end), state); + break; + case DW_CFA_val_offset: + value = get_uleb128(&ptr.p8, end); + set_rule(value, Value, get_uleb128(&ptr.p8, end), state); + break; + case DW_CFA_offset_extended_sf: + value = get_uleb128(&ptr.p8, end); + set_rule(value, Memory, get_sleb128(&ptr.p8, end), state); + break; + case DW_CFA_val_offset_sf: + value = get_uleb128(&ptr.p8, end); + set_rule(value, Value, get_sleb128(&ptr.p8, end), state); + break; + case DW_CFA_restore_extended: + case DW_CFA_undefined: + case DW_CFA_same_value: + set_rule(get_uleb128(&ptr.p8, end), Nowhere, 0, state); + break; + case DW_CFA_register: + value = get_uleb128(&ptr.p8, end); + set_rule(value, + Register, + get_uleb128(&ptr.p8, end), state); + break; + case DW_CFA_remember_state: + if (ptr.p8 == state->label) { + state->label = NULL; + return 1; + } + if (state->stackDepth >= MAX_STACK_DEPTH) { + dprintk(1, "State stack overflow (%p,%p).", ptr.p8, end); + return 0; + } + state->stack[state->stackDepth++] = ptr.p8; + break; + case DW_CFA_restore_state: + if (state->stackDepth) { + const uleb128_t loc = state->loc; + const u8 *label = state->label; + + state->label = state->stack[state->stackDepth - 1]; + memcpy(&state->cfa, &badCFA, sizeof(state->cfa)); + memset(state->regs, 0, sizeof(state->regs)); + state->stackDepth = 0; + result = processCFI(start, end, 0, ptrType, state); + state->loc = loc; + state->label = label; + } else { + dprintk(1, "State stack underflow (%p,%p).", ptr.p8, end); + return 0; + } + break; + case DW_CFA_def_cfa: + state->cfa.reg = get_uleb128(&ptr.p8, end); + /*nobreak*/ + case DW_CFA_def_cfa_offset: + state->cfa.offs = get_uleb128(&ptr.p8, end); + break; + case DW_CFA_def_cfa_sf: + state->cfa.reg = get_uleb128(&ptr.p8, end); + /*nobreak*/ + case DW_CFA_def_cfa_offset_sf: + state->cfa.offs = get_sleb128(&ptr.p8, end) + * state->dataAlign; + break; + case DW_CFA_def_cfa_register: + state->cfa.reg = get_uleb128(&ptr.p8, end); + break; + /*todo case DW_CFA_def_cfa_expression: */ + /*todo case DW_CFA_expression: */ + /*todo case DW_CFA_val_expression: */ + case DW_CFA_GNU_args_size: + get_uleb128(&ptr.p8, end); + break; + case DW_CFA_GNU_negative_offset_extended: + value = get_uleb128(&ptr.p8, end); + set_rule(value, + Memory, + (uleb128_t)0 - get_uleb128(&ptr.p8, end), state); + break; + case DW_CFA_GNU_window_save: + default: + dprintk(1, "Unrecognized CFI op %02X (%p,%p).", ptr.p8[-1], ptr.p8 - 1, end); + result = 0; + break; + } + break; + case 1: + result = advance_loc(*ptr.p8++ & 0x3f, state); + break; + case 2: + value = *ptr.p8++ & 0x3f; + set_rule(value, Memory, get_uleb128(&ptr.p8, end), state); + break; + case 3: + set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state); + break; + } + if (ptr.p8 > end) { + dprintk(1, "Data overrun (%p,%p).", ptr.p8, end); + result = 0; + } + if (result && targetLoc != 0 && targetLoc < state->loc) + return 1; + } + + if (result && ptr.p8 < end) + dprintk(1, "Data underrun (%p,%p).", ptr.p8, end); + + return result + && ptr.p8 == end + && (targetLoc == 0 + || (/*todo While in theory this should apply, gcc in practice omits + everything past the function prolog, and hence the location + never reaches the end of the function. + targetLoc < state->loc &&*/ state->label == NULL)); +} + +/* Unwind to previous to frame. Returns 0 if successful, negative + * number in case of an error. */ +int unwind(struct unwind_frame_info *frame) +{ +#define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs]) + const u32 *fde = NULL, *cie = NULL; + const u8 *ptr = NULL, *end = NULL; + unsigned long pc = UNW_PC(frame) - frame->call_frame, sp; + unsigned long startLoc = 0, endLoc = 0, cfa; + unsigned i; + signed ptrType = -1; + uleb128_t retAddrReg = 0; + const struct unwind_table *table; + struct unwind_state state; + + if (UNW_PC(frame) == 0) + return -EINVAL; + if ((table = find_table(pc)) != NULL + && !(table->size & (sizeof(*fde) - 1))) { + const u8 *hdr = table->header; + unsigned long tableSize; + + smp_rmb(); + if (hdr && hdr[0] == 1) { + switch(hdr[3] & DW_EH_PE_FORM) { + case DW_EH_PE_native: tableSize = sizeof(unsigned long); break; + case DW_EH_PE_data2: tableSize = 2; break; + case DW_EH_PE_data4: tableSize = 4; break; + case DW_EH_PE_data8: tableSize = 8; break; + default: tableSize = 0; break; + } + ptr = hdr + 4; + end = hdr + table->hdrsz; + if (tableSize + && read_pointer(&ptr, end, hdr[1], 0, 0) + == (unsigned long)table->address + && (i = read_pointer(&ptr, end, hdr[2], 0, 0)) > 0 + && i == (end - ptr) / (2 * tableSize) + && !((end - ptr) % (2 * tableSize))) { + do { + const u8 *cur = ptr + (i / 2) * (2 * tableSize); + + startLoc = read_pointer(&cur, + cur + tableSize, + hdr[3], 0, + (unsigned long)hdr); + if (pc < startLoc) + i /= 2; + else { + ptr = cur - tableSize; + i = (i + 1) / 2; + } + } while (startLoc && i > 1); + if (i == 1 + && (startLoc = read_pointer(&ptr, + ptr + tableSize, + hdr[3], 0, + (unsigned long)hdr)) != 0 + && pc >= startLoc) + fde = (void *)read_pointer(&ptr, + ptr + tableSize, + hdr[3], 0, + (unsigned long)hdr); + } + } + if(hdr && !fde) + dprintk(3, "Binary lookup for %lx failed.", pc); + + if (fde != NULL) { + cie = cie_for_fde(fde, table); + ptr = (const u8 *)(fde + 2); + if(cie != NULL + && cie != &bad_cie + && cie != ¬_fde + && (ptrType = fde_pointer_type(cie)) >= 0 + && read_pointer(&ptr, + (const u8 *)(fde + 1) + *fde, + ptrType, 0, 0) == startLoc) { + if (!(ptrType & DW_EH_PE_indirect)) + ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed; + endLoc = startLoc + + read_pointer(&ptr, + (const u8 *)(fde + 1) + *fde, + ptrType, 0, 0); + if(pc >= endLoc) + fde = NULL; + } else + fde = NULL; + if(!fde) + dprintk(1, "Binary lookup result for %lx discarded.", pc); + } + if (fde == NULL) { + for (fde = table->address, tableSize = table->size; + cie = NULL, tableSize > sizeof(*fde) + && tableSize - sizeof(*fde) >= *fde; + tableSize -= sizeof(*fde) + *fde, + fde += 1 + *fde / sizeof(*fde)) { + cie = cie_for_fde(fde, table); + if (cie == &bad_cie) { + cie = NULL; + break; + } + if (cie == NULL + || cie == ¬_fde + || (ptrType = fde_pointer_type(cie)) < 0) + continue; + ptr = (const u8 *)(fde + 2); + startLoc = read_pointer(&ptr, + (const u8 *)(fde + 1) + *fde, + ptrType, 0, 0); + if (!startLoc) + continue; + if (!(ptrType & DW_EH_PE_indirect)) + ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed; + endLoc = startLoc + + read_pointer(&ptr, + (const u8 *)(fde + 1) + *fde, + ptrType, 0, 0); + if (pc >= startLoc && pc < endLoc) + break; + } + if(!fde) + dprintk(3, "Linear lookup for %lx failed.", pc); + } + } + if (cie != NULL) { + memset(&state, 0, sizeof(state)); + state.cieEnd = ptr; /* keep here temporarily */ + ptr = (const u8 *)(cie + 2); + end = (const u8 *)(cie + 1) + *cie; + frame->call_frame = 1; + if ((state.version = *ptr) != 1) + cie = NULL; /* unsupported version */ + else if (*++ptr) { + /* check if augmentation size is first (and thus present) */ + if (*ptr == 'z') { + while (++ptr < end && *ptr) { + switch(*ptr) { + /* check for ignorable (or already handled) + * nul-terminated augmentation string */ + case 'L': + case 'P': + case 'R': + continue; + case 'S': + frame->call_frame = 0; + continue; + default: + break; + } + break; + } + } + if (ptr >= end || *ptr) + cie = NULL; + } + if(!cie) + dprintk(1, "CIE unusable (%p,%p).", ptr, end); + ++ptr; + } + if (cie != NULL) { + /* get code aligment factor */ + state.codeAlign = get_uleb128(&ptr, end); + /* get data aligment factor */ + state.dataAlign = get_sleb128(&ptr, end); + if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end) + cie = NULL; + else if (UNW_PC(frame) % state.codeAlign + || UNW_SP(frame) % sleb128abs(state.dataAlign)) { + dprintk(1, "Input pointer(s) misaligned (%lx,%lx).", + UNW_PC(frame), UNW_SP(frame)); + return -EPERM; + } else { + retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end); + /* skip augmentation */ + if (((const char *)(cie + 2))[1] == 'z') { + uleb128_t augSize = get_uleb128(&ptr, end); + + ptr += augSize; + } + if (ptr > end + || retAddrReg >= ARRAY_SIZE(reg_info) + || REG_INVALID(retAddrReg) + || reg_info[retAddrReg].width != sizeof(unsigned long)) + cie = NULL; + } + if(!cie) + dprintk(1, "CIE validation failed (%p,%p).", ptr, end); + } + if (cie != NULL) { + state.cieStart = ptr; + ptr = state.cieEnd; + state.cieEnd = end; + end = (const u8 *)(fde + 1) + *fde; + /* skip augmentation */ + if (((const char *)(cie + 2))[1] == 'z') { + uleb128_t augSize = get_uleb128(&ptr, end); + + if ((ptr += augSize) > end) + fde = NULL; + } + if(!fde) + dprintk(1, "FDE validation failed (%p,%p).", ptr, end); + } + if (cie == NULL || fde == NULL) { +#ifdef CONFIG_FRAME_POINTER + unsigned long top, bottom; + + if ((UNW_SP(frame) | UNW_FP(frame)) % sizeof(unsigned long)) + return -EPERM; + top = STACK_TOP(frame->task); + bottom = STACK_BOTTOM(frame->task); +# if FRAME_RETADDR_OFFSET < 0 + if (UNW_SP(frame) < top + && UNW_FP(frame) <= UNW_SP(frame) + && bottom < UNW_FP(frame) +# else + if (UNW_SP(frame) > top + && UNW_FP(frame) >= UNW_SP(frame) + && bottom > UNW_FP(frame) +# endif + && !((UNW_SP(frame) | UNW_FP(frame)) + & (sizeof(unsigned long) - 1))) { + unsigned long link; + + if (!probe_kernel_address( + (unsigned long *)(UNW_FP(frame) + + FRAME_LINK_OFFSET), + link) +# if FRAME_RETADDR_OFFSET < 0 + && link > bottom && link < UNW_FP(frame) +# else + && link > UNW_FP(frame) && link < bottom +# endif + && !(link & (sizeof(link) - 1)) + && !probe_kernel_address( + (unsigned long *)(UNW_FP(frame) + + FRAME_RETADDR_OFFSET), UNW_PC(frame))) { + UNW_SP(frame) = UNW_FP(frame) + FRAME_RETADDR_OFFSET +# if FRAME_RETADDR_OFFSET < 0 + - +# else + + +# endif + sizeof(UNW_PC(frame)); + UNW_FP(frame) = link; + return 0; + } + } +#endif + return -ENXIO; + } + state.org = startLoc; + memcpy(&state.cfa, &badCFA, sizeof(state.cfa)); + /* process instructions */ + if (!processCFI(ptr, end, pc, ptrType, &state) + || state.loc > endLoc + || state.regs[retAddrReg].where == Nowhere + || state.cfa.reg >= ARRAY_SIZE(reg_info) + || reg_info[state.cfa.reg].width != sizeof(unsigned long) + || FRAME_REG(state.cfa.reg, unsigned long) % sizeof(unsigned long) + || state.cfa.offs % sizeof(unsigned long)) { + dprintk(1, "Unusable unwind info (%p,%p).", ptr, end); + return -EIO; + } + /* update frame */ +#ifndef CONFIG_AS_CFI_SIGNAL_FRAME + if(frame->call_frame + && !UNW_DEFAULT_RA(state.regs[retAddrReg], state.dataAlign)) + frame->call_frame = 0; +#endif + cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs; + startLoc = min((unsigned long)UNW_SP(frame), cfa); + endLoc = max((unsigned long)UNW_SP(frame), cfa); + if (STACK_LIMIT(startLoc) != STACK_LIMIT(endLoc)) { + startLoc = min(STACK_LIMIT(cfa), cfa); + endLoc = max(STACK_LIMIT(cfa), cfa); + } +#ifndef CONFIG_64BIT +# define CASES CASE(8); CASE(16); CASE(32) +#else +# define CASES CASE(8); CASE(16); CASE(32); CASE(64) +#endif + pc = UNW_PC(frame); + sp = UNW_SP(frame); + for (i = 0; i < ARRAY_SIZE(state.regs); ++i) { + if (REG_INVALID(i)) { + if (state.regs[i].where == Nowhere) + continue; + dprintk(1, "Cannot restore register %u (%d).", + i, state.regs[i].where); + return -EIO; + } + switch(state.regs[i].where) { + default: + break; + case Register: + if (state.regs[i].value >= ARRAY_SIZE(reg_info) + || REG_INVALID(state.regs[i].value) + || reg_info[i].width > reg_info[state.regs[i].value].width) { + dprintk(1, "Cannot restore register %u from register %lu.", + i, state.regs[i].value); + return -EIO; + } + switch(reg_info[state.regs[i].value].width) { +#define CASE(n) \ + case sizeof(u##n): \ + state.regs[i].value = FRAME_REG(state.regs[i].value, \ + const u##n); \ + break + CASES; +#undef CASE + default: + dprintk(1, "Unsupported register size %u (%lu).", + reg_info[state.regs[i].value].width, + state.regs[i].value); + return -EIO; + } + break; + } + } + for (i = 0; i < ARRAY_SIZE(state.regs); ++i) { + if (REG_INVALID(i)) + continue; + switch(state.regs[i].where) { + case Nowhere: + if (reg_info[i].width != sizeof(UNW_SP(frame)) + || &FRAME_REG(i, __typeof__(UNW_SP(frame))) + != &UNW_SP(frame)) + continue; + UNW_SP(frame) = cfa; + break; + case Register: + switch(reg_info[i].width) { +#define CASE(n) case sizeof(u##n): \ + FRAME_REG(i, u##n) = state.regs[i].value; \ + break + CASES; +#undef CASE + default: + dprintk(1, "Unsupported register size %u (%u).", + reg_info[i].width, i); + return -EIO; + } + break; + case Value: + if (reg_info[i].width != sizeof(unsigned long)) { + dprintk(1, "Unsupported value size %u (%u).", + reg_info[i].width, i); + return -EIO; + } + FRAME_REG(i, unsigned long) = cfa + state.regs[i].value + * state.dataAlign; + break; + case Memory: { + unsigned long addr = cfa + state.regs[i].value + * state.dataAlign; + + if ((state.regs[i].value * state.dataAlign) + % sizeof(unsigned long) + || addr < startLoc + || addr + sizeof(unsigned long) < addr + || addr + sizeof(unsigned long) > endLoc) { + dprintk(1, "Bad memory location %lx (%lx).", + addr, state.regs[i].value); + return -EIO; + } + switch(reg_info[i].width) { +#define CASE(n) case sizeof(u##n): \ + probe_kernel_address((u##n *)addr, FRAME_REG(i, u##n)); \ + break + CASES; +#undef CASE + default: + dprintk(1, "Unsupported memory size %u (%u).", + reg_info[i].width, i); + return -EIO; + } + } + break; + } + } + + if (UNW_PC(frame) % state.codeAlign + || UNW_SP(frame) % sleb128abs(state.dataAlign)) { + dprintk(1, "Output pointer(s) misaligned (%lx,%lx).", + UNW_PC(frame), UNW_SP(frame)); + return -EIO; + } + if (pc == UNW_PC(frame) && sp == UNW_SP(frame)) { + dprintk(1, "No progress (%lx,%lx).", pc, sp); + return -EIO; + } + + return 0; +#undef CASES +#undef FRAME_REG +} +EXPORT_SYMBOL(unwind); + +int unwind_init_frame_info(struct unwind_frame_info *info, + struct task_struct *tsk, + /*const*/ struct pt_regs *regs) +{ + info->task = tsk; + info->call_frame = 0; + arch_unw_init_frame_info(info, regs); + + return 0; +} +EXPORT_SYMBOL(unwind_init_frame_info); + +/* + * Prepare to unwind a blocked task. + */ +int unwind_init_blocked(struct unwind_frame_info *info, + struct task_struct *tsk) +{ + info->task = tsk; + info->call_frame = 0; + arch_unw_init_blocked(info); + + return 0; +} +EXPORT_SYMBOL(unwind_init_blocked); + +/* + * Prepare to unwind the currently running thread. + */ +int unwind_init_running(struct unwind_frame_info *info, + asmlinkage int (*callback)(struct unwind_frame_info *, + void *arg), + void *arg) +{ + info->task = current; + info->call_frame = 0; + + return arch_unwind_init_running(info, callback, arg); +} +EXPORT_SYMBOL(unwind_init_running); + +/* + * Unwind until the return pointer is in user-land (or until an error + * occurs). Returns 0 if successful, negative number in case of + * error. + */ +int unwind_to_user(struct unwind_frame_info *info) +{ + while (!arch_unw_user_mode(info)) { + int err = unwind(info); + + if (err < 0) + return err; + } + + return 0; +} +EXPORT_SYMBOL(unwind_to_user); diff --git a/trunk/kernel/workqueue.c b/trunk/kernel/workqueue.c index 742cbbe49bdc..db49886bfae1 100644 --- a/trunk/kernel/workqueue.c +++ b/trunk/kernel/workqueue.c @@ -96,13 +96,13 @@ static inline void set_wq_data(struct work_struct *work, void *wq) BUG_ON(!work_pending(work)); new = (unsigned long) wq | (1UL << WORK_STRUCT_PENDING); - new |= WORK_STRUCT_FLAG_MASK & *work_data_bits(work); - atomic_long_set(&work->data, new); + new |= work->management & WORK_STRUCT_FLAG_MASK; + work->management = new; } static inline void *get_wq_data(struct work_struct *work) { - return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK); + return (void *) (work->management & WORK_STRUCT_WQ_DATA_MASK); } static int __run_work(struct cpu_workqueue_struct *cwq, struct work_struct *work) @@ -133,7 +133,7 @@ static int __run_work(struct cpu_workqueue_struct *cwq, struct work_struct *work list_del_init(&work->entry); spin_unlock_irqrestore(&cwq->lock, flags); - if (!test_bit(WORK_STRUCT_NOAUTOREL, work_data_bits(work))) + if (!test_bit(WORK_STRUCT_NOAUTOREL, &work->management)) work_release(work); f(work); @@ -206,7 +206,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work) { int ret = 0, cpu = get_cpu(); - if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) { + if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) { if (unlikely(is_single_threaded(wq))) cpu = singlethread_cpu; BUG_ON(!list_empty(&work->entry)); @@ -248,7 +248,7 @@ int fastcall queue_delayed_work(struct workqueue_struct *wq, if (delay == 0) return queue_work(wq, work); - if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) { + if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) { BUG_ON(timer_pending(timer)); BUG_ON(!list_empty(&work->entry)); @@ -280,7 +280,7 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, struct timer_list *timer = &dwork->timer; struct work_struct *work = &dwork->work; - if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) { + if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) { BUG_ON(timer_pending(timer)); BUG_ON(!list_empty(&work->entry)); @@ -321,7 +321,7 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq) spin_unlock_irqrestore(&cwq->lock, flags); BUG_ON(get_wq_data(work) != cwq); - if (!test_bit(WORK_STRUCT_NOAUTOREL, work_data_bits(work))) + if (!test_bit(WORK_STRUCT_NOAUTOREL, &work->management)) work_release(work); f(work); diff --git a/trunk/lib/Kconfig.debug b/trunk/lib/Kconfig.debug index 5c2681875b9a..818e4589f718 100644 --- a/trunk/lib/Kconfig.debug +++ b/trunk/lib/Kconfig.debug @@ -354,6 +354,24 @@ config FRAME_POINTER some architectures or if you use external debuggers. If you don't debug the kernel, you can say N. +config UNWIND_INFO + bool "Compile the kernel with frame unwind information" + depends on !IA64 && !PARISC && !ARM + depends on !MODULES || !(MIPS || PPC || SUPERH || V850) + help + If you say Y here the resulting kernel image will be slightly larger + but not slower, and it will give very useful debugging information. + If you don't debug the kernel, you can say N, but we may not be able + to solve problems without frame unwind information or frame pointers. + +config STACK_UNWIND + bool "Stack unwind support" + depends on UNWIND_INFO + depends on X86 + help + This enables more precise stack traces, omitting all unrelated + occurrences of pointers into kernel code from the dump. + config FORCED_INLINING bool "Force gcc to inline functions marked 'inline'" depends on DEBUG_KERNEL diff --git a/trunk/lib/fault-inject.c b/trunk/lib/fault-inject.c index b5a90fc056d3..d143c0faf248 100644 --- a/trunk/lib/fault-inject.c +++ b/trunk/lib/fault-inject.c @@ -55,7 +55,37 @@ static bool fail_task(struct fault_attr *attr, struct task_struct *task) #define MAX_STACK_TRACE_DEPTH 32 -#if defined(CONFIG_STACKTRACE) +#ifdef CONFIG_STACK_UNWIND + +static asmlinkage int fail_stacktrace_callback(struct unwind_frame_info *info, + void *arg) +{ + int depth; + struct fault_attr *attr = arg; + bool found = (attr->require_start == 0 && attr->require_end == ULONG_MAX); + + for (depth = 0; depth < attr->stacktrace_depth + && unwind(info) == 0 && UNW_PC(info); depth++) { + if (arch_unw_user_mode(info)) + break; + if (attr->reject_start <= UNW_PC(info) && + UNW_PC(info) < attr->reject_end) + return false; + if (attr->require_start <= UNW_PC(info) && + UNW_PC(info) < attr->require_end) + found = true; + } + return found; +} + +static bool fail_stacktrace(struct fault_attr *attr) +{ + struct unwind_frame_info info; + + return unwind_init_running(&info, fail_stacktrace_callback, attr); +} + +#elif defined(CONFIG_STACKTRACE) static bool fail_stacktrace(struct fault_attr *attr) { diff --git a/trunk/mm/mincore.c b/trunk/mm/mincore.c index b44d7f875cb6..72890780c1c9 100644 --- a/trunk/mm/mincore.c +++ b/trunk/mm/mincore.c @@ -1,7 +1,7 @@ /* * linux/mm/mincore.c * - * Copyright (C) 1994-2006 Linus Torvalds + * Copyright (C) 1994-1999 Linus Torvalds */ /* @@ -38,60 +38,46 @@ static unsigned char mincore_page(struct vm_area_struct * vma, return present; } -/* - * Do a chunk of "sys_mincore()". We've already checked - * all the arguments, we hold the mmap semaphore: we should - * just return the amount of info we're asked for. - */ -static long do_mincore(unsigned long addr, unsigned char *vec, unsigned long pages) +static long mincore_vma(struct vm_area_struct * vma, + unsigned long start, unsigned long end, unsigned char __user * vec) { - unsigned long i, nr, pgoff; - struct vm_area_struct *vma = find_vma(current->mm, addr); + long error, i, remaining; + unsigned char * tmp; - /* - * find_vma() didn't find anything: the address - * is above everything we have mapped. - */ - if (!vma) { - memset(vec, 0, pages); - return pages; - } + error = -ENOMEM; + if (!vma->vm_file) + return error; - /* - * find_vma() found something, but we might be - * below it: check for that. - */ - if (addr < vma->vm_start) { - unsigned long gap = (vma->vm_start - addr) >> PAGE_SHIFT; - if (gap > pages) - gap = pages; - memset(vec, 0, gap); - return gap; - } + start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + if (end > vma->vm_end) + end = vma->vm_end; + end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - /* - * Ok, got it. But check whether it's a segment we support - * mincore() on. Right now, we don't do any anonymous mappings. - */ - if (!vma->vm_file) - return -ENOMEM; + error = -EAGAIN; + tmp = (unsigned char *) __get_free_page(GFP_KERNEL); + if (!tmp) + return error; - /* - * Calculate how many pages there are left in the vma, and - * what the pgoff is for our address. - */ - nr = (vma->vm_end - addr) >> PAGE_SHIFT; - if (nr > pages) - nr = pages; + /* (end - start) is # of pages, and also # of bytes in "vec */ + remaining = (end - start), + + error = 0; + for (i = 0; remaining > 0; remaining -= PAGE_SIZE, i++) { + int j = 0; + long thispiece = (remaining < PAGE_SIZE) ? + remaining : PAGE_SIZE; - pgoff = (addr - vma->vm_start) >> PAGE_SHIFT; - pgoff += vma->vm_pgoff; + while (j < thispiece) + tmp[j++] = mincore_page(vma, start++); - /* And then we just fill the sucker in.. */ - for (i = 0 ; i < nr; i++, pgoff++) - vec[i] = mincore_page(vma, pgoff); + if (copy_to_user(vec + PAGE_SIZE * i, tmp, thispiece)) { + error = -EFAULT; + break; + } + } - return nr; + free_page((unsigned long) tmp); + return error; } /* @@ -121,50 +107,82 @@ static long do_mincore(unsigned long addr, unsigned char *vec, unsigned long pag asmlinkage long sys_mincore(unsigned long start, size_t len, unsigned char __user * vec) { - long retval; - unsigned long pages; - unsigned char *tmp; - - /* Check the start address: needs to be page-aligned.. */ + int index = 0; + unsigned long end, limit; + struct vm_area_struct * vma; + size_t max; + int unmapped_error = 0; + long error; + + /* check the arguments */ if (start & ~PAGE_CACHE_MASK) - return -EINVAL; + goto einval; - /* ..and we need to be passed a valid user-space range */ - if (!access_ok(VERIFY_READ, (void __user *) start, len)) - return -ENOMEM; + limit = TASK_SIZE; + if (start >= limit) + goto enomem; - /* This also avoids any overflows on PAGE_CACHE_ALIGN */ - pages = len >> PAGE_SHIFT; - pages += (len & ~PAGE_MASK) != 0; + if (!len) + return 0; - if (!access_ok(VERIFY_WRITE, vec, pages)) - return -EFAULT; + max = limit - start; + len = PAGE_CACHE_ALIGN(len); + if (len > max || !len) + goto enomem; - tmp = (void *) __get_free_page(GFP_USER); - if (!tmp) - return -ENOMEM; - - retval = 0; - while (pages) { - /* - * Do at most PAGE_SIZE entries per iteration, due to - * the temporary buffer size. - */ - down_read(¤t->mm->mmap_sem); - retval = do_mincore(start, tmp, max(pages, PAGE_SIZE)); - up_read(¤t->mm->mmap_sem); - - if (retval <= 0) - break; - if (copy_to_user(vec, tmp, retval)) { - retval = -EFAULT; - break; + end = start + len; + + /* check the output buffer whilst holding the lock */ + error = -EFAULT; + down_read(¤t->mm->mmap_sem); + + if (!access_ok(VERIFY_WRITE, vec, len >> PAGE_SHIFT)) + goto out; + + /* + * If the interval [start,end) covers some unmapped address + * ranges, just ignore them, but return -ENOMEM at the end. + */ + error = 0; + + vma = find_vma(current->mm, start); + while (vma) { + /* Here start < vma->vm_end. */ + if (start < vma->vm_start) { + unmapped_error = -ENOMEM; + start = vma->vm_start; } - pages -= retval; - vec += retval; - start += retval << PAGE_SHIFT; - retval = 0; + + /* Here vma->vm_start <= start < vma->vm_end. */ + if (end <= vma->vm_end) { + if (start < end) { + error = mincore_vma(vma, start, end, + &vec[index]); + if (error) + goto out; + } + error = unmapped_error; + goto out; + } + + /* Here vma->vm_start <= start < vma->vm_end < end. */ + error = mincore_vma(vma, start, vma->vm_end, &vec[index]); + if (error) + goto out; + index += (vma->vm_end - start) >> PAGE_CACHE_SHIFT; + start = vma->vm_end; + vma = vma->vm_next; } - free_page((unsigned long) tmp); - return retval; + + /* we found a hole in the area queried if we arrive here */ + error = -ENOMEM; + +out: + up_read(¤t->mm->mmap_sem); + return error; + +einval: + return -EINVAL; +enomem: + return -ENOMEM; }