Skip to content
Navigation Menu
Toggle navigation
Sign in
In this repository
All GitHub Enterprise
↵
Jump to
↵
No suggested jump to results
In this repository
All GitHub Enterprise
↵
Jump to
↵
In this organization
All GitHub Enterprise
↵
Jump to
↵
In this repository
All GitHub Enterprise
↵
Jump to
↵
Sign in
Reseting focus
You signed in with another tab or window.
Reload
to refresh your session.
You signed out in another tab or window.
Reload
to refresh your session.
You switched accounts on another tab or window.
Reload
to refresh your session.
Dismiss alert
{{ message }}
mariux64
/
linux
Public
Notifications
You must be signed in to change notification settings
Fork
0
Star
0
Code
Issues
2
Pull requests
0
Actions
Projects
0
Wiki
Security
Insights
Additional navigation options
Code
Issues
Pull requests
Actions
Projects
Wiki
Security
Insights
Files
4e29dd9
Documentation
LICENSES
arch
block
certs
crypto
drivers
fs
9p
adfs
affs
afs
autofs
befs
bfs
btrfs
cachefiles
ceph
cifs
coda
configfs
cramfs
crypto
debugfs
devpts
dlm
ecryptfs
efivarfs
efs
erofs
exfat
exportfs
ext2
ext4
f2fs
fat
freevxfs
fscache
fuse
gfs2
hfs
hfsplus
hostfs
hpfs
hugetlbfs
iomap
isofs
jbd2
jffs2
jfs
kernfs
ksmbd
lockd
minix
netfs
nfs
nfs_common
nfsd
nilfs2
nls
notify
ntfs
ntfs3
ocfs2
omfs
openpromfs
orangefs
overlayfs
proc
Kconfig
Makefile
array.c
base.c
bootconfig.c
cmdline.c
consoles.c
cpuinfo.c
devices.c
fd.c
fd.h
generic.c
inode.c
internal.h
interrupts.c
kcore.c
kmsg.c
loadavg.c
meminfo.c
namespaces.c
nommu.c
page.c
proc_net.c
proc_sysctl.c
proc_tty.c
root.c
self.c
softirqs.c
stat.c
task_mmu.c
task_nommu.c
thread_self.c
uptime.c
util.c
version.c
vmcore.c
pstore
qnx4
qnx6
quota
ramfs
reiserfs
romfs
smbfs_common
squashfs
sysfs
sysv
tracefs
ubifs
udf
ufs
unicode
vboxsf
verity
xfs
zonefs
Kconfig
Kconfig.binfmt
Makefile
aio.c
anon_inodes.c
attr.c
bad_inode.c
binfmt_elf.c
binfmt_elf_fdpic.c
binfmt_elf_test.c
binfmt_flat.c
binfmt_misc.c
binfmt_script.c
buffer.c
char_dev.c
compat_binfmt_elf.c
coredump.c
d_path.c
dax.c
dcache.c
direct-io.c
drop_caches.c
eventfd.c
eventpoll.c
exec.c
fcntl.c
fhandle.c
file.c
file_table.c
filesystems.c
fs-writeback.c
fs_context.c
fs_parser.c
fs_pin.c
fs_struct.c
fs_types.c
fsopen.c
init.c
inode.c
internal.h
ioctl.c
kernel_read_file.c
libfs.c
locks.c
mbcache.c
mnt_idmapping.c
mount.h
mpage.c
namei.c
namespace.c
no-block.c
nsfs.c
open.c
pipe.c
pnode.c
pnode.h
posix_acl.c
proc_namespace.c
read_write.c
readdir.c
remap_range.c
select.c
seq_file.c
signalfd.c
splice.c
stack.c
stat.c
statfs.c
super.c
sync.c
sysctls.c
timerfd.c
userfaultfd.c
utimes.c
xattr.c
include
init
io_uring
ipc
kernel
lib
mm
net
rust
samples
scripts
security
sound
tools
usr
virt
.clang-format
.cocciconfig
.get_maintainer.ignore
.gitattributes
.gitignore
.mailmap
.rustfmt.toml
COPYING
CREDITS
Kbuild
Kconfig
MAINTAINERS
Makefile
README
Breadcrumbs
linux
/
fs
/
proc
/
kcore.c
Copy path
Blame
Blame
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Latest commit
Lorenzo Stoakes
and
Andrew Morton
mm: vmalloc: convert vread() to vread_iter()
Mar 19, 2023
4e29dd9
·
Mar 19, 2023
History
History
664 lines (581 loc) · 15.7 KB
Breadcrumbs
linux
/
fs
/
proc
/
kcore.c
Top
File metadata and controls
Code
Blame
664 lines (581 loc) · 15.7 KB
Raw
// SPDX-License-Identifier: GPL-2.0 /* * fs/proc/kcore.c kernel ELF core dumper * * Modelled on fs/exec.c:aout_core_dump() * Jeremy Fitzhardinge <jeremy@sw.oz.au> * ELF version written by David Howells <David.Howells@nexor.co.uk> * Modified and incorporated into 2.3.x by Tigran Aivazian <tigran@veritas.com> * Support to dump vmalloc'd areas (ELF only), Tigran Aivazian <tigran@veritas.com> * Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com> */ #include <linux/crash_core.h> #include <linux/mm.h> #include <linux/proc_fs.h> #include <linux/kcore.h> #include <linux/user.h> #include <linux/capability.h> #include <linux/elf.h> #include <linux/elfcore.h> #include <linux/vmalloc.h> #include <linux/highmem.h> #include <linux/printk.h> #include <linux/memblock.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/uio.h> #include <asm/io.h> #include <linux/list.h> #include <linux/ioport.h> #include <linux/memory.h> #include <linux/sched/task.h> #include <linux/security.h> #include <asm/sections.h> #include "internal.h" #define CORE_STR "CORE" #ifndef ELF_CORE_EFLAGS #define ELF_CORE_EFLAGS 0 #endif static struct proc_dir_entry *proc_root_kcore; #ifndef kc_vaddr_to_offset #define kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET) #endif #ifndef kc_offset_to_vaddr #define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET) #endif static LIST_HEAD(kclist_head); static DECLARE_RWSEM(kclist_lock); static int kcore_need_update = 1; /* * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error * Same as oldmem_pfn_is_ram in vmcore */ static int (*mem_pfn_is_ram)(unsigned long pfn); int __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn)) { if (mem_pfn_is_ram) return -EBUSY; mem_pfn_is_ram = fn; return 0; } static int pfn_is_ram(unsigned long pfn) { if (mem_pfn_is_ram) return mem_pfn_is_ram(pfn); else return 1; } /* This doesn't grab kclist_lock, so it should only be used at init time. */ void __init kclist_add(struct kcore_list *new, void *addr, size_t size, int type) { new->addr = (unsigned long)addr; new->size = size; new->type = type; list_add_tail(&new->list, &kclist_head); } static size_t get_kcore_size(int *nphdr, size_t *phdrs_len, size_t *notes_len, size_t *data_offset) { size_t try, size; struct kcore_list *m; *nphdr = 1; /* PT_NOTE */ size = 0; list_for_each_entry(m, &kclist_head, list) { try = kc_vaddr_to_offset((size_t)m->addr + m->size); if (try > size) size = try; *nphdr = *nphdr + 1; } *phdrs_len = *nphdr * sizeof(struct elf_phdr); *notes_len = (4 * sizeof(struct elf_note) + 3 * ALIGN(sizeof(CORE_STR), 4) + VMCOREINFO_NOTE_NAME_BYTES + ALIGN(sizeof(struct elf_prstatus), 4) + ALIGN(sizeof(struct elf_prpsinfo), 4) + ALIGN(arch_task_struct_size, 4) + ALIGN(vmcoreinfo_size, 4)); *data_offset = PAGE_ALIGN(sizeof(struct elfhdr) + *phdrs_len + *notes_len); return *data_offset + size; } #ifdef CONFIG_HIGHMEM /* * If no highmem, we can assume [0...max_low_pfn) continuous range of memory * because memory hole is not as big as !HIGHMEM case. * (HIGHMEM is special because part of memory is _invisible_ from the kernel.) */ static int kcore_ram_list(struct list_head *head) { struct kcore_list *ent; ent = kmalloc(sizeof(*ent), GFP_KERNEL); if (!ent) return -ENOMEM; ent->addr = (unsigned long)__va(0); ent->size = max_low_pfn << PAGE_SHIFT; ent->type = KCORE_RAM; list_add(&ent->list, head); return 0; } #else /* !CONFIG_HIGHMEM */ #ifdef CONFIG_SPARSEMEM_VMEMMAP /* calculate vmemmap's address from given system ram pfn and register it */ static int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) { unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT; unsigned long nr_pages = ent->size >> PAGE_SHIFT; unsigned long start, end; struct kcore_list *vmm, *tmp; start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK; end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1; end = PAGE_ALIGN(end); /* overlap check (because we have to align page */ list_for_each_entry(tmp, head, list) { if (tmp->type != KCORE_VMEMMAP) continue; if (start < tmp->addr + tmp->size) if (end > tmp->addr) end = tmp->addr; } if (start < end) { vmm = kmalloc(sizeof(*vmm), GFP_KERNEL); if (!vmm) return 0; vmm->addr = start; vmm->size = end - start; vmm->type = KCORE_VMEMMAP; list_add_tail(&vmm->list, head); } return 1; } #else static int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) { return 1; } #endif static int kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg) { struct list_head *head = (struct list_head *)arg; struct kcore_list *ent; struct page *p; if (!pfn_valid(pfn)) return 1; p = pfn_to_page(pfn); ent = kmalloc(sizeof(*ent), GFP_KERNEL); if (!ent) return -ENOMEM; ent->addr = (unsigned long)page_to_virt(p); ent->size = nr_pages << PAGE_SHIFT; if (!virt_addr_valid(ent->addr)) goto free_out; /* cut not-mapped area. ....from ppc-32 code. */ if (ULONG_MAX - ent->addr < ent->size) ent->size = ULONG_MAX - ent->addr; /* * We've already checked virt_addr_valid so we know this address * is a valid pointer, therefore we can check against it to determine * if we need to trim */ if (VMALLOC_START > ent->addr) { if (VMALLOC_START - ent->addr < ent->size) ent->size = VMALLOC_START - ent->addr; } ent->type = KCORE_RAM; list_add_tail(&ent->list, head); if (!get_sparsemem_vmemmap_info(ent, head)) { list_del(&ent->list); goto free_out; } return 0; free_out: kfree(ent); return 1; } static int kcore_ram_list(struct list_head *list) { int nid, ret; unsigned long end_pfn; /* Not inialized....update now */ /* find out "max pfn" */ end_pfn = 0; for_each_node_state(nid, N_MEMORY) { unsigned long node_end; node_end = node_end_pfn(nid); if (end_pfn < node_end) end_pfn = node_end; } /* scan 0 to max_pfn */ ret = walk_system_ram_range(0, end_pfn, list, kclist_add_private); if (ret) return -ENOMEM; return 0; } #endif /* CONFIG_HIGHMEM */ static int kcore_update_ram(void) { LIST_HEAD(list); LIST_HEAD(garbage); int nphdr; size_t phdrs_len, notes_len, data_offset; struct kcore_list *tmp, *pos; int ret = 0; down_write(&kclist_lock); if (!xchg(&kcore_need_update, 0)) goto out; ret = kcore_ram_list(&list); if (ret) { /* Couldn't get the RAM list, try again next time. */ WRITE_ONCE(kcore_need_update, 1); list_splice_tail(&list, &garbage); goto out; } list_for_each_entry_safe(pos, tmp, &kclist_head, list) { if (pos->type == KCORE_RAM || pos->type == KCORE_VMEMMAP) list_move(&pos->list, &garbage); } list_splice_tail(&list, &kclist_head); proc_root_kcore->size = get_kcore_size(&nphdr, &phdrs_len, ¬es_len, &data_offset); out: up_write(&kclist_lock); list_for_each_entry_safe(pos, tmp, &garbage, list) { list_del(&pos->list); kfree(pos); } return ret; } static void append_kcore_note(char *notes, size_t *i, const char *name, unsigned int type, const void *desc, size_t descsz) { struct elf_note *note = (struct elf_note *)¬es[*i]; note->n_namesz = strlen(name) + 1; note->n_descsz = descsz; note->n_type = type; *i += sizeof(*note); memcpy(¬es[*i], name, note->n_namesz); *i = ALIGN(*i + note->n_namesz, 4); memcpy(¬es[*i], desc, descsz); *i = ALIGN(*i + descsz, 4); } static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter) { loff_t *ppos = &iocb->ki_pos; size_t phdrs_offset, notes_offset, data_offset; size_t page_offline_frozen = 1; size_t phdrs_len, notes_len; struct kcore_list *m; size_t tsz; int nphdr; unsigned long start; size_t buflen = iov_iter_count(iter); size_t orig_buflen = buflen; int ret = 0; down_read(&kclist_lock); /* * Don't race against drivers that set PageOffline() and expect no * further page access. */ page_offline_freeze(); get_kcore_size(&nphdr, &phdrs_len, ¬es_len, &data_offset); phdrs_offset = sizeof(struct elfhdr); notes_offset = phdrs_offset + phdrs_len; /* ELF file header. */ if (buflen && *ppos < sizeof(struct elfhdr)) { struct elfhdr ehdr = { .e_ident = { [EI_MAG0] = ELFMAG0, [EI_MAG1] = ELFMAG1, [EI_MAG2] = ELFMAG2, [EI_MAG3] = ELFMAG3, [EI_CLASS] = ELF_CLASS, [EI_DATA] = ELF_DATA, [EI_VERSION] = EV_CURRENT, [EI_OSABI] = ELF_OSABI, }, .e_type = ET_CORE, .e_machine = ELF_ARCH, .e_version = EV_CURRENT, .e_phoff = sizeof(struct elfhdr), .e_flags = ELF_CORE_EFLAGS, .e_ehsize = sizeof(struct elfhdr), .e_phentsize = sizeof(struct elf_phdr), .e_phnum = nphdr, }; tsz = min_t(size_t, buflen, sizeof(struct elfhdr) - *ppos); if (copy_to_iter((char *)&ehdr + *ppos, tsz, iter) != tsz) { ret = -EFAULT; goto out; } buflen -= tsz; *ppos += tsz; } /* ELF program headers. */ if (buflen && *ppos < phdrs_offset + phdrs_len) { struct elf_phdr *phdrs, *phdr; phdrs = kzalloc(phdrs_len, GFP_KERNEL); if (!phdrs) { ret = -ENOMEM; goto out; } phdrs[0].p_type = PT_NOTE; phdrs[0].p_offset = notes_offset; phdrs[0].p_filesz = notes_len; phdr = &phdrs[1]; list_for_each_entry(m, &kclist_head, list) { phdr->p_type = PT_LOAD; phdr->p_flags = PF_R | PF_W | PF_X; phdr->p_offset = kc_vaddr_to_offset(m->addr) + data_offset; phdr->p_vaddr = (size_t)m->addr; if (m->type == KCORE_RAM) phdr->p_paddr = __pa(m->addr); else if (m->type == KCORE_TEXT) phdr->p_paddr = __pa_symbol(m->addr); else phdr->p_paddr = (elf_addr_t)-1; phdr->p_filesz = phdr->p_memsz = m->size; phdr->p_align = PAGE_SIZE; phdr++; } tsz = min_t(size_t, buflen, phdrs_offset + phdrs_len - *ppos); if (copy_to_iter((char *)phdrs + *ppos - phdrs_offset, tsz, iter) != tsz) { kfree(phdrs); ret = -EFAULT; goto out; } kfree(phdrs); buflen -= tsz; *ppos += tsz; } /* ELF note segment. */ if (buflen && *ppos < notes_offset + notes_len) { struct elf_prstatus prstatus = {}; struct elf_prpsinfo prpsinfo = { .pr_sname = 'R', .pr_fname = "vmlinux", }; char *notes; size_t i = 0; strlcpy(prpsinfo.pr_psargs, saved_command_line, sizeof(prpsinfo.pr_psargs)); notes = kzalloc(notes_len, GFP_KERNEL); if (!notes) { ret = -ENOMEM; goto out; } append_kcore_note(notes, &i, CORE_STR, NT_PRSTATUS, &prstatus, sizeof(prstatus)); append_kcore_note(notes, &i, CORE_STR, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo)); append_kcore_note(notes, &i, CORE_STR, NT_TASKSTRUCT, current, arch_task_struct_size); /* * vmcoreinfo_size is mostly constant after init time, but it * can be changed by crash_save_vmcoreinfo(). Racing here with a * panic on another CPU before the machine goes down is insanely * unlikely, but it's better to not leave potential buffer * overflows lying around, regardless. */ append_kcore_note(notes, &i, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, min(vmcoreinfo_size, notes_len - i)); tsz = min_t(size_t, buflen, notes_offset + notes_len - *ppos); if (copy_to_iter(notes + *ppos - notes_offset, tsz, iter) != tsz) { kfree(notes); ret = -EFAULT; goto out; } kfree(notes); buflen -= tsz; *ppos += tsz; } /* * Check to see if our file offset matches with any of * the addresses in the elf_phdr on our list. */ start = kc_offset_to_vaddr(*ppos - data_offset); if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen) tsz = buflen; m = NULL; while (buflen) { struct page *page; unsigned long pfn; /* * If this is the first iteration or the address is not within * the previous entry, search for a matching entry. */ if (!m || start < m->addr || start >= m->addr + m->size) { struct kcore_list *iter; m = NULL; list_for_each_entry(iter, &kclist_head, list) { if (start >= iter->addr && start < iter->addr + iter->size) { m = iter; break; } } } if (page_offline_frozen++ % MAX_ORDER_NR_PAGES == 0) { page_offline_thaw(); cond_resched(); page_offline_freeze(); } if (!m) { if (iov_iter_zero(tsz, iter) != tsz) { ret = -EFAULT; goto out; } goto skip; } switch (m->type) { case KCORE_VMALLOC: if (vread_iter(iter, (char *)start, tsz) != tsz) { ret = -EFAULT; goto out; } break; case KCORE_USER: /* User page is handled prior to normal kernel page: */ if (copy_to_iter((char *)start, tsz, iter) != tsz) { ret = -EFAULT; goto out; } break; case KCORE_RAM: pfn = __pa(start) >> PAGE_SHIFT; page = pfn_to_online_page(pfn); /* * Don't read offline sections, logically offline pages * (e.g., inflated in a balloon), hwpoisoned pages, * and explicitly excluded physical ranges. */ if (!page || PageOffline(page) || is_page_hwpoison(page) || !pfn_is_ram(pfn)) { if (iov_iter_zero(tsz, iter) != tsz) { ret = -EFAULT; goto out; } break; } fallthrough; case KCORE_VMEMMAP: case KCORE_TEXT: /* * We use _copy_to_iter() to bypass usermode hardening * which would otherwise prevent this operation. */ if (_copy_to_iter((char *)start, tsz, iter) != tsz) { ret = -EFAULT; goto out; } break; default: pr_warn_once("Unhandled KCORE type: %d\n", m->type); if (iov_iter_zero(tsz, iter) != tsz) { ret = -EFAULT; goto out; } } skip: buflen -= tsz; *ppos += tsz; start += tsz; tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen); } out: page_offline_thaw(); up_read(&kclist_lock); if (ret) return ret; return orig_buflen - buflen; } static int open_kcore(struct inode *inode, struct file *filp) { int ret = security_locked_down(LOCKDOWN_KCORE); if (!capable(CAP_SYS_RAWIO)) return -EPERM; if (ret) return ret; if (kcore_need_update) kcore_update_ram(); if (i_size_read(inode) != proc_root_kcore->size) { inode_lock(inode); i_size_write(inode, proc_root_kcore->size); inode_unlock(inode); } return 0; } static const struct proc_ops kcore_proc_ops = { .proc_read_iter = read_kcore_iter, .proc_open = open_kcore, .proc_lseek = default_llseek, }; /* just remember that we have to update kcore */ static int __meminit kcore_callback(struct notifier_block *self, unsigned long action, void *arg) { switch (action) { case MEM_ONLINE: case MEM_OFFLINE: kcore_need_update = 1; break; } return NOTIFY_OK; } static struct kcore_list kcore_vmalloc; #ifdef CONFIG_ARCH_PROC_KCORE_TEXT static struct kcore_list kcore_text; /* * If defined, special segment is used for mapping kernel text instead of * direct-map area. We need to create special TEXT section. */ static void __init proc_kcore_text_init(void) { kclist_add(&kcore_text, _text, _end - _text, KCORE_TEXT); } #else static void __init proc_kcore_text_init(void) { } #endif #if defined(CONFIG_MODULES) && defined(MODULES_VADDR) /* * MODULES_VADDR has no intersection with VMALLOC_ADDR. */ static struct kcore_list kcore_modules; static void __init add_modules_range(void) { if (MODULES_VADDR != VMALLOC_START && MODULES_END != VMALLOC_END) { kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_END - MODULES_VADDR, KCORE_VMALLOC); } } #else static void __init add_modules_range(void) { } #endif static int __init proc_kcore_init(void) { proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &kcore_proc_ops); if (!proc_root_kcore) { pr_err("couldn't create /proc/kcore\n"); return 0; /* Always returns 0. */ } /* Store text area if it's special */ proc_kcore_text_init(); /* Store vmalloc area */ kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, VMALLOC_END - VMALLOC_START, KCORE_VMALLOC); add_modules_range(); /* Store direct-map area from physical memory map */ kcore_update_ram(); hotplug_memory_notifier(kcore_callback, DEFAULT_CALLBACK_PRI); return 0; } fs_initcall(proc_kcore_init);
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
You can’t perform that action at this time.