Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 72261
b: refs/heads/master
c: 47436aa
h: refs/heads/master
i:
  72259: 91a9366
v: v3
  • Loading branch information
Rusty Russell committed Oct 23, 2007
1 parent 12d5415 commit 570e801
Show file tree
Hide file tree
Showing 13 changed files with 142 additions and 149 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: c18acd73ffc209def08003a1927473096f66c5ad
refs/heads/master: 47436aa4ad054c1c7c8231618e86ebd9305308dc
134 changes: 31 additions & 103 deletions trunk/Documentation/lguest/lguest.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,19 +178,16 @@ static void *get_pages(unsigned int num)
/* To find out where to start we look for the magic Guest string, which marks
* the code we see in lguest_asm.S. This is a hack which we are currently
* plotting to replace with the normal Linux entry point. */
static unsigned long entry_point(const void *start, const void *end,
unsigned long page_offset)
static unsigned long entry_point(const void *start, const void *end)
{
const void *p;

/* The scan gives us the physical starting address. We want the
* virtual address in this case, and fortunately, we already figured
* out the physical-virtual difference and passed it here in
* "page_offset". */
/* The scan gives us the physical starting address. We boot with
* pagetables set up with virtual and physical the same, so that's
* OK. */
for (p = start; p < end; p++)
if (memcmp(p, "GenuineLguest", strlen("GenuineLguest")) == 0)
return to_guest_phys(p + strlen("GenuineLguest"))
+ page_offset;
return to_guest_phys(p + strlen("GenuineLguest"));

errx(1, "Is this image a genuine lguest?");
}
Expand Down Expand Up @@ -224,14 +221,11 @@ static void map_at(int fd, void *addr, unsigned long offset, unsigned long len)
* by all modern binaries on Linux including the kernel.
*
* The ELF headers give *two* addresses: a physical address, and a virtual
* address. The Guest kernel expects to be placed in memory at the physical
* address, and the page tables set up so it will correspond to that virtual
* address. We return the difference between the virtual and physical
* addresses in the "page_offset" pointer.
* address. We use the physical address; the Guest will map itself to the
* virtual address.
*
* We return the starting address. */
static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr,
unsigned long *page_offset)
static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr)
{
void *start = (void *)-1, *end = NULL;
Elf32_Phdr phdr[ehdr->e_phnum];
Expand All @@ -255,9 +249,6 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr,
if (read(elf_fd, phdr, sizeof(phdr)) != sizeof(phdr))
err(1, "Reading program headers");

/* We don't know page_offset yet. */
*page_offset = 0;

/* Try all the headers: there are usually only three. A read-only one,
* a read-write one, and a "note" section which isn't loadable. */
for (i = 0; i < ehdr->e_phnum; i++) {
Expand All @@ -268,14 +259,6 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr,
verbose("Section %i: size %i addr %p\n",
i, phdr[i].p_memsz, (void *)phdr[i].p_paddr);

/* We expect a simple linear address space: every segment must
* have the same difference between virtual (p_vaddr) and
* physical (p_paddr) address. */
if (!*page_offset)
*page_offset = phdr[i].p_vaddr - phdr[i].p_paddr;
else if (*page_offset != phdr[i].p_vaddr - phdr[i].p_paddr)
errx(1, "Page offset of section %i different", i);

/* We track the first and last address we mapped, so we can
* tell entry_point() where to scan. */
if (from_guest_phys(phdr[i].p_paddr) < start)
Expand All @@ -288,50 +271,13 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr,
phdr[i].p_offset, phdr[i].p_filesz);
}

return entry_point(start, end, *page_offset);
}

/*L:170 Prepare to be SHOCKED and AMAZED. And possibly a trifle nauseated.
*
* We know that CONFIG_PAGE_OFFSET sets what virtual address the kernel expects
* to be. We don't know what that option was, but we can figure it out
* approximately by looking at the addresses in the code. I chose the common
* case of reading a memory location into the %eax register:
*
* movl <some-address>, %eax
*
* This gets encoded as five bytes: "0xA1 <4-byte-address>". For example,
* "0xA1 0x18 0x60 0x47 0xC0" reads the address 0xC0476018 into %eax.
*
* In this example can guess that the kernel was compiled with
* CONFIG_PAGE_OFFSET set to 0xC0000000 (it's always a round number). If the
* kernel were larger than 16MB, we might see 0xC1 addresses show up, but our
* kernel isn't that bloated yet.
*
* Unfortunately, x86 has variable-length instructions, so finding this
* particular instruction properly involves writing a disassembler. Instead,
* we rely on statistics. We look for "0xA1" and tally the different bytes
* which occur 4 bytes later (the "0xC0" in our example above). When one of
* those bytes appears three times, we can be reasonably confident that it
* forms the start of CONFIG_PAGE_OFFSET.
*
* This is amazingly reliable. */
static unsigned long intuit_page_offset(unsigned char *img, unsigned long len)
{
unsigned int i, possibilities[256] = { 0 };

for (i = 0; i + 4 < len; i++) {
/* mov 0xXXXXXXXX,%eax */
if (img[i] == 0xA1 && ++possibilities[img[i+4]] > 3)
return (unsigned long)img[i+4] << 24;
}
errx(1, "could not determine page offset");
return entry_point(start, end);
}

/*L:160 Unfortunately the entire ELF image isn't compressed: the segments
* which need loading are extracted and compressed raw. This denies us the
* information we need to make a fully-general loader. */
static unsigned long unpack_bzimage(int fd, unsigned long *page_offset)
static unsigned long unpack_bzimage(int fd)
{
gzFile f;
int ret, len = 0;
Expand All @@ -352,12 +298,7 @@ static unsigned long unpack_bzimage(int fd, unsigned long *page_offset)

verbose("Unpacked size %i addr %p\n", len, img);

/* Without the ELF header, we can't tell virtual-physical gap. This is
* CONFIG_PAGE_OFFSET, and people do actually change it. Fortunately,
* I have a clever way of figuring it out from the code itself. */
*page_offset = intuit_page_offset(img, len);

return entry_point(img, img + len, *page_offset);
return entry_point(img, img + len);
}

/*L:150 A bzImage, unlike an ELF file, is not meant to be loaded. You're
Expand All @@ -368,7 +309,7 @@ static unsigned long unpack_bzimage(int fd, unsigned long *page_offset)
* The bzImage is formed by putting the decompressing code in front of the
* compressed kernel code. So we can simple scan through it looking for the
* first "gzip" header, and start decompressing from there. */
static unsigned long load_bzimage(int fd, unsigned long *page_offset)
static unsigned long load_bzimage(int fd)
{
unsigned char c;
int state = 0;
Expand Down Expand Up @@ -396,7 +337,7 @@ static unsigned long load_bzimage(int fd, unsigned long *page_offset)
if (c != 0x03)
state = -1;
else
return unpack_bzimage(fd, page_offset);
return unpack_bzimage(fd);
}
}
errx(1, "Could not find kernel in bzImage");
Expand All @@ -405,7 +346,7 @@ static unsigned long load_bzimage(int fd, unsigned long *page_offset)
/*L:140 Loading the kernel is easy when it's a "vmlinux", but most kernels
* come wrapped up in the self-decompressing "bzImage" format. With some funky
* coding, we can load those, too. */
static unsigned long load_kernel(int fd, unsigned long *page_offset)
static unsigned long load_kernel(int fd)
{
Elf32_Ehdr hdr;

Expand All @@ -415,10 +356,10 @@ static unsigned long load_kernel(int fd, unsigned long *page_offset)

/* If it's an ELF file, it starts with "\177ELF" */
if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0)
return map_elf(fd, &hdr, page_offset);
return map_elf(fd, &hdr);

/* Otherwise we assume it's a bzImage, and try to unpack it */
return load_bzimage(fd, page_offset);
return load_bzimage(fd);
}

/* This is a trivial little helper to align pages. Andi Kleen hated it because
Expand Down Expand Up @@ -463,27 +404,20 @@ static unsigned long load_initrd(const char *name, unsigned long mem)
return len;
}

/* Once we know the address the Guest kernel expects, we can construct simple
* linear page tables for all of memory which will get the Guest far enough
/* Once we know how much memory we have, we can construct simple linear page
* tables which set virtual == physical which will get the Guest far enough
* into the boot to create its own.
*
* We lay them out of the way, just below the initrd (which is why we need to
* know its size). */
static unsigned long setup_pagetables(unsigned long mem,
unsigned long initrd_size,
unsigned long page_offset)
unsigned long initrd_size)
{
unsigned long *pgdir, *linear;
unsigned int mapped_pages, i, linear_pages;
unsigned int ptes_per_page = getpagesize()/sizeof(void *);

/* Ideally we map all physical memory starting at page_offset.
* However, if page_offset is 0xC0000000 we can only map 1G of physical
* (0xC0000000 + 1G overflows). */
if (mem <= -page_offset)
mapped_pages = mem/getpagesize();
else
mapped_pages = -page_offset/getpagesize();
mapped_pages = mem/getpagesize();

/* Each PTE page can map ptes_per_page pages: how many do we need? */
linear_pages = (mapped_pages + ptes_per_page-1)/ptes_per_page;
Expand All @@ -500,11 +434,9 @@ static unsigned long setup_pagetables(unsigned long mem,
for (i = 0; i < mapped_pages; i++)
linear[i] = ((i * getpagesize()) | PAGE_PRESENT);

/* The top level points to the linear page table pages above. The
* entry representing page_offset points to the first one, and they
* continue from there. */
/* The top level points to the linear page table pages above. */
for (i = 0; i < mapped_pages; i += ptes_per_page) {
pgdir[(i + page_offset/getpagesize())/ptes_per_page]
pgdir[i/ptes_per_page]
= ((to_guest_phys(linear) + i*sizeof(void *))
| PAGE_PRESENT);
}
Expand Down Expand Up @@ -535,15 +467,12 @@ static void concat(char *dst, char *args[])
/* This is where we actually tell the kernel to initialize the Guest. We saw
* the arguments it expects when we looked at initialize() in lguest_user.c:
* the base of guest "physical" memory, the top physical page to allow, the
* top level pagetable, the entry point and the page_offset constant for the
* Guest. */
static int tell_kernel(unsigned long pgdir, unsigned long start,
unsigned long page_offset)
* top level pagetable and the entry point for the Guest. */
static int tell_kernel(unsigned long pgdir, unsigned long start)
{
unsigned long args[] = { LHREQ_INITIALIZE,
(unsigned long)guest_base,
guest_limit / getpagesize(),
pgdir, start, page_offset };
guest_limit / getpagesize(), pgdir, start };
int fd;

verbose("Guest: %p - %p (%#lx)\n",
Expand Down Expand Up @@ -1424,9 +1353,9 @@ static void usage(void)
/*L:105 The main routine is where the real work begins: */
int main(int argc, char *argv[])
{
/* Memory, top-level pagetable, code startpoint, PAGE_OFFSET and size
* of the (optional) initrd. */
unsigned long mem = 0, pgdir, start, page_offset, initrd_size = 0;
/* Memory, top-level pagetable, code startpoint and size of the
* (optional) initrd. */
unsigned long mem = 0, pgdir, start, initrd_size = 0;
/* A temporary and the /dev/lguest file descriptor. */
int i, c, lguest_fd;
/* The list of Guest devices, based on command line arguments. */
Expand Down Expand Up @@ -1500,8 +1429,7 @@ int main(int argc, char *argv[])
setup_console(&device_list);

/* Now we load the kernel */
start = load_kernel(open_or_die(argv[optind+1], O_RDONLY),
&page_offset);
start = load_kernel(open_or_die(argv[optind+1], O_RDONLY));

/* Boot information is stashed at physical address 0 */
boot = from_guest_phys(0);
Expand All @@ -1518,7 +1446,7 @@ int main(int argc, char *argv[])
}

/* Set up the initial linear pagetables, starting below the initrd. */
pgdir = setup_pagetables(mem, initrd_size, page_offset);
pgdir = setup_pagetables(mem, initrd_size);

/* The Linux boot header contains an "E820" memory map: ours is a
* simple, single region. */
Expand All @@ -1535,7 +1463,7 @@ int main(int argc, char *argv[])

/* We tell the kernel to initialize the Guest: this returns the open
* /dev/lguest file descriptor. */
lguest_fd = tell_kernel(pgdir, start, page_offset);
lguest_fd = tell_kernel(pgdir, start);

/* We fork off a child process, which wakes the Launcher whenever one
* of the input file descriptors needs attention. Otherwise we would
Expand Down
1 change: 1 addition & 0 deletions trunk/arch/x86/kernel/asm-offsets_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ void foo(void)
#ifdef CONFIG_LGUEST_GUEST
BLANK();
OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir);
OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc);
OFFSET(LGUEST_PAGES_host_idt_desc, lguest_pages, state.host_idt_desc);
OFFSET(LGUEST_PAGES_host_cr3, lguest_pages, state.host_cr3);
Expand Down
7 changes: 2 additions & 5 deletions trunk/arch/x86/lguest/boot.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ struct lguest_data lguest_data = {
.hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF },
.noirq_start = (u32)lguest_noirq_start,
.noirq_end = (u32)lguest_noirq_end,
.kernel_address = PAGE_OFFSET,
.blocked_interrupts = { 1 }, /* Block timer interrupts */
.syscall_vec = SYSCALL_VECTOR,
};
Expand Down Expand Up @@ -1033,11 +1034,7 @@ __init void lguest_init(void *boot)

/*G:070 Now we've seen all the paravirt_ops, we return to
* lguest_init() where the rest of the fairly chaotic boot setup
* occurs.
*
* The Host expects our first hypercall to tell it where our "struct
* lguest_data" is, so we do that first. */
hcall(LHCALL_LGUEST_INIT, __pa(&lguest_data), 0, 0);
* occurs. */

/* The native boot code sets up initial page tables immediately after
* the kernel itself, and sets init_pg_tables_end so they're not
Expand Down
41 changes: 36 additions & 5 deletions trunk/arch/x86/lguest/i386_head.S
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <linux/linkage.h>
#include <linux/lguest.h>
#include <asm/lguest_hcall.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/processor-flags.h>
Expand All @@ -8,18 +9,48 @@
* looks for. The plan is that the Linux boot protocol will be extended with a
* "platform type" field which will guide us here from the normal entry point,
* but for the moment this suffices. The normal boot code uses %esi for the
* boot header, so we do too. We convert it to a virtual address by adding
* PAGE_OFFSET, and hand it to lguest_init() as its argument (ie. %eax).
* boot header, so we do too.
*
* WARNING: be very careful here! We're running at addresses equal to physical
* addesses (around 0), not above PAGE_OFFSET as most code expectes
* (eg. 0xC0000000). Jumps are relative, so they're OK, but we can't touch any
* data.
*
* The .section line puts this code in .init.text so it will be discarded after
* boot. */
.section .init.text, "ax", @progbits
.ascii "GenuineLguest"
/* Set up initial stack. */
movl $(init_thread_union+THREAD_SIZE),%esp
/* Make initial hypercall now, so we can set up the pagetables. */
movl $LHCALL_LGUEST_INIT, %eax
movl $lguest_data - __PAGE_OFFSET, %edx
int $LGUEST_TRAP_ENTRY

/* Set up boot information pointer to hand to lguest_init(): it wants
* a virtual address. */
movl %esi, %eax
addl $__PAGE_OFFSET, %eax
jmp lguest_init

/* The Host put the toplevel pagetable in lguest_data.pgdir. The movsl
* instruction uses %esi, so we needed to save it above. */
movl lguest_data - __PAGE_OFFSET + LGUEST_DATA_pgdir, %esi

/* Copy first 32 entries of page directory to __PAGE_OFFSET entries.
* This means the first 128M of kernel memory will be mapped at
* PAGE_OFFSET where the kernel expects to run. This will get it far
* enough through boot to switch to its own pagetables. */
movl $32, %ecx
movl %esi, %edi
addl $((__PAGE_OFFSET >> 22) * 4), %edi
rep
movsl

/* Set up the initial stack so we can run C code. */
movl $(init_thread_union+THREAD_SIZE),%esp


/* Jumps are relative, and we're running __PAGE_OFFSET too low at the
* moment. */
jmp lguest_init+__PAGE_OFFSET

/*G:055 We create a macro which puts the assembler code between lgstart_ and
* lgend_ markers. These templates are put in the .text section: they can't be
Expand Down
8 changes: 4 additions & 4 deletions trunk/drivers/lguest/hypercalls.c
Original file line number Diff line number Diff line change
Expand Up @@ -181,15 +181,15 @@ static void initialize(struct lguest *lg)
/* The Guest tells us where we're not to deliver interrupts by putting
* the range of addresses into "struct lguest_data". */
if (get_user(lg->noirq_start, &lg->lguest_data->noirq_start)
|| get_user(lg->noirq_end, &lg->lguest_data->noirq_end)
/* We tell the Guest that it can't use the top 4MB of virtual
* addresses used by the Switcher. */
|| put_user(4U*1024*1024, &lg->lguest_data->reserve_mem))
|| get_user(lg->noirq_end, &lg->lguest_data->noirq_end))
kill_guest(lg, "bad guest page %p", lg->lguest_data);

/* We write the current time into the Guest's data page once now. */
write_timestamp(lg);

/* page_tables.c will also do some setup. */
page_table_guest_data_init(lg);

/* This is the one case where the above accesses might have been the
* first write to a Guest page. This may have caused a copy-on-write
* fault, but the Guest might be referring to the old (read-only)
Expand Down
Loading

0 comments on commit 570e801

Please sign in to comment.