Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-f…
Browse files Browse the repository at this point in the history
…or-linus

* git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus:
  lguest: Fix in/out emulation
  lguest: Fix translation count about wikipedia's cpuid page
  lguest: Fix three simple typos in comments
  lguest: update comments
  lguest: Simplify device initialization.
  lguest: don't rewrite vmcall instructions
  lguest: remove remaining vmcall
  lguest: use a special 1:1 linear pagetable mode until first switch.
  lguest: Do not exit on non-fatal errors
  • Loading branch information
Linus Torvalds committed Jul 22, 2011
2 parents 111ad11 + 996ba96 commit a7e1aab
Show file tree
Hide file tree
Showing 13 changed files with 199 additions and 380 deletions.
47 changes: 16 additions & 31 deletions Documentation/virtual/lguest/lguest.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
#include <asm/bootparam.h>
#include "../../../include/linux/lguest_launcher.h"
/*L:110
* We can ignore the 42 include files we need for this program, but I do want
* We can ignore the 43 include files we need for this program, but I do want
* to draw attention to the use of kernel-style types.
*
* As Linus said, "C is a Spartan language, and so should your naming be." I
Expand All @@ -65,7 +65,6 @@ typedef uint16_t u16;
typedef uint8_t u8;
/*:*/

#define PAGE_PRESENT 0x7 /* Present, RW, Execute */
#define BRIDGE_PFX "bridge:"
#ifndef SIOCBRADDIF
#define SIOCBRADDIF 0x89a2 /* add interface to bridge */
Expand Down Expand Up @@ -861,8 +860,10 @@ static void console_output(struct virtqueue *vq)
/* writev can return a partial write, so we loop here. */
while (!iov_empty(iov, out)) {
int len = writev(STDOUT_FILENO, iov, out);
if (len <= 0)
err(1, "Write to stdout gave %i", len);
if (len <= 0) {
warn("Write to stdout gave %i (%d)", len, errno);
break;
}
iov_consume(iov, out, len);
}

Expand Down Expand Up @@ -898,7 +899,7 @@ static void net_output(struct virtqueue *vq)
* same format: what a coincidence!
*/
if (writev(net_info->tunfd, iov, out) < 0)
errx(1, "Write to tun failed?");
warnx("Write to tun failed (%d)?", errno);

/*
* Done with that one; wait_for_vq_desc() will send the interrupt if
Expand Down Expand Up @@ -955,7 +956,7 @@ static void net_input(struct virtqueue *vq)
*/
len = readv(net_info->tunfd, iov, in);
if (len <= 0)
err(1, "Failed to read from tun.");
warn("Failed to read from tun (%d).", errno);

/*
* Mark that packet buffer as used, but don't interrupt here. We want
Expand Down Expand Up @@ -1093,9 +1094,10 @@ static void update_device_status(struct device *dev)
warnx("Device %s configuration FAILED", dev->name);
if (dev->running)
reset_device(dev);
} else if (dev->desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
if (!dev->running)
start_device(dev);
} else {
if (dev->running)
err(1, "Device %s features finalized twice", dev->name);
start_device(dev);
}
}

Expand All @@ -1120,25 +1122,11 @@ static void handle_output(unsigned long addr)
return;
}

/*
* Devices *can* be used before status is set to DRIVER_OK.
* The original plan was that they would never do this: they
* would always finish setting up their status bits before
* actually touching the virtqueues. In practice, we allowed
* them to, and they do (eg. the disk probes for partition
* tables as part of initialization).
*
* If we see this, we start the device: once it's running, we
* expect the device to catch all the notifications.
*/
/* Devices should not be used before features are finalized. */
for (vq = i->vq; vq; vq = vq->next) {
if (addr != vq->config.pfn*getpagesize())
continue;
if (i->running)
errx(1, "Notification on running %s", i->name);
/* This just calls create_thread() for each virtqueue */
start_device(i);
return;
errx(1, "Notification on %s before setup!", i->name);
}
}

Expand Down Expand Up @@ -1370,7 +1358,7 @@ static void setup_console(void)
* --sharenet=<name> option which opens or creates a named pipe. This can be
* used to send packets to another guest in a 1:1 manner.
*
* More sopisticated is to use one of the tools developed for project like UML
* More sophisticated is to use one of the tools developed for project like UML
* to do networking.
*
* Faster is to do virtio bonding in kernel. Doing this 1:1 would be
Expand All @@ -1380,7 +1368,7 @@ static void setup_console(void)
* multiple inter-guest channels behind one interface, although it would
* require some manner of hotplugging new virtio channels.
*
* Finally, we could implement a virtio network switch in the kernel.
* Finally, we could use a virtio network switch in the kernel, ie. vhost.
:*/

static u32 str2ip(const char *ipaddr)
Expand Down Expand Up @@ -2017,10 +2005,7 @@ int main(int argc, char *argv[])
/* Tell the entry path not to try to reload segment registers. */
boot->hdr.loadflags |= KEEP_SEGMENTS;

/*
* We tell the kernel to initialize the Guest: this returns the open
* /dev/lguest file descriptor.
*/
/* We tell the kernel to initialize the Guest. */
tell_kernel(start);

/* Ensure that we terminate if a device-servicing child dies. */
Expand Down
1 change: 1 addition & 0 deletions arch/x86/include/asm/lguest_hcall.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ hcall(unsigned long call,
: "memory");
return call;
}
/*:*/

/* Can't use our min() macro here: needs to be a constant */
#define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32)
Expand Down
1 change: 0 additions & 1 deletion arch/x86/kernel/asm-offsets_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ void foo(void)
BLANK();
OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
OFFSET(LGUEST_DATA_irq_pending, lguest_data, irq_pending);
OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir);

BLANK();
OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc);
Expand Down
36 changes: 22 additions & 14 deletions arch/x86/lguest/boot.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@
#include <asm/stackprotector.h>
#include <asm/reboot.h> /* for struct machine_ops */

/*G:010 Welcome to the Guest!
/*G:010
* Welcome to the Guest!
*
* The Guest in our tale is a simple creature: identical to the Host but
* behaving in simplified but equivalent ways. In particular, the Guest is the
Expand Down Expand Up @@ -190,15 +191,23 @@ static void lazy_hcall4(unsigned long call,
#endif

/*G:036
* When lazy mode is turned off reset the per-cpu lazy mode variable and then
* issue the do-nothing hypercall to flush any stored calls.
:*/
* When lazy mode is turned off, we issue the do-nothing hypercall to
* flush any stored calls, and call the generic helper to reset the
* per-cpu lazy mode variable.
*/
static void lguest_leave_lazy_mmu_mode(void)
{
hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0);
paravirt_leave_lazy_mmu();
}

/*
* We also catch the end of context switch; we enter lazy mode for much of
* that too, so again we need to flush here.
*
* (Technically, this is lazy CPU mode, and normally we're in lazy MMU
* mode, but unlike Xen, lguest doesn't care about the difference).
*/
static void lguest_end_context_switch(struct task_struct *next)
{
hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0);
Expand Down Expand Up @@ -391,7 +400,7 @@ static void lguest_load_tr_desc(void)
* giant ball of hair. Its entry in the current Intel manual runs to 28 pages.
*
* This instruction even it has its own Wikipedia entry. The Wikipedia entry
* has been translated into 5 languages. I am not making this up!
* has been translated into 6 languages. I am not making this up!
*
* We could get funky here and identify ourselves as "GenuineLguest", but
* instead we just use the real "cpuid" instruction. Then I pretty much turned
Expand Down Expand Up @@ -458,7 +467,7 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
/*
* PAE systems can mark pages as non-executable. Linux calls this the
* NX bit. Intel calls it XD (eXecute Disable), AMD EVP (Enhanced
* Virus Protection). We just switch turn if off here, since we don't
* Virus Protection). We just switch it off here, since we don't
* support it.
*/
case 0x80000001:
Expand Down Expand Up @@ -520,17 +529,16 @@ static unsigned long lguest_read_cr2(void)

/* See lguest_set_pte() below. */
static bool cr3_changed = false;
static unsigned long current_cr3;

/*
* cr3 is the current toplevel pagetable page: the principle is the same as
* cr0. Keep a local copy, and tell the Host when it changes. The only
* difference is that our local copy is in lguest_data because the Host needs
* to set it upon our initial hypercall.
* cr0. Keep a local copy, and tell the Host when it changes.
*/
static void lguest_write_cr3(unsigned long cr3)
{
lguest_data.pgdir = cr3;
lazy_hcall1(LHCALL_NEW_PGTABLE, cr3);
current_cr3 = cr3;

/* These two page tables are simple, linear, and used during boot */
if (cr3 != __pa(swapper_pg_dir) && cr3 != __pa(initial_page_table))
Expand All @@ -539,7 +547,7 @@ static void lguest_write_cr3(unsigned long cr3)

static unsigned long lguest_read_cr3(void)
{
return lguest_data.pgdir;
return current_cr3;
}

/* cr4 is used to enable and disable PGE, but we don't care. */
Expand Down Expand Up @@ -641,7 +649,7 @@ static void lguest_write_cr4(unsigned long val)

/*
* The Guest calls this after it has set a second-level entry (pte), ie. to map
* a page into a process' address space. Wetell the Host the toplevel and
* a page into a process' address space. We tell the Host the toplevel and
* address this corresponds to. The Guest uses one pagetable per process, so
* we need to tell the Host which one we're changing (mm->pgd).
*/
Expand Down Expand Up @@ -758,7 +766,7 @@ static void lguest_pmd_clear(pmd_t *pmdp)
static void lguest_flush_tlb_single(unsigned long addr)
{
/* Simply set it to zero: if it was not, it will fault back in. */
lazy_hcall3(LHCALL_SET_PTE, lguest_data.pgdir, addr, 0);
lazy_hcall3(LHCALL_SET_PTE, current_cr3, addr, 0);
}

/*
Expand Down Expand Up @@ -1140,7 +1148,7 @@ static struct notifier_block paniced = {
static __init char *lguest_memory_setup(void)
{
/*
*The Linux bootloader header contains an "e820" memory map: the
* The Linux bootloader header contains an "e820" memory map: the
* Launcher populated the first entry with our memory limit.
*/
e820_add_region(boot_params.e820_map[0].addr,
Expand Down
35 changes: 20 additions & 15 deletions arch/x86/lguest/i386_head.S
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,22 @@
#include <asm/processor-flags.h>

/*G:020
* Our story starts with the kernel booting into startup_32 in
* arch/x86/kernel/head_32.S. It expects a boot header, which is created by
* the bootloader (the Launcher in our case).
* Our story starts with the bzImage: booting starts at startup_32 in
* arch/x86/boot/compressed/head_32.S. This merely uncompresses the real
* kernel in place and then jumps into it: startup_32 in
* arch/x86/kernel/head_32.S. Both routines expects a boot header in the %esi
* register, which is created by the bootloader (the Launcher in our case).
*
* The startup_32 function does very little: it clears the uninitialized global
* C variables which we expect to be zero (ie. BSS) and then copies the boot
* header and kernel command line somewhere safe. Finally it checks the
* 'hardware_subarch' field. This was introduced in 2.6.24 for lguest and Xen:
* if it's set to '1' (lguest's assigned number), then it calls us here.
* header and kernel command line somewhere safe, and populates some initial
* page tables. Finally it checks the 'hardware_subarch' field. This was
* introduced in 2.6.24 for lguest and Xen: if it's set to '1' (lguest's
* assigned number), then it calls us here.
*
* WARNING: be very careful here! We're running at addresses equal to physical
* addesses (around 0), not above PAGE_OFFSET as most code expectes
* addresses (around 0), not above PAGE_OFFSET as most code expects
* (eg. 0xC0000000). Jumps are relative, so they're OK, but we can't touch any
* data without remembering to subtract __PAGE_OFFSET!
*
Expand All @@ -27,13 +31,18 @@
.section .init.text, "ax", @progbits
ENTRY(lguest_entry)
/*
* We make the "initialization" hypercall now to tell the Host about
* us, and also find out where it put our page tables.
* We make the "initialization" hypercall now to tell the Host where
* our lguest_data struct is.
*/
movl $LHCALL_LGUEST_INIT, %eax
movl $lguest_data - __PAGE_OFFSET, %ebx
int $LGUEST_TRAP_ENTRY

/* Now turn our pagetables on; setup by arch/x86/kernel/head_32.S. */
movl $LHCALL_NEW_PGTABLE, %eax
movl $(initial_page_table - __PAGE_OFFSET), %ebx
int $LGUEST_TRAP_ENTRY

/* Set up the initial stack so we can run C code. */
movl $(init_thread_union+THREAD_SIZE),%esp

Expand Down Expand Up @@ -96,12 +105,8 @@ send_interrupts:
*/
pushl %eax
movl $LHCALL_SEND_INTERRUPTS, %eax
/*
* This is a vmcall instruction (same thing that KVM uses). Older
* assembler versions might not know the "vmcall" instruction, so we
* create one manually here.
*/
.byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */
/* This is the actual hypercall trap. */
int $LGUEST_TRAP_ENTRY
/* Put eax back the way we found it. */
popl %eax
ret
Expand Down
2 changes: 1 addition & 1 deletion drivers/lguest/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ static __init int map_switcher(void)

/*
* Now the Switcher is mapped at the right address, we can't fail!
* Copy in the compiled-in Switcher code (from <arch>_switcher.S).
* Copy in the compiled-in Switcher code (from x86/switcher_32.S).
*/
memcpy(switcher_vma->addr, start_switcher_text,
end_switcher_text - start_switcher_text);
Expand Down
10 changes: 4 additions & 6 deletions drivers/lguest/interrupts_and_traps.c
Original file line number Diff line number Diff line change
Expand Up @@ -375,11 +375,9 @@ static bool direct_trap(unsigned int num)
/*
* The Host needs to see page faults (for shadow paging and to save the
* fault address), general protection faults (in/out emulation) and
* device not available (TS handling), invalid opcode fault (kvm hcall),
* and of course, the hypercall trap.
* device not available (TS handling) and of course, the hypercall trap.
*/
return num != 14 && num != 13 && num != 7 &&
num != 6 && num != LGUEST_TRAP_ENTRY;
return num != 14 && num != 13 && num != 7 && num != LGUEST_TRAP_ENTRY;
}
/*:*/

Expand Down Expand Up @@ -429,8 +427,8 @@ void pin_stack_pages(struct lg_cpu *cpu)

/*
* Direct traps also mean that we need to know whenever the Guest wants to use
* a different kernel stack, so we can change the IDT entries to use that
* stack. The IDT entries expect a virtual address, so unlike most addresses
* a different kernel stack, so we can change the guest TSS to use that
* stack. The TSS entries expect a virtual address, so unlike most addresses
* the Guest gives us, the "esp" (stack pointer) value here is virtual, not
* physical.
*
Expand Down
2 changes: 2 additions & 0 deletions drivers/lguest/lg.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ struct lg_cpu {

struct lguest_pages *last_pages;

/* Initialization mode: linear map everything. */
bool linear_pages;
int cpu_pgd; /* Which pgd this cpu is currently using */

/* If a hypercall was asked for, this points to the arguments. */
Expand Down
Loading

0 comments on commit a7e1aab

Please sign in to comment.