Skip to content

Commit

Permalink
lguest: PAE support
Browse files Browse the repository at this point in the history
This version requires that host and guest have the same PAE status.
NX cap is not offered to the guest, yet.

Signed-off-by: Matias Zabaljauregui <zabaljauregui@gmail.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
  • Loading branch information
Matias Zabaljauregui authored and Rusty Russell committed Jun 12, 2009
1 parent cefcad1 commit acdd0b6
Show file tree
Hide file tree
Showing 9 changed files with 403 additions and 48 deletions.
1 change: 0 additions & 1 deletion Documentation/lguest/lguest.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ Running Lguest:
"Paravirtualized guest support" = Y
"Lguest guest support" = Y
"High Memory Support" = off/4GB
"PAE (Physical Address Extension) Support" = N
"Alignment value to which kernel should be aligned" = 0x100000
(CONFIG_PARAVIRT=y, CONFIG_LGUEST_GUEST=y, CONFIG_HIGHMEM64G=n and
CONFIG_PHYSICAL_ALIGN=0x100000)
Expand Down
7 changes: 6 additions & 1 deletion arch/x86/include/asm/lguest.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,13 @@
/* Pages for switcher itself, then two pages per cpu */
#define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * nr_cpu_ids)

/* We map at -4M for ease of mapping into the guest (one PTE page). */
/* We map at -4M (-2M when PAE is activated) for ease of mapping
* into the guest (one PTE page). */
#ifdef CONFIG_X86_PAE
#define SWITCHER_ADDR 0xFFE00000
#else
#define SWITCHER_ADDR 0xFFC00000
#endif

/* Found in switcher.S */
extern unsigned long default_idt_entries[];
Expand Down
3 changes: 2 additions & 1 deletion arch/x86/include/asm/lguest_hcall.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#define LHCALL_TS 8
#define LHCALL_SET_CLOCKEVENT 9
#define LHCALL_HALT 10
#define LHCALL_SET_PMD 13
#define LHCALL_SET_PTE 14
#define LHCALL_SET_PGD 15
#define LHCALL_LOAD_TLS 16
Expand All @@ -33,7 +34,7 @@
* operations? There are two ways: the direct way is to make a "hypercall",
* to make requests of the Host Itself.
*
* We use the KVM hypercall mechanism. Eighteen hypercalls are
* We use the KVM hypercall mechanism. Seventeen hypercalls are
* available: the hypercall number is put in the %eax register, and the
* arguments (when required) are placed in %ebx, %ecx, %edx and %esi.
* If a return value makes sense, it's returned in %eax.
Expand Down
1 change: 0 additions & 1 deletion arch/x86/lguest/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ config LGUEST_GUEST
bool "Lguest guest support"
select PARAVIRT
depends on X86_32
depends on !X86_PAE
select VIRTIO
select VIRTIO_RING
select VIRTIO_CONSOLE
Expand Down
71 changes: 66 additions & 5 deletions arch/x86/lguest/boot.c
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ static void lazy_hcall3(unsigned long call,
async_hcall(call, arg1, arg2, arg3, 0);
}

#ifdef CONFIG_X86_PAE
static void lazy_hcall4(unsigned long call,
unsigned long arg1,
unsigned long arg2,
Expand All @@ -178,6 +179,7 @@ static void lazy_hcall4(unsigned long call,
else
async_hcall(call, arg1, arg2, arg3, arg4);
}
#endif

/* When lazy mode is turned off reset the per-cpu lazy mode variable and then
* issue the do-nothing hypercall to flush any stored calls. */
Expand Down Expand Up @@ -380,8 +382,8 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
case 1: /* Basic feature request. */
/* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */
*cx &= 0x00002201;
/* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU. */
*dx &= 0x07808111;
/* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU, PAE. */
*dx &= 0x07808151;
/* The Host can do a nice optimization if it knows that the
* kernel mappings (addresses above 0xC0000000 or whatever
* PAGE_OFFSET is set to) haven't changed. But Linux calls
Expand All @@ -400,6 +402,11 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
if (*ax > 0x80000008)
*ax = 0x80000008;
break;
case 0x80000001:
/* Here we should fix nx cap depending on host. */
/* For this version of PAE, we just clear NX bit. */
*dx &= ~(1 << 20);
break;
}
}

Expand Down Expand Up @@ -533,7 +540,12 @@ static void lguest_write_cr4(unsigned long val)
static void lguest_pte_update(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
#ifdef CONFIG_X86_PAE
lazy_hcall4(LHCALL_SET_PTE, __pa(mm->pgd), addr,
ptep->pte_low, ptep->pte_high);
#else
lazy_hcall3(LHCALL_SET_PTE, __pa(mm->pgd), addr, ptep->pte_low);
#endif
}

static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr,
Expand All @@ -543,15 +555,37 @@ static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr,
lguest_pte_update(mm, addr, ptep);
}

/* The Guest calls this to set a top-level entry. Again, we set the entry then
* tell the Host which top-level page we changed, and the index of the entry we
* changed. */
/* The Guest calls lguest_set_pud to set a top-level entry and lguest_set_pmd
* to set a middle-level entry when PAE is activated.
* Again, we set the entry then tell the Host which page we changed,
* and the index of the entry we changed. */
#ifdef CONFIG_X86_PAE
static void lguest_set_pud(pud_t *pudp, pud_t pudval)
{
native_set_pud(pudp, pudval);

/* 32 bytes aligned pdpt address and the index. */
lazy_hcall2(LHCALL_SET_PGD, __pa(pudp) & 0xFFFFFFE0,
(__pa(pudp) & 0x1F) / sizeof(pud_t));
}

static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
{
native_set_pmd(pmdp, pmdval);
lazy_hcall2(LHCALL_SET_PMD, __pa(pmdp) & PAGE_MASK,
(__pa(pmdp) & (PAGE_SIZE - 1)) / sizeof(pmd_t));
}
#else

/* The Guest calls lguest_set_pmd to set a top-level entry when PAE is not
* activated. */
static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
{
native_set_pmd(pmdp, pmdval);
lazy_hcall2(LHCALL_SET_PGD, __pa(pmdp) & PAGE_MASK,
(__pa(pmdp) & (PAGE_SIZE - 1)) / sizeof(pmd_t));
}
#endif

/* There are a couple of legacy places where the kernel sets a PTE, but we
* don't know the top level any more. This is useless for us, since we don't
Expand All @@ -569,6 +603,26 @@ static void lguest_set_pte(pte_t *ptep, pte_t pteval)
lazy_hcall1(LHCALL_FLUSH_TLB, 1);
}

#ifdef CONFIG_X86_PAE
static void lguest_set_pte_atomic(pte_t *ptep, pte_t pte)
{
native_set_pte_atomic(ptep, pte);
if (cr3_changed)
lazy_hcall1(LHCALL_FLUSH_TLB, 1);
}

void lguest_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
native_pte_clear(mm, addr, ptep);
lguest_pte_update(mm, addr, ptep);
}

void lguest_pmd_clear(pmd_t *pmdp)
{
lguest_set_pmd(pmdp, __pmd(0));
}
#endif

/* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on
* native page table operations. On native hardware you can set a new page
* table entry whenever you want, but if you want to remove one you have to do
Expand Down Expand Up @@ -1035,6 +1089,7 @@ __init void lguest_init(void)
pv_info.name = "lguest";
pv_info.paravirt_enabled = 1;
pv_info.kernel_rpl = 1;
pv_info.shared_kernel_pmd = 1;

/* We set up all the lguest overrides for sensitive operations. These
* are detailed with the operations themselves. */
Expand Down Expand Up @@ -1080,6 +1135,12 @@ __init void lguest_init(void)
pv_mmu_ops.set_pte = lguest_set_pte;
pv_mmu_ops.set_pte_at = lguest_set_pte_at;
pv_mmu_ops.set_pmd = lguest_set_pmd;
#ifdef CONFIG_X86_PAE
pv_mmu_ops.set_pte_atomic = lguest_set_pte_atomic;
pv_mmu_ops.pte_clear = lguest_pte_clear;
pv_mmu_ops.pmd_clear = lguest_pmd_clear;
pv_mmu_ops.set_pud = lguest_set_pud;
#endif
pv_mmu_ops.read_cr2 = lguest_read_cr2;
pv_mmu_ops.read_cr3 = lguest_read_cr3;
pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
Expand Down
2 changes: 1 addition & 1 deletion drivers/lguest/Kconfig
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
config LGUEST
tristate "Linux hypervisor example code"
depends on X86_32 && EXPERIMENTAL && !X86_PAE && FUTEX
depends on X86_32 && EXPERIMENTAL && FUTEX
select HVC_DRIVER
---help---
This is a very simple module which allows you to run
Expand Down
10 changes: 10 additions & 0 deletions drivers/lguest/hypercalls.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,21 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
guest_set_stack(cpu, args->arg1, args->arg2, args->arg3);
break;
case LHCALL_SET_PTE:
#ifdef CONFIG_X86_PAE
guest_set_pte(cpu, args->arg1, args->arg2,
__pte(args->arg3 | (u64)args->arg4 << 32));
#else
guest_set_pte(cpu, args->arg1, args->arg2, __pte(args->arg3));
#endif
break;
case LHCALL_SET_PGD:
guest_set_pgd(cpu->lg, args->arg1, args->arg2);
break;
#ifdef CONFIG_X86_PAE
case LHCALL_SET_PMD:
guest_set_pmd(cpu->lg, args->arg1, args->arg2);
break;
#endif
case LHCALL_SET_CLOCKEVENT:
guest_set_clockevent(cpu, args->arg1);
break;
Expand Down
5 changes: 5 additions & 0 deletions drivers/lguest/lg.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user);
* in the kernel. */
#define pgd_flags(x) (pgd_val(x) & ~PAGE_MASK)
#define pgd_pfn(x) (pgd_val(x) >> PAGE_SHIFT)
#define pmd_flags(x) (pmd_val(x) & ~PAGE_MASK)
#define pmd_pfn(x) (pmd_val(x) >> PAGE_SHIFT)

/* interrupts_and_traps.c: */
unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more);
Expand Down Expand Up @@ -170,6 +172,9 @@ int init_guest_pagetable(struct lguest *lg);
void free_guest_pagetable(struct lguest *lg);
void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable);
void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 i);
#ifdef CONFIG_X86_PAE
void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 i);
#endif
void guest_pagetable_clear_all(struct lg_cpu *cpu);
void guest_pagetable_flush_user(struct lg_cpu *cpu);
void guest_set_pte(struct lg_cpu *cpu, unsigned long gpgdir,
Expand Down
Loading

0 comments on commit acdd0b6

Please sign in to comment.