Skip to content

Commit

Permalink
paravirt: refactor struct paravirt_ops into smaller pv_*_ops
Browse files Browse the repository at this point in the history
This patch refactors the paravirt_ops structure into groups of
functionally related ops:

pv_info - random info, rather than function entrypoints
pv_init_ops - functions used at boot time (some for module_init too)
pv_misc_ops - lazy mode, which didn't fit well anywhere else
pv_time_ops - time-related functions
pv_cpu_ops - various privileged instruction ops
pv_irq_ops - operations for managing interrupt state
pv_apic_ops - APIC operations
pv_mmu_ops - operations for managing pagetables

There are several motivations for this:

1. Some of these ops will be general to all x86, and some will be
   i386/x86-64 specific.  This makes it easier to share common stuff
   while allowing separate implementations where needed.

2. At the moment we must export all of paravirt_ops, but modules only
   need selected parts of it.  This allows us to export on a case by case
   basis (and also choose which export license we want to apply).

3. Functional groupings make things a bit more readable.

Struct paravirt_ops is now only used as a template to generate
patch-site identifiers, and to extract function pointers for inserting
into jmp/calls when patching.  It is only instantiated when needed.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andi Kleen <ak@suse.de>
Cc: Zach Amsden <zach@vmware.com>
Cc: Avi Kivity <avi@qumranet.com>
Cc: Anthony Liguory <aliguori@us.ibm.com>
Cc: "Glauber de Oliveira Costa" <glommer@gmail.com>
Cc: Jun Nakajima <jun.nakajima@intel.com>
  • Loading branch information
Jeremy Fitzhardinge authored and Jeremy Fitzhardinge committed Oct 16, 2007
1 parent ab9c232 commit 93b1eab
Show file tree
Hide file tree
Showing 12 changed files with 587 additions and 462 deletions.
4 changes: 2 additions & 2 deletions arch/x86/kernel/alternative.c
Original file line number Diff line number Diff line change
Expand Up @@ -368,8 +368,8 @@ void apply_paravirt(struct paravirt_patch_site *start,
BUG_ON(p->len > MAX_PATCH_LEN);
/* prep the buffer with the original instructions */
memcpy(insnbuf, p->instr, p->len);
used = paravirt_ops.patch(p->instrtype, p->clobbers, insnbuf,
(unsigned long)p->instr, p->len);
used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
(unsigned long)p->instr, p->len);

BUG_ON(used > p->len);

Expand Down
14 changes: 8 additions & 6 deletions arch/x86/kernel/asm-offsets_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,14 @@ void foo(void)

#ifdef CONFIG_PARAVIRT
BLANK();
OFFSET(PARAVIRT_enabled, paravirt_ops, paravirt_enabled);
OFFSET(PARAVIRT_irq_disable, paravirt_ops, irq_disable);
OFFSET(PARAVIRT_irq_enable, paravirt_ops, irq_enable);
OFFSET(PARAVIRT_irq_enable_sysexit, paravirt_ops, irq_enable_sysexit);
OFFSET(PARAVIRT_iret, paravirt_ops, iret);
OFFSET(PARAVIRT_read_cr0, paravirt_ops, read_cr0);
OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
#endif

#ifdef CONFIG_XEN
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kernel/entry_32.S
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ ldt_ss:
* is still available to implement the setting of the high
* 16-bits in the INTERRUPT_RETURN paravirt-op.
*/
cmpl $0, paravirt_ops+PARAVIRT_enabled
cmpl $0, pv_info+PARAVIRT_enabled
jne restore_nocheck
#endif

Expand Down
174 changes: 103 additions & 71 deletions arch/x86/kernel/paravirt_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,32 +42,33 @@ void _paravirt_nop(void)
static void __init default_banner(void)
{
printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
paravirt_ops.name);
pv_info.name);
}

char *memory_setup(void)
{
return paravirt_ops.memory_setup();
return pv_init_ops.memory_setup();
}

/* Simple instruction patching code. */
#define DEF_NATIVE(name, code) \
extern const char start_##name[], end_##name[]; \
asm("start_" #name ": " code "; end_" #name ":")

DEF_NATIVE(irq_disable, "cli");
DEF_NATIVE(irq_enable, "sti");
DEF_NATIVE(restore_fl, "push %eax; popf");
DEF_NATIVE(save_fl, "pushf; pop %eax");
DEF_NATIVE(iret, "iret");
DEF_NATIVE(irq_enable_sysexit, "sti; sysexit");
DEF_NATIVE(read_cr2, "mov %cr2, %eax");
DEF_NATIVE(write_cr3, "mov %eax, %cr3");
DEF_NATIVE(read_cr3, "mov %cr3, %eax");
DEF_NATIVE(clts, "clts");
DEF_NATIVE(read_tsc, "rdtsc");

DEF_NATIVE(ud2a, "ud2a");
#define DEF_NATIVE(ops, name, code) \
extern const char start_##ops##_##name[], end_##ops##_##name[]; \
asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")

DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
DEF_NATIVE(pv_cpu_ops, iret, "iret");
DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
DEF_NATIVE(pv_cpu_ops, clts, "clts");
DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");

/* Undefined instruction for dealing with missing ops pointers. */
static const unsigned char ud2a[] = { 0x0f, 0x0b };

static unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
unsigned long addr, unsigned len)
Expand All @@ -76,37 +77,29 @@ static unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
unsigned ret;

switch(type) {
#define SITE(x) case PARAVIRT_PATCH(x): start = start_##x; end = end_##x; goto patch_site
SITE(irq_disable);
SITE(irq_enable);
SITE(restore_fl);
SITE(save_fl);
SITE(iret);
SITE(irq_enable_sysexit);
SITE(read_cr2);
SITE(read_cr3);
SITE(write_cr3);
SITE(clts);
SITE(read_tsc);
#define SITE(ops, x) \
case PARAVIRT_PATCH(ops.x): \
start = start_##ops##_##x; \
end = end_##ops##_##x; \
goto patch_site

SITE(pv_irq_ops, irq_disable);
SITE(pv_irq_ops, irq_enable);
SITE(pv_irq_ops, restore_fl);
SITE(pv_irq_ops, save_fl);
SITE(pv_cpu_ops, iret);
SITE(pv_cpu_ops, irq_enable_sysexit);
SITE(pv_mmu_ops, read_cr2);
SITE(pv_mmu_ops, read_cr3);
SITE(pv_mmu_ops, write_cr3);
SITE(pv_cpu_ops, clts);
SITE(pv_cpu_ops, read_tsc);
#undef SITE

patch_site:
ret = paravirt_patch_insns(ibuf, len, start, end);
break;

case PARAVIRT_PATCH(make_pgd):
case PARAVIRT_PATCH(make_pte):
case PARAVIRT_PATCH(pgd_val):
case PARAVIRT_PATCH(pte_val):
#ifdef CONFIG_X86_PAE
case PARAVIRT_PATCH(make_pmd):
case PARAVIRT_PATCH(pmd_val):
#endif
/* These functions end up returning exactly what
they're passed, in the same registers. */
ret = paravirt_patch_nop();
break;

default:
ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
break;
Expand Down Expand Up @@ -150,7 +143,7 @@ unsigned paravirt_patch_call(void *insnbuf,
return 5;
}

unsigned paravirt_patch_jmp(const void *target, void *insnbuf,
unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
unsigned long addr, unsigned len)
{
struct branch *b = insnbuf;
Expand All @@ -165,22 +158,38 @@ unsigned paravirt_patch_jmp(const void *target, void *insnbuf,
return 5;
}

/* Neat trick to map patch type back to the call within the
* corresponding structure. */
static void *get_call_destination(u8 type)
{
struct paravirt_patch_template tmpl = {
.pv_init_ops = pv_init_ops,
.pv_misc_ops = pv_misc_ops,
.pv_time_ops = pv_time_ops,
.pv_cpu_ops = pv_cpu_ops,
.pv_irq_ops = pv_irq_ops,
.pv_apic_ops = pv_apic_ops,
.pv_mmu_ops = pv_mmu_ops,
};
return *((void **)&tmpl + type);
}

unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
unsigned long addr, unsigned len)
{
void *opfunc = *((void **)&paravirt_ops + type);
void *opfunc = get_call_destination(type);
unsigned ret;

if (opfunc == NULL)
/* If there's no function, patch it with a ud2a (BUG) */
ret = paravirt_patch_insns(insnbuf, len, start_ud2a, end_ud2a);
ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a));
else if (opfunc == paravirt_nop)
/* If the operation is a nop, then nop the callsite */
ret = paravirt_patch_nop();
else if (type == PARAVIRT_PATCH(iret) ||
type == PARAVIRT_PATCH(irq_enable_sysexit))
else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit))
/* If operation requires a jmp, then jmp */
ret = paravirt_patch_jmp(opfunc, insnbuf, addr, len);
ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
else
/* Otherwise call the function; assume target could
clobber any caller-save reg */
Expand All @@ -205,7 +214,7 @@ unsigned paravirt_patch_insns(void *insnbuf, unsigned len,

void init_IRQ(void)
{
paravirt_ops.init_IRQ();
pv_irq_ops.init_IRQ();
}

static void native_flush_tlb(void)
Expand Down Expand Up @@ -233,7 +242,7 @@ extern void native_irq_enable_sysexit(void);

static int __init print_banner(void)
{
paravirt_ops.banner();
pv_init_ops.banner();
return 0;
}
core_initcall(print_banner);
Expand Down Expand Up @@ -273,47 +282,53 @@ int paravirt_disable_iospace(void)
return ret;
}

struct paravirt_ops paravirt_ops = {
struct pv_info pv_info = {
.name = "bare hardware",
.paravirt_enabled = 0,
.kernel_rpl = 0,
.shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
};

.patch = native_patch,
struct pv_init_ops pv_init_ops = {
.patch = native_patch,
.banner = default_banner,
.arch_setup = paravirt_nop,
.memory_setup = machine_specific_memory_setup,
};

struct pv_time_ops pv_time_ops = {
.time_init = hpet_time_init,
.get_wallclock = native_get_wallclock,
.set_wallclock = native_set_wallclock,
.time_init = hpet_time_init,
.sched_clock = native_sched_clock,
.get_cpu_khz = native_calculate_cpu_khz,
};

struct pv_irq_ops pv_irq_ops = {
.init_IRQ = native_init_IRQ,
.save_fl = native_save_fl,
.restore_fl = native_restore_fl,
.irq_disable = native_irq_disable,
.irq_enable = native_irq_enable,
.safe_halt = native_safe_halt,
.halt = native_halt,
};

struct pv_cpu_ops pv_cpu_ops = {
.cpuid = native_cpuid,
.get_debugreg = native_get_debugreg,
.set_debugreg = native_set_debugreg,
.clts = native_clts,
.read_cr0 = native_read_cr0,
.write_cr0 = native_write_cr0,
.read_cr2 = native_read_cr2,
.write_cr2 = native_write_cr2,
.read_cr3 = native_read_cr3,
.write_cr3 = native_write_cr3,
.read_cr4 = native_read_cr4,
.read_cr4_safe = native_read_cr4_safe,
.write_cr4 = native_write_cr4,
.save_fl = native_save_fl,
.restore_fl = native_restore_fl,
.irq_disable = native_irq_disable,
.irq_enable = native_irq_enable,
.safe_halt = native_safe_halt,
.halt = native_halt,
.wbinvd = native_wbinvd,
.read_msr = native_read_msr_safe,
.write_msr = native_write_msr_safe,
.read_tsc = native_read_tsc,
.read_pmc = native_read_pmc,
.sched_clock = native_sched_clock,
.get_cpu_khz = native_calculate_cpu_khz,
.load_tr_desc = native_load_tr_desc,
.set_ldt = native_set_ldt,
.load_gdt = native_load_gdt,
Expand All @@ -327,9 +342,14 @@ struct paravirt_ops paravirt_ops = {
.write_idt_entry = write_dt_entry,
.load_esp0 = native_load_esp0,

.irq_enable_sysexit = native_irq_enable_sysexit,
.iret = native_iret,

.set_iopl_mask = native_set_iopl_mask,
.io_delay = native_io_delay,
};

struct pv_apic_ops pv_apic_ops = {
#ifdef CONFIG_X86_LOCAL_APIC
.apic_write = native_apic_write,
.apic_write_atomic = native_apic_write_atomic,
Expand All @@ -338,11 +358,21 @@ struct paravirt_ops paravirt_ops = {
.setup_secondary_clock = setup_secondary_APIC_clock,
.startup_ipi_hook = paravirt_nop,
#endif
};

struct pv_misc_ops pv_misc_ops = {
.set_lazy_mode = paravirt_nop,
};

struct pv_mmu_ops pv_mmu_ops = {
.pagetable_setup_start = native_pagetable_setup_start,
.pagetable_setup_done = native_pagetable_setup_done,

.read_cr2 = native_read_cr2,
.write_cr2 = native_write_cr2,
.read_cr3 = native_read_cr3,
.write_cr3 = native_write_cr3,

.flush_tlb_user = native_flush_tlb,
.flush_tlb_kernel = native_flush_tlb_global,
.flush_tlb_single = native_flush_tlb_single,
Expand Down Expand Up @@ -381,12 +411,14 @@ struct paravirt_ops paravirt_ops = {
.make_pte = native_make_pte,
.make_pgd = native_make_pgd,

.irq_enable_sysexit = native_irq_enable_sysexit,
.iret = native_iret,

.dup_mmap = paravirt_nop,
.exit_mmap = paravirt_nop,
.activate_mm = paravirt_nop,
};

EXPORT_SYMBOL(paravirt_ops);
EXPORT_SYMBOL_GPL(pv_time_ops);
EXPORT_SYMBOL_GPL(pv_cpu_ops);
EXPORT_SYMBOL_GPL(pv_mmu_ops);
EXPORT_SYMBOL_GPL(pv_apic_ops);
EXPORT_SYMBOL_GPL(pv_info);
EXPORT_SYMBOL (pv_irq_ops);
Loading

0 comments on commit 93b1eab

Please sign in to comment.