Skip to content

Commit

Permalink
[PATCH] i386: Allow a kernel not to be in ring 0
Browse files Browse the repository at this point in the history
We allow for the fact that the guest kernel may not run in ring 0.  This
requires some abstraction in a few places when setting %cs or checking
privilege level (user vs kernel).

This is Chris' [RFC PATCH 15/33] move segment checks to subarch, except rather
than using #define USER_MODE_MASK which depends on a config option, we use
Zach's more flexible approach of assuming ring 3 == userspace.  I also used
"get_kernel_rpl()" over "get_kernel_cs()" because I think it reads better in
the code...

1) Remove the hardcoded 3 and introduce #define SEGMENT_RPL_MASK 3 2) Add a
get_kernel_rpl() macro, and don't assume it's zero.

And:

Clean up of patch for letting kernel run other than ring 0:

a. Add some comments about the SEGMENT_IS_*_CODE() macros.
b. Add a USER_RPL macro.  (Code was comparing a value to a mask
   in some places and to the magic number 3 in other places.)
c. Add macros for table indicator field and use them.
d. Change the entry.S tests for LDT stack segment to use the macros

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Zachary Amsden <zach@vmware.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Andi Kleen <ak@suse.de>
  • Loading branch information
Rusty Russell authored and Andi Kleen committed Sep 26, 2006
1 parent 0da5db3 commit 78be370
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 15 deletions.
9 changes: 5 additions & 4 deletions arch/i386/kernel/entry.S
Original file line number Diff line number Diff line change
Expand Up @@ -240,8 +240,9 @@ ret_from_intr:
check_userspace:
movl EFLAGS(%esp), %eax # mix EFLAGS and CS
movb CS(%esp), %al
testl $(VM_MASK | 3), %eax
jz resume_kernel
andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
cmpl $USER_RPL, %eax
jb resume_kernel # not returning to v8086 or userspace
ENTRY(resume_userspace)
DISABLE_INTERRUPTS # make sure we don't miss an interrupt
# setting need_resched or sigpending
Expand Down Expand Up @@ -377,8 +378,8 @@ restore_all:
# See comments in process.c:copy_thread() for details.
movb OLDSS(%esp), %ah
movb CS(%esp), %al
andl $(VM_MASK | (4 << 8) | 3), %eax
cmpl $((4 << 8) | 3), %eax
andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
CFI_REMEMBER_STATE
je ldt_ss # returning to user-space with LDT SS
restore_nocheck:
Expand Down
2 changes: 1 addition & 1 deletion arch/i386/kernel/process.c
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
regs.xes = __USER_DS;
regs.orig_eax = -1;
regs.eip = (unsigned long) kernel_thread_helper;
regs.xcs = __KERNEL_CS;
regs.xcs = __KERNEL_CS | get_kernel_rpl();
regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;

/* Ok, create the new process.. */
Expand Down
2 changes: 1 addition & 1 deletion arch/i386/mm/extable.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ int fixup_exception(struct pt_regs *regs)
const struct exception_table_entry *fixup;

#ifdef CONFIG_PNPBIOS
if (unlikely((regs->xcs & ~15) == (GDT_ENTRY_PNPBIOS_BASE << 3)))
if (unlikely(SEGMENT_IS_PNP_CODE(regs->xcs)))
{
extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp;
extern u32 pnp_bios_is_utter_crap;
Expand Down
11 changes: 4 additions & 7 deletions arch/i386/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <asm/uaccess.h>
#include <asm/desc.h>
#include <asm/kdebug.h>
#include <asm/segment.h>

extern void die(const char *,struct pt_regs *,long);

Expand Down Expand Up @@ -113,10 +114,10 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs,
}

/* The standard kernel/user address space limit. */
*eip_limit = (seg & 3) ? USER_DS.seg : KERNEL_DS.seg;
*eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;

/* By far the most common cases. */
if (likely(seg == __USER_CS || seg == __KERNEL_CS))
if (likely(SEGMENT_IS_FLAT_CODE(seg)))
return eip;

/* Check the segment exists, is within the current LDT/GDT size,
Expand Down Expand Up @@ -430,11 +431,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
write = 0;
switch (error_code & 3) {
default: /* 3: write, present */
#ifdef TEST_VERIFY_AREA
if (regs->cs == KERNEL_CS)
printk("WP fault at %08lx\n", regs->eip);
#endif
/* fall through */
/* fall through */
case 2: /* write, not present */
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
Expand Down
5 changes: 3 additions & 2 deletions include/asm-i386/ptrace.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ struct pt_regs {
#ifdef __KERNEL__

#include <asm/vm86.h>
#include <asm/segment.h>

struct task_struct;
extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code);
Expand All @@ -73,11 +74,11 @@ extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int erro
*/
static inline int user_mode(struct pt_regs *regs)
{
return (regs->xcs & 3) != 0;
return (regs->xcs & SEGMENT_RPL_MASK) == USER_RPL;
}
static inline int user_mode_vm(struct pt_regs *regs)
{
return ((regs->xcs & 3) | (regs->eflags & VM_MASK)) != 0;
return ((regs->xcs & SEGMENT_RPL_MASK) | (regs->eflags & VM_MASK)) >= USER_RPL;
}
#define instruction_pointer(regs) ((regs)->eip)
extern unsigned long profile_pc(struct pt_regs *regs);
Expand Down
17 changes: 17 additions & 0 deletions include/asm-i386/segment.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@

#define GDT_SIZE (GDT_ENTRIES * 8)

/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */
#define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8)
/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)

/* Simple and small GDT entries for booting only */

#define GDT_ENTRY_BOOT_CS 2
Expand Down Expand Up @@ -112,4 +117,16 @@
*/
#define IDT_ENTRIES 256

/* Bottom two bits of selector give the ring privilege level */
#define SEGMENT_RPL_MASK 0x3
/* Bit 2 is table indicator (LDT/GDT) */
#define SEGMENT_TI_MASK 0x4

/* User mode is privilege level 3 */
#define USER_RPL 0x3
/* LDT segment has TI set, GDT has it cleared */
#define SEGMENT_LDT 0x4
#define SEGMENT_GDT 0x0

#define get_kernel_rpl() 0
#endif

0 comments on commit 78be370

Please sign in to comment.