From: Andrew Cooper Subject: x86/traps: Use an Interrupt Stack Table for #DB PV guests can use architectural corner cases to cause #DB to be raised after transitioning into supervisor mode. Use an interrupt stack table for #DB to prevent the exception being taken with a guest controlled stack pointer. This is part of XSA-260 / CVE-2018-8897 Signed-off-by: Andrew Cooper Reviewed-by: Jan Beulich diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c index 0a452ae..8d1e7be 100644 --- a/xen/arch/x86/cpu/common.c +++ b/xen/arch/x86/cpu/common.c @@ -757,6 +757,7 @@ void load_system_tables(void) [IST_MCE - 1] = stack_top + IST_MCE * PAGE_SIZE, [IST_DF - 1] = stack_top + IST_DF * PAGE_SIZE, [IST_NMI - 1] = stack_top + IST_NMI * PAGE_SIZE, + [IST_DB - 1] = stack_top + IST_DB * PAGE_SIZE, [IST_MAX ... ARRAY_SIZE(tss->ist) - 1] = 0x8600111111111111ul, diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 3d4ac59..819a31c 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -353,13 +353,13 @@ static void show_guest_stack(struct vcpu *v, const struct cpu_user_regs *regs) /* * Notes for get_stack_trace_bottom() and get_stack_dump_bottom() * - * Stack pages 0, 1 and 2: + * Stack pages 0 - 3: * These are all 1-page IST stacks. Each of these stacks have an exception * frame and saved register state at the top. The interesting bound for a * trace is the word adjacent to this, while the bound for a dump is the * very top, including the exception frame. * - * Stack pages 3, 4 and 5: + * Stack pages 4 and 5: * None of these are particularly interesting. With MEMORY_GUARD, page 5 is * explicitly not present, so attempting to dump or trace it is * counterproductive. Without MEMORY_GUARD, it is possible for a call chain @@ -380,12 +380,12 @@ unsigned long get_stack_trace_bottom(unsigned long sp) { switch ( get_stack_page(sp) ) { - case 0 ... 2: + case 0 ... 3: return ROUNDUP(sp, PAGE_SIZE) - offsetof(struct cpu_user_regs, es) - sizeof(unsigned long); #ifndef MEMORY_GUARD - case 3 ... 5: + case 4 ... 5: #endif case 6 ... 7: return ROUNDUP(sp, STACK_SIZE) - @@ -400,11 +400,11 @@ unsigned long get_stack_dump_bottom(unsigned long sp) { switch ( get_stack_page(sp) ) { - case 0 ... 2: + case 0 ... 3: return ROUNDUP(sp, PAGE_SIZE) - sizeof(unsigned long); #ifndef MEMORY_GUARD - case 3 ... 5: + case 4 ... 5: #endif case 6 ... 7: return ROUNDUP(sp, STACK_SIZE) - sizeof(unsigned long); diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S index ccece08..6ae4d6f 100644 --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -723,7 +723,7 @@ ENTRY(device_not_available) ENTRY(debug) pushq $0 movl $TRAP_debug,4(%rsp) - jmp handle_exception + jmp handle_ist_exception ENTRY(int3) pushq $0 diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h index db9988a..6e0e935 100644 --- a/xen/include/asm-x86/processor.h +++ b/xen/include/asm-x86/processor.h @@ -404,7 +404,8 @@ struct __packed __cacheline_aligned tss_struct { #define IST_DF 1UL #define IST_NMI 2UL #define IST_MCE 3UL -#define IST_MAX 3UL +#define IST_DB 4UL +#define IST_MAX 4UL /* Set the interrupt stack table used by a particular interrupt * descriptor table entry. */ @@ -423,6 +424,7 @@ static inline void enable_each_ist(idt_entry_t *idt) set_ist(&idt[TRAP_double_fault], IST_DF); set_ist(&idt[TRAP_nmi], IST_NMI); set_ist(&idt[TRAP_machine_check], IST_MCE); + set_ist(&idt[TRAP_debug], IST_DB); } static inline void disable_each_ist(idt_entry_t *idt) @@ -430,6 +432,7 @@ static inline void disable_each_ist(idt_entry_t *idt) set_ist(&idt[TRAP_double_fault], IST_NONE); set_ist(&idt[TRAP_nmi], IST_NONE); set_ist(&idt[TRAP_machine_check], IST_NONE); + set_ist(&idt[TRAP_debug], IST_NONE); } #define IDT_ENTRIES 256