x86: clear EFLAGS.NT in SYSENTER entry path ... as it causes problems if we happen to exit back via IRET: In the course of trying to handle the fault, the hypervisor creates a stack frame by hand, and uses PUSHFQ to set the respective EFLAGS field, but expects to be able to IRET through that stack frame to the second portion of the fixup code (which causes a #GP due to the stored EFLAGS having NT set). And even if this worked (e.g if we cleared NT in that path), it would then (through the fail safe callback) cause a #GP in the guest with the SYSENTER handler's first instruction as the source, which in turn would allow guest user mode code to crash the guest kernel. Inject a #GP on the fake (NULL) address of the SYSENTER instruction instead, just like in the case where the guest kernel didn't register a corresponding entry point. On 32-bit we also need to make sure we clear SYSENTER_CS for all CPUs (neither #RESET nor #INIT guarantee this). This is CVE-2013-1917 / XSA-44. Reported-by: Andrew Cooper Signed-off-by: Jan Beulich Tested-by: Andrew Cooper Acked-by: Andrew Cooper --- a/xen/arch/x86/acpi/suspend.c +++ b/xen/arch/x86/acpi/suspend.c @@ -81,8 +81,12 @@ void restore_rest_processor_state(void) } #else /* !defined(CONFIG_X86_64) */ - if ( supervisor_mode_kernel && cpu_has_sep ) - wrmsr(MSR_IA32_SYSENTER_ESP, &this_cpu(init_tss).esp1, 0); + if ( cpu_has_sep ) + { + wrmsr(MSR_IA32_SYSENTER_CS, 0, 0); + if ( supervisor_mode_kernel ) + wrmsr(MSR_IA32_SYSENTER_ESP, &this_cpu(init_tss).esp1, 0); + } #endif /* Maybe load the debug registers. */ --- a/xen/arch/x86/cpu/common.c +++ b/xen/arch/x86/cpu/common.c @@ -655,8 +655,11 @@ void __cpuinit cpu_init(void) #if defined(CONFIG_X86_32) t->ss0 = __HYPERVISOR_DS; t->esp0 = get_stack_bottom(); - if ( supervisor_mode_kernel && cpu_has_sep ) + if ( cpu_has_sep ) { + wrmsr(MSR_IA32_SYSENTER_CS, 0, 0); + if ( supervisor_mode_kernel ) wrmsr(MSR_IA32_SYSENTER_ESP, &t->esp1, 0); + } #elif defined(CONFIG_X86_64) /* Bottom-of-stack must be 16-byte aligned! */ BUG_ON((get_stack_bottom() & 15) != 0); --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -284,7 +284,14 @@ sysenter_eflags_saved: cmpb $0,VCPU_sysenter_disables_events(%rbx) movq VCPU_sysenter_addr(%rbx),%rax setne %cl + testl $X86_EFLAGS_NT,UREGS_eflags(%rsp) leaq VCPU_trap_bounce(%rbx),%rdx +UNLIKELY_START(nz, sysenter_nt_set) + pushfq + andl $~X86_EFLAGS_NT,(%rsp) + popfq + xorl %eax,%eax +UNLIKELY_END(sysenter_nt_set) testq %rax,%rax leal (,%rcx,TBF_INTERRUPT),%ecx UNLIKELY_START(z, sysenter_gpf)