From 948cf8145a132fd08c7fcfd29b6fa78d63657651 Mon Sep 17 00:00:00 2001 From: Henrik Riomar Date: Thu, 5 Apr 2018 09:26:36 +0200 Subject: main/xen: upgrade to 4.9.2 Update musl-support.patch and remove hunk that fixes tools/libxl/libxl_arm_acpi.c as this is in upstream commit: 6b1a2704e7 libxl/arm: Fix build on arm64 + acpi Drop patches included in new upstream version --- ...n-band-aid-against-malicious-64-bit-PV-gu.patch | 761 --------------------- 1 file changed, 761 deletions(-) delete mode 100644 main/xen/0003-x86-Meltdown-band-aid-against-malicious-64-bit-PV-gu.patch (limited to 'main/xen/0003-x86-Meltdown-band-aid-against-malicious-64-bit-PV-gu.patch') diff --git a/main/xen/0003-x86-Meltdown-band-aid-against-malicious-64-bit-PV-gu.patch b/main/xen/0003-x86-Meltdown-band-aid-against-malicious-64-bit-PV-gu.patch deleted file mode 100644 index 296bbe8484..0000000000 --- a/main/xen/0003-x86-Meltdown-band-aid-against-malicious-64-bit-PV-gu.patch +++ /dev/null @@ -1,761 +0,0 @@ -From 92884bbf6c424c402ae76e6da06e62cd33714cb3 Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Wed, 17 Jan 2018 17:07:33 +0100 -Subject: [PATCH 3/4] x86: Meltdown band-aid against malicious 64-bit PV guests - -This is a very simplistic change limiting the amount of memory a running -64-bit PV guest has mapped (and hence available for attacking): Only the -mappings of stack, IDT, and TSS are being cloned from the direct map -into per-CPU page tables. Guest controlled parts of the page tables are -being copied into those per-CPU page tables upon entry into the guest. -Cross-vCPU synchronization of top level page table entry changes is -being effected by forcing other active vCPU-s of the guest into the -hypervisor. - -The change to context_switch() isn't strictly necessary, but there's no -reason to keep switching page tables once a PV guest is being scheduled -out. - -This isn't providing full isolation yet, but it should be covering all -pieces of information exposure of which would otherwise require an XSA. - -There is certainly much room for improvement, especially of performance, -here - first and foremost suppressing all the negative effects on AMD -systems. But in the interest of backportability (including to really old -hypervisors, which may not even have alternative patching) any such is -being left out here. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -master commit: 5784de3e2067ed73efc2fe42e62831e8ae7f46c4 -master date: 2018-01-16 17:49:03 +0100 -(cherry picked from commit 1e0974638d65d9b8acf9ac7511d747188f38bcc3) ---- - xen/arch/x86/domain.c | 5 + - xen/arch/x86/mm.c | 21 ++++ - xen/arch/x86/smpboot.c | 198 +++++++++++++++++++++++++++++++++++++ - xen/arch/x86/x86_64/asm-offsets.c | 2 + - xen/arch/x86/x86_64/compat/entry.S | 11 +++ - xen/arch/x86/x86_64/entry.S | 149 +++++++++++++++++++++++++++- - xen/include/asm-x86/asm_defns.h | 30 ++++++ - xen/include/asm-x86/current.h | 12 +++ - xen/include/asm-x86/processor.h | 1 + - xen/include/asm-x86/x86_64/page.h | 5 +- - 10 files changed, 428 insertions(+), 6 deletions(-) - -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index 07b50315b9..c0f0fc7a32 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -1933,6 +1933,9 @@ static void paravirt_ctxt_switch_to(struct vcpu *v) - - switch_kernel_stack(v); - -+ this_cpu(root_pgt)[root_table_offset(PERDOMAIN_VIRT_START)] = -+ l4e_from_page(v->domain->arch.perdomain_l3_pg, __PAGE_HYPERVISOR_RW); -+ - cr4 = pv_guest_cr4_to_real_cr4(v); - if ( unlikely(cr4 != read_cr4()) ) - write_cr4(cr4); -@@ -2102,6 +2105,8 @@ void context_switch(struct vcpu *prev, struct vcpu *next) - - ASSERT(local_irq_is_enabled()); - -+ get_cpu_info()->xen_cr3 = 0; -+ - cpumask_copy(&dirty_mask, next->vcpu_dirty_cpumask); - /* Allow at most one CPU at a time to be dirty. */ - ASSERT(cpumask_weight(&dirty_mask) <= 1); -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 981458907f..78f4cb37f5 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -3906,6 +3906,7 @@ long do_mmu_update( - struct vcpu *curr = current, *v = curr; - struct domain *d = v->domain, *pt_owner = d, *pg_owner; - struct domain_mmap_cache mapcache; -+ bool sync_guest = false; - uint32_t xsm_needed = 0; - uint32_t xsm_checked = 0; - int rc = put_old_guest_table(curr); -@@ -4054,6 +4055,8 @@ long do_mmu_update( - case PGT_l4_page_table: - rc = mod_l4_entry(va, l4e_from_intpte(req.val), mfn, - cmd == MMU_PT_UPDATE_PRESERVE_AD, v); -+ if ( !rc ) -+ sync_guest = true; - break; - case PGT_writable_page: - perfc_incr(writable_mmu_updates); -@@ -4156,6 +4159,24 @@ long do_mmu_update( - - domain_mmap_cache_destroy(&mapcache); - -+ if ( sync_guest ) -+ { -+ /* -+ * Force other vCPU-s of the affected guest to pick up L4 entry -+ * changes (if any). Issue a flush IPI with empty operation mask to -+ * facilitate this (including ourselves waiting for the IPI to -+ * actually have arrived). Utilize the fact that FLUSH_VA_VALID is -+ * meaningless without FLUSH_CACHE, but will allow to pass the no-op -+ * check in flush_area_mask(). -+ */ -+ unsigned int cpu = smp_processor_id(); -+ cpumask_t *mask = per_cpu(scratch_cpumask, cpu); -+ -+ cpumask_andnot(mask, pt_owner->domain_dirty_cpumask, cpumask_of(cpu)); -+ if ( !cpumask_empty(mask) ) -+ flush_area_mask(mask, ZERO_BLOCK_PTR, FLUSH_VA_VALID); -+ } -+ - perfc_add(num_page_updates, i); - - out: -diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c -index 26b5301dcc..965a49f923 100644 ---- a/xen/arch/x86/smpboot.c -+++ b/xen/arch/x86/smpboot.c -@@ -321,6 +321,9 @@ void start_secondary(void *unused) - */ - spin_debug_disable(); - -+ get_cpu_info()->xen_cr3 = 0; -+ get_cpu_info()->pv_cr3 = __pa(this_cpu(root_pgt)); -+ - load_system_tables(); - - /* Full exception support from here on in. */ -@@ -635,6 +638,187 @@ void cpu_exit_clear(unsigned int cpu) - set_cpu_state(CPU_STATE_DEAD); - } - -+static int clone_mapping(const void *ptr, root_pgentry_t *rpt) -+{ -+ unsigned long linear = (unsigned long)ptr, pfn; -+ unsigned int flags; -+ l3_pgentry_t *pl3e = l4e_to_l3e(idle_pg_table[root_table_offset(linear)]) + -+ l3_table_offset(linear); -+ l2_pgentry_t *pl2e; -+ l1_pgentry_t *pl1e; -+ -+ if ( linear < DIRECTMAP_VIRT_START ) -+ return 0; -+ -+ flags = l3e_get_flags(*pl3e); -+ ASSERT(flags & _PAGE_PRESENT); -+ if ( flags & _PAGE_PSE ) -+ { -+ pfn = (l3e_get_pfn(*pl3e) & ~((1UL << (2 * PAGETABLE_ORDER)) - 1)) | -+ (PFN_DOWN(linear) & ((1UL << (2 * PAGETABLE_ORDER)) - 1)); -+ flags &= ~_PAGE_PSE; -+ } -+ else -+ { -+ pl2e = l3e_to_l2e(*pl3e) + l2_table_offset(linear); -+ flags = l2e_get_flags(*pl2e); -+ ASSERT(flags & _PAGE_PRESENT); -+ if ( flags & _PAGE_PSE ) -+ { -+ pfn = (l2e_get_pfn(*pl2e) & ~((1UL << PAGETABLE_ORDER) - 1)) | -+ (PFN_DOWN(linear) & ((1UL << PAGETABLE_ORDER) - 1)); -+ flags &= ~_PAGE_PSE; -+ } -+ else -+ { -+ pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(linear); -+ flags = l1e_get_flags(*pl1e); -+ if ( !(flags & _PAGE_PRESENT) ) -+ return 0; -+ pfn = l1e_get_pfn(*pl1e); -+ } -+ } -+ -+ if ( !(root_get_flags(rpt[root_table_offset(linear)]) & _PAGE_PRESENT) ) -+ { -+ pl3e = alloc_xen_pagetable(); -+ if ( !pl3e ) -+ return -ENOMEM; -+ clear_page(pl3e); -+ l4e_write(&rpt[root_table_offset(linear)], -+ l4e_from_paddr(__pa(pl3e), __PAGE_HYPERVISOR)); -+ } -+ else -+ pl3e = l4e_to_l3e(rpt[root_table_offset(linear)]); -+ -+ pl3e += l3_table_offset(linear); -+ -+ if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ) -+ { -+ pl2e = alloc_xen_pagetable(); -+ if ( !pl2e ) -+ return -ENOMEM; -+ clear_page(pl2e); -+ l3e_write(pl3e, l3e_from_paddr(__pa(pl2e), __PAGE_HYPERVISOR)); -+ } -+ else -+ { -+ ASSERT(!(l3e_get_flags(*pl3e) & _PAGE_PSE)); -+ pl2e = l3e_to_l2e(*pl3e); -+ } -+ -+ pl2e += l2_table_offset(linear); -+ -+ if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ) -+ { -+ pl1e = alloc_xen_pagetable(); -+ if ( !pl1e ) -+ return -ENOMEM; -+ clear_page(pl1e); -+ l2e_write(pl2e, l2e_from_paddr(__pa(pl1e), __PAGE_HYPERVISOR)); -+ } -+ else -+ { -+ ASSERT(!(l2e_get_flags(*pl2e) & _PAGE_PSE)); -+ pl1e = l2e_to_l1e(*pl2e); -+ } -+ -+ pl1e += l1_table_offset(linear); -+ -+ if ( l1e_get_flags(*pl1e) & _PAGE_PRESENT ) -+ { -+ ASSERT(l1e_get_pfn(*pl1e) == pfn); -+ ASSERT(l1e_get_flags(*pl1e) == flags); -+ } -+ else -+ l1e_write(pl1e, l1e_from_pfn(pfn, flags)); -+ -+ return 0; -+} -+ -+DEFINE_PER_CPU(root_pgentry_t *, root_pgt); -+ -+static int setup_cpu_root_pgt(unsigned int cpu) -+{ -+ root_pgentry_t *rpt = alloc_xen_pagetable(); -+ unsigned int off; -+ int rc; -+ -+ if ( !rpt ) -+ return -ENOMEM; -+ -+ clear_page(rpt); -+ per_cpu(root_pgt, cpu) = rpt; -+ -+ rpt[root_table_offset(RO_MPT_VIRT_START)] = -+ idle_pg_table[root_table_offset(RO_MPT_VIRT_START)]; -+ /* SH_LINEAR_PT inserted together with guest mappings. */ -+ /* PERDOMAIN inserted during context switch. */ -+ rpt[root_table_offset(XEN_VIRT_START)] = -+ idle_pg_table[root_table_offset(XEN_VIRT_START)]; -+ -+ /* Install direct map page table entries for stack, IDT, and TSS. */ -+ for ( off = rc = 0; !rc && off < STACK_SIZE; off += PAGE_SIZE ) -+ rc = clone_mapping(__va(__pa(stack_base[cpu])) + off, rpt); -+ -+ if ( !rc ) -+ rc = clone_mapping(idt_tables[cpu], rpt); -+ if ( !rc ) -+ rc = clone_mapping(&per_cpu(init_tss, cpu), rpt); -+ -+ return rc; -+} -+ -+static void cleanup_cpu_root_pgt(unsigned int cpu) -+{ -+ root_pgentry_t *rpt = per_cpu(root_pgt, cpu); -+ unsigned int r; -+ -+ if ( !rpt ) -+ return; -+ -+ per_cpu(root_pgt, cpu) = NULL; -+ -+ for ( r = root_table_offset(DIRECTMAP_VIRT_START); -+ r < root_table_offset(HYPERVISOR_VIRT_END); ++r ) -+ { -+ l3_pgentry_t *l3t; -+ unsigned int i3; -+ -+ if ( !(root_get_flags(rpt[r]) & _PAGE_PRESENT) ) -+ continue; -+ -+ l3t = l4e_to_l3e(rpt[r]); -+ -+ for ( i3 = 0; i3 < L3_PAGETABLE_ENTRIES; ++i3 ) -+ { -+ l2_pgentry_t *l2t; -+ unsigned int i2; -+ -+ if ( !(l3e_get_flags(l3t[i3]) & _PAGE_PRESENT) ) -+ continue; -+ -+ ASSERT(!(l3e_get_flags(l3t[i3]) & _PAGE_PSE)); -+ l2t = l3e_to_l2e(l3t[i3]); -+ -+ for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; ++i2 ) -+ { -+ if ( !(l2e_get_flags(l2t[i2]) & _PAGE_PRESENT) ) -+ continue; -+ -+ ASSERT(!(l2e_get_flags(l2t[i2]) & _PAGE_PSE)); -+ free_xen_pagetable(l2e_to_l1e(l2t[i2])); -+ } -+ -+ free_xen_pagetable(l2t); -+ } -+ -+ free_xen_pagetable(l3t); -+ } -+ -+ free_xen_pagetable(rpt); -+} -+ - static void cpu_smpboot_free(unsigned int cpu) - { - unsigned int order, socket = cpu_to_socket(cpu); -@@ -673,6 +857,8 @@ static void cpu_smpboot_free(unsigned int cpu) - free_domheap_page(mfn_to_page(mfn)); - } - -+ cleanup_cpu_root_pgt(cpu); -+ - order = get_order_from_pages(NR_RESERVED_GDT_PAGES); - free_xenheap_pages(per_cpu(gdt_table, cpu), order); - -@@ -728,6 +914,9 @@ static int cpu_smpboot_alloc(unsigned int cpu) - set_ist(&idt_tables[cpu][TRAP_nmi], IST_NONE); - set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE); - -+ if ( setup_cpu_root_pgt(cpu) ) -+ goto oom; -+ - for ( stub_page = 0, i = cpu & ~(STUBS_PER_PAGE - 1); - i < nr_cpu_ids && i <= (cpu | (STUBS_PER_PAGE - 1)); ++i ) - if ( cpu_online(i) && cpu_to_node(i) == node ) -@@ -783,6 +972,8 @@ static struct notifier_block cpu_smpboot_nfb = { - - void __init smp_prepare_cpus(unsigned int max_cpus) - { -+ int rc; -+ - register_cpu_notifier(&cpu_smpboot_nfb); - - mtrr_aps_sync_begin(); -@@ -796,6 +987,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus) - - stack_base[0] = stack_start; - -+ rc = setup_cpu_root_pgt(0); -+ if ( rc ) -+ panic("Error %d setting up PV root page table\n", rc); -+ get_cpu_info()->pv_cr3 = __pa(per_cpu(root_pgt, 0)); -+ - set_nr_sockets(); - - socket_cpumask = xzalloc_array(cpumask_t *, nr_sockets); -@@ -865,6 +1061,8 @@ void __init smp_prepare_boot_cpu(void) - #if NR_CPUS > 2 * BITS_PER_LONG - per_cpu(scratch_cpumask, cpu) = &scratch_cpu0mask; - #endif -+ -+ get_cpu_info()->xen_cr3 = 0; - } - - static void -diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c -index e136af6b99..b1a4310974 100644 ---- a/xen/arch/x86/x86_64/asm-offsets.c -+++ b/xen/arch/x86/x86_64/asm-offsets.c -@@ -137,6 +137,8 @@ void __dummy__(void) - OFFSET(CPUINFO_processor_id, struct cpu_info, processor_id); - OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu); - OFFSET(CPUINFO_cr4, struct cpu_info, cr4); -+ OFFSET(CPUINFO_xen_cr3, struct cpu_info, xen_cr3); -+ OFFSET(CPUINFO_pv_cr3, struct cpu_info, pv_cr3); - DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info)); - BLANK(); - -diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S -index 37864a67f3..86ab78063a 100644 ---- a/xen/arch/x86/x86_64/compat/entry.S -+++ b/xen/arch/x86/x86_64/compat/entry.S -@@ -197,6 +197,17 @@ ENTRY(cstar_enter) - pushq $0 - movl $TRAP_syscall, 4(%rsp) - SAVE_ALL -+ -+ GET_STACK_END(bx) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx -+ neg %rcx -+ jz .Lcstar_cr3_okay -+ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+ neg %rcx -+ write_cr3 rcx, rdi, rsi -+ movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+.Lcstar_cr3_okay: -+ - GET_CURRENT(bx) - movq VCPU_domain(%rbx),%rcx - cmpb $0,DOMAIN_is_32bit_pv(%rcx) -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 668bf8ac28..16cf095ee1 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -35,6 +35,32 @@ ENTRY(switch_to_kernel) - /* %rbx: struct vcpu, interrupts disabled */ - restore_all_guest: - ASSERT_INTERRUPTS_DISABLED -+ -+ /* Copy guest mappings and switch to per-CPU root page table. */ -+ mov %cr3, %r9 -+ GET_STACK_END(dx) -+ mov STACK_CPUINFO_FIELD(pv_cr3)(%rdx), %rdi -+ movabs $PADDR_MASK & PAGE_MASK, %rsi -+ movabs $DIRECTMAP_VIRT_START, %rcx -+ mov %rdi, %rax -+ and %rsi, %rdi -+ and %r9, %rsi -+ add %rcx, %rdi -+ add %rcx, %rsi -+ mov $ROOT_PAGETABLE_FIRST_XEN_SLOT, %ecx -+ mov root_table_offset(SH_LINEAR_PT_VIRT_START)*8(%rsi), %r8 -+ mov %r8, root_table_offset(SH_LINEAR_PT_VIRT_START)*8(%rdi) -+ rep movsq -+ mov $ROOT_PAGETABLE_ENTRIES - \ -+ ROOT_PAGETABLE_LAST_XEN_SLOT - 1, %ecx -+ sub $(ROOT_PAGETABLE_FIRST_XEN_SLOT - \ -+ ROOT_PAGETABLE_LAST_XEN_SLOT - 1) * 8, %rsi -+ sub $(ROOT_PAGETABLE_FIRST_XEN_SLOT - \ -+ ROOT_PAGETABLE_LAST_XEN_SLOT - 1) * 8, %rdi -+ rep movsq -+ mov %r9, STACK_CPUINFO_FIELD(xen_cr3)(%rdx) -+ write_cr3 rax, rdi, rsi -+ - RESTORE_ALL - testw $TRAP_syscall,4(%rsp) - jz iret_exit_to_guest -@@ -69,6 +95,22 @@ iret_exit_to_guest: - ALIGN - /* No special register assumptions. */ - restore_all_xen: -+ /* -+ * Check whether we need to switch to the per-CPU page tables, in -+ * case we return to late PV exit code (from an NMI or #MC). -+ */ -+ GET_STACK_END(ax) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%rax), %rdx -+ mov STACK_CPUINFO_FIELD(pv_cr3)(%rax), %rax -+ test %rdx, %rdx -+ /* -+ * Ideally the condition would be "nsz", but such doesn't exist, -+ * so "g" will have to do. -+ */ -+UNLIKELY_START(g, exit_cr3) -+ write_cr3 rax, rdi, rsi -+UNLIKELY_END(exit_cr3) -+ - RESTORE_ALL adj=8 - iretq - -@@ -98,7 +140,18 @@ ENTRY(lstar_enter) - pushq $0 - movl $TRAP_syscall, 4(%rsp) - SAVE_ALL -- GET_CURRENT(bx) -+ -+ GET_STACK_END(bx) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx -+ neg %rcx -+ jz .Llstar_cr3_okay -+ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+ neg %rcx -+ write_cr3 rcx, rdi, rsi -+ movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+.Llstar_cr3_okay: -+ -+ __GET_CURRENT(bx) - testb $TF_kernel_mode,VCPU_thread_flags(%rbx) - jz switch_to_kernel - -@@ -190,7 +243,18 @@ GLOBAL(sysenter_eflags_saved) - pushq $0 - movl $TRAP_syscall, 4(%rsp) - SAVE_ALL -- GET_CURRENT(bx) -+ -+ GET_STACK_END(bx) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx -+ neg %rcx -+ jz .Lsyse_cr3_okay -+ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+ neg %rcx -+ write_cr3 rcx, rdi, rsi -+ movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+.Lsyse_cr3_okay: -+ -+ __GET_CURRENT(bx) - cmpb $0,VCPU_sysenter_disables_events(%rbx) - movq VCPU_sysenter_addr(%rbx),%rax - setne %cl -@@ -226,13 +290,23 @@ ENTRY(int80_direct_trap) - movl $0x80, 4(%rsp) - SAVE_ALL - -+ GET_STACK_END(bx) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx -+ neg %rcx -+ jz .Lint80_cr3_okay -+ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+ neg %rcx -+ write_cr3 rcx, rdi, rsi -+ movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+.Lint80_cr3_okay: -+ - cmpb $0,untrusted_msi(%rip) - UNLIKELY_START(ne, msi_check) - movl $0x80,%edi - call check_for_unexpected_msi - UNLIKELY_END(msi_check) - -- GET_CURRENT(bx) -+ __GET_CURRENT(bx) - - /* Check that the callback is non-null. */ - leaq VCPU_int80_bounce(%rbx),%rdx -@@ -389,9 +463,27 @@ ENTRY(dom_crash_sync_extable) - - ENTRY(common_interrupt) - SAVE_ALL CLAC -+ -+ GET_STACK_END(14) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx -+ mov %rcx, %r15 -+ neg %rcx -+ jz .Lintr_cr3_okay -+ jns .Lintr_cr3_load -+ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) -+ neg %rcx -+.Lintr_cr3_load: -+ write_cr3 rcx, rdi, rsi -+ xor %ecx, %ecx -+ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) -+ testb $3, UREGS_cs(%rsp) -+ cmovnz %rcx, %r15 -+.Lintr_cr3_okay: -+ - CR4_PV32_RESTORE - movq %rsp,%rdi - callq do_IRQ -+ mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) - jmp ret_from_intr - - /* No special register assumptions. */ -@@ -409,6 +501,23 @@ ENTRY(page_fault) - /* No special register assumptions. */ - GLOBAL(handle_exception) - SAVE_ALL CLAC -+ -+ GET_STACK_END(14) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx -+ mov %rcx, %r15 -+ neg %rcx -+ jz .Lxcpt_cr3_okay -+ jns .Lxcpt_cr3_load -+ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) -+ neg %rcx -+.Lxcpt_cr3_load: -+ write_cr3 rcx, rdi, rsi -+ xor %ecx, %ecx -+ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) -+ testb $3, UREGS_cs(%rsp) -+ cmovnz %rcx, %r15 -+.Lxcpt_cr3_okay: -+ - handle_exception_saved: - GET_CURRENT(bx) - testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%rsp) -@@ -473,6 +582,7 @@ handle_exception_saved: - leaq exception_table(%rip),%rdx - PERFC_INCR(exceptions, %rax, %rbx) - callq *(%rdx,%rax,8) -+ mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) - testb $3,UREGS_cs(%rsp) - jz restore_all_xen - leaq VCPU_trap_bounce(%rbx),%rdx -@@ -505,6 +615,7 @@ exception_with_ints_disabled: - rep; movsq # make room for ec/ev - 1: movq UREGS_error_code(%rsp),%rax # ec/ev - movq %rax,UREGS_kernel_sizeof(%rsp) -+ mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) - jmp restore_all_xen # return to fixup code - - /* No special register assumptions. */ -@@ -583,6 +694,17 @@ ENTRY(double_fault) - movl $TRAP_double_fault,4(%rsp) - /* Set AC to reduce chance of further SMAP faults */ - SAVE_ALL STAC -+ -+ GET_STACK_END(bx) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rbx -+ test %rbx, %rbx -+ jz .Ldblf_cr3_okay -+ jns .Ldblf_cr3_load -+ neg %rbx -+.Ldblf_cr3_load: -+ write_cr3 rbx, rdi, rsi -+.Ldblf_cr3_okay: -+ - movq %rsp,%rdi - call do_double_fault - BUG /* do_double_fault() shouldn't return. */ -@@ -601,10 +723,28 @@ ENTRY(nmi) - movl $TRAP_nmi,4(%rsp) - handle_ist_exception: - SAVE_ALL CLAC -+ -+ GET_STACK_END(14) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx -+ mov %rcx, %r15 -+ neg %rcx -+ jz .List_cr3_okay -+ jns .List_cr3_load -+ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) -+ neg %rcx -+.List_cr3_load: -+ write_cr3 rcx, rdi, rsi -+ movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%r14) -+.List_cr3_okay: -+ - CR4_PV32_RESTORE - testb $3,UREGS_cs(%rsp) - jz 1f -- /* Interrupted guest context. Copy the context to stack bottom. */ -+ /* -+ * Interrupted guest context. Clear the restore value for xen_cr3 -+ * and copy the context to stack bottom. -+ */ -+ xor %r15, %r15 - GET_CPUINFO_FIELD(guest_cpu_user_regs,di) - movq %rsp,%rsi - movl $UREGS_kernel_sizeof/8,%ecx -@@ -614,6 +754,7 @@ handle_ist_exception: - movzbl UREGS_entry_vector(%rsp),%eax - leaq exception_table(%rip),%rdx - callq *(%rdx,%rax,8) -+ mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) - cmpb $TRAP_nmi,UREGS_entry_vector(%rsp) - jne ret_from_intr - -diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h -index 98192eb4e6..fb0fee9286 100644 ---- a/xen/include/asm-x86/asm_defns.h -+++ b/xen/include/asm-x86/asm_defns.h -@@ -93,9 +93,30 @@ void ret_from_intr(void); - UNLIKELY_DONE(mp, tag); \ - __UNLIKELY_END(tag) - -+ .equ .Lrax, 0 -+ .equ .Lrcx, 1 -+ .equ .Lrdx, 2 -+ .equ .Lrbx, 3 -+ .equ .Lrsp, 4 -+ .equ .Lrbp, 5 -+ .equ .Lrsi, 6 -+ .equ .Lrdi, 7 -+ .equ .Lr8, 8 -+ .equ .Lr9, 9 -+ .equ .Lr10, 10 -+ .equ .Lr11, 11 -+ .equ .Lr12, 12 -+ .equ .Lr13, 13 -+ .equ .Lr14, 14 -+ .equ .Lr15, 15 -+ - #define STACK_CPUINFO_FIELD(field) (1 - CPUINFO_sizeof + CPUINFO_##field) - #define GET_STACK_END(reg) \ -+ .if .Lr##reg > 8; \ -+ movq $STACK_SIZE-1, %r##reg; \ -+ .else; \ - movl $STACK_SIZE-1, %e##reg; \ -+ .endif; \ - orq %rsp, %r##reg - - #define GET_CPUINFO_FIELD(field, reg) \ -@@ -177,6 +198,15 @@ void ret_from_intr(void); - #define ASM_STAC ASM_AC(STAC) - #define ASM_CLAC ASM_AC(CLAC) - -+.macro write_cr3 val:req, tmp1:req, tmp2:req -+ mov %cr4, %\tmp1 -+ mov %\tmp1, %\tmp2 -+ and $~X86_CR4_PGE, %\tmp1 -+ mov %\tmp1, %cr4 -+ mov %\val, %cr3 -+ mov %\tmp2, %cr4 -+.endm -+ - #define CR4_PV32_RESTORE \ - 667: ASM_NOP5; \ - .pushsection .altinstr_replacement, "ax"; \ -diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h -index 89849929eb..b929c48c85 100644 ---- a/xen/include/asm-x86/current.h -+++ b/xen/include/asm-x86/current.h -@@ -41,6 +41,18 @@ struct cpu_info { - struct vcpu *current_vcpu; - unsigned long per_cpu_offset; - unsigned long cr4; -+ /* -+ * Of the two following fields the latter is being set to the CR3 value -+ * to be used on the given pCPU for loading whenever 64-bit PV guest -+ * context is being entered. The value never changes once set. -+ * The former is the value to restore when re-entering Xen, if any. IOW -+ * its value being zero means there's nothing to restore. However, its -+ * value can also be negative, indicating to the exit-to-Xen code that -+ * restoring is not necessary, but allowing any nested entry code paths -+ * to still know the value to put back into CR3. -+ */ -+ unsigned long xen_cr3; -+ unsigned long pv_cr3; - /* get_stack_bottom() must be 16-byte aligned */ - }; - -diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h -index 00cc23ce40..0291e82de3 100644 ---- a/xen/include/asm-x86/processor.h -+++ b/xen/include/asm-x86/processor.h -@@ -466,6 +466,7 @@ extern idt_entry_t idt_table[]; - extern idt_entry_t *idt_tables[]; - - DECLARE_PER_CPU(struct tss_struct, init_tss); -+DECLARE_PER_CPU(root_pgentry_t *, root_pgt); - - extern void init_int80_direct_trap(struct vcpu *v); - -diff --git a/xen/include/asm-x86/x86_64/page.h b/xen/include/asm-x86/x86_64/page.h -index 1a6cae6283..749554fbbb 100644 ---- a/xen/include/asm-x86/x86_64/page.h -+++ b/xen/include/asm-x86/x86_64/page.h -@@ -25,8 +25,8 @@ - /* These are architectural limits. Current CPUs support only 40-bit phys. */ - #define PADDR_BITS 52 - #define VADDR_BITS 48 --#define PADDR_MASK ((1UL << PADDR_BITS)-1) --#define VADDR_MASK ((1UL << VADDR_BITS)-1) -+#define PADDR_MASK ((_AC(1,UL) << PADDR_BITS) - 1) -+#define VADDR_MASK ((_AC(1,UL) << VADDR_BITS) - 1) - - #define is_canonical_address(x) (((long)(x) >> 47) == ((long)(x) >> 63)) - -@@ -116,6 +116,7 @@ typedef l4_pgentry_t root_pgentry_t; - : (((_s) < ROOT_PAGETABLE_FIRST_XEN_SLOT) || \ - ((_s) > ROOT_PAGETABLE_LAST_XEN_SLOT))) - -+#define root_table_offset l4_table_offset - #define root_get_pfn l4e_get_pfn - #define root_get_flags l4e_get_flags - #define root_get_intpte l4e_get_intpte --- -2.15.0 - -- cgit v1.2.3