diff options
-rw-r--r-- | main/xen/APKBUILD | 22 | ||||
-rw-r--r-- | main/xen/xsa247-4.9-2.patch | 109 | ||||
-rw-r--r-- | main/xen/xsa252.patch | 27 | ||||
-rw-r--r-- | main/xen/xsa253-4.10.patch | 26 | ||||
-rw-r--r-- | main/xen/xsa254-4.10.patch | 1373 | ||||
-rw-r--r-- | main/xen/xsa255-1.patch | 133 | ||||
-rw-r--r-- | main/xen/xsa255-2.patch | 167 | ||||
-rw-r--r-- | main/xen/xsa256.patch | 40 |
8 files changed, 6 insertions, 1891 deletions
diff --git a/main/xen/APKBUILD b/main/xen/APKBUILD index 31201ba019..7070edbfa0 100644 --- a/main/xen/APKBUILD +++ b/main/xen/APKBUILD @@ -2,8 +2,8 @@ # Contributor: Roger Pau Monne <roger.pau@entel.upc.edu> # Maintainer: William Pitcock <nenolod@dereferenced.org> pkgname=xen -pkgver=4.10.0 -pkgrel=3 +pkgver=4.10.1 +pkgrel=0 pkgdesc="Xen hypervisor" url="http://www.xen.org/" arch="x86_64 armhf aarch64" @@ -112,6 +112,9 @@ options="!strip" # - CVE-2018-7540 XSA-252 # - CVE-2018-7541 XSA-255 # - CVE-2018-7542 XSA-256 +# 4.10.1-r0: +# - CVE-2018-10472 XSA-258 +# - CVE-2018-10471 XSA-259 case "$CARCH" in x86*) @@ -159,13 +162,6 @@ source="https://downloads.xenproject.org/release/$pkgname/$pkgver/$pkgname-$pkgv http://xenbits.xen.org/xen-extfiles/zlib-$_ZLIB_VERSION.tar.gz http://xenbits.xen.org/xen-extfiles/ipxe-git-$_IPXE_GIT_TAG.tar.gz - xsa252.patch - xsa253-4.10.patch - xsa254-4.10.patch - xsa255-1.patch - xsa255-2.patch - xsa256.patch - qemu-xen_paths.patch hotplug-vif-vtrill.patch @@ -416,7 +412,7 @@ EOF } -sha512sums="5a37935c382f9cfe3641a35c3be0ba11689bca10c7d3c2401963513e3a834ee8d0c8a0ddcf3716dbf0a795aea1bab78caf19acf1272e5e054bf012cfa06a4690 xen-4.10.0.tar.gz +sha512sums="236c02bee69e33644703ed26d323d4c491a91fc05bd0ee0990a7368579f7c82f5bb4510845bf80348fd923024d7d60d521f593dfd0365d971dc592f8ef10fbea xen-4.10.1.tar.gz 2e0b0fd23e6f10742a5517981e5171c6e88b0a93c83da701b296f5c0861d72c19782daab589a7eac3f9032152a0fc7eff7f5362db8fccc4859564a9aa82329cf gmp-4.3.2.tar.bz2 c2bc9ffc8583aeae71cee9ddcc4418969768d4e3764d47307da54f93981c0109fb07d84b061b3a3628bd00ba4d14a54742bc04848110eb3ae8ca25dbfbaabadb grub-0.97.tar.gz 1465b58279af1647f909450e394fe002ca165f0ff4a0254bfa9fe0e64316f50facdde2729d79a4e632565b4500cf4d6c74192ac0dd3bc9fe09129bbd67ba089d lwip-1.3.0.tar.gz @@ -426,12 +422,6 @@ c2bc9ffc8583aeae71cee9ddcc4418969768d4e3764d47307da54f93981c0109fb07d84b061b3a36 4928b5b82f57645be9408362706ff2c4d9baa635b21b0d41b1c82930e8c60a759b1ea4fa74d7e6c7cae1b7692d006aa5cb72df0c3b88bf049779aa2b566f9d35 tpm_emulator-0.7.4.tar.gz 021b958fcd0d346c4ba761bcf0cc40f3522de6186cf5a0a6ea34a70504ce9622b1c2626fce40675bc8282cf5f5ade18473656abc38050f72f5d6480507a2106e zlib-1.2.3.tar.gz bbcce5e55040e7e29adebd4a5253a046016a6e2e7ff34cf801a42d147e1ec1af57e0297318249bfa9c5bbeac969fe4b37c18cbf845a80b2136d65387a4fc31da ipxe-git-356f6c1b64d7a97746d1816cef8ca22bdd8d0b5d.tar.gz -63fd6cee56ef04506efd6bf632998dc90514ff967e9435514a5ba8d2c5781735f986241344a479b6f44df9c6e6a278a165ba14834b0b3236064e24f71cd600f1 xsa252.patch -58f288fb3087ecdd42075031b5604a493adef0754f68d596dce8576fbc46bfe8b1bf3dc429269cab3797b6f193036bdafeb32cf2c7cca34d9c89d5fe95a0453c xsa253-4.10.patch -f15350c0b44d3a6d5a3056dfac81d25f2af047135c528f6258f3d42ef26e6d87511d8e148a63e8d7d88108e07dc5b3551ed54c915be6dc3fe3f978ab72094321 xsa254-4.10.patch -d16ead93486beee767c3c80d11981d940dfce55d9aabfe7adee480d02f575a2df074bb83a1e62e455ac754f6d8f3fb83abe7139b93b94b77233c2918b46dc2e2 xsa255-1.patch -2f0719fbbde261a51e1ec66eb677fb2b17c94e0631d583c0a99357b7c2dfb2c695b6970ebbe8e05f68154344af74fa31e8b47b0d25c778b3aef1b284101ae528 xsa255-2.patch -3bd2697a8ad66197264af8a713bf97152ed414c4b11910cc986c6adaa85bd86b4d35319675799edccf04aaff9ae48a58ca5c438cb6b5b95f60fffbfeec5e4faf xsa256.patch 1936ab39a1867957fa640eb81c4070214ca4856a2743ba7e49c0cd017917071a9680d015f002c57fa7b9600dbadd29dcea5887f50e6c133305df2669a7a933f3 qemu-xen_paths.patch f095ea373f36381491ad36f0662fb4f53665031973721256b23166e596318581da7cbb0146d0beb2446729adfdb321e01468e377793f6563a67d68b8b0f7ffe3 hotplug-vif-vtrill.patch 77b08e9655e091b0352e4630d520b54c6ca6d659d1d38fbb4b3bfc9ff3e66db433a2e194ead32bb10ff962c382d800a670e82b7a62835b238e294b22808290ea musl-hvmloader-fix-stdint.patch diff --git a/main/xen/xsa247-4.9-2.patch b/main/xen/xsa247-4.9-2.patch deleted file mode 100644 index 13737a9bf2..0000000000 --- a/main/xen/xsa247-4.9-2.patch +++ /dev/null @@ -1,109 +0,0 @@ -From d4bc7833707351a5341a6bdf04c752a028d9560d Mon Sep 17 00:00:00 2001 -From: George Dunlap <george.dunlap@citrix.com> -Date: Fri, 10 Nov 2017 16:53:55 +0000 -Subject: [PATCH 2/2] p2m: Check return value of p2m_set_entry() when - decreasing reservation - -If the entire range specified to p2m_pod_decrease_reservation() is marked -populate-on-demand, then it will make a single p2m_set_entry() call, -reducing its PoD entry count. - -Unfortunately, in the right circumstances, this p2m_set_entry() call -may fail. It that case, repeated calls to decrease_reservation() may -cause p2m->pod.entry_count to fall below zero, potentially tripping -over BUG_ON()s to the contrary. - -Instead, check to see if the entry succeeded, and return false if not. -The caller will then call guest_remove_page() on the gfns, which will -return -EINVAL upon finding no valid memory there to return. - -Unfortunately if the order > 0, the entry may have partially changed. -A domain_crash() is probably the safest thing in that case. - -Other p2m_set_entry() calls in the same function should be fine, -because they are writing the entry at its current order. Nonetheless, -check the return value and crash if our assumption turns otu to be -wrong. - -This is part of XSA-247. - -Reported-by: XXX PERSON <XXX EMAIL> -Signed-off-by: George Dunlap <george.dunlap@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> ---- -v2: Crash the domain if we're not sure it's safe (or if we think it -can't happen) ---- - xen/arch/x86/mm/p2m-pod.c | 42 +++++++++++++++++++++++++++++++++--------- - 1 file changed, 33 insertions(+), 9 deletions(-) - -diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c -index f2ed751892..473d6a6dbf 100644 ---- a/xen/arch/x86/mm/p2m-pod.c -+++ b/xen/arch/x86/mm/p2m-pod.c -@@ -555,11 +555,23 @@ p2m_pod_decrease_reservation(struct domain *d, - - if ( !nonpod ) - { -- /* All PoD: Mark the whole region invalid and tell caller -- * we're done. */ -- p2m_set_entry(p2m, gpfn, INVALID_MFN, order, p2m_invalid, -- p2m->default_access); -- p2m->pod.entry_count-=(1<<order); -+ /* -+ * All PoD: Mark the whole region invalid and tell caller -+ * we're done. -+ */ -+ if ( p2m_set_entry(p2m, gpfn, INVALID_MFN, order, p2m_invalid, -+ p2m->default_access) ) -+ { -+ /* -+ * If this fails, we can't tell how much of the range was changed. -+ * Best to crash the domain unless we're sure a partial change is -+ * impossible. -+ */ -+ if ( order != 0 ) -+ domain_crash(d); -+ goto out_unlock; -+ } -+ p2m->pod.entry_count -= 1UL << order; - BUG_ON(p2m->pod.entry_count < 0); - ret = 1; - goto out_entry_check; -@@ -600,8 +612,14 @@ p2m_pod_decrease_reservation(struct domain *d, - n = 1UL << cur_order; - if ( t == p2m_populate_on_demand ) - { -- p2m_set_entry(p2m, gpfn + i, INVALID_MFN, cur_order, -- p2m_invalid, p2m->default_access); -+ /* This shouldn't be able to fail */ -+ if ( p2m_set_entry(p2m, gpfn + i, INVALID_MFN, cur_order, -+ p2m_invalid, p2m->default_access) ) -+ { -+ ASSERT_UNREACHABLE(); -+ domain_crash(d); -+ goto out_unlock; -+ } - p2m->pod.entry_count -= n; - BUG_ON(p2m->pod.entry_count < 0); - pod -= n; -@@ -622,8 +640,14 @@ p2m_pod_decrease_reservation(struct domain *d, - - page = mfn_to_page(mfn); - -- p2m_set_entry(p2m, gpfn + i, INVALID_MFN, cur_order, -- p2m_invalid, p2m->default_access); -+ /* This shouldn't be able to fail */ -+ if ( p2m_set_entry(p2m, gpfn + i, INVALID_MFN, cur_order, -+ p2m_invalid, p2m->default_access) ) -+ { -+ ASSERT_UNREACHABLE(); -+ domain_crash(d); -+ goto out_unlock; -+ } - p2m_tlb_flush_sync(p2m); - for ( j = 0; j < n; ++j ) - set_gpfn_from_mfn(mfn_x(mfn), INVALID_M2P_ENTRY); --- -2.15.0 - diff --git a/main/xen/xsa252.patch b/main/xen/xsa252.patch deleted file mode 100644 index 8615928142..0000000000 --- a/main/xen/xsa252.patch +++ /dev/null @@ -1,27 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: memory: don't implicitly unpin for decrease-reservation - -It very likely was a mistake (copy-and-paste from domain cleanup code) -to implicitly unpin here: The caller should really unpin itself before -(or after, if they so wish) requesting the page to be removed. - -This is XSA-252. - -Reported-by: Jann Horn <jannh@google.com> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> - ---- a/xen/common/memory.c -+++ b/xen/common/memory.c -@@ -357,11 +357,6 @@ int guest_remove_page(struct domain *d, - - rc = guest_physmap_remove_page(d, _gfn(gmfn), mfn, 0); - --#ifdef _PGT_pinned -- if ( !rc && test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) ) -- put_page_and_type(page); --#endif -- - /* - * With the lack of an IOMMU on some platforms, domains with DMA-capable - * device must retrieve the same pfn when the hypercall populate_physmap diff --git a/main/xen/xsa253-4.10.patch b/main/xen/xsa253-4.10.patch deleted file mode 100644 index 19e4269358..0000000000 --- a/main/xen/xsa253-4.10.patch +++ /dev/null @@ -1,26 +0,0 @@ -From: Andrew Cooper <andrew.cooper3@citrix.com> -Subject: x86/msr: Free msr_vcpu_policy during vcpu destruction - -c/s 4187f79dc7 "x86/msr: introduce struct msr_vcpu_policy" introduced a -per-vcpu memory allocation, but failed to free it in the clean vcpu -destruction case. - -This is XSA-253 - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> - -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index b17468c..0ae715d 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -382,6 +382,9 @@ void vcpu_destroy(struct vcpu *v) - - vcpu_destroy_fpu(v); - -+ xfree(v->arch.msr); -+ v->arch.msr = NULL; -+ - if ( !is_idle_domain(v->domain) ) - vpmu_destroy(v); - diff --git a/main/xen/xsa254-4.10.patch b/main/xen/xsa254-4.10.patch deleted file mode 100644 index deb04bf549..0000000000 --- a/main/xen/xsa254-4.10.patch +++ /dev/null @@ -1,1373 +0,0 @@ -From 910dd005da20f27f3415b7eccdf436874989506b Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Wed, 17 Jan 2018 16:54:44 +0100 -Subject: [PATCH] x86/entry: Remove support for partial cpu_user_regs frames - -Save all GPRs on entry to Xen. - -The entry_int82() path is via a DPL1 gate, only usable by 32bit PV guests, so -can get away with only saving the 32bit registers. All other entrypoints can -be reached from 32 or 64bit contexts. - -This is part of XSA-254. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Wei Liu <wei.liu2@citrix.com> -Acked-by: Jan Beulich <jbeulich@suse.com> -master commit: f9eb74789af77e985ae653193f3622263499f674 -master date: 2018-01-05 19:57:07 +0000 ---- - tools/tests/x86_emulator/x86-emulate.c | 1 - - xen/arch/x86/pv/domain.c | 1 - - xen/arch/x86/pv/emul-priv-op.c | 2 - - xen/arch/x86/x86_64/compat/entry.S | 7 ++- - xen/arch/x86/x86_64/entry.S | 12 ++-- - xen/arch/x86/x86_64/traps.c | 13 ++-- - xen/arch/x86/x86_emulate.c | 1 - - xen/arch/x86/x86_emulate/x86_emulate.c | 8 +-- - xen/common/wait.c | 1 - - xen/include/asm-x86/asm_defns.h | 105 +++------------------------------ - 10 files changed, 26 insertions(+), 125 deletions(-) - -diff --git a/tools/tests/x86_emulator/x86-emulate.c b/tools/tests/x86_emulator/x86-emulate.c -index 975ddc7..9056610 100644 ---- a/tools/tests/x86_emulator/x86-emulate.c -+++ b/tools/tests/x86_emulator/x86-emulate.c -@@ -3,7 +3,6 @@ - #include <sys/mman.h> - - #define cpu_has_amd_erratum(nr) 0 --#define mark_regs_dirty(r) ((void)(r)) - #define cpu_has_mpx false - #define read_bndcfgu() 0 - #define xstate_set_init(what) -diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c -index 2234128..74e9e66 100644 ---- a/xen/arch/x86/pv/domain.c -+++ b/xen/arch/x86/pv/domain.c -@@ -20,7 +20,6 @@ - static void noreturn continue_nonidle_domain(struct vcpu *v) - { - check_wakeup_from_wait(); -- mark_regs_dirty(guest_cpu_user_regs()); - reset_stack_and_jump(ret_from_intr); - } - -diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c -index 2f92645..5f23c2c 100644 ---- a/xen/arch/x86/pv/emul-priv-op.c -+++ b/xen/arch/x86/pv/emul-priv-op.c -@@ -337,7 +337,6 @@ static int read_io(unsigned int port, unsigned int bytes, - io_emul_stub_t *io_emul = - io_emul_stub_setup(poc, ctxt->opcode, port, bytes); - -- mark_regs_dirty(ctxt->regs); - io_emul(ctxt->regs); - return X86EMUL_DONE; - } -@@ -436,7 +435,6 @@ static int write_io(unsigned int port, unsigned int bytes, - io_emul_stub_t *io_emul = - io_emul_stub_setup(poc, ctxt->opcode, port, bytes); - -- mark_regs_dirty(ctxt->regs); - io_emul(ctxt->regs); - if ( (bytes == 1) && pv_post_outb_hook ) - pv_post_outb_hook(port, val); -diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S -index ba6e941..3fea54e 100644 ---- a/xen/arch/x86/x86_64/compat/entry.S -+++ b/xen/arch/x86/x86_64/compat/entry.S -@@ -16,7 +16,8 @@ - ENTRY(entry_int82) - ASM_CLAC - pushq $0 -- SAVE_VOLATILE type=HYPERCALL_VECTOR compat=1 -+ movl $HYPERCALL_VECTOR, 4(%rsp) -+ SAVE_ALL compat=1 /* DPL1 gate, restricted to 32bit PV guests only. */ - CR4_PV32_RESTORE - - GET_CURRENT(bx) -@@ -60,7 +61,6 @@ compat_test_guest_events: - /* %rbx: struct vcpu */ - compat_process_softirqs: - sti -- andl $~TRAP_regs_partial,UREGS_entry_vector(%rsp) - call do_softirq - jmp compat_test_all_events - -@@ -197,7 +197,8 @@ ENTRY(cstar_enter) - pushq $FLAT_USER_CS32 - pushq %rcx - pushq $0 -- SAVE_VOLATILE TRAP_syscall -+ movl $TRAP_syscall, 4(%rsp) -+ SAVE_ALL - GET_CURRENT(bx) - movq VCPU_domain(%rbx),%rcx - cmpb $0,DOMAIN_is_32bit_pv(%rcx) -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 6066ed8..1dd9ccf 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -98,7 +98,8 @@ ENTRY(lstar_enter) - pushq $FLAT_KERNEL_CS64 - pushq %rcx - pushq $0 -- SAVE_VOLATILE TRAP_syscall -+ movl $TRAP_syscall, 4(%rsp) -+ SAVE_ALL - GET_CURRENT(bx) - testb $TF_kernel_mode,VCPU_thread_flags(%rbx) - jz switch_to_kernel -@@ -140,7 +141,6 @@ test_guest_events: - /* %rbx: struct vcpu */ - process_softirqs: - sti -- SAVE_PRESERVED - call do_softirq - jmp test_all_events - -@@ -190,7 +190,8 @@ GLOBAL(sysenter_eflags_saved) - pushq $3 /* ring 3 null cs */ - pushq $0 /* null rip */ - pushq $0 -- SAVE_VOLATILE TRAP_syscall -+ movl $TRAP_syscall, 4(%rsp) -+ SAVE_ALL - GET_CURRENT(bx) - cmpb $0,VCPU_sysenter_disables_events(%rbx) - movq VCPU_sysenter_addr(%rbx),%rax -@@ -207,7 +208,6 @@ UNLIKELY_END(sysenter_nt_set) - leal (,%rcx,TBF_INTERRUPT),%ecx - UNLIKELY_START(z, sysenter_gpf) - movq VCPU_trap_ctxt(%rbx),%rsi -- SAVE_PRESERVED - movl $TRAP_gp_fault,UREGS_entry_vector(%rsp) - movl %eax,TRAPBOUNCE_error_code(%rdx) - movq TRAP_gp_fault * TRAPINFO_sizeof + TRAPINFO_eip(%rsi),%rax -@@ -225,7 +225,8 @@ UNLIKELY_END(sysenter_gpf) - ENTRY(int80_direct_trap) - ASM_CLAC - pushq $0 -- SAVE_VOLATILE 0x80 -+ movl $0x80, 4(%rsp) -+ SAVE_ALL - - cmpb $0,untrusted_msi(%rip) - UNLIKELY_START(ne, msi_check) -@@ -253,7 +254,6 @@ int80_slow_path: - * IDT entry with DPL==0. - */ - movl $((0x80 << 3) | X86_XEC_IDT),UREGS_error_code(%rsp) -- SAVE_PRESERVED - movl $TRAP_gp_fault,UREGS_entry_vector(%rsp) - /* A GPF wouldn't have incremented the instruction pointer. */ - subq $2,UREGS_rip(%rsp) -diff --git a/xen/arch/x86/x86_64/traps.c b/xen/arch/x86/x86_64/traps.c -index 2a326be..3652f5f 100644 ---- a/xen/arch/x86/x86_64/traps.c -+++ b/xen/arch/x86/x86_64/traps.c -@@ -80,15 +80,10 @@ static void _show_registers( - regs->rbp, regs->rsp, regs->r8); - printk("r9: %016lx r10: %016lx r11: %016lx\n", - regs->r9, regs->r10, regs->r11); -- if ( !(regs->entry_vector & TRAP_regs_partial) ) -- { -- printk("r12: %016lx r13: %016lx r14: %016lx\n", -- regs->r12, regs->r13, regs->r14); -- printk("r15: %016lx cr0: %016lx cr4: %016lx\n", -- regs->r15, crs[0], crs[4]); -- } -- else -- printk("cr0: %016lx cr4: %016lx\n", crs[0], crs[4]); -+ printk("r12: %016lx r13: %016lx r14: %016lx\n", -+ regs->r12, regs->r13, regs->r14); -+ printk("r15: %016lx cr0: %016lx cr4: %016lx\n", -+ regs->r15, crs[0], crs[4]); - printk("cr3: %016lx cr2: %016lx\n", crs[3], crs[2]); - printk("fsb: %016lx gsb: %016lx gss: %016lx\n", - crs[5], crs[6], crs[7]); -diff --git a/xen/arch/x86/x86_emulate.c b/xen/arch/x86/x86_emulate.c -index cc334ca..c7ba221 100644 ---- a/xen/arch/x86/x86_emulate.c -+++ b/xen/arch/x86/x86_emulate.c -@@ -11,7 +11,6 @@ - - #include <xen/domain_page.h> - #include <asm/x86_emulate.h> --#include <asm/asm_defns.h> /* mark_regs_dirty() */ - #include <asm/processor.h> /* current_cpu_info */ - #include <asm/xstate.h> - #include <asm/amd.h> /* cpu_has_amd_erratum() */ -diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c -index 54a2756..820495f 100644 ---- a/xen/arch/x86/x86_emulate/x86_emulate.c -+++ b/xen/arch/x86/x86_emulate/x86_emulate.c -@@ -1956,10 +1956,10 @@ decode_register( - case 9: p = ®s->r9; break; - case 10: p = ®s->r10; break; - case 11: p = ®s->r11; break; -- case 12: mark_regs_dirty(regs); p = ®s->r12; break; -- case 13: mark_regs_dirty(regs); p = ®s->r13; break; -- case 14: mark_regs_dirty(regs); p = ®s->r14; break; -- case 15: mark_regs_dirty(regs); p = ®s->r15; break; -+ case 12: p = ®s->r12; break; -+ case 13: p = ®s->r13; break; -+ case 14: p = ®s->r14; break; -+ case 15: p = ®s->r15; break; - #endif - default: BUG(); p = NULL; break; - } -diff --git a/xen/common/wait.c b/xen/common/wait.c -index 9490a17..c5fc094 100644 ---- a/xen/common/wait.c -+++ b/xen/common/wait.c -@@ -127,7 +127,6 @@ static void __prepare_to_wait(struct waitqueue_vcpu *wqv) - unsigned long dummy; - u32 entry_vector = cpu_info->guest_cpu_user_regs.entry_vector; - -- cpu_info->guest_cpu_user_regs.entry_vector &= ~TRAP_regs_partial; - ASSERT(wqv->esp == 0); - - /* Save current VCPU affinity; force wakeup on *this* CPU only. */ -diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h -index 388fc93..98192eb 100644 ---- a/xen/include/asm-x86/asm_defns.h -+++ b/xen/include/asm-x86/asm_defns.h -@@ -17,15 +17,6 @@ - void ret_from_intr(void); - #endif - --#ifdef CONFIG_FRAME_POINTER --/* Indicate special exception stack frame by inverting the frame pointer. */ --#define SETUP_EXCEPTION_FRAME_POINTER(offs) \ -- leaq offs(%rsp),%rbp; \ -- notq %rbp --#else --#define SETUP_EXCEPTION_FRAME_POINTER(offs) --#endif -- - #ifndef NDEBUG - #define ASSERT_INTERRUPT_STATUS(x, msg) \ - pushf; \ -@@ -42,31 +33,6 @@ void ret_from_intr(void); - #define ASSERT_INTERRUPTS_DISABLED \ - ASSERT_INTERRUPT_STATUS(z, "INTERRUPTS DISABLED") - --/* -- * This flag is set in an exception frame when registers R12-R15 did not get -- * saved. -- */ --#define _TRAP_regs_partial 16 --#define TRAP_regs_partial (1 << _TRAP_regs_partial) --/* -- * This flag gets set in an exception frame when registers R12-R15 possibly -- * get modified from their originally saved values and hence need to be -- * restored even if the normal call flow would restore register values. -- * -- * The flag being set implies _TRAP_regs_partial to be unset. Restoring -- * R12-R15 thus is -- * - required when this flag is set, -- * - safe when _TRAP_regs_partial is unset. -- */ --#define _TRAP_regs_dirty 17 --#define TRAP_regs_dirty (1 << _TRAP_regs_dirty) -- --#define mark_regs_dirty(r) ({ \ -- struct cpu_user_regs *r__ = (r); \ -- ASSERT(!((r__)->entry_vector & TRAP_regs_partial)); \ -- r__->entry_vector |= TRAP_regs_dirty; \ --}) -- - #ifdef __ASSEMBLY__ - # define _ASM_EX(p) p-. - #else -@@ -236,7 +202,7 @@ static always_inline void stac(void) - #endif - - #ifdef __ASSEMBLY__ --.macro SAVE_ALL op -+.macro SAVE_ALL op, compat=0 - .ifeqs "\op", "CLAC" - ASM_CLAC - .else -@@ -255,40 +221,6 @@ static always_inline void stac(void) - movq %rdx,UREGS_rdx(%rsp) - movq %rcx,UREGS_rcx(%rsp) - movq %rax,UREGS_rax(%rsp) -- movq %r8,UREGS_r8(%rsp) -- movq %r9,UREGS_r9(%rsp) -- movq %r10,UREGS_r10(%rsp) -- movq %r11,UREGS_r11(%rsp) -- movq %rbx,UREGS_rbx(%rsp) -- movq %rbp,UREGS_rbp(%rsp) -- SETUP_EXCEPTION_FRAME_POINTER(UREGS_rbp) -- movq %r12,UREGS_r12(%rsp) -- movq %r13,UREGS_r13(%rsp) -- movq %r14,UREGS_r14(%rsp) -- movq %r15,UREGS_r15(%rsp) --.endm -- --/* -- * Save all registers not preserved by C code or used in entry/exit code. Mark -- * the frame as partial. -- * -- * @type: exception type -- * @compat: R8-R15 don't need saving, and the frame nevertheless is complete -- */ --.macro SAVE_VOLATILE type compat=0 --.if \compat -- movl $\type,UREGS_entry_vector-UREGS_error_code(%rsp) --.else -- movl $\type|TRAP_regs_partial,\ -- UREGS_entry_vector-UREGS_error_code(%rsp) --.endif -- addq $-(UREGS_error_code-UREGS_r15),%rsp -- cld -- movq %rdi,UREGS_rdi(%rsp) -- movq %rsi,UREGS_rsi(%rsp) -- movq %rdx,UREGS_rdx(%rsp) -- movq %rcx,UREGS_rcx(%rsp) -- movq %rax,UREGS_rax(%rsp) - .if !\compat - movq %r8,UREGS_r8(%rsp) - movq %r9,UREGS_r9(%rsp) -@@ -297,20 +229,17 @@ static always_inline void stac(void) - .endif - movq %rbx,UREGS_rbx(%rsp) - movq %rbp,UREGS_rbp(%rsp) -- SETUP_EXCEPTION_FRAME_POINTER(UREGS_rbp) --.endm -- --/* -- * Complete a frame potentially only partially saved. -- */ --.macro SAVE_PRESERVED -- btrl $_TRAP_regs_partial,UREGS_entry_vector(%rsp) -- jnc 987f -+#ifdef CONFIG_FRAME_POINTER -+/* Indicate special exception stack frame by inverting the frame pointer. */ -+ leaq UREGS_rbp(%rsp), %rbp -+ notq %rbp -+#endif -+.if !\compat - movq %r12,UREGS_r12(%rsp) - movq %r13,UREGS_r13(%rsp) - movq %r14,UREGS_r14(%rsp) - movq %r15,UREGS_r15(%rsp) --987: -+.endif - .endm - - #define LOAD_ONE_REG(reg, compat) \ -@@ -330,7 +259,6 @@ static always_inline void stac(void) - */ - .macro RESTORE_ALL adj=0 compat=0 - .if !\compat -- testl $TRAP_regs_dirty,UREGS_entry_vector(%rsp) - movq UREGS_r11(%rsp),%r11 - movq UREGS_r10(%rsp),%r10 - movq UREGS_r9(%rsp),%r9 -@@ -347,33 +275,16 @@ static always_inline void stac(void) - LOAD_ONE_REG(si, \compat) - LOAD_ONE_REG(di, \compat) - .if !\compat -- jz 987f - movq UREGS_r15(%rsp),%r15 - movq UREGS_r14(%rsp),%r14 - movq UREGS_r13(%rsp),%r13 - movq UREGS_r12(%rsp),%r12 --#ifndef NDEBUG -- .subsection 1 --987: testl $TRAP_regs_partial,UREGS_entry_vector(%rsp) -- jnz 987f -- cmpq UREGS_r15(%rsp),%r15 -- jne 789f -- cmpq UREGS_r14(%rsp),%r14 -- jne 789f -- cmpq UREGS_r13(%rsp),%r13 -- jne 789f -- cmpq UREGS_r12(%rsp),%r12 -- je 987f --789: BUG /* Corruption of partial register state. */ -- .subsection 0 --#endif - .else - xor %r15, %r15 - xor %r14, %r14 - xor %r13, %r13 - xor %r12, %r12 - .endif --987: - LOAD_ONE_REG(bp, \compat) - LOAD_ONE_REG(bx, \compat) - subq $-(UREGS_error_code-UREGS_r15+\adj), %rsp --- -2.1.4 - -From 57dc197cf0d36c56ba1d9d32c6a1454bb52605bb Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Wed, 17 Jan 2018 16:56:03 +0100 -Subject: [PATCH] x86/mm: Always set _PAGE_ACCESSED on L4e updates - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -master commit: bd61fe94bee0556bc2f64999a4a8315b93f90f21 -master date: 2018-01-15 13:53:16 +0000 ---- - xen/arch/x86/pv/mm.h | 18 +++++++++++++++--- - 1 file changed, 15 insertions(+), 3 deletions(-) - -diff --git a/xen/arch/x86/pv/mm.h b/xen/arch/x86/pv/mm.h -index 7502d53..976209b 100644 ---- a/xen/arch/x86/pv/mm.h -+++ b/xen/arch/x86/pv/mm.h -@@ -144,9 +144,21 @@ static inline l3_pgentry_t unadjust_guest_l3e(l3_pgentry_t l3e, - static inline l4_pgentry_t adjust_guest_l4e(l4_pgentry_t l4e, - const struct domain *d) - { -- if ( likely(l4e_get_flags(l4e) & _PAGE_PRESENT) && -- likely(!is_pv_32bit_domain(d)) ) -- l4e_add_flags(l4e, _PAGE_USER); -+ /* -+ * When shadowing an L4 behind the guests back (e.g. for per-pcpu -+ * purposes), we cannot efficiently sync access bit updates from hardware -+ * (on the shadow tables) back into the guest view. -+ * -+ * We therefore unconditionally set _PAGE_ACCESSED even in the guests -+ * view. This will appear to the guest as a CPU which proactively pulls -+ * all valid L4e's into its TLB, which is compatible with the x86 ABI. -+ * -+ * At the time of writing, all PV guests set the access bit anyway, so -+ * this is no actual change in their behaviour. -+ */ -+ if ( likely(l4e_get_flags(l4e) & _PAGE_PRESENT) ) -+ l4e_add_flags(l4e, (_PAGE_ACCESSED | -+ (is_pv_32bit_domain(d) ? 0 : _PAGE_USER))); - - return l4e; - } --- -2.1.4 - -From 234f481337ea1a93db968d614649a6bdfdc8418a Mon Sep 17 00:00:00 2001 -From: Jan Beulich <jbeulich@suse.com> -Date: Wed, 17 Jan 2018 16:56:57 +0100 -Subject: [PATCH] x86: Meltdown band-aid against malicious 64-bit PV guests - -This is a very simplistic change limiting the amount of memory a running -64-bit PV guest has mapped (and hence available for attacking): Only the -mappings of stack, IDT, and TSS are being cloned from the direct map -into per-CPU page tables. Guest controlled parts of the page tables are -being copied into those per-CPU page tables upon entry into the guest. -Cross-vCPU synchronization of top level page table entry changes is -being effected by forcing other active vCPU-s of the guest into the -hypervisor. - -The change to context_switch() isn't strictly necessary, but there's no -reason to keep switching page tables once a PV guest is being scheduled -out. - -This isn't providing full isolation yet, but it should be covering all -pieces of information exposure of which would otherwise require an XSA. - -There is certainly much room for improvement, especially of performance, -here - first and foremost suppressing all the negative effects on AMD -systems. But in the interest of backportability (including to really old -hypervisors, which may not even have alternative patching) any such is -being left out here. - -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> -master commit: 5784de3e2067ed73efc2fe42e62831e8ae7f46c4 -master date: 2018-01-16 17:49:03 +0100 ---- - xen/arch/x86/domain.c | 5 + - xen/arch/x86/mm.c | 21 ++++ - xen/arch/x86/smpboot.c | 198 +++++++++++++++++++++++++++++++++++++ - xen/arch/x86/x86_64/asm-offsets.c | 2 + - xen/arch/x86/x86_64/compat/entry.S | 11 +++ - xen/arch/x86/x86_64/entry.S | 149 +++++++++++++++++++++++++++- - xen/include/asm-x86/asm_defns.h | 30 ++++++ - xen/include/asm-x86/current.h | 12 +++ - xen/include/asm-x86/processor.h | 1 + - xen/include/asm-x86/x86_64/page.h | 5 +- - 10 files changed, 428 insertions(+), 6 deletions(-) - -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index b44c95b..f4a3d74 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -1507,6 +1507,9 @@ void paravirt_ctxt_switch_to(struct vcpu *v) - { - unsigned long cr4; - -+ this_cpu(root_pgt)[root_table_offset(PERDOMAIN_VIRT_START)] = -+ l4e_from_page(v->domain->arch.perdomain_l3_pg, __PAGE_HYPERVISOR_RW); -+ - cr4 = pv_guest_cr4_to_real_cr4(v); - if ( unlikely(cr4 != read_cr4()) ) - write_cr4(cr4); -@@ -1676,6 +1679,8 @@ void context_switch(struct vcpu *prev, struct vcpu *next) - - ASSERT(local_irq_is_enabled()); - -+ get_cpu_info()->xen_cr3 = 0; -+ - cpumask_copy(&dirty_mask, next->vcpu_dirty_cpumask); - /* Allow at most one CPU at a time to be dirty. */ - ASSERT(cpumask_weight(&dirty_mask) <= 1); -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index a7a76a7..6c7d120 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -3509,6 +3509,7 @@ long do_mmu_update( - struct vcpu *curr = current, *v = curr; - struct domain *d = v->domain, *pt_owner = d, *pg_owner; - mfn_t map_mfn = INVALID_MFN; -+ bool sync_guest = false; - uint32_t xsm_needed = 0; - uint32_t xsm_checked = 0; - int rc = put_old_guest_table(curr); -@@ -3663,6 +3664,8 @@ long do_mmu_update( - case PGT_l4_page_table: - rc = mod_l4_entry(va, l4e_from_intpte(req.val), mfn, - cmd == MMU_PT_UPDATE_PRESERVE_AD, v); -+ if ( !rc ) -+ sync_guest = true; - break; - case PGT_writable_page: - perfc_incr(writable_mmu_updates); -@@ -3765,6 +3768,24 @@ long do_mmu_update( - if ( va ) - unmap_domain_page(va); - -+ if ( sync_guest ) -+ { -+ /* -+ * Force other vCPU-s of the affected guest to pick up L4 entry -+ * changes (if any). Issue a flush IPI with empty operation mask to -+ * facilitate this (including ourselves waiting for the IPI to -+ * actually have arrived). Utilize the fact that FLUSH_VA_VALID is -+ * meaningless without FLUSH_CACHE, but will allow to pass the no-op -+ * check in flush_area_mask(). -+ */ -+ unsigned int cpu = smp_processor_id(); -+ cpumask_t *mask = per_cpu(scratch_cpumask, cpu); -+ -+ cpumask_andnot(mask, pt_owner->domain_dirty_cpumask, cpumask_of(cpu)); -+ if ( !cpumask_empty(mask) ) -+ flush_area_mask(mask, ZERO_BLOCK_PTR, FLUSH_VA_VALID); -+ } -+ - perfc_add(num_page_updates, i); - - out: -diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c -index 1609b62..b1fbb57 100644 ---- a/xen/arch/x86/smpboot.c -+++ b/xen/arch/x86/smpboot.c -@@ -327,6 +327,9 @@ void start_secondary(void *unused) - */ - spin_debug_disable(); - -+ get_cpu_info()->xen_cr3 = 0; -+ get_cpu_info()->pv_cr3 = __pa(this_cpu(root_pgt)); -+ - load_system_tables(); - - /* Full exception support from here on in. */ -@@ -635,6 +638,187 @@ void cpu_exit_clear(unsigned int cpu) - set_cpu_state(CPU_STATE_DEAD); - } - -+static int clone_mapping(const void *ptr, root_pgentry_t *rpt) -+{ -+ unsigned long linear = (unsigned long)ptr, pfn; -+ unsigned int flags; -+ l3_pgentry_t *pl3e = l4e_to_l3e(idle_pg_table[root_table_offset(linear)]) + -+ l3_table_offset(linear); -+ l2_pgentry_t *pl2e; -+ l1_pgentry_t *pl1e; -+ -+ if ( linear < DIRECTMAP_VIRT_START ) -+ return 0; -+ -+ flags = l3e_get_flags(*pl3e); -+ ASSERT(flags & _PAGE_PRESENT); -+ if ( flags & _PAGE_PSE ) -+ { -+ pfn = (l3e_get_pfn(*pl3e) & ~((1UL << (2 * PAGETABLE_ORDER)) - 1)) | -+ (PFN_DOWN(linear) & ((1UL << (2 * PAGETABLE_ORDER)) - 1)); -+ flags &= ~_PAGE_PSE; -+ } -+ else -+ { -+ pl2e = l3e_to_l2e(*pl3e) + l2_table_offset(linear); -+ flags = l2e_get_flags(*pl2e); -+ ASSERT(flags & _PAGE_PRESENT); -+ if ( flags & _PAGE_PSE ) -+ { -+ pfn = (l2e_get_pfn(*pl2e) & ~((1UL << PAGETABLE_ORDER) - 1)) | -+ (PFN_DOWN(linear) & ((1UL << PAGETABLE_ORDER) - 1)); -+ flags &= ~_PAGE_PSE; -+ } -+ else -+ { -+ pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(linear); -+ flags = l1e_get_flags(*pl1e); -+ if ( !(flags & _PAGE_PRESENT) ) -+ return 0; -+ pfn = l1e_get_pfn(*pl1e); -+ } -+ } -+ -+ if ( !(root_get_flags(rpt[root_table_offset(linear)]) & _PAGE_PRESENT) ) -+ { -+ pl3e = alloc_xen_pagetable(); -+ if ( !pl3e ) -+ return -ENOMEM; -+ clear_page(pl3e); -+ l4e_write(&rpt[root_table_offset(linear)], -+ l4e_from_paddr(__pa(pl3e), __PAGE_HYPERVISOR)); -+ } -+ else -+ pl3e = l4e_to_l3e(rpt[root_table_offset(linear)]); -+ -+ pl3e += l3_table_offset(linear); -+ -+ if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ) -+ { -+ pl2e = alloc_xen_pagetable(); -+ if ( !pl2e ) -+ return -ENOMEM; -+ clear_page(pl2e); -+ l3e_write(pl3e, l3e_from_paddr(__pa(pl2e), __PAGE_HYPERVISOR)); -+ } -+ else -+ { -+ ASSERT(!(l3e_get_flags(*pl3e) & _PAGE_PSE)); -+ pl2e = l3e_to_l2e(*pl3e); -+ } -+ -+ pl2e += l2_table_offset(linear); -+ -+ if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ) -+ { -+ pl1e = alloc_xen_pagetable(); -+ if ( !pl1e ) -+ return -ENOMEM; -+ clear_page(pl1e); -+ l2e_write(pl2e, l2e_from_paddr(__pa(pl1e), __PAGE_HYPERVISOR)); -+ } -+ else -+ { -+ ASSERT(!(l2e_get_flags(*pl2e) & _PAGE_PSE)); -+ pl1e = l2e_to_l1e(*pl2e); -+ } -+ -+ pl1e += l1_table_offset(linear); -+ -+ if ( l1e_get_flags(*pl1e) & _PAGE_PRESENT ) -+ { -+ ASSERT(l1e_get_pfn(*pl1e) == pfn); -+ ASSERT(l1e_get_flags(*pl1e) == flags); -+ } -+ else -+ l1e_write(pl1e, l1e_from_pfn(pfn, flags)); -+ -+ return 0; -+} -+ -+DEFINE_PER_CPU(root_pgentry_t *, root_pgt); -+ -+static int setup_cpu_root_pgt(unsigned int cpu) -+{ -+ root_pgentry_t *rpt = alloc_xen_pagetable(); -+ unsigned int off; -+ int rc; -+ -+ if ( !rpt ) -+ return -ENOMEM; -+ -+ clear_page(rpt); -+ per_cpu(root_pgt, cpu) = rpt; -+ -+ rpt[root_table_offset(RO_MPT_VIRT_START)] = -+ idle_pg_table[root_table_offset(RO_MPT_VIRT_START)]; -+ /* SH_LINEAR_PT inserted together with guest mappings. */ -+ /* PERDOMAIN inserted during context switch. */ -+ rpt[root_table_offset(XEN_VIRT_START)] = -+ idle_pg_table[root_table_offset(XEN_VIRT_START)]; -+ -+ /* Install direct map page table entries for stack, IDT, and TSS. */ -+ for ( off = rc = 0; !rc && off < STACK_SIZE; off += PAGE_SIZE ) -+ rc = clone_mapping(__va(__pa(stack_base[cpu])) + off, rpt); -+ -+ if ( !rc ) -+ rc = clone_mapping(idt_tables[cpu], rpt); -+ if ( !rc ) -+ rc = clone_mapping(&per_cpu(init_tss, cpu), rpt); -+ -+ return rc; -+} -+ -+static void cleanup_cpu_root_pgt(unsigned int cpu) -+{ -+ root_pgentry_t *rpt = per_cpu(root_pgt, cpu); -+ unsigned int r; -+ -+ if ( !rpt ) -+ return; -+ -+ per_cpu(root_pgt, cpu) = NULL; -+ -+ for ( r = root_table_offset(DIRECTMAP_VIRT_START); -+ r < root_table_offset(HYPERVISOR_VIRT_END); ++r ) -+ { -+ l3_pgentry_t *l3t; -+ unsigned int i3; -+ -+ if ( !(root_get_flags(rpt[r]) & _PAGE_PRESENT) ) -+ continue; -+ -+ l3t = l4e_to_l3e(rpt[r]); -+ -+ for ( i3 = 0; i3 < L3_PAGETABLE_ENTRIES; ++i3 ) -+ { -+ l2_pgentry_t *l2t; -+ unsigned int i2; -+ -+ if ( !(l3e_get_flags(l3t[i3]) & _PAGE_PRESENT) ) -+ continue; -+ -+ ASSERT(!(l3e_get_flags(l3t[i3]) & _PAGE_PSE)); -+ l2t = l3e_to_l2e(l3t[i3]); -+ -+ for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; ++i2 ) -+ { -+ if ( !(l2e_get_flags(l2t[i2]) & _PAGE_PRESENT) ) -+ continue; -+ -+ ASSERT(!(l2e_get_flags(l2t[i2]) & _PAGE_PSE)); -+ free_xen_pagetable(l2e_to_l1e(l2t[i2])); -+ } -+ -+ free_xen_pagetable(l2t); -+ } -+ -+ free_xen_pagetable(l3t); -+ } -+ -+ free_xen_pagetable(rpt); -+} -+ - static void cpu_smpboot_free(unsigned int cpu) - { - unsigned int order, socket = cpu_to_socket(cpu); -@@ -673,6 +857,8 @@ static void cpu_smpboot_free(unsigned int cpu) - free_domheap_page(mfn_to_page(mfn)); - } - -+ cleanup_cpu_root_pgt(cpu); -+ - order = get_order_from_pages(NR_RESERVED_GDT_PAGES); - free_xenheap_pages(per_cpu(gdt_table, cpu), order); - -@@ -728,6 +914,9 @@ static int cpu_smpboot_alloc(unsigned int cpu) - set_ist(&idt_tables[cpu][TRAP_nmi], IST_NONE); - set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE); - -+ if ( setup_cpu_root_pgt(cpu) ) -+ goto oom; -+ - for ( stub_page = 0, i = cpu & ~(STUBS_PER_PAGE - 1); - i < nr_cpu_ids && i <= (cpu | (STUBS_PER_PAGE - 1)); ++i ) - if ( cpu_online(i) && cpu_to_node(i) == node ) -@@ -783,6 +972,8 @@ static struct notifier_block cpu_smpboot_nfb = { - - void __init smp_prepare_cpus(unsigned int max_cpus) - { -+ int rc; -+ - register_cpu_notifier(&cpu_smpboot_nfb); - - mtrr_aps_sync_begin(); -@@ -796,6 +987,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus) - - stack_base[0] = stack_start; - -+ rc = setup_cpu_root_pgt(0); -+ if ( rc ) -+ panic("Error %d setting up PV root page table\n", rc); -+ get_cpu_info()->pv_cr3 = __pa(per_cpu(root_pgt, 0)); -+ - set_nr_sockets(); - - socket_cpumask = xzalloc_array(cpumask_t *, nr_sockets); -@@ -864,6 +1060,8 @@ void __init smp_prepare_boot_cpu(void) - #if NR_CPUS > 2 * BITS_PER_LONG - per_cpu(scratch_cpumask, cpu) = &scratch_cpu0mask; - #endif -+ -+ get_cpu_info()->xen_cr3 = 0; - } - - static void -diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c -index e136af6..b1a4310 100644 ---- a/xen/arch/x86/x86_64/asm-offsets.c -+++ b/xen/arch/x86/x86_64/asm-offsets.c -@@ -137,6 +137,8 @@ void __dummy__(void) - OFFSET(CPUINFO_processor_id, struct cpu_info, processor_id); - OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu); - OFFSET(CPUINFO_cr4, struct cpu_info, cr4); -+ OFFSET(CPUINFO_xen_cr3, struct cpu_info, xen_cr3); -+ OFFSET(CPUINFO_pv_cr3, struct cpu_info, pv_cr3); - DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info)); - BLANK(); - -diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S -index 3fea54e..e668f00 100644 ---- a/xen/arch/x86/x86_64/compat/entry.S -+++ b/xen/arch/x86/x86_64/compat/entry.S -@@ -199,6 +199,17 @@ ENTRY(cstar_enter) - pushq $0 - movl $TRAP_syscall, 4(%rsp) - SAVE_ALL -+ -+ GET_STACK_END(bx) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx -+ neg %rcx -+ jz .Lcstar_cr3_okay -+ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+ neg %rcx -+ write_cr3 rcx, rdi, rsi -+ movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+.Lcstar_cr3_okay: -+ - GET_CURRENT(bx) - movq VCPU_domain(%rbx),%rcx - cmpb $0,DOMAIN_is_32bit_pv(%rcx) -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 1dd9ccf..fc38874 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -37,6 +37,32 @@ ENTRY(switch_to_kernel) - /* %rbx: struct vcpu, interrupts disabled */ - restore_all_guest: - ASSERT_INTERRUPTS_DISABLED -+ -+ /* Copy guest mappings and switch to per-CPU root page table. */ -+ mov %cr3, %r9 -+ GET_STACK_END(dx) -+ mov STACK_CPUINFO_FIELD(pv_cr3)(%rdx), %rdi -+ movabs $PADDR_MASK & PAGE_MASK, %rsi -+ movabs $DIRECTMAP_VIRT_START, %rcx -+ mov %rdi, %rax -+ and %rsi, %rdi -+ and %r9, %rsi -+ add %rcx, %rdi -+ add %rcx, %rsi -+ mov $ROOT_PAGETABLE_FIRST_XEN_SLOT, %ecx -+ mov root_table_offset(SH_LINEAR_PT_VIRT_START)*8(%rsi), %r8 -+ mov %r8, root_table_offset(SH_LINEAR_PT_VIRT_START)*8(%rdi) -+ rep movsq -+ mov $ROOT_PAGETABLE_ENTRIES - \ -+ ROOT_PAGETABLE_LAST_XEN_SLOT - 1, %ecx -+ sub $(ROOT_PAGETABLE_FIRST_XEN_SLOT - \ -+ ROOT_PAGETABLE_LAST_XEN_SLOT - 1) * 8, %rsi -+ sub $(ROOT_PAGETABLE_FIRST_XEN_SLOT - \ -+ ROOT_PAGETABLE_LAST_XEN_SLOT - 1) * 8, %rdi -+ rep movsq -+ mov %r9, STACK_CPUINFO_FIELD(xen_cr3)(%rdx) -+ write_cr3 rax, rdi, rsi -+ - RESTORE_ALL - testw $TRAP_syscall,4(%rsp) - jz iret_exit_to_guest -@@ -71,6 +97,22 @@ iret_exit_to_guest: - ALIGN - /* No special register assumptions. */ - restore_all_xen: -+ /* -+ * Check whether we need to switch to the per-CPU page tables, in -+ * case we return to late PV exit code (from an NMI or #MC). -+ */ -+ GET_STACK_END(ax) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%rax), %rdx -+ mov STACK_CPUINFO_FIELD(pv_cr3)(%rax), %rax -+ test %rdx, %rdx -+ /* -+ * Ideally the condition would be "nsz", but such doesn't exist, -+ * so "g" will have to do. -+ */ -+UNLIKELY_START(g, exit_cr3) -+ write_cr3 rax, rdi, rsi -+UNLIKELY_END(exit_cr3) -+ - RESTORE_ALL adj=8 - iretq - -@@ -100,7 +142,18 @@ ENTRY(lstar_enter) - pushq $0 - movl $TRAP_syscall, 4(%rsp) - SAVE_ALL -- GET_CURRENT(bx) -+ -+ GET_STACK_END(bx) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx -+ neg %rcx -+ jz .Llstar_cr3_okay -+ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+ neg %rcx -+ write_cr3 rcx, rdi, rsi -+ movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+.Llstar_cr3_okay: -+ -+ __GET_CURRENT(bx) - testb $TF_kernel_mode,VCPU_thread_flags(%rbx) - jz switch_to_kernel - -@@ -192,7 +245,18 @@ GLOBAL(sysenter_eflags_saved) - pushq $0 - movl $TRAP_syscall, 4(%rsp) - SAVE_ALL -- GET_CURRENT(bx) -+ -+ GET_STACK_END(bx) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx -+ neg %rcx -+ jz .Lsyse_cr3_okay -+ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+ neg %rcx -+ write_cr3 rcx, rdi, rsi -+ movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+.Lsyse_cr3_okay: -+ -+ __GET_CURRENT(bx) - cmpb $0,VCPU_sysenter_disables_events(%rbx) - movq VCPU_sysenter_addr(%rbx),%rax - setne %cl -@@ -228,13 +292,23 @@ ENTRY(int80_direct_trap) - movl $0x80, 4(%rsp) - SAVE_ALL - -+ GET_STACK_END(bx) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx -+ neg %rcx -+ jz .Lint80_cr3_okay -+ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+ neg %rcx -+ write_cr3 rcx, rdi, rsi -+ movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+.Lint80_cr3_okay: -+ - cmpb $0,untrusted_msi(%rip) - UNLIKELY_START(ne, msi_check) - movl $0x80,%edi - call check_for_unexpected_msi - UNLIKELY_END(msi_check) - -- GET_CURRENT(bx) -+ __GET_CURRENT(bx) - - /* Check that the callback is non-null. */ - leaq VCPU_int80_bounce(%rbx),%rdx -@@ -391,9 +465,27 @@ ENTRY(dom_crash_sync_extable) - - ENTRY(common_interrupt) - SAVE_ALL CLAC -+ -+ GET_STACK_END(14) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx -+ mov %rcx, %r15 -+ neg %rcx -+ jz .Lintr_cr3_okay -+ jns .Lintr_cr3_load -+ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) -+ neg %rcx -+.Lintr_cr3_load: -+ write_cr3 rcx, rdi, rsi -+ xor %ecx, %ecx -+ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) -+ testb $3, UREGS_cs(%rsp) -+ cmovnz %rcx, %r15 -+.Lintr_cr3_okay: -+ - CR4_PV32_RESTORE - movq %rsp,%rdi - callq do_IRQ -+ mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) - jmp ret_from_intr - - /* No special register assumptions. */ -@@ -411,6 +503,23 @@ ENTRY(page_fault) - /* No special register assumptions. */ - GLOBAL(handle_exception) - SAVE_ALL CLAC -+ -+ GET_STACK_END(14) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx -+ mov %rcx, %r15 -+ neg %rcx -+ jz .Lxcpt_cr3_okay -+ jns .Lxcpt_cr3_load -+ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) -+ neg %rcx -+.Lxcpt_cr3_load: -+ write_cr3 rcx, rdi, rsi -+ xor %ecx, %ecx -+ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) -+ testb $3, UREGS_cs(%rsp) -+ cmovnz %rcx, %r15 -+.Lxcpt_cr3_okay: -+ - handle_exception_saved: - GET_CURRENT(bx) - testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%rsp) -@@ -475,6 +584,7 @@ handle_exception_saved: - leaq exception_table(%rip),%rdx - PERFC_INCR(exceptions, %rax, %rbx) - callq *(%rdx,%rax,8) -+ mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) - testb $3,UREGS_cs(%rsp) - jz restore_all_xen - leaq VCPU_trap_bounce(%rbx),%rdx -@@ -507,6 +617,7 @@ exception_with_ints_disabled: - rep; movsq # make room for ec/ev - 1: movq UREGS_error_code(%rsp),%rax # ec/ev - movq %rax,UREGS_kernel_sizeof(%rsp) -+ mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) - jmp restore_all_xen # return to fixup code - - /* No special register assumptions. */ -@@ -585,6 +696,17 @@ ENTRY(double_fault) - movl $TRAP_double_fault,4(%rsp) - /* Set AC to reduce chance of further SMAP faults */ - SAVE_ALL STAC -+ -+ GET_STACK_END(bx) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rbx -+ test %rbx, %rbx -+ jz .Ldblf_cr3_okay -+ jns .Ldblf_cr3_load -+ neg %rbx -+.Ldblf_cr3_load: -+ write_cr3 rbx, rdi, rsi -+.Ldblf_cr3_okay: -+ - movq %rsp,%rdi - call do_double_fault - BUG /* do_double_fault() shouldn't return. */ -@@ -603,10 +725,28 @@ ENTRY(nmi) - movl $TRAP_nmi,4(%rsp) - handle_ist_exception: - SAVE_ALL CLAC -+ -+ GET_STACK_END(14) -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx -+ mov %rcx, %r15 -+ neg %rcx -+ jz .List_cr3_okay -+ jns .List_cr3_load -+ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) -+ neg %rcx -+.List_cr3_load: -+ write_cr3 rcx, rdi, rsi -+ movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%r14) -+.List_cr3_okay: -+ - CR4_PV32_RESTORE - testb $3,UREGS_cs(%rsp) - jz 1f -- /* Interrupted guest context. Copy the context to stack bottom. */ -+ /* -+ * Interrupted guest context. Clear the restore value for xen_cr3 -+ * and copy the context to stack bottom. -+ */ -+ xor %r15, %r15 - GET_CPUINFO_FIELD(guest_cpu_user_regs,di) - movq %rsp,%rsi - movl $UREGS_kernel_sizeof/8,%ecx -@@ -616,6 +756,7 @@ handle_ist_exception: - movzbl UREGS_entry_vector(%rsp),%eax - leaq exception_table(%rip),%rdx - callq *(%rdx,%rax,8) -+ mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) - cmpb $TRAP_nmi,UREGS_entry_vector(%rsp) - jne ret_from_intr - -diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h -index 98192eb..fb0fee9 100644 ---- a/xen/include/asm-x86/asm_defns.h -+++ b/xen/include/asm-x86/asm_defns.h -@@ -93,9 +93,30 @@ void ret_from_intr(void); - UNLIKELY_DONE(mp, tag); \ - __UNLIKELY_END(tag) - -+ .equ .Lrax, 0 -+ .equ .Lrcx, 1 -+ .equ .Lrdx, 2 -+ .equ .Lrbx, 3 -+ .equ .Lrsp, 4 -+ .equ .Lrbp, 5 -+ .equ .Lrsi, 6 -+ .equ .Lrdi, 7 -+ .equ .Lr8, 8 -+ .equ .Lr9, 9 -+ .equ .Lr10, 10 -+ .equ .Lr11, 11 -+ .equ .Lr12, 12 -+ .equ .Lr13, 13 -+ .equ .Lr14, 14 -+ .equ .Lr15, 15 -+ - #define STACK_CPUINFO_FIELD(field) (1 - CPUINFO_sizeof + CPUINFO_##field) - #define GET_STACK_END(reg) \ -+ .if .Lr##reg > 8; \ -+ movq $STACK_SIZE-1, %r##reg; \ -+ .else; \ - movl $STACK_SIZE-1, %e##reg; \ -+ .endif; \ - orq %rsp, %r##reg - - #define GET_CPUINFO_FIELD(field, reg) \ -@@ -177,6 +198,15 @@ void ret_from_intr(void); - #define ASM_STAC ASM_AC(STAC) - #define ASM_CLAC ASM_AC(CLAC) - -+.macro write_cr3 val:req, tmp1:req, tmp2:req -+ mov %cr4, %\tmp1 -+ mov %\tmp1, %\tmp2 -+ and $~X86_CR4_PGE, %\tmp1 -+ mov %\tmp1, %cr4 -+ mov %\val, %cr3 -+ mov %\tmp2, %cr4 -+.endm -+ - #define CR4_PV32_RESTORE \ - 667: ASM_NOP5; \ - .pushsection .altinstr_replacement, "ax"; \ -diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h -index 8984992..b929c48 100644 ---- a/xen/include/asm-x86/current.h -+++ b/xen/include/asm-x86/current.h -@@ -41,6 +41,18 @@ struct cpu_info { - struct vcpu *current_vcpu; - unsigned long per_cpu_offset; - unsigned long cr4; -+ /* -+ * Of the two following fields the latter is being set to the CR3 value -+ * to be used on the given pCPU for loading whenever 64-bit PV guest -+ * context is being entered. The value never changes once set. -+ * The former is the value to restore when re-entering Xen, if any. IOW -+ * its value being zero means there's nothing to restore. However, its -+ * value can also be negative, indicating to the exit-to-Xen code that -+ * restoring is not necessary, but allowing any nested entry code paths -+ * to still know the value to put back into CR3. -+ */ -+ unsigned long xen_cr3; -+ unsigned long pv_cr3; - /* get_stack_bottom() must be 16-byte aligned */ - }; - -diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h -index 41a8d8c..2962e83 100644 ---- a/xen/include/asm-x86/processor.h -+++ b/xen/include/asm-x86/processor.h -@@ -462,6 +462,7 @@ extern idt_entry_t idt_table[]; - extern idt_entry_t *idt_tables[]; - - DECLARE_PER_CPU(struct tss_struct, init_tss); -+DECLARE_PER_CPU(root_pgentry_t *, root_pgt); - - extern void init_int80_direct_trap(struct vcpu *v); - -diff --git a/xen/include/asm-x86/x86_64/page.h b/xen/include/asm-x86/x86_64/page.h -index 6fb7cd5..05a0334 100644 ---- a/xen/include/asm-x86/x86_64/page.h -+++ b/xen/include/asm-x86/x86_64/page.h -@@ -24,8 +24,8 @@ - /* These are architectural limits. Current CPUs support only 40-bit phys. */ - #define PADDR_BITS 52 - #define VADDR_BITS 48 --#define PADDR_MASK ((1UL << PADDR_BITS)-1) --#define VADDR_MASK ((1UL << VADDR_BITS)-1) -+#define PADDR_MASK ((_AC(1,UL) << PADDR_BITS) - 1) -+#define VADDR_MASK ((_AC(1,UL) << VADDR_BITS) - 1) - - #define VADDR_TOP_BIT (1UL << (VADDR_BITS - 1)) - #define CANONICAL_MASK (~0UL & ~VADDR_MASK) -@@ -107,6 +107,7 @@ typedef l4_pgentry_t root_pgentry_t; - : (((_s) < ROOT_PAGETABLE_FIRST_XEN_SLOT) || \ - ((_s) > ROOT_PAGETABLE_LAST_XEN_SLOT))) - -+#define root_table_offset l4_table_offset - #define root_get_pfn l4e_get_pfn - #define root_get_flags l4e_get_flags - #define root_get_intpte l4e_get_intpte --- -2.1.4 - -From 7cccd6f748ec724cf9408cec6b3ec8e54a8a2c1f Mon Sep 17 00:00:00 2001 -From: Jan Beulich <jbeulich@suse.com> -Date: Wed, 17 Jan 2018 16:57:33 +0100 -Subject: [PATCH] x86: allow Meltdown band-aid to be disabled - -First of all we don't need it on AMD systems. Additionally allow its use -to be controlled by command line option. For best backportability, this -intentionally doesn't use alternative instruction patching to achieve -the intended effect - while we likely want it, this will be later -follow-up. - -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> -master commit: e871e80c38547d9faefc6604532ba3e985e65873 -master date: 2018-01-16 17:50:59 +0100 ---- - docs/misc/xen-command-line.markdown | 12 ++++++++++++ - xen/arch/x86/domain.c | 7 +++++-- - xen/arch/x86/mm.c | 2 +- - xen/arch/x86/smpboot.c | 17 ++++++++++++++--- - xen/arch/x86/x86_64/entry.S | 2 ++ - 5 files changed, 34 insertions(+), 6 deletions(-) - -diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown -index 781110d..49539b4 100644 ---- a/docs/misc/xen-command-line.markdown -+++ b/docs/misc/xen-command-line.markdown -@@ -1849,6 +1849,18 @@ In the case that x2apic is in use, this option switches between physical and - clustered mode. The default, given no hint from the **FADT**, is cluster - mode. - -+### xpti -+> `= <boolean>` -+ -+> Default: `false` on AMD hardware -+> Default: `true` everywhere else -+ -+Override default selection of whether to isolate 64-bit PV guest page -+tables. -+ -+** WARNING: Not yet a complete isolation implementation, but better than -+nothing. ** -+ - ### xsave - > `= <boolean>` - -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index f4a3d74..b357b60 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -1505,10 +1505,13 @@ void paravirt_ctxt_switch_from(struct vcpu *v) - - void paravirt_ctxt_switch_to(struct vcpu *v) - { -+ root_pgentry_t *root_pgt = this_cpu(root_pgt); - unsigned long cr4; - -- this_cpu(root_pgt)[root_table_offset(PERDOMAIN_VIRT_START)] = -- l4e_from_page(v->domain->arch.perdomain_l3_pg, __PAGE_HYPERVISOR_RW); -+ if ( root_pgt ) -+ root_pgt[root_table_offset(PERDOMAIN_VIRT_START)] = -+ l4e_from_page(v->domain->arch.perdomain_l3_pg, -+ __PAGE_HYPERVISOR_RW); - - cr4 = pv_guest_cr4_to_real_cr4(v); - if ( unlikely(cr4 != read_cr4()) ) -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 6c7d120..53295f8 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -3665,7 +3665,7 @@ long do_mmu_update( - rc = mod_l4_entry(va, l4e_from_intpte(req.val), mfn, - cmd == MMU_PT_UPDATE_PRESERVE_AD, v); - if ( !rc ) -- sync_guest = true; -+ sync_guest = this_cpu(root_pgt); - break; - case PGT_writable_page: - perfc_incr(writable_mmu_updates); -diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c -index b1fbb57..edf607f 100644 ---- a/xen/arch/x86/smpboot.c -+++ b/xen/arch/x86/smpboot.c -@@ -328,7 +328,7 @@ void start_secondary(void *unused) - spin_debug_disable(); - - get_cpu_info()->xen_cr3 = 0; -- get_cpu_info()->pv_cr3 = __pa(this_cpu(root_pgt)); -+ get_cpu_info()->pv_cr3 = this_cpu(root_pgt) ? __pa(this_cpu(root_pgt)) : 0; - - load_system_tables(); - -@@ -736,14 +736,20 @@ static int clone_mapping(const void *ptr, root_pgentry_t *rpt) - return 0; - } - -+static __read_mostly int8_t opt_xpti = -1; -+boolean_param("xpti", opt_xpti); - DEFINE_PER_CPU(root_pgentry_t *, root_pgt); - - static int setup_cpu_root_pgt(unsigned int cpu) - { -- root_pgentry_t *rpt = alloc_xen_pagetable(); -+ root_pgentry_t *rpt; - unsigned int off; - int rc; - -+ if ( !opt_xpti ) -+ return 0; -+ -+ rpt = alloc_xen_pagetable(); - if ( !rpt ) - return -ENOMEM; - -@@ -987,10 +993,14 @@ void __init smp_prepare_cpus(unsigned int max_cpus) - - stack_base[0] = stack_start; - -+ if ( opt_xpti < 0 ) -+ opt_xpti = boot_cpu_data.x86_vendor != X86_VENDOR_AMD; -+ - rc = setup_cpu_root_pgt(0); - if ( rc ) - panic("Error %d setting up PV root page table\n", rc); -- get_cpu_info()->pv_cr3 = __pa(per_cpu(root_pgt, 0)); -+ if ( per_cpu(root_pgt, 0) ) -+ get_cpu_info()->pv_cr3 = __pa(per_cpu(root_pgt, 0)); - - set_nr_sockets(); - -@@ -1062,6 +1072,7 @@ void __init smp_prepare_boot_cpu(void) - #endif - - get_cpu_info()->xen_cr3 = 0; -+ get_cpu_info()->pv_cr3 = 0; - } - - static void -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index fc38874..a8825c8 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -46,6 +46,7 @@ restore_all_guest: - movabs $DIRECTMAP_VIRT_START, %rcx - mov %rdi, %rax - and %rsi, %rdi -+ jz .Lrag_keep_cr3 - and %r9, %rsi - add %rcx, %rdi - add %rcx, %rsi -@@ -62,6 +63,7 @@ restore_all_guest: - rep movsq - mov %r9, STACK_CPUINFO_FIELD(xen_cr3)(%rdx) - write_cr3 rax, rdi, rsi -+.Lrag_keep_cr3: - - RESTORE_ALL - testw $TRAP_syscall,4(%rsp) --- -2.1.4 - diff --git a/main/xen/xsa255-1.patch b/main/xen/xsa255-1.patch deleted file mode 100644 index f8bba9e516..0000000000 --- a/main/xen/xsa255-1.patch +++ /dev/null @@ -1,133 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: gnttab/ARM: don't corrupt shared GFN array - -... by writing status GFNs to it. Introduce a second array instead. -Also implement gnttab_status_gmfn() properly now that the information is -suitably being tracked. - -While touching it anyway, remove a misguided (but luckily benign) upper -bound check from gnttab_shared_gmfn(): We should never access beyond the -bounds of that array. - -This is part of XSA-255. - -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Stefano Stabellini <sstabellini@kernel.org> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> ---- -v3: Don't init the ARM GFN arrays to zero anymore, use INVALID_GFN. -v2: New. - ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -3775,6 +3775,7 @@ int gnttab_map_frame(struct domain *d, u - { - int rc = 0; - struct grant_table *gt = d->grant_table; -+ bool status = false; - - grant_write_lock(gt); - -@@ -3785,6 +3786,7 @@ int gnttab_map_frame(struct domain *d, u - (idx & XENMAPIDX_grant_table_status) ) - { - idx &= ~XENMAPIDX_grant_table_status; -+ status = true; - if ( idx < nr_status_frames(gt) ) - *mfn = _mfn(virt_to_mfn(gt->status[idx])); - else -@@ -3802,7 +3804,7 @@ int gnttab_map_frame(struct domain *d, u - } - - if ( !rc ) -- gnttab_set_frame_gfn(gt, idx, gfn); -+ gnttab_set_frame_gfn(gt, status, idx, gfn); - - grant_write_unlock(gt); - ---- a/xen/include/asm-arm/grant_table.h -+++ b/xen/include/asm-arm/grant_table.h -@@ -9,7 +9,8 @@ - #define INITIAL_NR_GRANT_FRAMES 1U - - struct grant_table_arch { -- gfn_t *gfn; -+ gfn_t *shared_gfn; -+ gfn_t *status_gfn; - }; - - void gnttab_clear_flag(unsigned long nr, uint16_t *addr); -@@ -21,7 +22,6 @@ int replace_grant_host_mapping(unsigned - unsigned long new_gpaddr, unsigned int flags); - void gnttab_mark_dirty(struct domain *d, unsigned long l); - #define gnttab_create_status_page(d, t, i) do {} while (0) --#define gnttab_status_gmfn(d, t, i) (0) - #define gnttab_release_host_mappings(domain) 1 - static inline int replace_grant_supported(void) - { -@@ -42,19 +42,35 @@ static inline unsigned int gnttab_dom0_m - - #define gnttab_init_arch(gt) \ - ({ \ -- (gt)->arch.gfn = xzalloc_array(gfn_t, (gt)->max_grant_frames); \ -- ( (gt)->arch.gfn ? 0 : -ENOMEM ); \ -+ unsigned int ngf_ = (gt)->max_grant_frames; \ -+ unsigned int nsf_ = grant_to_status_frames(ngf_); \ -+ \ -+ (gt)->arch.shared_gfn = xmalloc_array(gfn_t, ngf_); \ -+ (gt)->arch.status_gfn = xmalloc_array(gfn_t, nsf_); \ -+ if ( (gt)->arch.shared_gfn && (gt)->arch.status_gfn ) \ -+ { \ -+ while ( ngf_-- ) \ -+ (gt)->arch.shared_gfn[ngf_] = INVALID_GFN; \ -+ while ( nsf_-- ) \ -+ (gt)->arch.status_gfn[nsf_] = INVALID_GFN; \ -+ } \ -+ else \ -+ gnttab_destroy_arch(gt); \ -+ (gt)->arch.shared_gfn ? 0 : -ENOMEM; \ - }) - - #define gnttab_destroy_arch(gt) \ - do { \ -- xfree((gt)->arch.gfn); \ -- (gt)->arch.gfn = NULL; \ -+ xfree((gt)->arch.shared_gfn); \ -+ (gt)->arch.shared_gfn = NULL; \ -+ xfree((gt)->arch.status_gfn); \ -+ (gt)->arch.status_gfn = NULL; \ - } while ( 0 ) - --#define gnttab_set_frame_gfn(gt, idx, gfn) \ -+#define gnttab_set_frame_gfn(gt, st, idx, gfn) \ - do { \ -- (gt)->arch.gfn[idx] = gfn; \ -+ ((st) ? (gt)->arch.status_gfn : (gt)->arch.shared_gfn)[idx] = \ -+ (gfn); \ - } while ( 0 ) - - #define gnttab_create_shared_page(d, t, i) \ -@@ -65,8 +81,10 @@ static inline unsigned int gnttab_dom0_m - } while ( 0 ) - - #define gnttab_shared_gmfn(d, t, i) \ -- ( ((i >= nr_grant_frames(t)) && \ -- (i < (t)->max_grant_frames))? 0 : gfn_x((t)->arch.gfn[i])) -+ gfn_x(((i) >= nr_grant_frames(t)) ? INVALID_GFN : (t)->arch.shared_gfn[i]) -+ -+#define gnttab_status_gmfn(d, t, i) \ -+ gfn_x(((i) >= nr_status_frames(t)) ? INVALID_GFN : (t)->arch.status_gfn[i]) - - #define gnttab_need_iommu_mapping(d) \ - (is_domain_direct_mapped(d) && need_iommu(d)) ---- a/xen/include/asm-x86/grant_table.h -+++ b/xen/include/asm-x86/grant_table.h -@@ -46,7 +46,7 @@ static inline unsigned int gnttab_dom0_m - - #define gnttab_init_arch(gt) 0 - #define gnttab_destroy_arch(gt) do {} while ( 0 ) --#define gnttab_set_frame_gfn(gt, idx, gfn) do {} while ( 0 ) -+#define gnttab_set_frame_gfn(gt, st, idx, gfn) do {} while ( 0 ) - - #define gnttab_create_shared_page(d, t, i) \ - do { \ diff --git a/main/xen/xsa255-2.patch b/main/xen/xsa255-2.patch deleted file mode 100644 index 402b6efe98..0000000000 --- a/main/xen/xsa255-2.patch +++ /dev/null @@ -1,167 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: gnttab: don't blindly free status pages upon version change - -There may still be active mappings, which would trigger the respective -BUG_ON(). Split the loop into one dealing with the page attributes and -the second (when the first fully passed) freeing the pages. Return an -error if any pages still have pending references. - -This is part of XSA-255. - -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Stefano Stabellini <sstabellini@kernel.org> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> ---- -v4: Add gprintk(XENLOG_ERR, ...) to domain_crash() invocations. -v3: Call guest_physmap_remove_page() from gnttab_map_frame(), making the - code unconditional at the same time. Re-base over changes to first - patch. -v2: Also deal with translated guests. - ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -1636,23 +1636,74 @@ status_alloc_failed: - return -ENOMEM; - } - --static void -+static int - gnttab_unpopulate_status_frames(struct domain *d, struct grant_table *gt) - { -- int i; -+ unsigned int i; - - for ( i = 0; i < nr_status_frames(gt); i++ ) - { - struct page_info *pg = virt_to_page(gt->status[i]); -+ gfn_t gfn = gnttab_get_frame_gfn(gt, true, i); -+ -+ /* -+ * For translated domains, recovering from failure after partial -+ * changes were made is more complicated than it seems worth -+ * implementing at this time. Hence respective error paths below -+ * crash the domain in such a case. -+ */ -+ if ( paging_mode_translate(d) ) -+ { -+ int rc = gfn_eq(gfn, INVALID_GFN) -+ ? 0 -+ : guest_physmap_remove_page(d, gfn, -+ _mfn(page_to_mfn(pg)), 0); -+ -+ if ( rc ) -+ { -+ gprintk(XENLOG_ERR, -+ "Could not remove status frame %u (GFN %#lx) from P2M\n", -+ i, gfn_x(gfn)); -+ domain_crash(d); -+ return rc; -+ } -+ gnttab_set_frame_gfn(gt, true, i, INVALID_GFN); -+ } - - BUG_ON(page_get_owner(pg) != d); - if ( test_and_clear_bit(_PGC_allocated, &pg->count_info) ) - put_page(pg); -- BUG_ON(pg->count_info & ~PGC_xen_heap); -+ -+ if ( pg->count_info & ~PGC_xen_heap ) -+ { -+ if ( paging_mode_translate(d) ) -+ { -+ gprintk(XENLOG_ERR, -+ "Wrong page state %#lx of status frame %u (GFN %#lx)\n", -+ pg->count_info, i, gfn_x(gfn)); -+ domain_crash(d); -+ } -+ else -+ { -+ if ( get_page(pg, d) ) -+ set_bit(_PGC_allocated, &pg->count_info); -+ while ( i-- ) -+ gnttab_create_status_page(d, gt, i); -+ } -+ return -EBUSY; -+ } -+ -+ page_set_owner(pg, NULL); -+ } -+ -+ for ( i = 0; i < nr_status_frames(gt); i++ ) -+ { - free_xenheap_page(gt->status[i]); - gt->status[i] = NULL; - } - gt->nr_status_frames = 0; -+ -+ return 0; - } - - /* -@@ -2962,8 +3013,9 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA - break; - } - -- if ( op.version < 2 && gt->gt_version == 2 ) -- gnttab_unpopulate_status_frames(currd, gt); -+ if ( op.version < 2 && gt->gt_version == 2 && -+ (res = gnttab_unpopulate_status_frames(currd, gt)) != 0 ) -+ goto out_unlock; - - /* Make sure there's no crud left over from the old version. */ - for ( i = 0; i < nr_grant_frames(gt); i++ ) -@@ -3803,6 +3855,11 @@ int gnttab_map_frame(struct domain *d, u - rc = -EINVAL; - } - -+ if ( !rc && paging_mode_translate(d) && -+ !gfn_eq(gnttab_get_frame_gfn(gt, status, idx), INVALID_GFN) ) -+ rc = guest_physmap_remove_page(d, gnttab_get_frame_gfn(gt, status, idx), -+ *mfn, 0); -+ - if ( !rc ) - gnttab_set_frame_gfn(gt, status, idx, gfn); - ---- a/xen/include/asm-arm/grant_table.h -+++ b/xen/include/asm-arm/grant_table.h -@@ -73,6 +73,11 @@ static inline unsigned int gnttab_dom0_m - (gfn); \ - } while ( 0 ) - -+#define gnttab_get_frame_gfn(gt, st, idx) ({ \ -+ _gfn((st) ? gnttab_status_gmfn(NULL, gt, idx) \ -+ : gnttab_shared_gmfn(NULL, gt, idx)); \ -+}) -+ - #define gnttab_create_shared_page(d, t, i) \ - do { \ - share_xen_page_with_guest( \ ---- a/xen/include/asm-x86/grant_table.h -+++ b/xen/include/asm-x86/grant_table.h -@@ -47,6 +47,12 @@ static inline unsigned int gnttab_dom0_m - #define gnttab_init_arch(gt) 0 - #define gnttab_destroy_arch(gt) do {} while ( 0 ) - #define gnttab_set_frame_gfn(gt, st, idx, gfn) do {} while ( 0 ) -+#define gnttab_get_frame_gfn(gt, st, idx) ({ \ -+ unsigned long mfn_ = (st) ? gnttab_status_mfn(gt, idx) \ -+ : gnttab_shared_mfn(gt, idx); \ -+ unsigned long gpfn_ = get_gpfn_from_mfn(mfn_); \ -+ VALID_M2P(gpfn_) ? _gfn(gpfn_) : INVALID_GFN; \ -+}) - - #define gnttab_create_shared_page(d, t, i) \ - do { \ -@@ -63,11 +69,11 @@ static inline unsigned int gnttab_dom0_m - } while ( 0 ) - - --#define gnttab_shared_mfn(d, t, i) \ -+#define gnttab_shared_mfn(t, i) \ - ((virt_to_maddr((t)->shared_raw[i]) >> PAGE_SHIFT)) - - #define gnttab_shared_gmfn(d, t, i) \ -- (mfn_to_gmfn(d, gnttab_shared_mfn(d, t, i))) -+ (mfn_to_gmfn(d, gnttab_shared_mfn(t, i))) - - - #define gnttab_status_mfn(t, i) \ diff --git a/main/xen/xsa256.patch b/main/xen/xsa256.patch deleted file mode 100644 index 50ff24e17b..0000000000 --- a/main/xen/xsa256.patch +++ /dev/null @@ -1,40 +0,0 @@ -From: Andrew Cooper <andrew.cooper3@citrix.com> -Subject: x86/hvm: Disallow the creation of HVM domains without Local APIC emulation - -There are multiple problems, not necesserily limited to: - - * Guests which configure event channels via hvmop_set_evtchn_upcall_vector(), - or which hit %cr8 emulation will cause Xen to fall over a NULL vlapic->regs - pointer. - - * On Intel hardware, disabling the TPR_SHADOW execution control without - reenabling CR8_{LOAD,STORE} interception means that the guests %cr8 - accesses interact with the real TPR. Amongst other things, setting the - real TPR to 0xf blocks even IPIs from interrupting this CPU. - - * On hardware which sets up the use of Interrupt Posting, including - IOMMU-Posting, guests run without the appropriate non-root configuration, - which at a minimum will result in dropped interrupts. - -Whether no-LAPIC mode is of any use at all remains to be seen. - -This is XSA-256. - -Reported-by: Ian Jackson <ian.jackson@eu.citrix.com> -Reviewed-by: Roger Pau MonnĂ© <roger.pau@citrix.com> -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> - -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index f93327b..f65fc12 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -413,7 +413,7 @@ static bool emulation_flags_ok(const struct domain *d, uint32_t emflags) - if ( is_hardware_domain(d) && - emflags != (XEN_X86_EMU_LAPIC|XEN_X86_EMU_IOAPIC) ) - return false; -- if ( !is_hardware_domain(d) && emflags && -+ if ( !is_hardware_domain(d) && - emflags != XEN_X86_EMU_ALL && emflags != XEN_X86_EMU_LAPIC ) - return false; - } |