diff options
author | Leonardo Arena <rnalrd@alpinelinux.org> | 2019-02-04 08:21:18 +0000 |
---|---|---|
committer | Leonardo Arena <rnalrd@alpinelinux.org> | 2019-02-04 08:21:42 +0000 |
commit | f39fc76089047b3f9b0e1ec06aecb40cc1ac1786 (patch) | |
tree | 682b45d3e61bc3df3c9695ab9b7672d442f6fb3f /main | |
parent | d39be709613fbc979651b09ac2bc27c6591afd99 (diff) | |
download | aports-f39fc76089047b3f9b0e1ec06aecb40cc1ac1786.tar.bz2 aports-f39fc76089047b3f9b0e1ec06aecb40cc1ac1786.tar.xz |
main/xen: security fixes
CVE-2018-19961, CVE-2018-19962, XSA-275
CVE-2018-18883, XSA-278
CVE-2018-19965, XSA-279
CVE-2018-19966, XSA-280
CVE-2018-19967, XSA-282
Fixes #9845
Diffstat (limited to 'main')
-rw-r--r-- | main/xen/APKBUILD | 26 | ||||
-rw-r--r-- | main/xen/xsa275-4.11-1.patch | 104 | ||||
-rw-r--r-- | main/xen/xsa275-4.11-2.patch | 68 | ||||
-rw-r--r-- | main/xen/xsa278-4.11.patch | 326 | ||||
-rw-r--r-- | main/xen/xsa279-4.9.patch | 35 | ||||
-rw-r--r-- | main/xen/xsa280-4.10-2.patch | 141 | ||||
-rw-r--r-- | main/xen/xsa280-4.9-1.patch | 112 | ||||
-rw-r--r-- | main/xen/xsa282-2.patch | 42 | ||||
-rw-r--r-- | main/xen/xsa282-4.9-1.patch | 87 |
9 files changed, 940 insertions, 1 deletions
diff --git a/main/xen/APKBUILD b/main/xen/APKBUILD index a35bff37af..b3b9171512 100644 --- a/main/xen/APKBUILD +++ b/main/xen/APKBUILD @@ -3,7 +3,7 @@ # Maintainer: William Pitcock <nenolod@dereferenced.org> pkgname=xen pkgver=4.9.3 -pkgrel=0 +pkgrel=1 pkgdesc="Xen hypervisor" url="http://www.xen.org/" arch="x86_64 armhf aarch64" @@ -130,6 +130,13 @@ options="!strip" # - CVE-2018-15468 XSA-269 # - CVE-2018-15470 XSA-272 # - CVE-2018-3620 CVE-2018-3646 XSA-273 +# 4.9.3-r1: +# - CVE-2018-19961 XSA-275 +# - CVE-2018-19962 XSA-275 +# - CVE-2018-18883 XSA-278 +# - CVE-2018-19965 XSA-279 +# - CVE-2018-19966 XSA-280 +# - CVE-2018-19967 XSA-282 case "$CARCH" in x86*) @@ -194,6 +201,15 @@ source="https://downloads.xenproject.org/release/$pkgname/$pkgver/$pkgname-$pkgv xenqemu-xattr-size-max.patch + xsa275-4.11-1.patch + xsa275-4.11-2.patch + xsa278-4.11.patch + xsa282-4.9-1.patch + xsa282-2.patch + xsa279-4.9.patch + xsa280-4.10-2.patch + xsa280-4.9-1.patch + xenstored.initd xenstored.confd xenconsoled.initd @@ -446,6 +462,14 @@ a3197d9c2455983554610031702ea95dc31f1b375b8c1291207d33c9e6114c6928417b4c8138cb53 e76816c6ad0e91dc5f81947f266da3429b20e6d976c3e8c41202c6179532eec878a3f0913921ef3ac853c5dbad8082da3c9cd53b65081910516feb492577b7fc xen-fd-is-file.c 69dfa60628ca838678862383528654ecbdf4269cbb5c9cfb6b84d976202a8dea85d711aa65a52fa1b477fb0b30604ca70cf1337192d6fb9388a08bbe7fe56077 xenstore_client_transaction_fix.patch 2094ea964fa610b2bf72fd2c7ede7e954899a75c0f5b08030cf1d74460fb759ade84866176e32f8fe29c921dfdc6dafd2b31e23ab9b0a3874d3dceeabdd1913b xenqemu-xattr-size-max.patch +158054c37d9df6b3576246ecf43505fb5417febad175650dce954151ed52b8ce27729a59ac873ce1cf210e6613c8315378fb5ac9ab1667e9b844fe0d007c776d xsa275-4.11-1.patch +6f118663e5e5c86449e05a22a84a400bb9d1e8ef6b351cbba00fafcf52932924392c44a9664d1f3d720473cc7afbb04abff0a60ec7be75109bf355f8fe95fa59 xsa275-4.11-2.patch +35c8c90b78856ce364cac0ddfd759aa807480bb57136e609a1462ad0f53e867a6a2acafbec5dad586d6d5159e2e377e5b6aa2ffe659d83a7684b7bb6fddba1a6 xsa278-4.11.patch +7050af051031c499170bb42a2060678297f6e3ff5b9079b646b84a9ad137ed478fe319ba43b9bccde56b9c4a341672403458c12d2adbf8e208995b7e09a5ca14 xsa279-4.9.patch +5eb30e29e22cf7c76a777f99e1e8035be1d6d645ddb616446a7840ef93fd4e2d2fedda9e7a3708b31e42c12b14178aa424c50b3e3f585b93052fcbc9a357f21d xsa280-4.10-2.patch +0517d9ab5dd0e1faef5126fbd012306da503a23d95143b232ca61aba2bf92a15ebced3c4a4b9bb3c5105a089ea7dff2059e861c80a82975372d78ecdbc32a4c4 xsa280-4.9-1.patch +a2cb124aab729931617e10a6a34900c21ef7f846926447a8752adb343ef7bf32f3625059f25c6487df27337eee03701da9a3009154d82a2cd1c8fb4be58cbc2e xsa282-2.patch +d5dd53d66fc45dfccd51adf81e8864b70c6c35922479002419e6e984738f4a2695d528be4d871d9aa9f4ddf60987990580a6f8ebf0a7b99e5845984f0f36755b xsa282-4.9-1.patch 52c43beb2596d645934d0f909f2d21f7587b6898ed5e5e7046799a8ed6d58f7a09c5809e1634fa26152f3fd4f3e7cfa07da7076f01b4a20cc8f5df8b9cb77e50 xenstored.initd 093f7fbd43faf0a16a226486a0776bade5dc1681d281c5946a3191c32d74f9699c6bf5d0ab8de9d1195a2461165d1660788e92a3156c9b3c7054d7b2d52d7ff0 xenstored.confd 3c86ed48fbee0af4051c65c4a3893f131fa66e47bf083caf20c9b6aa4b63fdead8832f84a58d0e27964bc49ec8397251b34e5be5c212c139f556916dc8da9523 xenconsoled.initd diff --git a/main/xen/xsa275-4.11-1.patch b/main/xen/xsa275-4.11-1.patch new file mode 100644 index 0000000000..932d8f1132 --- /dev/null +++ b/main/xen/xsa275-4.11-1.patch @@ -0,0 +1,104 @@ +From: Roger Pau Monné <roger.pau@citrix.com> +Subject: amd/iommu: fix flush checks + +Flush checking for AMD IOMMU didn't check whether the previous entry +was present, or whether the flags (writable/readable) changed in order +to decide whether a flush should be executed. + +Fix this by taking the writable/readable/next-level fields into account, +together with the present bit. + +Along these lines the flushing in amd_iommu_map_page() must not be +omitted for PV domains. The comment there was simply wrong: Mappings may +very well change, both their addresses and their permissions. Ultimately +this should honor iommu_dont_flush_iotlb, but to achieve this +amd_iommu_ops first needs to gain an .iotlb_flush hook. + +Also make clear_iommu_pte_present() static, to demonstrate there's no +caller omitting the (subsequent) flush. + +This is part of XSA-275. + +Reported-by: Paul Durrant <paul.durrant@citrix.com> +Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> +Signed-off-by: Jan Beulich <jbeulich@suse.com> + +--- a/xen/drivers/passthrough/amd/iommu_map.c ++++ b/xen/drivers/passthrough/amd/iommu_map.c +@@ -35,7 +35,7 @@ static unsigned int pfn_to_pde_idx(unsig + return idx; + } + +-void clear_iommu_pte_present(unsigned long l1_mfn, unsigned long gfn) ++static void clear_iommu_pte_present(unsigned long l1_mfn, unsigned long gfn) + { + u64 *table, *pte; + +@@ -49,23 +49,42 @@ static bool_t set_iommu_pde_present(u32 + unsigned int next_level, + bool_t iw, bool_t ir) + { +- u64 addr_lo, addr_hi, maddr_old, maddr_next; ++ uint64_t addr_lo, addr_hi, maddr_next; + u32 entry; +- bool_t need_flush = 0; ++ bool need_flush = false, old_present; + + maddr_next = (u64)next_mfn << PAGE_SHIFT; + +- addr_hi = get_field_from_reg_u32(pde[1], +- IOMMU_PTE_ADDR_HIGH_MASK, +- IOMMU_PTE_ADDR_HIGH_SHIFT); +- addr_lo = get_field_from_reg_u32(pde[0], +- IOMMU_PTE_ADDR_LOW_MASK, +- IOMMU_PTE_ADDR_LOW_SHIFT); +- +- maddr_old = (addr_hi << 32) | (addr_lo << PAGE_SHIFT); +- +- if ( maddr_old != maddr_next ) +- need_flush = 1; ++ old_present = get_field_from_reg_u32(pde[0], IOMMU_PTE_PRESENT_MASK, ++ IOMMU_PTE_PRESENT_SHIFT); ++ if ( old_present ) ++ { ++ bool old_r, old_w; ++ unsigned int old_level; ++ uint64_t maddr_old; ++ ++ addr_hi = get_field_from_reg_u32(pde[1], ++ IOMMU_PTE_ADDR_HIGH_MASK, ++ IOMMU_PTE_ADDR_HIGH_SHIFT); ++ addr_lo = get_field_from_reg_u32(pde[0], ++ IOMMU_PTE_ADDR_LOW_MASK, ++ IOMMU_PTE_ADDR_LOW_SHIFT); ++ old_level = get_field_from_reg_u32(pde[0], ++ IOMMU_PDE_NEXT_LEVEL_MASK, ++ IOMMU_PDE_NEXT_LEVEL_SHIFT); ++ old_w = get_field_from_reg_u32(pde[1], ++ IOMMU_PTE_IO_WRITE_PERMISSION_MASK, ++ IOMMU_PTE_IO_WRITE_PERMISSION_SHIFT); ++ old_r = get_field_from_reg_u32(pde[1], ++ IOMMU_PTE_IO_READ_PERMISSION_MASK, ++ IOMMU_PTE_IO_READ_PERMISSION_SHIFT); ++ ++ maddr_old = (addr_hi << 32) | (addr_lo << PAGE_SHIFT); ++ ++ if ( maddr_old != maddr_next || iw != old_w || ir != old_r || ++ old_level != next_level ) ++ need_flush = true; ++ } + + addr_lo = maddr_next & DMA_32BIT_MASK; + addr_hi = maddr_next >> 32; +@@ -687,10 +706,7 @@ int amd_iommu_map_page(struct domain *d, + if ( !need_flush ) + goto out; + +- /* 4K mapping for PV guests never changes, +- * no need to flush if we trust non-present bits */ +- if ( is_hvm_domain(d) ) +- amd_iommu_flush_pages(d, gfn, 0); ++ amd_iommu_flush_pages(d, gfn, 0); + + for ( merge_level = IOMMU_PAGING_MODE_LEVEL_2; + merge_level <= hd->arch.paging_mode; merge_level++ ) diff --git a/main/xen/xsa275-4.11-2.patch b/main/xen/xsa275-4.11-2.patch new file mode 100644 index 0000000000..72a2d07882 --- /dev/null +++ b/main/xen/xsa275-4.11-2.patch @@ -0,0 +1,68 @@ +From: Jan Beulich <jbeulich@suse.com> +Subject: AMD/IOMMU: suppress PTE merging after initial table creation + +The logic is not fit for this purpose, so simply disable its use until +it can be fixed / replaced. Note that this re-enables merging for the +table creation case, which was disabled as a (perhaps unintended) side +effect of the earlier "amd/iommu: fix flush checks". It relies on no +page getting mapped more than once (with different properties) in this +process, as that would still be beyond what the merging logic can cope +with. But arch_iommu_populate_page_table() guarantees this afaict. + +This is part of XSA-275. + +Reported-by: Paul Durrant <paul.durrant@citrix.com> +Signed-off-by: Jan Beulich <jbeulich@suse.com> + +--- a/xen/drivers/passthrough/amd/iommu_map.c ++++ b/xen/drivers/passthrough/amd/iommu_map.c +@@ -702,11 +702,24 @@ int amd_iommu_map_page(struct domain *d, + !!(flags & IOMMUF_writable), + !!(flags & IOMMUF_readable)); + +- /* Do not increase pde count if io mapping has not been changed */ +- if ( !need_flush ) +- goto out; ++ if ( need_flush ) ++ { ++ amd_iommu_flush_pages(d, gfn, 0); ++ /* No further merging, as the logic doesn't cope. */ ++ hd->arch.no_merge = true; ++ } + +- amd_iommu_flush_pages(d, gfn, 0); ++ /* ++ * Suppress merging of non-R/W mappings or after initial table creation, ++ * as the merge logic does not cope with this. ++ */ ++ if ( hd->arch.no_merge || flags != (IOMMUF_writable | IOMMUF_readable) ) ++ goto out; ++ if ( d->creation_finished ) ++ { ++ hd->arch.no_merge = true; ++ goto out; ++ } + + for ( merge_level = IOMMU_PAGING_MODE_LEVEL_2; + merge_level <= hd->arch.paging_mode; merge_level++ ) +@@ -780,6 +793,10 @@ int amd_iommu_unmap_page(struct domain * + + /* mark PTE as 'page not present' */ + clear_iommu_pte_present(pt_mfn[1], gfn); ++ ++ /* No further merging in amd_iommu_map_page(), as the logic doesn't cope. */ ++ hd->arch.no_merge = true; ++ + spin_unlock(&hd->arch.mapping_lock); + + amd_iommu_flush_pages(d, gfn, 0); +--- a/xen/include/asm-x86/iommu.h ++++ b/xen/include/asm-x86/iommu.h +@@ -40,6 +40,7 @@ struct arch_iommu + + /* amd iommu support */ + int paging_mode; ++ bool no_merge; + struct page_info *root_table; + struct guest_iommu *g_iommu; + }; diff --git a/main/xen/xsa278-4.11.patch b/main/xen/xsa278-4.11.patch new file mode 100644 index 0000000000..9e1484981e --- /dev/null +++ b/main/xen/xsa278-4.11.patch @@ -0,0 +1,326 @@ +From: Andrew Cooper <andrew.cooper3@citrix.com> +Subject: x86/vvmx: Disallow the use of VT-x instructions when nested virt is disabled + +c/s ac6a4500b "vvmx: set vmxon_region_pa of vcpu out of VMX operation to an +invalid address" was a real bugfix as described, but has a very subtle bug +which results in all VT-x instructions being usable by a guest. + +The toolstack constructs a guest by issuing: + + XEN_DOMCTL_createdomain + XEN_DOMCTL_max_vcpus + +and optionally later, HVMOP_set_param to enable nested virt. + +As a result, the call to nvmx_vcpu_initialise() in hvm_vcpu_initialise() +(which is what makes the above patch look correct during review) is actually +dead code. In practice, nvmx_vcpu_initialise() first gets called when nested +virt is enabled, which is typically never. + +As a result, the zeroed memory of struct vcpu causes nvmx_vcpu_in_vmx() to +return true before nested virt is enabled for the guest. + +Fixing the order of initialisation is a work in progress for other reasons, +but not viable for security backports. + +A compounding factor is that the vmexit handlers for all instructions, other +than VMXON, pass 0 into vmx_inst_check_privilege()'s vmxop_check parameter, +which skips the CR4.VMXE check. (This is one of many reasons why nested virt +isn't a supported feature yet.) + +However, the overall result is that when nested virt is not enabled by the +toolstack (i.e. the default configuration for all production guests), the VT-x +instructions (other than VMXON) are actually usable, and Xen very quickly +falls over the fact that the nvmx structure is uninitialised. + +In order to fail safe in the supported case, re-implement all the VT-x +instruction handling using a single function with a common prologue, covering +all the checks which should cause #UD or #GP faults. This deliberately +doesn't use any state from the nvmx structure, in case there are other lurking +issues. + +This is XSA-278 + +Reported-by: Sergey Dyasli <sergey.dyasli@citrix.com> +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Sergey Dyasli <sergey.dyasli@citrix.com> + +diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c +index a6415f0..a4d2829 100644 +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -3982,57 +3982,17 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs) + break; + + case EXIT_REASON_VMXOFF: +- if ( nvmx_handle_vmxoff(regs) == X86EMUL_OKAY ) +- update_guest_eip(); +- break; +- + case EXIT_REASON_VMXON: +- if ( nvmx_handle_vmxon(regs) == X86EMUL_OKAY ) +- update_guest_eip(); +- break; +- + case EXIT_REASON_VMCLEAR: +- if ( nvmx_handle_vmclear(regs) == X86EMUL_OKAY ) +- update_guest_eip(); +- break; +- + case EXIT_REASON_VMPTRLD: +- if ( nvmx_handle_vmptrld(regs) == X86EMUL_OKAY ) +- update_guest_eip(); +- break; +- + case EXIT_REASON_VMPTRST: +- if ( nvmx_handle_vmptrst(regs) == X86EMUL_OKAY ) +- update_guest_eip(); +- break; +- + case EXIT_REASON_VMREAD: +- if ( nvmx_handle_vmread(regs) == X86EMUL_OKAY ) +- update_guest_eip(); +- break; +- + case EXIT_REASON_VMWRITE: +- if ( nvmx_handle_vmwrite(regs) == X86EMUL_OKAY ) +- update_guest_eip(); +- break; +- + case EXIT_REASON_VMLAUNCH: +- if ( nvmx_handle_vmlaunch(regs) == X86EMUL_OKAY ) +- update_guest_eip(); +- break; +- + case EXIT_REASON_VMRESUME: +- if ( nvmx_handle_vmresume(regs) == X86EMUL_OKAY ) +- update_guest_eip(); +- break; +- + case EXIT_REASON_INVEPT: +- if ( nvmx_handle_invept(regs) == X86EMUL_OKAY ) +- update_guest_eip(); +- break; +- + case EXIT_REASON_INVVPID: +- if ( nvmx_handle_invvpid(regs) == X86EMUL_OKAY ) ++ if ( nvmx_handle_vmx_insn(regs, exit_reason) == X86EMUL_OKAY ) + update_guest_eip(); + break; + +diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c +index e97db33..88cb58c 100644 +--- a/xen/arch/x86/hvm/vmx/vvmx.c ++++ b/xen/arch/x86/hvm/vmx/vvmx.c +@@ -1470,7 +1470,7 @@ void nvmx_switch_guest(void) + * VMX instructions handling + */ + +-int nvmx_handle_vmxon(struct cpu_user_regs *regs) ++static int nvmx_handle_vmxon(struct cpu_user_regs *regs) + { + struct vcpu *v=current; + struct nestedvmx *nvmx = &vcpu_2_nvmx(v); +@@ -1522,7 +1522,7 @@ int nvmx_handle_vmxon(struct cpu_user_regs *regs) + return X86EMUL_OKAY; + } + +-int nvmx_handle_vmxoff(struct cpu_user_regs *regs) ++static int nvmx_handle_vmxoff(struct cpu_user_regs *regs) + { + struct vcpu *v=current; + struct nestedvmx *nvmx = &vcpu_2_nvmx(v); +@@ -1611,7 +1611,7 @@ static int nvmx_vmresume(struct vcpu *v, struct cpu_user_regs *regs) + return X86EMUL_OKAY; + } + +-int nvmx_handle_vmresume(struct cpu_user_regs *regs) ++static int nvmx_handle_vmresume(struct cpu_user_regs *regs) + { + bool_t launched; + struct vcpu *v = current; +@@ -1645,7 +1645,7 @@ int nvmx_handle_vmresume(struct cpu_user_regs *regs) + return nvmx_vmresume(v,regs); + } + +-int nvmx_handle_vmlaunch(struct cpu_user_regs *regs) ++static int nvmx_handle_vmlaunch(struct cpu_user_regs *regs) + { + bool_t launched; + struct vcpu *v = current; +@@ -1688,7 +1688,7 @@ int nvmx_handle_vmlaunch(struct cpu_user_regs *regs) + return rc; + } + +-int nvmx_handle_vmptrld(struct cpu_user_regs *regs) ++static int nvmx_handle_vmptrld(struct cpu_user_regs *regs) + { + struct vcpu *v = current; + struct vmx_inst_decoded decode; +@@ -1759,7 +1759,7 @@ int nvmx_handle_vmptrld(struct cpu_user_regs *regs) + return X86EMUL_OKAY; + } + +-int nvmx_handle_vmptrst(struct cpu_user_regs *regs) ++static int nvmx_handle_vmptrst(struct cpu_user_regs *regs) + { + struct vcpu *v = current; + struct vmx_inst_decoded decode; +@@ -1784,7 +1784,7 @@ int nvmx_handle_vmptrst(struct cpu_user_regs *regs) + return X86EMUL_OKAY; + } + +-int nvmx_handle_vmclear(struct cpu_user_regs *regs) ++static int nvmx_handle_vmclear(struct cpu_user_regs *regs) + { + struct vcpu *v = current; + struct vmx_inst_decoded decode; +@@ -1836,7 +1836,7 @@ int nvmx_handle_vmclear(struct cpu_user_regs *regs) + return X86EMUL_OKAY; + } + +-int nvmx_handle_vmread(struct cpu_user_regs *regs) ++static int nvmx_handle_vmread(struct cpu_user_regs *regs) + { + struct vcpu *v = current; + struct vmx_inst_decoded decode; +@@ -1878,7 +1878,7 @@ int nvmx_handle_vmread(struct cpu_user_regs *regs) + return X86EMUL_OKAY; + } + +-int nvmx_handle_vmwrite(struct cpu_user_regs *regs) ++static int nvmx_handle_vmwrite(struct cpu_user_regs *regs) + { + struct vcpu *v = current; + struct vmx_inst_decoded decode; +@@ -1926,7 +1926,7 @@ int nvmx_handle_vmwrite(struct cpu_user_regs *regs) + return X86EMUL_OKAY; + } + +-int nvmx_handle_invept(struct cpu_user_regs *regs) ++static int nvmx_handle_invept(struct cpu_user_regs *regs) + { + struct vmx_inst_decoded decode; + unsigned long eptp; +@@ -1954,7 +1954,7 @@ int nvmx_handle_invept(struct cpu_user_regs *regs) + return X86EMUL_OKAY; + } + +-int nvmx_handle_invvpid(struct cpu_user_regs *regs) ++static int nvmx_handle_invvpid(struct cpu_user_regs *regs) + { + struct vmx_inst_decoded decode; + unsigned long vpid; +@@ -1980,6 +1980,81 @@ int nvmx_handle_invvpid(struct cpu_user_regs *regs) + return X86EMUL_OKAY; + } + ++int nvmx_handle_vmx_insn(struct cpu_user_regs *regs, unsigned int exit_reason) ++{ ++ struct vcpu *curr = current; ++ int ret; ++ ++ if ( !(curr->arch.hvm_vcpu.guest_cr[4] & X86_CR4_VMXE) || ++ !nestedhvm_enabled(curr->domain) || ++ (vmx_guest_x86_mode(curr) < (hvm_long_mode_active(curr) ? 8 : 2)) ) ++ { ++ hvm_inject_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC); ++ return X86EMUL_EXCEPTION; ++ } ++ ++ if ( vmx_get_cpl() > 0 ) ++ { ++ hvm_inject_hw_exception(TRAP_gp_fault, 0); ++ return X86EMUL_EXCEPTION; ++ } ++ ++ switch ( exit_reason ) ++ { ++ case EXIT_REASON_VMXOFF: ++ ret = nvmx_handle_vmxoff(regs); ++ break; ++ ++ case EXIT_REASON_VMXON: ++ ret = nvmx_handle_vmxon(regs); ++ break; ++ ++ case EXIT_REASON_VMCLEAR: ++ ret = nvmx_handle_vmclear(regs); ++ break; ++ ++ case EXIT_REASON_VMPTRLD: ++ ret = nvmx_handle_vmptrld(regs); ++ break; ++ ++ case EXIT_REASON_VMPTRST: ++ ret = nvmx_handle_vmptrst(regs); ++ break; ++ ++ case EXIT_REASON_VMREAD: ++ ret = nvmx_handle_vmread(regs); ++ break; ++ ++ case EXIT_REASON_VMWRITE: ++ ret = nvmx_handle_vmwrite(regs); ++ break; ++ ++ case EXIT_REASON_VMLAUNCH: ++ ret = nvmx_handle_vmlaunch(regs); ++ break; ++ ++ case EXIT_REASON_VMRESUME: ++ ret = nvmx_handle_vmresume(regs); ++ break; ++ ++ case EXIT_REASON_INVEPT: ++ ret = nvmx_handle_invept(regs); ++ break; ++ ++ case EXIT_REASON_INVVPID: ++ ret = nvmx_handle_invvpid(regs); ++ break; ++ ++ default: ++ ASSERT_UNREACHABLE(); ++ domain_crash(curr->domain); ++ ret = X86EMUL_UNHANDLEABLE; ++ break; ++ } ++ ++ return ret; ++} ++ + #define __emul_value(enable1, default1) \ + ((enable1 | default1) << 32 | (default1)) + +diff --git a/xen/include/asm-x86/hvm/vmx/vvmx.h b/xen/include/asm-x86/hvm/vmx/vvmx.h +index 9ea35eb..fc4a8d1 100644 +--- a/xen/include/asm-x86/hvm/vmx/vvmx.h ++++ b/xen/include/asm-x86/hvm/vmx/vvmx.h +@@ -94,9 +94,6 @@ void nvmx_domain_relinquish_resources(struct domain *d); + + bool_t nvmx_ept_enabled(struct vcpu *v); + +-int nvmx_handle_vmxon(struct cpu_user_regs *regs); +-int nvmx_handle_vmxoff(struct cpu_user_regs *regs); +- + #define EPT_TRANSLATE_SUCCEED 0 + #define EPT_TRANSLATE_VIOLATION 1 + #define EPT_TRANSLATE_MISCONFIG 2 +@@ -191,15 +188,7 @@ enum vmx_insn_errno set_vvmcs_real_safe(const struct vcpu *, u32 encoding, + uint64_t get_shadow_eptp(struct vcpu *v); + + void nvmx_destroy_vmcs(struct vcpu *v); +-int nvmx_handle_vmptrld(struct cpu_user_regs *regs); +-int nvmx_handle_vmptrst(struct cpu_user_regs *regs); +-int nvmx_handle_vmclear(struct cpu_user_regs *regs); +-int nvmx_handle_vmread(struct cpu_user_regs *regs); +-int nvmx_handle_vmwrite(struct cpu_user_regs *regs); +-int nvmx_handle_vmresume(struct cpu_user_regs *regs); +-int nvmx_handle_vmlaunch(struct cpu_user_regs *regs); +-int nvmx_handle_invept(struct cpu_user_regs *regs); +-int nvmx_handle_invvpid(struct cpu_user_regs *regs); ++int nvmx_handle_vmx_insn(struct cpu_user_regs *regs, unsigned int exit_reason); + int nvmx_msr_read_intercept(unsigned int msr, + u64 *msr_content); + diff --git a/main/xen/xsa279-4.9.patch b/main/xen/xsa279-4.9.patch new file mode 100644 index 0000000000..984b7ccb0b --- /dev/null +++ b/main/xen/xsa279-4.9.patch @@ -0,0 +1,35 @@ +From: Andrew Cooper <andrew.cooper3@citrix.com> +Subject: x86/mm: Don't perform flush after failing to update a guests L1e + +If the L1e update hasn't occured, the flush cannot do anything useful. This +skips the potentially expensive vcpumask_to_pcpumask() conversion, and +broadcast TLB shootdown. + +More importantly however, we might be in the error path due to a bad va +parameter from the guest, and this should not propagate into the TLB flushing +logic. The INVPCID instruction for example raises #GP for a non-canonical +address. + +This is XSA-279. + +Reported-by: Matthew Daley <mattd@bugfuzz.com> +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> + +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c +@@ -4894,6 +4894,14 @@ static int __do_update_va_mapping( + if ( pl1e ) + guest_unmap_l1e(pl1e); + ++ /* ++ * Any error at this point means that we haven't change the l1e. Skip the ++ * flush, as it won't do anything useful. Furthermore, va is guest ++ * controlled and not necesserily audited by this point. ++ */ ++ if ( rc ) ++ return rc; ++ + switch ( flags & UVMF_FLUSHTYPE_MASK ) + { + case UVMF_TLB_FLUSH: diff --git a/main/xen/xsa280-4.10-2.patch b/main/xen/xsa280-4.10-2.patch new file mode 100644 index 0000000000..d250d7645a --- /dev/null +++ b/main/xen/xsa280-4.10-2.patch @@ -0,0 +1,141 @@ +From: Jan Beulich <jbeulich@suse.com> +Subject: x86/shadow: shrink struct page_info's shadow_flags to 16 bits + +This is to avoid it overlapping the linear_pt_count field needed for PV +domains. Introduce a separate, HVM-only pagetable_dying field to replace +the sole one left in the upper 16 bits. + +Note that the accesses to ->shadow_flags in shadow_{pro,de}mote() get +switched to non-atomic, non-bitops operations, as {test,set,clear}_bit() +are not allowed on uint16_t fields and hence their use would have +required ugly casts. This is fine because all updates of the field ought +to occur with the paging lock held, and other updates of it use |= and +&= as well (i.e. using atomic operations here didn't really guard +against potentially racing updates elsewhere). + +This is part of XSA-280. + +Reported-by: Prgmr.com Security <security@prgmr.com> +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: Tim Deegan <tim@xen.org> + +--- a/xen/arch/x86/mm/shadow/common.c ++++ b/xen/arch/x86/mm/shadow/common.c +@@ -957,10 +957,14 @@ void shadow_promote(struct domain *d, mf + + /* Is the page already shadowed? */ + if ( !test_and_set_bit(_PGC_page_table, &page->count_info) ) ++ { + page->shadow_flags = 0; ++ if ( is_hvm_domain(d) ) ++ page->pagetable_dying = false; ++ } + +- ASSERT(!test_bit(type, &page->shadow_flags)); +- set_bit(type, &page->shadow_flags); ++ ASSERT(!(page->shadow_flags & (1u << type))); ++ page->shadow_flags |= 1u << type; + TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_PROMOTE); + } + +@@ -969,9 +973,9 @@ void shadow_demote(struct domain *d, mfn + struct page_info *page = mfn_to_page(gmfn); + + ASSERT(test_bit(_PGC_page_table, &page->count_info)); +- ASSERT(test_bit(type, &page->shadow_flags)); ++ ASSERT(page->shadow_flags & (1u << type)); + +- clear_bit(type, &page->shadow_flags); ++ page->shadow_flags &= ~(1u << type); + + if ( (page->shadow_flags & SHF_page_type_mask) == 0 ) + { +@@ -2801,7 +2805,7 @@ void sh_remove_shadows(struct domain *d, + if ( !fast && all && (pg->count_info & PGC_page_table) ) + { + SHADOW_ERROR("can't find all shadows of mfn %"PRI_mfn" " +- "(shadow_flags=%08x)\n", ++ "(shadow_flags=%04x)\n", + mfn_x(gmfn), pg->shadow_flags); + domain_crash(d); + } +--- a/xen/arch/x86/mm/shadow/multi.c ++++ b/xen/arch/x86/mm/shadow/multi.c +@@ -3328,8 +3328,8 @@ static int sh_page_fault(struct vcpu *v, + + /* Unshadow if we are writing to a toplevel pagetable that is + * flagged as a dying process, and that is not currently used. */ +- if ( sh_mfn_is_a_page_table(gmfn) +- && (mfn_to_page(gmfn)->shadow_flags & SHF_pagetable_dying) ) ++ if ( sh_mfn_is_a_page_table(gmfn) && is_hvm_domain(d) && ++ mfn_to_page(gmfn)->pagetable_dying ) + { + int used = 0; + struct vcpu *tmp; +@@ -4301,9 +4301,9 @@ int sh_rm_write_access_from_sl1p(struct + ASSERT(mfn_valid(smfn)); + + /* Remember if we've been told that this process is being torn down */ +- if ( curr->domain == d ) ++ if ( curr->domain == d && is_hvm_domain(d) ) + curr->arch.paging.shadow.pagetable_dying +- = !!(mfn_to_page(gmfn)->shadow_flags & SHF_pagetable_dying); ++ = mfn_to_page(gmfn)->pagetable_dying; + + sp = mfn_to_page(smfn); + +@@ -4619,10 +4619,10 @@ static void sh_pagetable_dying(struct vc + : shadow_hash_lookup(d, mfn_x(gmfn), SH_type_l2_pae_shadow); + } + +- if ( mfn_valid(smfn) ) ++ if ( mfn_valid(smfn) && is_hvm_domain(d) ) + { + gmfn = _mfn(mfn_to_page(smfn)->v.sh.back); +- mfn_to_page(gmfn)->shadow_flags |= SHF_pagetable_dying; ++ mfn_to_page(gmfn)->pagetable_dying = true; + shadow_unhook_mappings(d, smfn, 1/* user pages only */); + flush = 1; + } +@@ -4659,9 +4659,9 @@ static void sh_pagetable_dying(struct vc + smfn = shadow_hash_lookup(d, mfn_x(gmfn), SH_type_l4_64_shadow); + #endif + +- if ( mfn_valid(smfn) ) ++ if ( mfn_valid(smfn) && is_hvm_domain(d) ) + { +- mfn_to_page(gmfn)->shadow_flags |= SHF_pagetable_dying; ++ mfn_to_page(gmfn)->pagetable_dying = true; + shadow_unhook_mappings(d, smfn, 1/* user pages only */); + /* Now flush the TLB: we removed toplevel mappings. */ + flush_tlb_mask(d->domain_dirty_cpumask); +--- a/xen/arch/x86/mm/shadow/private.h ++++ b/xen/arch/x86/mm/shadow/private.h +@@ -292,8 +292,6 @@ static inline void sh_terminate_list(str + + #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */ + +-#define SHF_pagetable_dying (1u<<31) +- + static inline int sh_page_has_multiple_shadows(struct page_info *pg) + { + u32 shadows; +--- a/xen/include/asm-x86/mm.h ++++ b/xen/include/asm-x86/mm.h +@@ -188,8 +188,15 @@ struct page_info + * Guest pages with a shadow. This does not conflict with + * tlbflush_timestamp since page table pages are explicitly not + * tracked for TLB-flush avoidance when a guest runs in shadow mode. ++ * ++ * pagetable_dying is used for HVM domains only. The layout here has ++ * to avoid re-use of the space used by linear_pt_count, which (only) ++ * PV guests use. + */ +- u32 shadow_flags; ++ struct { ++ uint16_t shadow_flags; ++ bool pagetable_dying; ++ }; + + /* When in use as a shadow, next shadow in this hash chain. */ + __pdx_t next_shadow; diff --git a/main/xen/xsa280-4.9-1.patch b/main/xen/xsa280-4.9-1.patch new file mode 100644 index 0000000000..32cc67c662 --- /dev/null +++ b/main/xen/xsa280-4.9-1.patch @@ -0,0 +1,112 @@ +From: Jan Beulich <jbeulich@suse.com> +Subject: x86/shadow: move OOS flag bit positions + +In preparation of reducing struct page_info's shadow_flags field to 16 +bits, lower the bit positions used for SHF_out_of_sync and +SHF_oos_may_write. + +Instead of also adjusting the open coded use in _get_page_type(), +introduce shadow_prepare_page_type_change() to contain knowledge of the +bit positions to shadow code. + +This is part of XSA-280. + +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: Tim Deegan <tim@xen.org> + +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c +@@ -2799,15 +2799,8 @@ static int __get_page_type(struct page_i + { + struct domain *d = page_get_owner(page); + +- /* Normally we should never let a page go from type count 0 +- * to type count 1 when it is shadowed. One exception: +- * out-of-sync shadowed pages are allowed to become +- * writeable. */ +- if ( d && shadow_mode_enabled(d) +- && (page->count_info & PGC_page_table) +- && !((page->shadow_flags & (1u<<29)) +- && type == PGT_writable_page) ) +- shadow_remove_all_shadows(d, _mfn(page_to_mfn(page))); ++ if ( d && shadow_mode_enabled(d) ) ++ shadow_prepare_page_type_change(d, page, type); + + ASSERT(!(x & PGT_pae_xen_l2)); + if ( (x & PGT_type_mask) != type ) +--- a/xen/arch/x86/mm/shadow/common.c ++++ b/xen/arch/x86/mm/shadow/common.c +@@ -919,6 +919,9 @@ int sh_unsync(struct vcpu *v, mfn_t gmfn + || !v->domain->arch.paging.shadow.oos_active ) + return 0; + ++ BUILD_BUG_ON(!(typeof(pg->shadow_flags))SHF_out_of_sync); ++ BUILD_BUG_ON(!(typeof(pg->shadow_flags))SHF_oos_may_write); ++ + pg->shadow_flags |= SHF_out_of_sync|SHF_oos_may_write; + oos_hash_add(v, gmfn); + perfc_incr(shadow_unsync); +@@ -2810,6 +2813,26 @@ void sh_remove_shadows(struct domain *d, + paging_unlock(d); + } + ++void shadow_prepare_page_type_change(struct domain *d, struct page_info *page, ++ unsigned long new_type) ++{ ++ if ( !(page->count_info & PGC_page_table) ) ++ return; ++ ++#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) ++ /* ++ * Normally we should never let a page go from type count 0 to type ++ * count 1 when it is shadowed. One exception: out-of-sync shadowed ++ * pages are allowed to become writeable. ++ */ ++ if ( (page->shadow_flags & SHF_oos_may_write) && ++ new_type == PGT_writable_page ) ++ return; ++#endif ++ ++ shadow_remove_all_shadows(d, page_to_mfn(page)); ++} ++ + static void + sh_remove_all_shadows_and_parents(struct domain *d, mfn_t gmfn) + /* Even harsher: this is a HVM page that we thing is no longer a pagetable. +--- a/xen/arch/x86/mm/shadow/private.h ++++ b/xen/arch/x86/mm/shadow/private.h +@@ -287,8 +287,8 @@ static inline void sh_terminate_list(str + * codepath is called during that time and is sensitive to oos issues, it may + * need to use the second flag. + */ +-#define SHF_out_of_sync (1u<<30) +-#define SHF_oos_may_write (1u<<29) ++#define SHF_out_of_sync (1u << (SH_type_max_shadow + 1)) ++#define SHF_oos_may_write (1u << (SH_type_max_shadow + 2)) + + #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */ + +--- a/xen/include/asm-x86/shadow.h ++++ b/xen/include/asm-x86/shadow.h +@@ -81,6 +81,10 @@ void shadow_final_teardown(struct domain + + void sh_remove_shadows(struct domain *d, mfn_t gmfn, int fast, int all); + ++/* Adjust shadows ready for a guest page to change its type. */ ++void shadow_prepare_page_type_change(struct domain *d, struct page_info *page, ++ unsigned long new_type); ++ + /* Discard _all_ mappings from the domain's shadows. */ + void shadow_blow_tables_per_domain(struct domain *d); + +@@ -105,6 +109,10 @@ int shadow_set_allocation(struct domain + static inline void sh_remove_shadows(struct domain *d, mfn_t gmfn, + bool_t fast, bool_t all) {} + ++static inline void shadow_prepare_page_type_change(struct domain *d, ++ struct page_info *page, ++ unsigned long new_type) {} ++ + static inline void shadow_blow_tables_per_domain(struct domain *d) {} + + static inline int shadow_domctl(struct domain *d, xen_domctl_shadow_op_t *sc, diff --git a/main/xen/xsa282-2.patch b/main/xen/xsa282-2.patch new file mode 100644 index 0000000000..4c34691db3 --- /dev/null +++ b/main/xen/xsa282-2.patch @@ -0,0 +1,42 @@ +From: Jan Beulich <jbeulich@suse.com> +Subject: x86: work around HLE host lockup erratum + +XACQUIRE prefixed accesses to the 4Mb range of memory starting at 1Gb +are liable to lock up the processor. Disallow use of this memory range. + +Unfortunately the available Core Gen7 and Gen8 spec updates are pretty +old, so I can only guess that they're similarly affected when Core Gen6 +is and the Xeon counterparts are, too. + +This is part of XSA-282. + +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> +--- +v2: Don't apply the workaround when running ourselves virtualized. + +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c +@@ -5853,6 +5853,22 @@ const struct platform_bad_page *__init g + { .mfn = 0x20138000 >> PAGE_SHIFT }, + { .mfn = 0x40004000 >> PAGE_SHIFT }, + }; ++ static const struct platform_bad_page __initconst hle_bad_page = { ++ .mfn = 0x40000000 >> PAGE_SHIFT, .order = 10 ++ }; ++ ++ switch ( cpuid_eax(1) & 0x000f3ff0 ) ++ { ++ case 0x000406e0: /* erratum SKL167 */ ++ case 0x00050650: /* erratum SKZ63 */ ++ case 0x000506e0: /* errata SKL167 / SKW159 */ ++ case 0x000806e0: /* erratum KBL??? */ ++ case 0x000906e0: /* errata KBL??? / KBW114 / CFW103 */ ++ *array_size = (cpuid_eax(0) >= 7 && ++ !(cpuid_ecx(1) & cpufeat_mask(X86_FEATURE_HYPERVISOR)) && ++ (cpuid_count_ebx(7, 0) & cpufeat_mask(X86_FEATURE_HLE))); ++ return &hle_bad_page; ++ } + + *array_size = ARRAY_SIZE(snb_bad_pages); + igd_id = pci_conf_read32(0, 0, 2, 0, 0); diff --git a/main/xen/xsa282-4.9-1.patch b/main/xen/xsa282-4.9-1.patch new file mode 100644 index 0000000000..580933d4b0 --- /dev/null +++ b/main/xen/xsa282-4.9-1.patch @@ -0,0 +1,87 @@ +From: Jan Beulich <jbeulich@suse.com> +Subject: x86: extend get_platform_badpages() interface + +Use a structure so along with an address (now frame number) an order can +also be specified. + +This is part of XSA-282. + +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> + +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c +@@ -7111,23 +7111,23 @@ void arch_dump_shared_mem_info(void) + mem_sharing_get_nr_saved_mfns()); + } + +-const unsigned long *__init get_platform_badpages(unsigned int *array_size) ++const struct platform_bad_page *__init get_platform_badpages(unsigned int *array_size) + { + u32 igd_id; +- static unsigned long __initdata bad_pages[] = { +- 0x20050000, +- 0x20110000, +- 0x20130000, +- 0x20138000, +- 0x40004000, ++ static const struct platform_bad_page __initconst snb_bad_pages[] = { ++ { .mfn = 0x20050000 >> PAGE_SHIFT }, ++ { .mfn = 0x20110000 >> PAGE_SHIFT }, ++ { .mfn = 0x20130000 >> PAGE_SHIFT }, ++ { .mfn = 0x20138000 >> PAGE_SHIFT }, ++ { .mfn = 0x40004000 >> PAGE_SHIFT }, + }; + +- *array_size = ARRAY_SIZE(bad_pages); ++ *array_size = ARRAY_SIZE(snb_bad_pages); + igd_id = pci_conf_read32(0, 0, 2, 0, 0); +- if ( !IS_SNB_GFX(igd_id) ) +- return NULL; ++ if ( IS_SNB_GFX(igd_id) ) ++ return snb_bad_pages; + +- return bad_pages; ++ return NULL; + } + + void paging_invlpg(struct vcpu *v, unsigned long va) +--- a/xen/common/page_alloc.c ++++ b/xen/common/page_alloc.c +@@ -270,7 +270,7 @@ void __init init_boot_pages(paddr_t ps, + unsigned long bad_spfn, bad_epfn; + const char *p; + #ifdef CONFIG_X86 +- const unsigned long *badpage = NULL; ++ const struct platform_bad_page *badpage; + unsigned int i, array_size; + #endif + +@@ -295,8 +295,8 @@ void __init init_boot_pages(paddr_t ps, + { + for ( i = 0; i < array_size; i++ ) + { +- bootmem_region_zap(*badpage >> PAGE_SHIFT, +- (*badpage >> PAGE_SHIFT) + 1); ++ bootmem_region_zap(badpage->mfn, ++ badpage->mfn + (1U << badpage->order)); + badpage++; + } + } +--- a/xen/include/asm-x86/mm.h ++++ b/xen/include/asm-x86/mm.h +@@ -350,7 +350,13 @@ bool is_iomem_page(mfn_t mfn); + + void clear_superpage_mark(struct page_info *page); + +-const unsigned long *get_platform_badpages(unsigned int *array_size); ++struct platform_bad_page { ++ unsigned long mfn; ++ unsigned int order; ++}; ++ ++const struct platform_bad_page *get_platform_badpages(unsigned int *array_size); ++ + /* Per page locks: + * page_lock() is used for two purposes: pte serialization, and memory sharing. + * |