aboutsummaryrefslogtreecommitdiffstats
path: root/main
diff options
context:
space:
mode:
authorLeonardo Arena <rnalrd@alpinelinux.org>2019-02-04 08:21:18 +0000
committerLeonardo Arena <rnalrd@alpinelinux.org>2019-02-04 08:21:42 +0000
commitf39fc76089047b3f9b0e1ec06aecb40cc1ac1786 (patch)
tree682b45d3e61bc3df3c9695ab9b7672d442f6fb3f /main
parentd39be709613fbc979651b09ac2bc27c6591afd99 (diff)
downloadaports-f39fc76089047b3f9b0e1ec06aecb40cc1ac1786.tar.bz2
aports-f39fc76089047b3f9b0e1ec06aecb40cc1ac1786.tar.xz
main/xen: security fixes
Diffstat (limited to 'main')
-rw-r--r--main/xen/APKBUILD26
-rw-r--r--main/xen/xsa275-4.11-1.patch104
-rw-r--r--main/xen/xsa275-4.11-2.patch68
-rw-r--r--main/xen/xsa278-4.11.patch326
-rw-r--r--main/xen/xsa279-4.9.patch35
-rw-r--r--main/xen/xsa280-4.10-2.patch141
-rw-r--r--main/xen/xsa280-4.9-1.patch112
-rw-r--r--main/xen/xsa282-2.patch42
-rw-r--r--main/xen/xsa282-4.9-1.patch87
9 files changed, 940 insertions, 1 deletions
diff --git a/main/xen/APKBUILD b/main/xen/APKBUILD
index a35bff37af..b3b9171512 100644
--- a/main/xen/APKBUILD
+++ b/main/xen/APKBUILD
@@ -3,7 +3,7 @@
# Maintainer: William Pitcock <nenolod@dereferenced.org>
pkgname=xen
pkgver=4.9.3
-pkgrel=0
+pkgrel=1
pkgdesc="Xen hypervisor"
url="http://www.xen.org/"
arch="x86_64 armhf aarch64"
@@ -130,6 +130,13 @@ options="!strip"
# - CVE-2018-15468 XSA-269
# - CVE-2018-15470 XSA-272
# - CVE-2018-3620 CVE-2018-3646 XSA-273
+# 4.9.3-r1:
+# - CVE-2018-19961 XSA-275
+# - CVE-2018-19962 XSA-275
+# - CVE-2018-18883 XSA-278
+# - CVE-2018-19965 XSA-279
+# - CVE-2018-19966 XSA-280
+# - CVE-2018-19967 XSA-282
case "$CARCH" in
x86*)
@@ -194,6 +201,15 @@ source="https://downloads.xenproject.org/release/$pkgname/$pkgver/$pkgname-$pkgv
xenqemu-xattr-size-max.patch
+ xsa275-4.11-1.patch
+ xsa275-4.11-2.patch
+ xsa278-4.11.patch
+ xsa282-4.9-1.patch
+ xsa282-2.patch
+ xsa279-4.9.patch
+ xsa280-4.10-2.patch
+ xsa280-4.9-1.patch
+
xenstored.initd
xenstored.confd
xenconsoled.initd
@@ -446,6 +462,14 @@ a3197d9c2455983554610031702ea95dc31f1b375b8c1291207d33c9e6114c6928417b4c8138cb53
e76816c6ad0e91dc5f81947f266da3429b20e6d976c3e8c41202c6179532eec878a3f0913921ef3ac853c5dbad8082da3c9cd53b65081910516feb492577b7fc xen-fd-is-file.c
69dfa60628ca838678862383528654ecbdf4269cbb5c9cfb6b84d976202a8dea85d711aa65a52fa1b477fb0b30604ca70cf1337192d6fb9388a08bbe7fe56077 xenstore_client_transaction_fix.patch
2094ea964fa610b2bf72fd2c7ede7e954899a75c0f5b08030cf1d74460fb759ade84866176e32f8fe29c921dfdc6dafd2b31e23ab9b0a3874d3dceeabdd1913b xenqemu-xattr-size-max.patch
+158054c37d9df6b3576246ecf43505fb5417febad175650dce954151ed52b8ce27729a59ac873ce1cf210e6613c8315378fb5ac9ab1667e9b844fe0d007c776d xsa275-4.11-1.patch
+6f118663e5e5c86449e05a22a84a400bb9d1e8ef6b351cbba00fafcf52932924392c44a9664d1f3d720473cc7afbb04abff0a60ec7be75109bf355f8fe95fa59 xsa275-4.11-2.patch
+35c8c90b78856ce364cac0ddfd759aa807480bb57136e609a1462ad0f53e867a6a2acafbec5dad586d6d5159e2e377e5b6aa2ffe659d83a7684b7bb6fddba1a6 xsa278-4.11.patch
+7050af051031c499170bb42a2060678297f6e3ff5b9079b646b84a9ad137ed478fe319ba43b9bccde56b9c4a341672403458c12d2adbf8e208995b7e09a5ca14 xsa279-4.9.patch
+5eb30e29e22cf7c76a777f99e1e8035be1d6d645ddb616446a7840ef93fd4e2d2fedda9e7a3708b31e42c12b14178aa424c50b3e3f585b93052fcbc9a357f21d xsa280-4.10-2.patch
+0517d9ab5dd0e1faef5126fbd012306da503a23d95143b232ca61aba2bf92a15ebced3c4a4b9bb3c5105a089ea7dff2059e861c80a82975372d78ecdbc32a4c4 xsa280-4.9-1.patch
+a2cb124aab729931617e10a6a34900c21ef7f846926447a8752adb343ef7bf32f3625059f25c6487df27337eee03701da9a3009154d82a2cd1c8fb4be58cbc2e xsa282-2.patch
+d5dd53d66fc45dfccd51adf81e8864b70c6c35922479002419e6e984738f4a2695d528be4d871d9aa9f4ddf60987990580a6f8ebf0a7b99e5845984f0f36755b xsa282-4.9-1.patch
52c43beb2596d645934d0f909f2d21f7587b6898ed5e5e7046799a8ed6d58f7a09c5809e1634fa26152f3fd4f3e7cfa07da7076f01b4a20cc8f5df8b9cb77e50 xenstored.initd
093f7fbd43faf0a16a226486a0776bade5dc1681d281c5946a3191c32d74f9699c6bf5d0ab8de9d1195a2461165d1660788e92a3156c9b3c7054d7b2d52d7ff0 xenstored.confd
3c86ed48fbee0af4051c65c4a3893f131fa66e47bf083caf20c9b6aa4b63fdead8832f84a58d0e27964bc49ec8397251b34e5be5c212c139f556916dc8da9523 xenconsoled.initd
diff --git a/main/xen/xsa275-4.11-1.patch b/main/xen/xsa275-4.11-1.patch
new file mode 100644
index 0000000000..932d8f1132
--- /dev/null
+++ b/main/xen/xsa275-4.11-1.patch
@@ -0,0 +1,104 @@
+From: Roger Pau Monné <roger.pau@citrix.com>
+Subject: amd/iommu: fix flush checks
+
+Flush checking for AMD IOMMU didn't check whether the previous entry
+was present, or whether the flags (writable/readable) changed in order
+to decide whether a flush should be executed.
+
+Fix this by taking the writable/readable/next-level fields into account,
+together with the present bit.
+
+Along these lines the flushing in amd_iommu_map_page() must not be
+omitted for PV domains. The comment there was simply wrong: Mappings may
+very well change, both their addresses and their permissions. Ultimately
+this should honor iommu_dont_flush_iotlb, but to achieve this
+amd_iommu_ops first needs to gain an .iotlb_flush hook.
+
+Also make clear_iommu_pte_present() static, to demonstrate there's no
+caller omitting the (subsequent) flush.
+
+This is part of XSA-275.
+
+Reported-by: Paul Durrant <paul.durrant@citrix.com>
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+
+--- a/xen/drivers/passthrough/amd/iommu_map.c
++++ b/xen/drivers/passthrough/amd/iommu_map.c
+@@ -35,7 +35,7 @@ static unsigned int pfn_to_pde_idx(unsig
+ return idx;
+ }
+
+-void clear_iommu_pte_present(unsigned long l1_mfn, unsigned long gfn)
++static void clear_iommu_pte_present(unsigned long l1_mfn, unsigned long gfn)
+ {
+ u64 *table, *pte;
+
+@@ -49,23 +49,42 @@ static bool_t set_iommu_pde_present(u32
+ unsigned int next_level,
+ bool_t iw, bool_t ir)
+ {
+- u64 addr_lo, addr_hi, maddr_old, maddr_next;
++ uint64_t addr_lo, addr_hi, maddr_next;
+ u32 entry;
+- bool_t need_flush = 0;
++ bool need_flush = false, old_present;
+
+ maddr_next = (u64)next_mfn << PAGE_SHIFT;
+
+- addr_hi = get_field_from_reg_u32(pde[1],
+- IOMMU_PTE_ADDR_HIGH_MASK,
+- IOMMU_PTE_ADDR_HIGH_SHIFT);
+- addr_lo = get_field_from_reg_u32(pde[0],
+- IOMMU_PTE_ADDR_LOW_MASK,
+- IOMMU_PTE_ADDR_LOW_SHIFT);
+-
+- maddr_old = (addr_hi << 32) | (addr_lo << PAGE_SHIFT);
+-
+- if ( maddr_old != maddr_next )
+- need_flush = 1;
++ old_present = get_field_from_reg_u32(pde[0], IOMMU_PTE_PRESENT_MASK,
++ IOMMU_PTE_PRESENT_SHIFT);
++ if ( old_present )
++ {
++ bool old_r, old_w;
++ unsigned int old_level;
++ uint64_t maddr_old;
++
++ addr_hi = get_field_from_reg_u32(pde[1],
++ IOMMU_PTE_ADDR_HIGH_MASK,
++ IOMMU_PTE_ADDR_HIGH_SHIFT);
++ addr_lo = get_field_from_reg_u32(pde[0],
++ IOMMU_PTE_ADDR_LOW_MASK,
++ IOMMU_PTE_ADDR_LOW_SHIFT);
++ old_level = get_field_from_reg_u32(pde[0],
++ IOMMU_PDE_NEXT_LEVEL_MASK,
++ IOMMU_PDE_NEXT_LEVEL_SHIFT);
++ old_w = get_field_from_reg_u32(pde[1],
++ IOMMU_PTE_IO_WRITE_PERMISSION_MASK,
++ IOMMU_PTE_IO_WRITE_PERMISSION_SHIFT);
++ old_r = get_field_from_reg_u32(pde[1],
++ IOMMU_PTE_IO_READ_PERMISSION_MASK,
++ IOMMU_PTE_IO_READ_PERMISSION_SHIFT);
++
++ maddr_old = (addr_hi << 32) | (addr_lo << PAGE_SHIFT);
++
++ if ( maddr_old != maddr_next || iw != old_w || ir != old_r ||
++ old_level != next_level )
++ need_flush = true;
++ }
+
+ addr_lo = maddr_next & DMA_32BIT_MASK;
+ addr_hi = maddr_next >> 32;
+@@ -687,10 +706,7 @@ int amd_iommu_map_page(struct domain *d,
+ if ( !need_flush )
+ goto out;
+
+- /* 4K mapping for PV guests never changes,
+- * no need to flush if we trust non-present bits */
+- if ( is_hvm_domain(d) )
+- amd_iommu_flush_pages(d, gfn, 0);
++ amd_iommu_flush_pages(d, gfn, 0);
+
+ for ( merge_level = IOMMU_PAGING_MODE_LEVEL_2;
+ merge_level <= hd->arch.paging_mode; merge_level++ )
diff --git a/main/xen/xsa275-4.11-2.patch b/main/xen/xsa275-4.11-2.patch
new file mode 100644
index 0000000000..72a2d07882
--- /dev/null
+++ b/main/xen/xsa275-4.11-2.patch
@@ -0,0 +1,68 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: AMD/IOMMU: suppress PTE merging after initial table creation
+
+The logic is not fit for this purpose, so simply disable its use until
+it can be fixed / replaced. Note that this re-enables merging for the
+table creation case, which was disabled as a (perhaps unintended) side
+effect of the earlier "amd/iommu: fix flush checks". It relies on no
+page getting mapped more than once (with different properties) in this
+process, as that would still be beyond what the merging logic can cope
+with. But arch_iommu_populate_page_table() guarantees this afaict.
+
+This is part of XSA-275.
+
+Reported-by: Paul Durrant <paul.durrant@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+
+--- a/xen/drivers/passthrough/amd/iommu_map.c
++++ b/xen/drivers/passthrough/amd/iommu_map.c
+@@ -702,11 +702,24 @@ int amd_iommu_map_page(struct domain *d,
+ !!(flags & IOMMUF_writable),
+ !!(flags & IOMMUF_readable));
+
+- /* Do not increase pde count if io mapping has not been changed */
+- if ( !need_flush )
+- goto out;
++ if ( need_flush )
++ {
++ amd_iommu_flush_pages(d, gfn, 0);
++ /* No further merging, as the logic doesn't cope. */
++ hd->arch.no_merge = true;
++ }
+
+- amd_iommu_flush_pages(d, gfn, 0);
++ /*
++ * Suppress merging of non-R/W mappings or after initial table creation,
++ * as the merge logic does not cope with this.
++ */
++ if ( hd->arch.no_merge || flags != (IOMMUF_writable | IOMMUF_readable) )
++ goto out;
++ if ( d->creation_finished )
++ {
++ hd->arch.no_merge = true;
++ goto out;
++ }
+
+ for ( merge_level = IOMMU_PAGING_MODE_LEVEL_2;
+ merge_level <= hd->arch.paging_mode; merge_level++ )
+@@ -780,6 +793,10 @@ int amd_iommu_unmap_page(struct domain *
+
+ /* mark PTE as 'page not present' */
+ clear_iommu_pte_present(pt_mfn[1], gfn);
++
++ /* No further merging in amd_iommu_map_page(), as the logic doesn't cope. */
++ hd->arch.no_merge = true;
++
+ spin_unlock(&hd->arch.mapping_lock);
+
+ amd_iommu_flush_pages(d, gfn, 0);
+--- a/xen/include/asm-x86/iommu.h
++++ b/xen/include/asm-x86/iommu.h
+@@ -40,6 +40,7 @@ struct arch_iommu
+
+ /* amd iommu support */
+ int paging_mode;
++ bool no_merge;
+ struct page_info *root_table;
+ struct guest_iommu *g_iommu;
+ };
diff --git a/main/xen/xsa278-4.11.patch b/main/xen/xsa278-4.11.patch
new file mode 100644
index 0000000000..9e1484981e
--- /dev/null
+++ b/main/xen/xsa278-4.11.patch
@@ -0,0 +1,326 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86/vvmx: Disallow the use of VT-x instructions when nested virt is disabled
+
+c/s ac6a4500b "vvmx: set vmxon_region_pa of vcpu out of VMX operation to an
+invalid address" was a real bugfix as described, but has a very subtle bug
+which results in all VT-x instructions being usable by a guest.
+
+The toolstack constructs a guest by issuing:
+
+ XEN_DOMCTL_createdomain
+ XEN_DOMCTL_max_vcpus
+
+and optionally later, HVMOP_set_param to enable nested virt.
+
+As a result, the call to nvmx_vcpu_initialise() in hvm_vcpu_initialise()
+(which is what makes the above patch look correct during review) is actually
+dead code. In practice, nvmx_vcpu_initialise() first gets called when nested
+virt is enabled, which is typically never.
+
+As a result, the zeroed memory of struct vcpu causes nvmx_vcpu_in_vmx() to
+return true before nested virt is enabled for the guest.
+
+Fixing the order of initialisation is a work in progress for other reasons,
+but not viable for security backports.
+
+A compounding factor is that the vmexit handlers for all instructions, other
+than VMXON, pass 0 into vmx_inst_check_privilege()'s vmxop_check parameter,
+which skips the CR4.VMXE check. (This is one of many reasons why nested virt
+isn't a supported feature yet.)
+
+However, the overall result is that when nested virt is not enabled by the
+toolstack (i.e. the default configuration for all production guests), the VT-x
+instructions (other than VMXON) are actually usable, and Xen very quickly
+falls over the fact that the nvmx structure is uninitialised.
+
+In order to fail safe in the supported case, re-implement all the VT-x
+instruction handling using a single function with a common prologue, covering
+all the checks which should cause #UD or #GP faults. This deliberately
+doesn't use any state from the nvmx structure, in case there are other lurking
+issues.
+
+This is XSA-278
+
+Reported-by: Sergey Dyasli <sergey.dyasli@citrix.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Sergey Dyasli <sergey.dyasli@citrix.com>
+
+diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
+index a6415f0..a4d2829 100644
+--- a/xen/arch/x86/hvm/vmx/vmx.c
++++ b/xen/arch/x86/hvm/vmx/vmx.c
+@@ -3982,57 +3982,17 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
+ break;
+
+ case EXIT_REASON_VMXOFF:
+- if ( nvmx_handle_vmxoff(regs) == X86EMUL_OKAY )
+- update_guest_eip();
+- break;
+-
+ case EXIT_REASON_VMXON:
+- if ( nvmx_handle_vmxon(regs) == X86EMUL_OKAY )
+- update_guest_eip();
+- break;
+-
+ case EXIT_REASON_VMCLEAR:
+- if ( nvmx_handle_vmclear(regs) == X86EMUL_OKAY )
+- update_guest_eip();
+- break;
+-
+ case EXIT_REASON_VMPTRLD:
+- if ( nvmx_handle_vmptrld(regs) == X86EMUL_OKAY )
+- update_guest_eip();
+- break;
+-
+ case EXIT_REASON_VMPTRST:
+- if ( nvmx_handle_vmptrst(regs) == X86EMUL_OKAY )
+- update_guest_eip();
+- break;
+-
+ case EXIT_REASON_VMREAD:
+- if ( nvmx_handle_vmread(regs) == X86EMUL_OKAY )
+- update_guest_eip();
+- break;
+-
+ case EXIT_REASON_VMWRITE:
+- if ( nvmx_handle_vmwrite(regs) == X86EMUL_OKAY )
+- update_guest_eip();
+- break;
+-
+ case EXIT_REASON_VMLAUNCH:
+- if ( nvmx_handle_vmlaunch(regs) == X86EMUL_OKAY )
+- update_guest_eip();
+- break;
+-
+ case EXIT_REASON_VMRESUME:
+- if ( nvmx_handle_vmresume(regs) == X86EMUL_OKAY )
+- update_guest_eip();
+- break;
+-
+ case EXIT_REASON_INVEPT:
+- if ( nvmx_handle_invept(regs) == X86EMUL_OKAY )
+- update_guest_eip();
+- break;
+-
+ case EXIT_REASON_INVVPID:
+- if ( nvmx_handle_invvpid(regs) == X86EMUL_OKAY )
++ if ( nvmx_handle_vmx_insn(regs, exit_reason) == X86EMUL_OKAY )
+ update_guest_eip();
+ break;
+
+diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
+index e97db33..88cb58c 100644
+--- a/xen/arch/x86/hvm/vmx/vvmx.c
++++ b/xen/arch/x86/hvm/vmx/vvmx.c
+@@ -1470,7 +1470,7 @@ void nvmx_switch_guest(void)
+ * VMX instructions handling
+ */
+
+-int nvmx_handle_vmxon(struct cpu_user_regs *regs)
++static int nvmx_handle_vmxon(struct cpu_user_regs *regs)
+ {
+ struct vcpu *v=current;
+ struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
+@@ -1522,7 +1522,7 @@ int nvmx_handle_vmxon(struct cpu_user_regs *regs)
+ return X86EMUL_OKAY;
+ }
+
+-int nvmx_handle_vmxoff(struct cpu_user_regs *regs)
++static int nvmx_handle_vmxoff(struct cpu_user_regs *regs)
+ {
+ struct vcpu *v=current;
+ struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
+@@ -1611,7 +1611,7 @@ static int nvmx_vmresume(struct vcpu *v, struct cpu_user_regs *regs)
+ return X86EMUL_OKAY;
+ }
+
+-int nvmx_handle_vmresume(struct cpu_user_regs *regs)
++static int nvmx_handle_vmresume(struct cpu_user_regs *regs)
+ {
+ bool_t launched;
+ struct vcpu *v = current;
+@@ -1645,7 +1645,7 @@ int nvmx_handle_vmresume(struct cpu_user_regs *regs)
+ return nvmx_vmresume(v,regs);
+ }
+
+-int nvmx_handle_vmlaunch(struct cpu_user_regs *regs)
++static int nvmx_handle_vmlaunch(struct cpu_user_regs *regs)
+ {
+ bool_t launched;
+ struct vcpu *v = current;
+@@ -1688,7 +1688,7 @@ int nvmx_handle_vmlaunch(struct cpu_user_regs *regs)
+ return rc;
+ }
+
+-int nvmx_handle_vmptrld(struct cpu_user_regs *regs)
++static int nvmx_handle_vmptrld(struct cpu_user_regs *regs)
+ {
+ struct vcpu *v = current;
+ struct vmx_inst_decoded decode;
+@@ -1759,7 +1759,7 @@ int nvmx_handle_vmptrld(struct cpu_user_regs *regs)
+ return X86EMUL_OKAY;
+ }
+
+-int nvmx_handle_vmptrst(struct cpu_user_regs *regs)
++static int nvmx_handle_vmptrst(struct cpu_user_regs *regs)
+ {
+ struct vcpu *v = current;
+ struct vmx_inst_decoded decode;
+@@ -1784,7 +1784,7 @@ int nvmx_handle_vmptrst(struct cpu_user_regs *regs)
+ return X86EMUL_OKAY;
+ }
+
+-int nvmx_handle_vmclear(struct cpu_user_regs *regs)
++static int nvmx_handle_vmclear(struct cpu_user_regs *regs)
+ {
+ struct vcpu *v = current;
+ struct vmx_inst_decoded decode;
+@@ -1836,7 +1836,7 @@ int nvmx_handle_vmclear(struct cpu_user_regs *regs)
+ return X86EMUL_OKAY;
+ }
+
+-int nvmx_handle_vmread(struct cpu_user_regs *regs)
++static int nvmx_handle_vmread(struct cpu_user_regs *regs)
+ {
+ struct vcpu *v = current;
+ struct vmx_inst_decoded decode;
+@@ -1878,7 +1878,7 @@ int nvmx_handle_vmread(struct cpu_user_regs *regs)
+ return X86EMUL_OKAY;
+ }
+
+-int nvmx_handle_vmwrite(struct cpu_user_regs *regs)
++static int nvmx_handle_vmwrite(struct cpu_user_regs *regs)
+ {
+ struct vcpu *v = current;
+ struct vmx_inst_decoded decode;
+@@ -1926,7 +1926,7 @@ int nvmx_handle_vmwrite(struct cpu_user_regs *regs)
+ return X86EMUL_OKAY;
+ }
+
+-int nvmx_handle_invept(struct cpu_user_regs *regs)
++static int nvmx_handle_invept(struct cpu_user_regs *regs)
+ {
+ struct vmx_inst_decoded decode;
+ unsigned long eptp;
+@@ -1954,7 +1954,7 @@ int nvmx_handle_invept(struct cpu_user_regs *regs)
+ return X86EMUL_OKAY;
+ }
+
+-int nvmx_handle_invvpid(struct cpu_user_regs *regs)
++static int nvmx_handle_invvpid(struct cpu_user_regs *regs)
+ {
+ struct vmx_inst_decoded decode;
+ unsigned long vpid;
+@@ -1980,6 +1980,81 @@ int nvmx_handle_invvpid(struct cpu_user_regs *regs)
+ return X86EMUL_OKAY;
+ }
+
++int nvmx_handle_vmx_insn(struct cpu_user_regs *regs, unsigned int exit_reason)
++{
++ struct vcpu *curr = current;
++ int ret;
++
++ if ( !(curr->arch.hvm_vcpu.guest_cr[4] & X86_CR4_VMXE) ||
++ !nestedhvm_enabled(curr->domain) ||
++ (vmx_guest_x86_mode(curr) < (hvm_long_mode_active(curr) ? 8 : 2)) )
++ {
++ hvm_inject_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC);
++ return X86EMUL_EXCEPTION;
++ }
++
++ if ( vmx_get_cpl() > 0 )
++ {
++ hvm_inject_hw_exception(TRAP_gp_fault, 0);
++ return X86EMUL_EXCEPTION;
++ }
++
++ switch ( exit_reason )
++ {
++ case EXIT_REASON_VMXOFF:
++ ret = nvmx_handle_vmxoff(regs);
++ break;
++
++ case EXIT_REASON_VMXON:
++ ret = nvmx_handle_vmxon(regs);
++ break;
++
++ case EXIT_REASON_VMCLEAR:
++ ret = nvmx_handle_vmclear(regs);
++ break;
++
++ case EXIT_REASON_VMPTRLD:
++ ret = nvmx_handle_vmptrld(regs);
++ break;
++
++ case EXIT_REASON_VMPTRST:
++ ret = nvmx_handle_vmptrst(regs);
++ break;
++
++ case EXIT_REASON_VMREAD:
++ ret = nvmx_handle_vmread(regs);
++ break;
++
++ case EXIT_REASON_VMWRITE:
++ ret = nvmx_handle_vmwrite(regs);
++ break;
++
++ case EXIT_REASON_VMLAUNCH:
++ ret = nvmx_handle_vmlaunch(regs);
++ break;
++
++ case EXIT_REASON_VMRESUME:
++ ret = nvmx_handle_vmresume(regs);
++ break;
++
++ case EXIT_REASON_INVEPT:
++ ret = nvmx_handle_invept(regs);
++ break;
++
++ case EXIT_REASON_INVVPID:
++ ret = nvmx_handle_invvpid(regs);
++ break;
++
++ default:
++ ASSERT_UNREACHABLE();
++ domain_crash(curr->domain);
++ ret = X86EMUL_UNHANDLEABLE;
++ break;
++ }
++
++ return ret;
++}
++
+ #define __emul_value(enable1, default1) \
+ ((enable1 | default1) << 32 | (default1))
+
+diff --git a/xen/include/asm-x86/hvm/vmx/vvmx.h b/xen/include/asm-x86/hvm/vmx/vvmx.h
+index 9ea35eb..fc4a8d1 100644
+--- a/xen/include/asm-x86/hvm/vmx/vvmx.h
++++ b/xen/include/asm-x86/hvm/vmx/vvmx.h
+@@ -94,9 +94,6 @@ void nvmx_domain_relinquish_resources(struct domain *d);
+
+ bool_t nvmx_ept_enabled(struct vcpu *v);
+
+-int nvmx_handle_vmxon(struct cpu_user_regs *regs);
+-int nvmx_handle_vmxoff(struct cpu_user_regs *regs);
+-
+ #define EPT_TRANSLATE_SUCCEED 0
+ #define EPT_TRANSLATE_VIOLATION 1
+ #define EPT_TRANSLATE_MISCONFIG 2
+@@ -191,15 +188,7 @@ enum vmx_insn_errno set_vvmcs_real_safe(const struct vcpu *, u32 encoding,
+ uint64_t get_shadow_eptp(struct vcpu *v);
+
+ void nvmx_destroy_vmcs(struct vcpu *v);
+-int nvmx_handle_vmptrld(struct cpu_user_regs *regs);
+-int nvmx_handle_vmptrst(struct cpu_user_regs *regs);
+-int nvmx_handle_vmclear(struct cpu_user_regs *regs);
+-int nvmx_handle_vmread(struct cpu_user_regs *regs);
+-int nvmx_handle_vmwrite(struct cpu_user_regs *regs);
+-int nvmx_handle_vmresume(struct cpu_user_regs *regs);
+-int nvmx_handle_vmlaunch(struct cpu_user_regs *regs);
+-int nvmx_handle_invept(struct cpu_user_regs *regs);
+-int nvmx_handle_invvpid(struct cpu_user_regs *regs);
++int nvmx_handle_vmx_insn(struct cpu_user_regs *regs, unsigned int exit_reason);
+ int nvmx_msr_read_intercept(unsigned int msr,
+ u64 *msr_content);
+
diff --git a/main/xen/xsa279-4.9.patch b/main/xen/xsa279-4.9.patch
new file mode 100644
index 0000000000..984b7ccb0b
--- /dev/null
+++ b/main/xen/xsa279-4.9.patch
@@ -0,0 +1,35 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86/mm: Don't perform flush after failing to update a guests L1e
+
+If the L1e update hasn't occured, the flush cannot do anything useful. This
+skips the potentially expensive vcpumask_to_pcpumask() conversion, and
+broadcast TLB shootdown.
+
+More importantly however, we might be in the error path due to a bad va
+parameter from the guest, and this should not propagate into the TLB flushing
+logic. The INVPCID instruction for example raises #GP for a non-canonical
+address.
+
+This is XSA-279.
+
+Reported-by: Matthew Daley <mattd@bugfuzz.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -4894,6 +4894,14 @@ static int __do_update_va_mapping(
+ if ( pl1e )
+ guest_unmap_l1e(pl1e);
+
++ /*
++ * Any error at this point means that we haven't change the l1e. Skip the
++ * flush, as it won't do anything useful. Furthermore, va is guest
++ * controlled and not necesserily audited by this point.
++ */
++ if ( rc )
++ return rc;
++
+ switch ( flags & UVMF_FLUSHTYPE_MASK )
+ {
+ case UVMF_TLB_FLUSH:
diff --git a/main/xen/xsa280-4.10-2.patch b/main/xen/xsa280-4.10-2.patch
new file mode 100644
index 0000000000..d250d7645a
--- /dev/null
+++ b/main/xen/xsa280-4.10-2.patch
@@ -0,0 +1,141 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: x86/shadow: shrink struct page_info's shadow_flags to 16 bits
+
+This is to avoid it overlapping the linear_pt_count field needed for PV
+domains. Introduce a separate, HVM-only pagetable_dying field to replace
+the sole one left in the upper 16 bits.
+
+Note that the accesses to ->shadow_flags in shadow_{pro,de}mote() get
+switched to non-atomic, non-bitops operations, as {test,set,clear}_bit()
+are not allowed on uint16_t fields and hence their use would have
+required ugly casts. This is fine because all updates of the field ought
+to occur with the paging lock held, and other updates of it use |= and
+&= as well (i.e. using atomic operations here didn't really guard
+against potentially racing updates elsewhere).
+
+This is part of XSA-280.
+
+Reported-by: Prgmr.com Security <security@prgmr.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Tim Deegan <tim@xen.org>
+
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -957,10 +957,14 @@ void shadow_promote(struct domain *d, mf
+
+ /* Is the page already shadowed? */
+ if ( !test_and_set_bit(_PGC_page_table, &page->count_info) )
++ {
+ page->shadow_flags = 0;
++ if ( is_hvm_domain(d) )
++ page->pagetable_dying = false;
++ }
+
+- ASSERT(!test_bit(type, &page->shadow_flags));
+- set_bit(type, &page->shadow_flags);
++ ASSERT(!(page->shadow_flags & (1u << type)));
++ page->shadow_flags |= 1u << type;
+ TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_PROMOTE);
+ }
+
+@@ -969,9 +973,9 @@ void shadow_demote(struct domain *d, mfn
+ struct page_info *page = mfn_to_page(gmfn);
+
+ ASSERT(test_bit(_PGC_page_table, &page->count_info));
+- ASSERT(test_bit(type, &page->shadow_flags));
++ ASSERT(page->shadow_flags & (1u << type));
+
+- clear_bit(type, &page->shadow_flags);
++ page->shadow_flags &= ~(1u << type);
+
+ if ( (page->shadow_flags & SHF_page_type_mask) == 0 )
+ {
+@@ -2801,7 +2805,7 @@ void sh_remove_shadows(struct domain *d,
+ if ( !fast && all && (pg->count_info & PGC_page_table) )
+ {
+ SHADOW_ERROR("can't find all shadows of mfn %"PRI_mfn" "
+- "(shadow_flags=%08x)\n",
++ "(shadow_flags=%04x)\n",
+ mfn_x(gmfn), pg->shadow_flags);
+ domain_crash(d);
+ }
+--- a/xen/arch/x86/mm/shadow/multi.c
++++ b/xen/arch/x86/mm/shadow/multi.c
+@@ -3328,8 +3328,8 @@ static int sh_page_fault(struct vcpu *v,
+
+ /* Unshadow if we are writing to a toplevel pagetable that is
+ * flagged as a dying process, and that is not currently used. */
+- if ( sh_mfn_is_a_page_table(gmfn)
+- && (mfn_to_page(gmfn)->shadow_flags & SHF_pagetable_dying) )
++ if ( sh_mfn_is_a_page_table(gmfn) && is_hvm_domain(d) &&
++ mfn_to_page(gmfn)->pagetable_dying )
+ {
+ int used = 0;
+ struct vcpu *tmp;
+@@ -4301,9 +4301,9 @@ int sh_rm_write_access_from_sl1p(struct
+ ASSERT(mfn_valid(smfn));
+
+ /* Remember if we've been told that this process is being torn down */
+- if ( curr->domain == d )
++ if ( curr->domain == d && is_hvm_domain(d) )
+ curr->arch.paging.shadow.pagetable_dying
+- = !!(mfn_to_page(gmfn)->shadow_flags & SHF_pagetable_dying);
++ = mfn_to_page(gmfn)->pagetable_dying;
+
+ sp = mfn_to_page(smfn);
+
+@@ -4619,10 +4619,10 @@ static void sh_pagetable_dying(struct vc
+ : shadow_hash_lookup(d, mfn_x(gmfn), SH_type_l2_pae_shadow);
+ }
+
+- if ( mfn_valid(smfn) )
++ if ( mfn_valid(smfn) && is_hvm_domain(d) )
+ {
+ gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
+- mfn_to_page(gmfn)->shadow_flags |= SHF_pagetable_dying;
++ mfn_to_page(gmfn)->pagetable_dying = true;
+ shadow_unhook_mappings(d, smfn, 1/* user pages only */);
+ flush = 1;
+ }
+@@ -4659,9 +4659,9 @@ static void sh_pagetable_dying(struct vc
+ smfn = shadow_hash_lookup(d, mfn_x(gmfn), SH_type_l4_64_shadow);
+ #endif
+
+- if ( mfn_valid(smfn) )
++ if ( mfn_valid(smfn) && is_hvm_domain(d) )
+ {
+- mfn_to_page(gmfn)->shadow_flags |= SHF_pagetable_dying;
++ mfn_to_page(gmfn)->pagetable_dying = true;
+ shadow_unhook_mappings(d, smfn, 1/* user pages only */);
+ /* Now flush the TLB: we removed toplevel mappings. */
+ flush_tlb_mask(d->domain_dirty_cpumask);
+--- a/xen/arch/x86/mm/shadow/private.h
++++ b/xen/arch/x86/mm/shadow/private.h
+@@ -292,8 +292,6 @@ static inline void sh_terminate_list(str
+
+ #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */
+
+-#define SHF_pagetable_dying (1u<<31)
+-
+ static inline int sh_page_has_multiple_shadows(struct page_info *pg)
+ {
+ u32 shadows;
+--- a/xen/include/asm-x86/mm.h
++++ b/xen/include/asm-x86/mm.h
+@@ -188,8 +188,15 @@ struct page_info
+ * Guest pages with a shadow. This does not conflict with
+ * tlbflush_timestamp since page table pages are explicitly not
+ * tracked for TLB-flush avoidance when a guest runs in shadow mode.
++ *
++ * pagetable_dying is used for HVM domains only. The layout here has
++ * to avoid re-use of the space used by linear_pt_count, which (only)
++ * PV guests use.
+ */
+- u32 shadow_flags;
++ struct {
++ uint16_t shadow_flags;
++ bool pagetable_dying;
++ };
+
+ /* When in use as a shadow, next shadow in this hash chain. */
+ __pdx_t next_shadow;
diff --git a/main/xen/xsa280-4.9-1.patch b/main/xen/xsa280-4.9-1.patch
new file mode 100644
index 0000000000..32cc67c662
--- /dev/null
+++ b/main/xen/xsa280-4.9-1.patch
@@ -0,0 +1,112 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: x86/shadow: move OOS flag bit positions
+
+In preparation of reducing struct page_info's shadow_flags field to 16
+bits, lower the bit positions used for SHF_out_of_sync and
+SHF_oos_may_write.
+
+Instead of also adjusting the open coded use in _get_page_type(),
+introduce shadow_prepare_page_type_change() to contain knowledge of the
+bit positions to shadow code.
+
+This is part of XSA-280.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Tim Deegan <tim@xen.org>
+
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -2799,15 +2799,8 @@ static int __get_page_type(struct page_i
+ {
+ struct domain *d = page_get_owner(page);
+
+- /* Normally we should never let a page go from type count 0
+- * to type count 1 when it is shadowed. One exception:
+- * out-of-sync shadowed pages are allowed to become
+- * writeable. */
+- if ( d && shadow_mode_enabled(d)
+- && (page->count_info & PGC_page_table)
+- && !((page->shadow_flags & (1u<<29))
+- && type == PGT_writable_page) )
+- shadow_remove_all_shadows(d, _mfn(page_to_mfn(page)));
++ if ( d && shadow_mode_enabled(d) )
++ shadow_prepare_page_type_change(d, page, type);
+
+ ASSERT(!(x & PGT_pae_xen_l2));
+ if ( (x & PGT_type_mask) != type )
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -919,6 +919,9 @@ int sh_unsync(struct vcpu *v, mfn_t gmfn
+ || !v->domain->arch.paging.shadow.oos_active )
+ return 0;
+
++ BUILD_BUG_ON(!(typeof(pg->shadow_flags))SHF_out_of_sync);
++ BUILD_BUG_ON(!(typeof(pg->shadow_flags))SHF_oos_may_write);
++
+ pg->shadow_flags |= SHF_out_of_sync|SHF_oos_may_write;
+ oos_hash_add(v, gmfn);
+ perfc_incr(shadow_unsync);
+@@ -2810,6 +2813,26 @@ void sh_remove_shadows(struct domain *d,
+ paging_unlock(d);
+ }
+
++void shadow_prepare_page_type_change(struct domain *d, struct page_info *page,
++ unsigned long new_type)
++{
++ if ( !(page->count_info & PGC_page_table) )
++ return;
++
++#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
++ /*
++ * Normally we should never let a page go from type count 0 to type
++ * count 1 when it is shadowed. One exception: out-of-sync shadowed
++ * pages are allowed to become writeable.
++ */
++ if ( (page->shadow_flags & SHF_oos_may_write) &&
++ new_type == PGT_writable_page )
++ return;
++#endif
++
++ shadow_remove_all_shadows(d, page_to_mfn(page));
++}
++
+ static void
+ sh_remove_all_shadows_and_parents(struct domain *d, mfn_t gmfn)
+ /* Even harsher: this is a HVM page that we thing is no longer a pagetable.
+--- a/xen/arch/x86/mm/shadow/private.h
++++ b/xen/arch/x86/mm/shadow/private.h
+@@ -287,8 +287,8 @@ static inline void sh_terminate_list(str
+ * codepath is called during that time and is sensitive to oos issues, it may
+ * need to use the second flag.
+ */
+-#define SHF_out_of_sync (1u<<30)
+-#define SHF_oos_may_write (1u<<29)
++#define SHF_out_of_sync (1u << (SH_type_max_shadow + 1))
++#define SHF_oos_may_write (1u << (SH_type_max_shadow + 2))
+
+ #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */
+
+--- a/xen/include/asm-x86/shadow.h
++++ b/xen/include/asm-x86/shadow.h
+@@ -81,6 +81,10 @@ void shadow_final_teardown(struct domain
+
+ void sh_remove_shadows(struct domain *d, mfn_t gmfn, int fast, int all);
+
++/* Adjust shadows ready for a guest page to change its type. */
++void shadow_prepare_page_type_change(struct domain *d, struct page_info *page,
++ unsigned long new_type);
++
+ /* Discard _all_ mappings from the domain's shadows. */
+ void shadow_blow_tables_per_domain(struct domain *d);
+
+@@ -105,6 +109,10 @@ int shadow_set_allocation(struct domain
+ static inline void sh_remove_shadows(struct domain *d, mfn_t gmfn,
+ bool_t fast, bool_t all) {}
+
++static inline void shadow_prepare_page_type_change(struct domain *d,
++ struct page_info *page,
++ unsigned long new_type) {}
++
+ static inline void shadow_blow_tables_per_domain(struct domain *d) {}
+
+ static inline int shadow_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
diff --git a/main/xen/xsa282-2.patch b/main/xen/xsa282-2.patch
new file mode 100644
index 0000000000..4c34691db3
--- /dev/null
+++ b/main/xen/xsa282-2.patch
@@ -0,0 +1,42 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: x86: work around HLE host lockup erratum
+
+XACQUIRE prefixed accesses to the 4Mb range of memory starting at 1Gb
+are liable to lock up the processor. Disallow use of this memory range.
+
+Unfortunately the available Core Gen7 and Gen8 spec updates are pretty
+old, so I can only guess that they're similarly affected when Core Gen6
+is and the Xeon counterparts are, too.
+
+This is part of XSA-282.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+---
+v2: Don't apply the workaround when running ourselves virtualized.
+
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -5853,6 +5853,22 @@ const struct platform_bad_page *__init g
+ { .mfn = 0x20138000 >> PAGE_SHIFT },
+ { .mfn = 0x40004000 >> PAGE_SHIFT },
+ };
++ static const struct platform_bad_page __initconst hle_bad_page = {
++ .mfn = 0x40000000 >> PAGE_SHIFT, .order = 10
++ };
++
++ switch ( cpuid_eax(1) & 0x000f3ff0 )
++ {
++ case 0x000406e0: /* erratum SKL167 */
++ case 0x00050650: /* erratum SKZ63 */
++ case 0x000506e0: /* errata SKL167 / SKW159 */
++ case 0x000806e0: /* erratum KBL??? */
++ case 0x000906e0: /* errata KBL??? / KBW114 / CFW103 */
++ *array_size = (cpuid_eax(0) >= 7 &&
++ !(cpuid_ecx(1) & cpufeat_mask(X86_FEATURE_HYPERVISOR)) &&
++ (cpuid_count_ebx(7, 0) & cpufeat_mask(X86_FEATURE_HLE)));
++ return &hle_bad_page;
++ }
+
+ *array_size = ARRAY_SIZE(snb_bad_pages);
+ igd_id = pci_conf_read32(0, 0, 2, 0, 0);
diff --git a/main/xen/xsa282-4.9-1.patch b/main/xen/xsa282-4.9-1.patch
new file mode 100644
index 0000000000..580933d4b0
--- /dev/null
+++ b/main/xen/xsa282-4.9-1.patch
@@ -0,0 +1,87 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: x86: extend get_platform_badpages() interface
+
+Use a structure so along with an address (now frame number) an order can
+also be specified.
+
+This is part of XSA-282.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -7111,23 +7111,23 @@ void arch_dump_shared_mem_info(void)
+ mem_sharing_get_nr_saved_mfns());
+ }
+
+-const unsigned long *__init get_platform_badpages(unsigned int *array_size)
++const struct platform_bad_page *__init get_platform_badpages(unsigned int *array_size)
+ {
+ u32 igd_id;
+- static unsigned long __initdata bad_pages[] = {
+- 0x20050000,
+- 0x20110000,
+- 0x20130000,
+- 0x20138000,
+- 0x40004000,
++ static const struct platform_bad_page __initconst snb_bad_pages[] = {
++ { .mfn = 0x20050000 >> PAGE_SHIFT },
++ { .mfn = 0x20110000 >> PAGE_SHIFT },
++ { .mfn = 0x20130000 >> PAGE_SHIFT },
++ { .mfn = 0x20138000 >> PAGE_SHIFT },
++ { .mfn = 0x40004000 >> PAGE_SHIFT },
+ };
+
+- *array_size = ARRAY_SIZE(bad_pages);
++ *array_size = ARRAY_SIZE(snb_bad_pages);
+ igd_id = pci_conf_read32(0, 0, 2, 0, 0);
+- if ( !IS_SNB_GFX(igd_id) )
+- return NULL;
++ if ( IS_SNB_GFX(igd_id) )
++ return snb_bad_pages;
+
+- return bad_pages;
++ return NULL;
+ }
+
+ void paging_invlpg(struct vcpu *v, unsigned long va)
+--- a/xen/common/page_alloc.c
++++ b/xen/common/page_alloc.c
+@@ -270,7 +270,7 @@ void __init init_boot_pages(paddr_t ps,
+ unsigned long bad_spfn, bad_epfn;
+ const char *p;
+ #ifdef CONFIG_X86
+- const unsigned long *badpage = NULL;
++ const struct platform_bad_page *badpage;
+ unsigned int i, array_size;
+ #endif
+
+@@ -295,8 +295,8 @@ void __init init_boot_pages(paddr_t ps,
+ {
+ for ( i = 0; i < array_size; i++ )
+ {
+- bootmem_region_zap(*badpage >> PAGE_SHIFT,
+- (*badpage >> PAGE_SHIFT) + 1);
++ bootmem_region_zap(badpage->mfn,
++ badpage->mfn + (1U << badpage->order));
+ badpage++;
+ }
+ }
+--- a/xen/include/asm-x86/mm.h
++++ b/xen/include/asm-x86/mm.h
+@@ -350,7 +350,13 @@ bool is_iomem_page(mfn_t mfn);
+
+ void clear_superpage_mark(struct page_info *page);
+
+-const unsigned long *get_platform_badpages(unsigned int *array_size);
++struct platform_bad_page {
++ unsigned long mfn;
++ unsigned int order;
++};
++
++const struct platform_bad_page *get_platform_badpages(unsigned int *array_size);
++
+ /* Per page locks:
+ * page_lock() is used for two purposes: pte serialization, and memory sharing.
+ *