aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNatanael Copa <ncopa@alpinelinux.org>2014-08-26 06:22:11 +0000
committerNatanael Copa <ncopa@alpinelinux.org>2014-08-26 06:31:06 +0000
commit34cd3222cc7f54f16d6de20f5c5868a69c0edc4b (patch)
tree2948f487737d0afda155b6eae43852dac9335c42
parente2856516886b7cf35fe1cf5be0eb646ea76ca687 (diff)
downloadaports-34cd3222cc7f54f16d6de20f5c5868a69c0edc4b.tar.bz2
aports-34cd3222cc7f54f16d6de20f5c5868a69c0edc4b.tar.xz
main/xen: upgrade to 4.2.4 and fix XSA-97 (CVE-2014-5146,CVE-2014-5149)
fixes #3291
-rw-r--r--main/xen/APKBUILD48
-rw-r--r--main/xen/xsa33-4.2-unstable.patch21
-rw-r--r--main/xen/xsa34-4.2.patch30
-rw-r--r--main/xen/xsa35-4.2-with-xsa34.patch24
-rw-r--r--main/xen/xsa36-4.2.patch323
-rw-r--r--main/xen/xsa38.patch73
-rw-r--r--main/xen/xsa45-4.2.patch1133
-rw-r--r--main/xen/xsa48-4.2.patch114
-rw-r--r--main/xen/xsa52-4.2-unstable.patch46
-rw-r--r--main/xen/xsa53-4.2.patch57
-rw-r--r--main/xen/xsa54.patch24
-rw-r--r--main/xen/xsa55.patch3431
-rw-r--r--main/xen/xsa56.patch50
-rw-r--r--main/xen/xsa57.patch333
-rw-r--r--main/xen/xsa58-4.2.patch129
-rw-r--r--main/xen/xsa61-4.2-unstable.patch44
-rw-r--r--main/xen/xsa75-4.2.patch53
-rw-r--r--main/xen/xsa97-hap-4_2-prereq.patch466
-rw-r--r--main/xen/xsa97-hap-4_2.patch485
19 files changed, 958 insertions, 5926 deletions
diff --git a/main/xen/APKBUILD b/main/xen/APKBUILD
index 6d10160507..9f3221d865 100644
--- a/main/xen/APKBUILD
+++ b/main/xen/APKBUILD
@@ -2,8 +2,8 @@
# Contributor: Roger Pau Monne <roger.pau@entel.upc.edu>
# Maintainer: William Pitcock <nenolod@dereferenced.org>
pkgname=xen
-pkgver=4.2.1
-pkgrel=17
+pkgver=4.2.4
+pkgrel=0
pkgdesc="Xen hypervisor"
url="http://www.xen.org/"
arch="x86 x86_64"
@@ -20,28 +20,11 @@ source="http://bits.xensource.com/oss-xen/release/$pkgver/$pkgname-$pkgver.tar.g
qemu-xen_paths.patch
docs-Fix-generating-qemu-doc.html-with-texinfo-5.patch
- xsa33-4.2-unstable.patch
xsa41.patch
xsa41b.patch
xsa41c.patch
- xsa34-4.2.patch
- xsa35-4.2-with-xsa34.patch
- xsa36-4.2.patch
- xsa38.patch
- xsa44-4.2.patch
- xsa45-4.2.patch
- xsa46-4.2.patch
- xsa47-4.2-unstable.patch
- xsa48-4.2.patch
- xsa52-4.2-unstable.patch
- xsa53-4.2.patch
- xsa54.patch
- xsa55.patch
- xsa56.patch
- xsa57.patch
- xsa58-4.2.patch
- xsa61-4.2-unstable.patch
- xsa75-4.2.patch
+ xsa97-hap-4_2-prereq.patch
+ xsa97-hap-4_2.patch
xenstored.initd
xenstored.confd
@@ -151,33 +134,16 @@ xend() {
-exec mv '{}' "$subpkgdir"/"$sitepackages"/xen \;
}
-md5sums="0d48cbe1767b82aba12517898d4e0408 xen-4.2.1.tar.gz
+md5sums="b32be39471c93249828b117473adca9d xen-4.2.4.tar.gz
506e7ab6f9482dc95f230978d340bcd9 qemu_uclibc_configure.patch
2dc5ddf47c53ea168729975046c3c1f9 librt.patch
1ccde6b36a6f9542a16d998204dc9a22 qemu-xen_paths.patch
6dcff640268d514fa9164b4c812cc52d docs-Fix-generating-qemu-doc.html-with-texinfo-5.patch
-8aa341b27fac3f93a99113c72671c864 xsa33-4.2-unstable.patch
8ad8942000b8a4be4917599cad9209cf xsa41.patch
ed7d0399c6ca6aeee479da5d8f807fe0 xsa41b.patch
2f3dd7bdc59d104370066d6582725575 xsa41c.patch
-af10e1a3f757a184a1d79904a5ef8572 xsa34-4.2.patch
-8270dbf929e26b5e95532d10a697e404 xsa35-4.2-with-xsa34.patch
-87a54b2a1f1ea3d955017fe1fd8c0398 xsa36-4.2.patch
-47589e06d077d71282ec1b87dd4d87a9 xsa38.patch
-85239ba26395b05502ceee5eec968ea7 xsa44-4.2.patch
-9265540493f41f7d40c48d0886ec5823 xsa45-4.2.patch
-b955534323681fa461f86c69e4acec75 xsa46-4.2.patch
-c05bb12fc5b6aa64cd23f2ad623c539a xsa47-4.2-unstable.patch
-b3e3a57d189a4f86c9766eaf3b5207f4 xsa48-4.2.patch
-83a9cdd035bcd18bf035434a1ba08c38 xsa52-4.2-unstable.patch
-03a1a4ebc470ee7e638e04db2701a4f7 xsa53-4.2.patch
-a8393d1ec6b886ea72ffe624a04ee10a xsa54.patch
-42cd104f2a33d67938a63a6372cff573 xsa55.patch
-e70b9128ffc2175cea314a533a7d8457 xsa56.patch
-7475158130474ee062a4eb878259af61 xsa57.patch
-7de2cd11c10d6a554f3c81e0688c38b7 xsa58-4.2.patch
-d2b6cd997b025e55d4a9f98c6bd6839b xsa61-4.2-unstable.patch
-3a7da1a0f6c39e7e2f422c6455a97ccd xsa75-4.2.patch
+4778066a3338ca9a2263048e6a22bb6b xsa97-hap-4_2-prereq.patch
+052b4144e2eef36757a28e7011d0ac74 xsa97-hap-4_2.patch
95d8af17bf844d41a015ff32aae51ba1 xenstored.initd
b017ccdd5e1c27bbf1513e3569d4ff07 xenstored.confd
ed262f15fb880badb53575539468646c xenconsoled.initd
diff --git a/main/xen/xsa33-4.2-unstable.patch b/main/xen/xsa33-4.2-unstable.patch
deleted file mode 100644
index 369d65bba9..0000000000
--- a/main/xen/xsa33-4.2-unstable.patch
+++ /dev/null
@@ -1,21 +0,0 @@
-VT-d: fix interrupt remapping source validation for devices behind
-legacy bridges
-
-Using SVT_VERIFY_BUS here doesn't make sense; native Linux also
-uses SVT_VERIFY_SID_SQ here instead.
-
-This is XSA-33 / CVE-2012-5634.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-
---- a/xen/drivers/passthrough/vtd/intremap.c
-+++ b/xen/drivers/passthrough/vtd/intremap.c
-@@ -466,7 +466,7 @@ static void set_msi_source_id(struct pci_dev *pdev, struct iremap_entry *ire)
- set_ire_sid(ire, SVT_VERIFY_BUS, SQ_ALL_16,
- (bus << 8) | pdev->bus);
- else if ( pdev_type(seg, bus, devfn) == DEV_TYPE_LEGACY_PCI_BRIDGE )
-- set_ire_sid(ire, SVT_VERIFY_BUS, SQ_ALL_16,
-+ set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_ALL_16,
- PCI_BDF2(bus, devfn));
- }
- break;
diff --git a/main/xen/xsa34-4.2.patch b/main/xen/xsa34-4.2.patch
deleted file mode 100644
index f5328eff9f..0000000000
--- a/main/xen/xsa34-4.2.patch
+++ /dev/null
@@ -1,30 +0,0 @@
-x86_32: don't allow use of nested HVM
-
-There are (indirect) uses of map_domain_page() in the nested HVM code
-that are unsafe when not just using the 1:1 mapping.
-
-This is XSA-34 / CVE-2013-0151.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-
---- a/xen/arch/x86/hvm/hvm.c
-+++ b/xen/arch/x86/hvm/hvm.c
-@@ -3926,6 +3926,10 @@ long do_hvm_op(unsigned long op, XEN_GUE
- rc = -EINVAL;
- break;
- case HVM_PARAM_NESTEDHVM:
-+#ifdef __i386__
-+ if ( a.value )
-+ rc = -EINVAL;
-+#else
- if ( a.value > 1 )
- rc = -EINVAL;
- if ( !is_hvm_domain(d) )
-@@ -3940,6 +3944,7 @@ long do_hvm_op(unsigned long op, XEN_GUE
- for_each_vcpu(d, v)
- if ( rc == 0 )
- rc = nestedhvm_vcpu_initialise(v);
-+#endif
- break;
- case HVM_PARAM_BUFIOREQ_EVTCHN:
- rc = -EINVAL;
diff --git a/main/xen/xsa35-4.2-with-xsa34.patch b/main/xen/xsa35-4.2-with-xsa34.patch
deleted file mode 100644
index 89230e2a46..0000000000
--- a/main/xen/xsa35-4.2-with-xsa34.patch
+++ /dev/null
@@ -1,24 +0,0 @@
-xen: Do not allow guests to enable nested HVM on themselves
-
-There is no reason for this and doing so exposes a memory leak to
-guests. Only toolstacks need write access to this HVM param.
-
-This is XSA-35 / CVE-2013-0152.
-
-Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
-Acked-by: Jan Beulich <JBeulich@suse.com>
-
---- a/xen/arch/x86/hvm/hvm.c
-+++ b/xen/arch/x86/hvm/hvm.c
-@@ -3862,6 +3862,11 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg)
- rc = -EINVAL;
- break;
- case HVM_PARAM_NESTEDHVM:
-+ if ( !IS_PRIV(current->domain) )
-+ {
-+ rc = -EPERM;
-+ break;
-+ }
- #ifdef __i386__
- if ( a.value )
- rc = -EINVAL;
diff --git a/main/xen/xsa36-4.2.patch b/main/xen/xsa36-4.2.patch
deleted file mode 100644
index 8477701a22..0000000000
--- a/main/xen/xsa36-4.2.patch
+++ /dev/null
@@ -1,323 +0,0 @@
-ACPI: acpi_table_parse() should return handler's error code
-
-Currently, the error code returned by acpi_table_parse()'s handler
-is ignored. This patch will propagate handler's return value to
-acpi_table_parse()'s caller.
-
-AMD,IOMMU: Clean up old entries in remapping tables when creating new
-interrupt mapping.
-
-When changing the affinity of an IRQ associated with a passed
-through PCI device, clear previous mapping.
-
-In addition, because some BIOSes may incorrectly program IVRS
-entries for IOAPIC try to check for entry's consistency. Specifically,
-if conflicting entries are found disable IOMMU if per-device
-remapping table is used. If entries refer to bogus IOAPIC IDs
-disable IOMMU unconditionally
-
-AMD,IOMMU: Disable IOMMU if SATA Combined mode is on
-
-AMD's SP5100 chipset can be placed into SATA Combined mode
-that may cause prevent dom0 from booting when IOMMU is
-enabled and per-device interrupt remapping table is used.
-While SP5100 erratum 28 requires BIOSes to disable this mode,
-some may still use it.
-
-This patch checks whether this mode is on and, if per-device
-table is in use, disables IOMMU.
-
-AMD,IOMMU: Make per-device interrupt remapping table default
-
-Using global interrupt remapping table may be insecure, as
-described by XSA-36. This patch makes per-device mode default.
-
-This is XSA-36 / CVE-2013-0153.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Signed-off-by: Boris Ostrovsky <boris.ostrovsky@amd.com>
-
---- a/xen/arch/x86/irq.c
-+++ b/xen/arch/x86/irq.c
-@@ -1942,9 +1942,6 @@ int map_domain_pirq(
- spin_lock_irqsave(&desc->lock, flags);
- set_domain_irq_pirq(d, irq, info);
- spin_unlock_irqrestore(&desc->lock, flags);
--
-- if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV )
-- printk(XENLOG_INFO "Per-device vector maps for GSIs not implemented yet.\n");
- }
-
- done:
---- a/xen/drivers/acpi/tables.c
-+++ b/xen/drivers/acpi/tables.c
-@@ -267,7 +267,7 @@ acpi_table_parse_madt(enum acpi_madt_typ
- * @handler: handler to run
- *
- * Scan the ACPI System Descriptor Table (STD) for a table matching @id,
-- * run @handler on it. Return 0 if table found, return on if not.
-+ * run @handler on it.
- */
- int __init acpi_table_parse(char *id, acpi_table_handler handler)
- {
-@@ -282,8 +282,7 @@ int __init acpi_table_parse(char *id, ac
- acpi_get_table(id, 0, &table);
-
- if (table) {
-- handler(table);
-- return 0;
-+ return handler(table);
- } else
- return 1;
- }
---- a/xen/drivers/passthrough/amd/iommu_acpi.c
-+++ b/xen/drivers/passthrough/amd/iommu_acpi.c
-@@ -22,6 +22,7 @@
- #include <xen/errno.h>
- #include <xen/acpi.h>
- #include <asm/apicdef.h>
-+#include <asm/io_apic.h>
- #include <asm/amd-iommu.h>
- #include <asm/hvm/svm/amd-iommu-proto.h>
-
-@@ -635,6 +636,7 @@ static u16 __init parse_ivhd_device_spec
- u16 header_length, u16 block_length, struct amd_iommu *iommu)
- {
- u16 dev_length, bdf;
-+ int apic;
-
- dev_length = sizeof(*special);
- if ( header_length < (block_length + dev_length) )
-@@ -651,10 +653,59 @@ static u16 __init parse_ivhd_device_spec
- }
-
- add_ivrs_mapping_entry(bdf, bdf, special->header.data_setting, iommu);
-- /* set device id of ioapic */
-- ioapic_sbdf[special->handle].bdf = bdf;
-- ioapic_sbdf[special->handle].seg = seg;
-- return dev_length;
-+
-+ if ( special->variety != ACPI_IVHD_IOAPIC )
-+ {
-+ if ( special->variety != ACPI_IVHD_HPET )
-+ printk(XENLOG_ERR "Unrecognized IVHD special variety %#x\n",
-+ special->variety);
-+ return dev_length;
-+ }
-+
-+ /*
-+ * Some BIOSes have IOAPIC broken entries so we check for IVRS
-+ * consistency here --- whether entry's IOAPIC ID is valid and
-+ * whether there are conflicting/duplicated entries.
-+ */
-+ for ( apic = 0; apic < nr_ioapics; apic++ )
-+ {
-+ if ( IO_APIC_ID(apic) != special->handle )
-+ continue;
-+
-+ if ( ioapic_sbdf[special->handle].pin_setup )
-+ {
-+ if ( ioapic_sbdf[special->handle].bdf == bdf &&
-+ ioapic_sbdf[special->handle].seg == seg )
-+ AMD_IOMMU_DEBUG("IVHD Warning: Duplicate IO-APIC %#x entries\n",
-+ special->handle);
-+ else
-+ {
-+ printk(XENLOG_ERR "IVHD Error: Conflicting IO-APIC %#x entries\n",
-+ special->handle);
-+ if ( amd_iommu_perdev_intremap )
-+ return 0;
-+ }
-+ }
-+ else
-+ {
-+ /* set device id of ioapic */
-+ ioapic_sbdf[special->handle].bdf = bdf;
-+ ioapic_sbdf[special->handle].seg = seg;
-+
-+ ioapic_sbdf[special->handle].pin_setup = xzalloc_array(
-+ unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
-+ if ( nr_ioapic_entries[apic] &&
-+ !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
-+ {
-+ printk(XENLOG_ERR "IVHD Error: Out of memory\n");
-+ return 0;
-+ }
-+ }
-+ return dev_length;
-+ }
-+
-+ printk(XENLOG_ERR "IVHD Error: Invalid IO-APIC %#x\n", special->handle);
-+ return 0;
- }
-
- static int __init parse_ivhd_block(const struct acpi_ivrs_hardware *ivhd_block)
---- a/xen/drivers/passthrough/amd/iommu_init.c
-+++ b/xen/drivers/passthrough/amd/iommu_init.c
-@@ -1126,12 +1126,45 @@ static int __init amd_iommu_setup_device
- return 0;
- }
-
-+/* Check whether SP5100 SATA Combined mode is on */
-+static bool_t __init amd_sp5100_erratum28(void)
-+{
-+ u32 bus, id;
-+ u16 vendor_id, dev_id;
-+ u8 byte;
-+
-+ for (bus = 0; bus < 256; bus++)
-+ {
-+ id = pci_conf_read32(0, bus, 0x14, 0, PCI_VENDOR_ID);
-+
-+ vendor_id = id & 0xffff;
-+ dev_id = (id >> 16) & 0xffff;
-+
-+ /* SP5100 SMBus module sets Combined mode on */
-+ if (vendor_id != 0x1002 || dev_id != 0x4385)
-+ continue;
-+
-+ byte = pci_conf_read8(0, bus, 0x14, 0, 0xad);
-+ if ( (byte >> 3) & 1 )
-+ {
-+ printk(XENLOG_WARNING "AMD-Vi: SP5100 erratum 28 detected, disabling IOMMU.\n"
-+ "If possible, disable SATA Combined mode in BIOS or contact your vendor for BIOS update.\n");
-+ return 1;
-+ }
-+ }
-+
-+ return 0;
-+}
-+
- int __init amd_iommu_init(void)
- {
- struct amd_iommu *iommu;
-
- BUG_ON( !iommu_found() );
-
-+ if ( amd_iommu_perdev_intremap && amd_sp5100_erratum28() )
-+ goto error_out;
-+
- ivrs_bdf_entries = amd_iommu_get_ivrs_dev_entries();
-
- if ( !ivrs_bdf_entries )
---- a/xen/drivers/passthrough/amd/iommu_intr.c
-+++ b/xen/drivers/passthrough/amd/iommu_intr.c
-@@ -99,12 +99,12 @@ static void update_intremap_entry(u32* e
- static void update_intremap_entry_from_ioapic(
- int bdf,
- struct amd_iommu *iommu,
-- struct IO_APIC_route_entry *ioapic_rte)
-+ const struct IO_APIC_route_entry *rte,
-+ const struct IO_APIC_route_entry *old_rte)
- {
- unsigned long flags;
- u32* entry;
- u8 delivery_mode, dest, vector, dest_mode;
-- struct IO_APIC_route_entry *rte = ioapic_rte;
- int req_id;
- spinlock_t *lock;
- int offset;
-@@ -120,6 +120,14 @@ static void update_intremap_entry_from_i
- spin_lock_irqsave(lock, flags);
-
- offset = get_intremap_offset(vector, delivery_mode);
-+ if ( old_rte )
-+ {
-+ int old_offset = get_intremap_offset(old_rte->vector,
-+ old_rte->delivery_mode);
-+
-+ if ( offset != old_offset )
-+ free_intremap_entry(iommu->seg, bdf, old_offset);
-+ }
- entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
- update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
-
-@@ -188,6 +196,7 @@ int __init amd_iommu_setup_ioapic_remapp
- amd_iommu_flush_intremap(iommu, req_id);
- spin_unlock_irqrestore(&iommu->lock, flags);
- }
-+ set_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup);
- }
- }
- return 0;
-@@ -199,6 +208,7 @@ void amd_iommu_ioapic_update_ire(
- struct IO_APIC_route_entry old_rte = { 0 };
- struct IO_APIC_route_entry new_rte = { 0 };
- unsigned int rte_lo = (reg & 1) ? reg - 1 : reg;
-+ unsigned int pin = (reg - 0x10) / 2;
- int saved_mask, seg, bdf;
- struct amd_iommu *iommu;
-
-@@ -236,6 +246,14 @@ void amd_iommu_ioapic_update_ire(
- *(((u32 *)&new_rte) + 1) = value;
- }
-
-+ if ( new_rte.mask &&
-+ !test_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) )
-+ {
-+ ASSERT(saved_mask);
-+ __io_apic_write(apic, reg, value);
-+ return;
-+ }
-+
- /* mask the interrupt while we change the intremap table */
- if ( !saved_mask )
- {
-@@ -244,7 +262,11 @@ void amd_iommu_ioapic_update_ire(
- }
-
- /* Update interrupt remapping entry */
-- update_intremap_entry_from_ioapic(bdf, iommu, &new_rte);
-+ update_intremap_entry_from_ioapic(
-+ bdf, iommu, &new_rte,
-+ test_and_set_bit(pin,
-+ ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) ? &old_rte
-+ : NULL);
-
- /* Forward write access to IO-APIC RTE */
- __io_apic_write(apic, reg, value);
-@@ -354,6 +376,12 @@ void amd_iommu_msi_msg_update_ire(
- return;
- }
-
-+ if ( msi_desc->remap_index >= 0 )
-+ update_intremap_entry_from_msi_msg(iommu, pdev, msi_desc, NULL);
-+
-+ if ( !msg )
-+ return;
-+
- update_intremap_entry_from_msi_msg(iommu, pdev, msi_desc, msg);
- }
-
---- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
-+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
-@@ -205,6 +205,8 @@ int __init amd_iov_detect(void)
- {
- printk("AMD-Vi: Not overriding irq_vector_map setting\n");
- }
-+ if ( !amd_iommu_perdev_intremap )
-+ printk(XENLOG_WARNING "AMD-Vi: Using global interrupt remap table is not recommended (see XSA-36)!\n");
- return scan_pci_devices();
- }
-
---- a/xen/drivers/passthrough/iommu.c
-+++ b/xen/drivers/passthrough/iommu.c
-@@ -52,7 +52,7 @@ bool_t __read_mostly iommu_qinval = 1;
- bool_t __read_mostly iommu_intremap = 1;
- bool_t __read_mostly iommu_hap_pt_share = 1;
- bool_t __read_mostly iommu_debug;
--bool_t __read_mostly amd_iommu_perdev_intremap;
-+bool_t __read_mostly amd_iommu_perdev_intremap = 1;
-
- DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb);
-
---- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
-+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
-@@ -100,6 +100,7 @@ void amd_iommu_read_msi_from_ire(
-
- extern struct ioapic_sbdf {
- u16 bdf, seg;
-+ unsigned long *pin_setup;
- } ioapic_sbdf[MAX_IO_APICS];
- extern void *shared_intremap_table;
-
diff --git a/main/xen/xsa38.patch b/main/xen/xsa38.patch
deleted file mode 100644
index f4a5dc0881..0000000000
--- a/main/xen/xsa38.patch
+++ /dev/null
@@ -1,73 +0,0 @@
-diff --git a/tools/ocaml/libs/xb/partial.ml b/tools/ocaml/libs/xb/partial.ml
-index 3558889..d4d1c7b 100644
---- a/tools/ocaml/libs/xb/partial.ml
-+++ b/tools/ocaml/libs/xb/partial.ml
-@@ -27,8 +27,15 @@ external header_size: unit -> int = "stub_header_size"
- external header_of_string_internal: string -> int * int * int * int
- = "stub_header_of_string"
-
-+let xenstore_payload_max = 4096 (* xen/include/public/io/xs_wire.h *)
-+
- let of_string s =
- let tid, rid, opint, dlen = header_of_string_internal s in
-+ (* A packet which is bigger than xenstore_payload_max is illegal.
-+ This will leave the guest connection is a bad state and will
-+ be hard to recover from without restarting the connection
-+ (ie rebooting the guest) *)
-+ let dlen = min xenstore_payload_max dlen in
- {
- tid = tid;
- rid = rid;
-@@ -38,6 +45,7 @@ let of_string s =
- }
-
- let append pkt s sz =
-+ if pkt.len > 4096 then failwith "Buffer.add: cannot grow buffer";
- Buffer.add_string pkt.buf (String.sub s 0 sz)
-
- let to_complete pkt =
-diff --git a/tools/ocaml/libs/xb/xs_ring_stubs.c b/tools/ocaml/libs/xb/xs_ring_stubs.c
-index 00414c5..4888ac5 100644
---- a/tools/ocaml/libs/xb/xs_ring_stubs.c
-+++ b/tools/ocaml/libs/xb/xs_ring_stubs.c
-@@ -39,21 +39,23 @@ static int xs_ring_read(struct mmap_interface *interface,
- char *buffer, int len)
- {
- struct xenstore_domain_interface *intf = interface->addr;
-- XENSTORE_RING_IDX cons, prod;
-+ XENSTORE_RING_IDX cons, prod; /* offsets only */
- int to_read;
-
-- cons = intf->req_cons;
-- prod = intf->req_prod;
-+ cons = *(volatile uint32*)&intf->req_cons;
-+ prod = *(volatile uint32*)&intf->req_prod;
- xen_mb();
-+ cons = MASK_XENSTORE_IDX(cons);
-+ prod = MASK_XENSTORE_IDX(prod);
- if (prod == cons)
- return 0;
-- if (MASK_XENSTORE_IDX(prod) > MASK_XENSTORE_IDX(cons))
-+ if (prod > cons)
- to_read = prod - cons;
- else
-- to_read = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
-+ to_read = XENSTORE_RING_SIZE - cons;
- if (to_read < len)
- len = to_read;
-- memcpy(buffer, intf->req + MASK_XENSTORE_IDX(cons), len);
-+ memcpy(buffer, intf->req + cons, len);
- xen_mb();
- intf->req_cons += len;
- return len;
-@@ -66,8 +68,8 @@ static int xs_ring_write(struct mmap_interface *interface,
- XENSTORE_RING_IDX cons, prod;
- int can_write;
-
-- cons = intf->rsp_cons;
-- prod = intf->rsp_prod;
-+ cons = *(volatile uint32*)&intf->rsp_cons;
-+ prod = *(volatile uint32*)&intf->rsp_prod;
- xen_mb();
- if ( (prod - cons) >= XENSTORE_RING_SIZE )
- return 0;
diff --git a/main/xen/xsa45-4.2.patch b/main/xen/xsa45-4.2.patch
deleted file mode 100644
index dfdfdea64b..0000000000
--- a/main/xen/xsa45-4.2.patch
+++ /dev/null
@@ -1,1133 +0,0 @@
-diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
-index 26a7f12..b97ac6d 100644
---- a/xen/arch/x86/domain.c
-+++ b/xen/arch/x86/domain.c
-@@ -73,8 +73,6 @@ void (*dead_idle) (void) __read_mostly = default_dead_idle;
- static void paravirt_ctxt_switch_from(struct vcpu *v);
- static void paravirt_ctxt_switch_to(struct vcpu *v);
-
--static void vcpu_destroy_pagetables(struct vcpu *v);
--
- static void default_idle(void)
- {
- local_irq_disable();
-@@ -860,6 +858,9 @@ int arch_set_info_guest(
-
- if ( !v->is_initialised )
- {
-+ if ( !compat && !(flags & VGCF_in_kernel) && !c.nat->ctrlreg[1] )
-+ return -EINVAL;
-+
- v->arch.pv_vcpu.ldt_base = c(ldt_base);
- v->arch.pv_vcpu.ldt_ents = c(ldt_ents);
- }
-@@ -957,24 +958,44 @@ int arch_set_info_guest(
- if ( rc != 0 )
- return rc;
-
-+ set_bit(_VPF_in_reset, &v->pause_flags);
-+
- if ( !compat )
-- {
- cr3_gfn = xen_cr3_to_pfn(c.nat->ctrlreg[3]);
-- cr3_page = get_page_from_gfn(d, cr3_gfn, NULL, P2M_ALLOC);
--
-- if ( !cr3_page )
-- {
-- destroy_gdt(v);
-- return -EINVAL;
-- }
-- if ( !paging_mode_refcounts(d)
-- && !get_page_type(cr3_page, PGT_base_page_table) )
-- {
-- put_page(cr3_page);
-- destroy_gdt(v);
-- return -EINVAL;
-- }
-+#ifdef CONFIG_COMPAT
-+ else
-+ cr3_gfn = compat_cr3_to_pfn(c.cmp->ctrlreg[3]);
-+#endif
-+ cr3_page = get_page_from_gfn(d, cr3_gfn, NULL, P2M_ALLOC);
-
-+ if ( !cr3_page )
-+ rc = -EINVAL;
-+ else if ( paging_mode_refcounts(d) )
-+ /* nothing */;
-+ else if ( cr3_page == v->arch.old_guest_table )
-+ {
-+ v->arch.old_guest_table = NULL;
-+ put_page(cr3_page);
-+ }
-+ else
-+ {
-+ /*
-+ * Since v->arch.guest_table{,_user} are both NULL, this effectively
-+ * is just a call to put_old_guest_table().
-+ */
-+ if ( !compat )
-+ rc = vcpu_destroy_pagetables(v);
-+ if ( !rc )
-+ rc = get_page_type_preemptible(cr3_page,
-+ !compat ? PGT_root_page_table
-+ : PGT_l3_page_table);
-+ if ( rc == -EINTR )
-+ rc = -EAGAIN;
-+ }
-+ if ( rc )
-+ /* handled below */;
-+ else if ( !compat )
-+ {
- v->arch.guest_table = pagetable_from_page(cr3_page);
- #ifdef __x86_64__
- if ( c.nat->ctrlreg[1] )
-@@ -982,56 +1003,44 @@ int arch_set_info_guest(
- cr3_gfn = xen_cr3_to_pfn(c.nat->ctrlreg[1]);
- cr3_page = get_page_from_gfn(d, cr3_gfn, NULL, P2M_ALLOC);
-
-- if ( !cr3_page ||
-- (!paging_mode_refcounts(d)
-- && !get_page_type(cr3_page, PGT_base_page_table)) )
-+ if ( !cr3_page )
-+ rc = -EINVAL;
-+ else if ( !paging_mode_refcounts(d) )
- {
-- if (cr3_page)
-- put_page(cr3_page);
-- cr3_page = pagetable_get_page(v->arch.guest_table);
-- v->arch.guest_table = pagetable_null();
-- if ( paging_mode_refcounts(d) )
-- put_page(cr3_page);
-- else
-- put_page_and_type(cr3_page);
-- destroy_gdt(v);
-- return -EINVAL;
-+ rc = get_page_type_preemptible(cr3_page, PGT_root_page_table);
-+ switch ( rc )
-+ {
-+ case -EINTR:
-+ rc = -EAGAIN;
-+ case -EAGAIN:
-+ v->arch.old_guest_table =
-+ pagetable_get_page(v->arch.guest_table);
-+ v->arch.guest_table = pagetable_null();
-+ break;
-+ }
- }
--
-- v->arch.guest_table_user = pagetable_from_page(cr3_page);
-- }
-- else if ( !(flags & VGCF_in_kernel) )
-- {
-- destroy_gdt(v);
-- return -EINVAL;
-+ if ( !rc )
-+ v->arch.guest_table_user = pagetable_from_page(cr3_page);
- }
- }
- else
- {
- l4_pgentry_t *l4tab;
-
-- cr3_gfn = compat_cr3_to_pfn(c.cmp->ctrlreg[3]);
-- cr3_page = get_page_from_gfn(d, cr3_gfn, NULL, P2M_ALLOC);
--
-- if ( !cr3_page)
-- {
-- destroy_gdt(v);
-- return -EINVAL;
-- }
--
-- if (!paging_mode_refcounts(d)
-- && !get_page_type(cr3_page, PGT_l3_page_table) )
-- {
-- put_page(cr3_page);
-- destroy_gdt(v);
-- return -EINVAL;
-- }
--
- l4tab = __va(pagetable_get_paddr(v->arch.guest_table));
- *l4tab = l4e_from_pfn(page_to_mfn(cr3_page),
- _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED);
- #endif
- }
-+ if ( rc )
-+ {
-+ if ( cr3_page )
-+ put_page(cr3_page);
-+ destroy_gdt(v);
-+ return rc;
-+ }
-+
-+ clear_bit(_VPF_in_reset, &v->pause_flags);
-
- if ( v->vcpu_id == 0 )
- update_domain_wallclock_time(d);
-@@ -1053,17 +1062,16 @@ int arch_set_info_guest(
- #undef c
- }
-
--void arch_vcpu_reset(struct vcpu *v)
-+int arch_vcpu_reset(struct vcpu *v)
- {
- if ( !is_hvm_vcpu(v) )
- {
- destroy_gdt(v);
-- vcpu_destroy_pagetables(v);
-- }
-- else
-- {
-- vcpu_end_shutdown_deferral(v);
-+ return vcpu_destroy_pagetables(v);
- }
-+
-+ vcpu_end_shutdown_deferral(v);
-+ return 0;
- }
-
- /*
-@@ -2069,63 +2077,6 @@ static int relinquish_memory(
- return ret;
- }
-
--static void vcpu_destroy_pagetables(struct vcpu *v)
--{
-- struct domain *d = v->domain;
-- unsigned long pfn;
--
--#ifdef __x86_64__
-- if ( is_pv_32on64_vcpu(v) )
-- {
-- pfn = l4e_get_pfn(*(l4_pgentry_t *)
-- __va(pagetable_get_paddr(v->arch.guest_table)));
--
-- if ( pfn != 0 )
-- {
-- if ( paging_mode_refcounts(d) )
-- put_page(mfn_to_page(pfn));
-- else
-- put_page_and_type(mfn_to_page(pfn));
-- }
--
-- l4e_write(
-- (l4_pgentry_t *)__va(pagetable_get_paddr(v->arch.guest_table)),
-- l4e_empty());
--
-- v->arch.cr3 = 0;
-- return;
-- }
--#endif
--
-- pfn = pagetable_get_pfn(v->arch.guest_table);
-- if ( pfn != 0 )
-- {
-- if ( paging_mode_refcounts(d) )
-- put_page(mfn_to_page(pfn));
-- else
-- put_page_and_type(mfn_to_page(pfn));
-- v->arch.guest_table = pagetable_null();
-- }
--
--#ifdef __x86_64__
-- /* Drop ref to guest_table_user (from MMUEXT_NEW_USER_BASEPTR) */
-- pfn = pagetable_get_pfn(v->arch.guest_table_user);
-- if ( pfn != 0 )
-- {
-- if ( !is_pv_32bit_vcpu(v) )
-- {
-- if ( paging_mode_refcounts(d) )
-- put_page(mfn_to_page(pfn));
-- else
-- put_page_and_type(mfn_to_page(pfn));
-- }
-- v->arch.guest_table_user = pagetable_null();
-- }
--#endif
--
-- v->arch.cr3 = 0;
--}
--
- int domain_relinquish_resources(struct domain *d)
- {
- int ret;
-@@ -2143,7 +2094,11 @@ int domain_relinquish_resources(struct domain *d)
-
- /* Drop the in-use references to page-table bases. */
- for_each_vcpu ( d, v )
-- vcpu_destroy_pagetables(v);
-+ {
-+ ret = vcpu_destroy_pagetables(v);
-+ if ( ret )
-+ return ret;
-+ }
-
- if ( !is_hvm_domain(d) )
- {
-diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
-index 3d471a5..efacc98 100644
---- a/xen/arch/x86/hvm/hvm.c
-+++ b/xen/arch/x86/hvm/hvm.c
-@@ -3509,8 +3509,11 @@ static void hvm_s3_suspend(struct domain *d)
-
- for_each_vcpu ( d, v )
- {
-+ int rc;
-+
- vlapic_reset(vcpu_vlapic(v));
-- vcpu_reset(v);
-+ rc = vcpu_reset(v);
-+ ASSERT(!rc);
- }
-
- vpic_reset(d);
-diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
-index 52d111b..7778342 100644
---- a/xen/arch/x86/hvm/vlapic.c
-+++ b/xen/arch/x86/hvm/vlapic.c
-@@ -252,10 +252,13 @@ static void vlapic_init_sipi_action(unsigned long _vcpu)
- {
- case APIC_DM_INIT: {
- bool_t fpu_initialised;
-+ int rc;
-+
- domain_lock(target->domain);
- /* Reset necessary VCPU state. This does not include FPU state. */
- fpu_initialised = target->fpu_initialised;
-- vcpu_reset(target);
-+ rc = vcpu_reset(target);
-+ ASSERT(!rc);
- target->fpu_initialised = fpu_initialised;
- vlapic_reset(vcpu_vlapic(target));
- domain_unlock(target->domain);
-diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
-index 8444610..055f307 100644
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -1241,7 +1241,16 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
- #endif
-
- if ( unlikely(partial > 0) )
-+ {
-+ ASSERT(preemptible >= 0);
- return __put_page_type(l3e_get_page(l3e), preemptible);
-+ }
-+
-+ if ( preemptible < 0 )
-+ {
-+ current->arch.old_guest_table = l3e_get_page(l3e);
-+ return 0;
-+ }
-
- return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
- }
-@@ -1254,7 +1263,17 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
- (l4e_get_pfn(l4e) != pfn) )
- {
- if ( unlikely(partial > 0) )
-+ {
-+ ASSERT(preemptible >= 0);
- return __put_page_type(l4e_get_page(l4e), preemptible);
-+ }
-+
-+ if ( preemptible < 0 )
-+ {
-+ current->arch.old_guest_table = l4e_get_page(l4e);
-+ return 0;
-+ }
-+
- return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible);
- }
- return 1;
-@@ -1549,12 +1568,17 @@ static int alloc_l3_table(struct page_info *page, int preemptible)
- if ( rc < 0 && rc != -EAGAIN && rc != -EINTR )
- {
- MEM_LOG("Failure in alloc_l3_table: entry %d", i);
-+ if ( i )
-+ {
-+ page->nr_validated_ptes = i;
-+ page->partial_pte = 0;
-+ current->arch.old_guest_table = page;
-+ }
- while ( i-- > 0 )
- {
- if ( !is_guest_l3_slot(i) )
- continue;
- unadjust_guest_l3e(pl3e[i], d);
-- put_page_from_l3e(pl3e[i], pfn, 0, 0);
- }
- }
-
-@@ -1584,22 +1608,24 @@ static int alloc_l4_table(struct page_info *page, int preemptible)
- page->nr_validated_ptes = i;
- page->partial_pte = partial ?: 1;
- }
-- else if ( rc == -EINTR )
-+ else if ( rc < 0 )
- {
-+ if ( rc != -EINTR )
-+ MEM_LOG("Failure in alloc_l4_table: entry %d", i);
- if ( i )
- {
- page->nr_validated_ptes = i;
- page->partial_pte = 0;
-- rc = -EAGAIN;
-+ if ( rc == -EINTR )
-+ rc = -EAGAIN;
-+ else
-+ {
-+ if ( current->arch.old_guest_table )
-+ page->nr_validated_ptes++;
-+ current->arch.old_guest_table = page;
-+ }
- }
- }
-- else if ( rc < 0 )
-- {
-- MEM_LOG("Failure in alloc_l4_table: entry %d", i);
-- while ( i-- > 0 )
-- if ( is_guest_l4_slot(d, i) )
-- put_page_from_l4e(pl4e[i], pfn, 0, 0);
-- }
- if ( rc < 0 )
- return rc;
-
-@@ -2047,7 +2073,7 @@ static int mod_l3_entry(l3_pgentry_t *pl3e,
- pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
- }
-
-- put_page_from_l3e(ol3e, pfn, 0, 0);
-+ put_page_from_l3e(ol3e, pfn, 0, -preemptible);
- return rc;
- }
-
-@@ -2110,7 +2136,7 @@ static int mod_l4_entry(l4_pgentry_t *pl4e,
- return -EFAULT;
- }
-
-- put_page_from_l4e(ol4e, pfn, 0, 0);
-+ put_page_from_l4e(ol4e, pfn, 0, -preemptible);
- return rc;
- }
-
-@@ -2268,7 +2294,15 @@ static int alloc_page_type(struct page_info *page, unsigned long type,
- PRtype_info ": caf=%08lx taf=%" PRtype_info,
- page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
- type, page->count_info, page->u.inuse.type_info);
-- page->u.inuse.type_info = 0;
-+ if ( page != current->arch.old_guest_table )
-+ page->u.inuse.type_info = 0;
-+ else
-+ {
-+ ASSERT((page->u.inuse.type_info &
-+ (PGT_count_mask | PGT_validated)) == 1);
-+ get_page_light(page);
-+ page->u.inuse.type_info |= PGT_partial;
-+ }
- }
- else
- {
-@@ -2808,49 +2842,150 @@ static void put_superpage(unsigned long mfn)
-
- #endif
-
-+static int put_old_guest_table(struct vcpu *v)
-+{
-+ int rc;
-+
-+ if ( !v->arch.old_guest_table )
-+ return 0;
-+
-+ switch ( rc = put_page_and_type_preemptible(v->arch.old_guest_table, 1) )
-+ {
-+ case -EINTR:
-+ case -EAGAIN:
-+ return -EAGAIN;
-+ }
-+
-+ v->arch.old_guest_table = NULL;
-+
-+ return rc;
-+}
-+
-+int vcpu_destroy_pagetables(struct vcpu *v)
-+{
-+ unsigned long mfn = pagetable_get_pfn(v->arch.guest_table);
-+ struct page_info *page;
-+ int rc = put_old_guest_table(v);
-+
-+ if ( rc )
-+ return rc;
-+
-+#ifdef __x86_64__
-+ if ( is_pv_32on64_vcpu(v) )
-+ mfn = l4e_get_pfn(*(l4_pgentry_t *)mfn_to_virt(mfn));
-+#endif
-+
-+ if ( mfn )
-+ {
-+ page = mfn_to_page(mfn);
-+ if ( paging_mode_refcounts(v->domain) )
-+ put_page(page);
-+ else
-+ rc = put_page_and_type_preemptible(page, 1);
-+ }
-+
-+#ifdef __x86_64__
-+ if ( is_pv_32on64_vcpu(v) )
-+ {
-+ if ( !rc )
-+ l4e_write(
-+ (l4_pgentry_t *)__va(pagetable_get_paddr(v->arch.guest_table)),
-+ l4e_empty());
-+ }
-+ else
-+#endif
-+ if ( !rc )
-+ {
-+ v->arch.guest_table = pagetable_null();
-+
-+#ifdef __x86_64__
-+ /* Drop ref to guest_table_user (from MMUEXT_NEW_USER_BASEPTR) */
-+ mfn = pagetable_get_pfn(v->arch.guest_table_user);
-+ if ( mfn )
-+ {
-+ page = mfn_to_page(mfn);
-+ if ( paging_mode_refcounts(v->domain) )
-+ put_page(page);
-+ else
-+ rc = put_page_and_type_preemptible(page, 1);
-+ }
-+ if ( !rc )
-+ v->arch.guest_table_user = pagetable_null();
-+#endif
-+ }
-+
-+ v->arch.cr3 = 0;
-+
-+ return rc;
-+}
-
- int new_guest_cr3(unsigned long mfn)
- {
- struct vcpu *curr = current;
- struct domain *d = curr->domain;
-- int okay;
-+ int rc;
- unsigned long old_base_mfn;
-
- #ifdef __x86_64__
- if ( is_pv_32on64_domain(d) )
- {
-- okay = paging_mode_refcounts(d)
-- ? 0 /* Old code was broken, but what should it be? */
-- : mod_l4_entry(
-+ rc = paging_mode_refcounts(d)
-+ ? -EINVAL /* Old code was broken, but what should it be? */
-+ : mod_l4_entry(
- __va(pagetable_get_paddr(curr->arch.guest_table)),
- l4e_from_pfn(
- mfn,
- (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)),
-- pagetable_get_pfn(curr->arch.guest_table), 0, 0, curr) == 0;
-- if ( unlikely(!okay) )
-+ pagetable_get_pfn(curr->arch.guest_table), 0, 1, curr);
-+ switch ( rc )
- {
-+ case 0:
-+ break;
-+ case -EINTR:
-+ case -EAGAIN:
-+ return -EAGAIN;
-+ default:
- MEM_LOG("Error while installing new compat baseptr %lx", mfn);
-- return 0;
-+ return rc;
- }
-
- invalidate_shadow_ldt(curr, 0);
- write_ptbase(curr);
-
-- return 1;
-+ return 0;
- }
- #endif
-- okay = paging_mode_refcounts(d)
-- ? get_page_from_pagenr(mfn, d)
-- : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 0);
-- if ( unlikely(!okay) )
-+ rc = put_old_guest_table(curr);
-+ if ( unlikely(rc) )
-+ return rc;
-+
-+ old_base_mfn = pagetable_get_pfn(curr->arch.guest_table);
-+ /*
-+ * This is particularly important when getting restarted after the
-+ * previous attempt got preempted in the put-old-MFN phase.
-+ */
-+ if ( old_base_mfn == mfn )
- {
-- MEM_LOG("Error while installing new baseptr %lx", mfn);
-+ write_ptbase(curr);
- return 0;
- }
-
-- invalidate_shadow_ldt(curr, 0);
-+ rc = paging_mode_refcounts(d)
-+ ? (get_page_from_pagenr(mfn, d) ? 0 : -EINVAL)
-+ : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 1);
-+ switch ( rc )
-+ {
-+ case 0:
-+ break;
-+ case -EINTR:
-+ case -EAGAIN:
-+ return -EAGAIN;
-+ default:
-+ MEM_LOG("Error while installing new baseptr %lx", mfn);
-+ return rc;
-+ }
-
-- old_base_mfn = pagetable_get_pfn(curr->arch.guest_table);
-+ invalidate_shadow_ldt(curr, 0);
-
- curr->arch.guest_table = pagetable_from_pfn(mfn);
- update_cr3(curr);
-@@ -2859,13 +2994,25 @@ int new_guest_cr3(unsigned long mfn)
-
- if ( likely(old_base_mfn != 0) )
- {
-+ struct page_info *page = mfn_to_page(old_base_mfn);
-+
- if ( paging_mode_refcounts(d) )
-- put_page(mfn_to_page(old_base_mfn));
-+ put_page(page);
- else
-- put_page_and_type(mfn_to_page(old_base_mfn));
-+ switch ( rc = put_page_and_type_preemptible(page, 1) )
-+ {
-+ case -EINTR:
-+ rc = -EAGAIN;
-+ case -EAGAIN:
-+ curr->arch.old_guest_table = page;
-+ break;
-+ default:
-+ BUG_ON(rc);
-+ break;
-+ }
- }
-
-- return 1;
-+ return rc;
- }
-
- static struct domain *get_pg_owner(domid_t domid)
-@@ -2994,12 +3141,29 @@ long do_mmuext_op(
- unsigned int foreigndom)
- {
- struct mmuext_op op;
-- int rc = 0, i = 0, okay;
- unsigned long type;
-- unsigned int done = 0;
-+ unsigned int i = 0, done = 0;
- struct vcpu *curr = current;
- struct domain *d = curr->domain;
- struct domain *pg_owner;
-+ int okay, rc = put_old_guest_table(curr);
-+
-+ if ( unlikely(rc) )
-+ {
-+ if ( likely(rc == -EAGAIN) )
-+ rc = hypercall_create_continuation(
-+ __HYPERVISOR_mmuext_op, "hihi", uops, count, pdone,
-+ foreigndom);
-+ return rc;
-+ }
-+
-+ if ( unlikely(count == MMU_UPDATE_PREEMPTED) &&
-+ likely(guest_handle_is_null(uops)) )
-+ {
-+ /* See the curr->arch.old_guest_table related
-+ * hypercall_create_continuation() below. */
-+ return (int)foreigndom;
-+ }
-
- if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
- {
-@@ -3024,7 +3188,7 @@ long do_mmuext_op(
-
- for ( i = 0; i < count; i++ )
- {
-- if ( hypercall_preempt_check() )
-+ if ( curr->arch.old_guest_table || hypercall_preempt_check() )
- {
- rc = -EAGAIN;
- break;
-@@ -3088,21 +3252,17 @@ long do_mmuext_op(
- }
-
- if ( (rc = xsm_memory_pin_page(d, pg_owner, page)) != 0 )
-- {
-- put_page_and_type(page);
- okay = 0;
-- break;
-- }
--
-- if ( unlikely(test_and_set_bit(_PGT_pinned,
-- &page->u.inuse.type_info)) )
-+ else if ( unlikely(test_and_set_bit(_PGT_pinned,
-+ &page->u.inuse.type_info)) )
- {
- MEM_LOG("Mfn %lx already pinned", page_to_mfn(page));
-- put_page_and_type(page);
- okay = 0;
-- break;
- }
-
-+ if ( unlikely(!okay) )
-+ goto pin_drop;
-+
- /* A page is dirtied when its pin status is set. */
- paging_mark_dirty(pg_owner, page_to_mfn(page));
-
-@@ -3116,7 +3276,13 @@ long do_mmuext_op(
- &page->u.inuse.type_info));
- spin_unlock(&pg_owner->page_alloc_lock);
- if ( drop_ref )
-- put_page_and_type(page);
-+ {
-+ pin_drop:
-+ if ( type == PGT_l1_page_table )
-+ put_page_and_type(page);
-+ else
-+ curr->arch.old_guest_table = page;
-+ }
- }
-
- break;
-@@ -3144,7 +3310,17 @@ long do_mmuext_op(
- break;
- }
-
-- put_page_and_type(page);
-+ switch ( rc = put_page_and_type_preemptible(page, 1) )
-+ {
-+ case -EINTR:
-+ case -EAGAIN:
-+ curr->arch.old_guest_table = page;
-+ rc = 0;
-+ break;
-+ default:
-+ BUG_ON(rc);
-+ break;
-+ }
- put_page(page);
-
- /* A page is dirtied when its pin status is cleared. */
-@@ -3154,8 +3330,13 @@ long do_mmuext_op(
- }
-
- case MMUEXT_NEW_BASEPTR:
-- okay = (!paging_mode_translate(d)
-- && new_guest_cr3(op.arg1.mfn));
-+ if ( paging_mode_translate(d) )
-+ okay = 0;
-+ else
-+ {
-+ rc = new_guest_cr3(op.arg1.mfn);
-+ okay = !rc;
-+ }
- break;
-
-
-@@ -3169,29 +3350,56 @@ long do_mmuext_op(
- break;
- }
-
-+ old_mfn = pagetable_get_pfn(curr->arch.guest_table_user);
-+ /*
-+ * This is particularly important when getting restarted after the
-+ * previous attempt got preempted in the put-old-MFN phase.
-+ */
-+ if ( old_mfn == op.arg1.mfn )
-+ break;
-+
- if ( op.arg1.mfn != 0 )
- {
- if ( paging_mode_refcounts(d) )
- okay = get_page_from_pagenr(op.arg1.mfn, d);
- else
-- okay = !get_page_and_type_from_pagenr(
-- op.arg1.mfn, PGT_root_page_table, d, 0, 0);
-+ {
-+ rc = get_page_and_type_from_pagenr(
-+ op.arg1.mfn, PGT_root_page_table, d, 0, 1);
-+ okay = !rc;
-+ }
- if ( unlikely(!okay) )
- {
-- MEM_LOG("Error while installing new mfn %lx", op.arg1.mfn);
-+ if ( rc == -EINTR )
-+ rc = -EAGAIN;
-+ else if ( rc != -EAGAIN )
-+ MEM_LOG("Error while installing new mfn %lx",
-+ op.arg1.mfn);
- break;
- }
- }
-
-- old_mfn = pagetable_get_pfn(curr->arch.guest_table_user);
- curr->arch.guest_table_user = pagetable_from_pfn(op.arg1.mfn);
-
- if ( old_mfn != 0 )
- {
-+ struct page_info *page = mfn_to_page(old_mfn);
-+
- if ( paging_mode_refcounts(d) )
-- put_page(mfn_to_page(old_mfn));
-+ put_page(page);
- else
-- put_page_and_type(mfn_to_page(old_mfn));
-+ switch ( rc = put_page_and_type_preemptible(page, 1) )
-+ {
-+ case -EINTR:
-+ rc = -EAGAIN;
-+ case -EAGAIN:
-+ curr->arch.old_guest_table = page;
-+ okay = 0;
-+ break;
-+ default:
-+ BUG_ON(rc);
-+ break;
-+ }
- }
-
- break;
-@@ -3433,9 +3641,27 @@ long do_mmuext_op(
- }
-
- if ( rc == -EAGAIN )
-+ {
-+ ASSERT(i < count);
- rc = hypercall_create_continuation(
- __HYPERVISOR_mmuext_op, "hihi",
- uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
-+ }
-+ else if ( curr->arch.old_guest_table )
-+ {
-+ XEN_GUEST_HANDLE(void) null;
-+
-+ ASSERT(rc || i == count);
-+ set_xen_guest_handle(null, NULL);
-+ /*
-+ * In order to have a way to communicate the final return value to
-+ * our continuation, we pass this in place of "foreigndom", building
-+ * on the fact that this argument isn't needed anymore.
-+ */
-+ rc = hypercall_create_continuation(
-+ __HYPERVISOR_mmuext_op, "hihi", null,
-+ MMU_UPDATE_PREEMPTED, null, rc);
-+ }
-
- put_pg_owner(pg_owner);
-
-@@ -3462,11 +3688,28 @@ long do_mmu_update(
- void *va;
- unsigned long gpfn, gmfn, mfn;
- struct page_info *page;
-- int rc = 0, i = 0;
-- unsigned int cmd, done = 0, pt_dom;
-- struct vcpu *v = current;
-+ unsigned int cmd, i = 0, done = 0, pt_dom;
-+ struct vcpu *curr = current, *v = curr;
- struct domain *d = v->domain, *pt_owner = d, *pg_owner;
- struct domain_mmap_cache mapcache;
-+ int rc = put_old_guest_table(curr);
-+
-+ if ( unlikely(rc) )
-+ {
-+ if ( likely(rc == -EAGAIN) )
-+ rc = hypercall_create_continuation(
-+ __HYPERVISOR_mmu_update, "hihi", ureqs, count, pdone,
-+ foreigndom);
-+ return rc;
-+ }
-+
-+ if ( unlikely(count == MMU_UPDATE_PREEMPTED) &&
-+ likely(guest_handle_is_null(ureqs)) )
-+ {
-+ /* See the curr->arch.old_guest_table related
-+ * hypercall_create_continuation() below. */
-+ return (int)foreigndom;
-+ }
-
- if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
- {
-@@ -3515,7 +3758,7 @@ long do_mmu_update(
-
- for ( i = 0; i < count; i++ )
- {
-- if ( hypercall_preempt_check() )
-+ if ( curr->arch.old_guest_table || hypercall_preempt_check() )
- {
- rc = -EAGAIN;
- break;
-@@ -3696,9 +3939,27 @@ long do_mmu_update(
- }
-
- if ( rc == -EAGAIN )
-+ {
-+ ASSERT(i < count);
- rc = hypercall_create_continuation(
- __HYPERVISOR_mmu_update, "hihi",
- ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
-+ }
-+ else if ( curr->arch.old_guest_table )
-+ {
-+ XEN_GUEST_HANDLE(void) null;
-+
-+ ASSERT(rc || i == count);
-+ set_xen_guest_handle(null, NULL);
-+ /*
-+ * In order to have a way to communicate the final return value to
-+ * our continuation, we pass this in place of "foreigndom", building
-+ * on the fact that this argument isn't needed anymore.
-+ */
-+ rc = hypercall_create_continuation(
-+ __HYPERVISOR_mmu_update, "hihi", null,
-+ MMU_UPDATE_PREEMPTED, null, rc);
-+ }
-
- put_pg_owner(pg_owner);
-
-diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
-index 692281a..eada470 100644
---- a/xen/arch/x86/traps.c
-+++ b/xen/arch/x86/traps.c
-@@ -2407,12 +2407,23 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
- #endif
- }
- page = get_page_from_gfn(v->domain, gfn, NULL, P2M_ALLOC);
-- rc = page ? new_guest_cr3(page_to_mfn(page)) : 0;
- if ( page )
-+ {
-+ rc = new_guest_cr3(page_to_mfn(page));
- put_page(page);
-+ }
-+ else
-+ rc = -EINVAL;
- domain_unlock(v->domain);
-- if ( rc == 0 ) /* not okay */
-+ switch ( rc )
-+ {
-+ case 0:
-+ break;
-+ case -EAGAIN: /* retry after preemption */
-+ goto skip;
-+ default: /* not okay */
- goto fail;
-+ }
- break;
- }
-
-diff --git a/xen/arch/x86/x86_64/compat/mm.c b/xen/arch/x86/x86_64/compat/mm.c
-index fb7baca..ef7822b 100644
---- a/xen/arch/x86/x86_64/compat/mm.c
-+++ b/xen/arch/x86/x86_64/compat/mm.c
-@@ -268,6 +268,13 @@ int compat_mmuext_op(XEN_GUEST_HANDLE(mmuext_op_compat_t) cmp_uops,
- int rc = 0;
- XEN_GUEST_HANDLE(mmuext_op_t) nat_ops;
-
-+ if ( unlikely(count == MMU_UPDATE_PREEMPTED) &&
-+ likely(guest_handle_is_null(cmp_uops)) )
-+ {
-+ set_xen_guest_handle(nat_ops, NULL);
-+ return do_mmuext_op(nat_ops, count, pdone, foreigndom);
-+ }
-+
- preempt_mask = count & MMU_UPDATE_PREEMPTED;
- count ^= preempt_mask;
-
-@@ -365,17 +372,23 @@ int compat_mmuext_op(XEN_GUEST_HANDLE(mmuext_op_compat_t) cmp_uops,
- : mcs->call.args[1];
- unsigned int left = arg1 & ~MMU_UPDATE_PREEMPTED;
-
-- BUG_ON(left == arg1);
-+ BUG_ON(left == arg1 && left != i);
- BUG_ON(left > count);
- guest_handle_add_offset(nat_ops, i - left);
- guest_handle_subtract_offset(cmp_uops, left);
- left = 1;
-- BUG_ON(!hypercall_xlat_continuation(&left, 0x01, nat_ops, cmp_uops));
-- BUG_ON(left != arg1);
-- if (!test_bit(_MCSF_in_multicall, &mcs->flags))
-- regs->_ecx += count - i;
-+ if ( arg1 != MMU_UPDATE_PREEMPTED )
-+ {
-+ BUG_ON(!hypercall_xlat_continuation(&left, 0x01, nat_ops,
-+ cmp_uops));
-+ if ( !test_bit(_MCSF_in_multicall, &mcs->flags) )
-+ regs->_ecx += count - i;
-+ else
-+ mcs->compat_call.args[1] += count - i;
-+ }
- else
-- mcs->compat_call.args[1] += count - i;
-+ BUG_ON(hypercall_xlat_continuation(&left, 0));
-+ BUG_ON(left != arg1);
- }
- else
- BUG_ON(err > 0);
-diff --git a/xen/common/compat/domain.c b/xen/common/compat/domain.c
-index 40a0287..9ddaa38 100644
---- a/xen/common/compat/domain.c
-+++ b/xen/common/compat/domain.c
-@@ -50,6 +50,10 @@ int compat_vcpu_op(int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg)
- rc = v->is_initialised ? -EEXIST : arch_set_info_guest(v, cmp_ctxt);
- domain_unlock(d);
-
-+ if ( rc == -EAGAIN )
-+ rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iih",
-+ cmd, vcpuid, arg);
-+
- xfree(cmp_ctxt);
- break;
- }
-diff --git a/xen/common/domain.c b/xen/common/domain.c
-index c09fb73..89ab922 100644
---- a/xen/common/domain.c
-+++ b/xen/common/domain.c
-@@ -779,14 +779,18 @@ void domain_unpause_by_systemcontroller(struct domain *d)
- domain_unpause(d);
- }
-
--void vcpu_reset(struct vcpu *v)
-+int vcpu_reset(struct vcpu *v)
- {
- struct domain *d = v->domain;
-+ int rc;
-
- vcpu_pause(v);
- domain_lock(d);
-
-- arch_vcpu_reset(v);
-+ set_bit(_VPF_in_reset, &v->pause_flags);
-+ rc = arch_vcpu_reset(v);
-+ if ( rc )
-+ goto out_unlock;
-
- set_bit(_VPF_down, &v->pause_flags);
-
-@@ -802,9 +806,13 @@ void vcpu_reset(struct vcpu *v)
- #endif
- cpumask_clear(v->cpu_affinity_tmp);
- clear_bit(_VPF_blocked, &v->pause_flags);
-+ clear_bit(_VPF_in_reset, &v->pause_flags);
-
-+ out_unlock:
- domain_unlock(v->domain);
- vcpu_unpause(v);
-+
-+ return rc;
- }
-
-
-@@ -841,6 +849,11 @@ long do_vcpu_op(int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg)
- domain_unlock(d);
-
- free_vcpu_guest_context(ctxt);
-+
-+ if ( rc == -EAGAIN )
-+ rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iih",
-+ cmd, vcpuid, arg);
-+
- break;
-
- case VCPUOP_up: {
-diff --git a/xen/common/domctl.c b/xen/common/domctl.c
-index cbc8146..b3bfb38 100644
---- a/xen/common/domctl.c
-+++ b/xen/common/domctl.c
-@@ -307,8 +307,10 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
-
- if ( guest_handle_is_null(op->u.vcpucontext.ctxt) )
- {
-- vcpu_reset(v);
-- ret = 0;
-+ ret = vcpu_reset(v);
-+ if ( ret == -EAGAIN )
-+ ret = hypercall_create_continuation(
-+ __HYPERVISOR_domctl, "h", u_domctl);
- goto svc_out;
- }
-
-@@ -337,6 +339,10 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
- domain_pause(d);
- ret = arch_set_info_guest(v, c);
- domain_unpause(d);
-+
-+ if ( ret == -EAGAIN )
-+ ret = hypercall_create_continuation(
-+ __HYPERVISOR_domctl, "h", u_domctl);
- }
-
- svc_out:
-diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
-index aecee68..898f63a 100644
---- a/xen/include/asm-x86/domain.h
-+++ b/xen/include/asm-x86/domain.h
-@@ -464,6 +464,7 @@ struct arch_vcpu
- pagetable_t guest_table_user; /* (MFN) x86/64 user-space pagetable */
- #endif
- pagetable_t guest_table; /* (MFN) guest notion of cr3 */
-+ struct page_info *old_guest_table; /* partially destructed pagetable */
- /* guest_table holds a ref to the page, and also a type-count unless
- * shadow refcounts are in use */
- pagetable_t shadow_table[4]; /* (MFN) shadow(s) of guest */
-diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
-index ba92568..82cdde6 100644
---- a/xen/include/asm-x86/mm.h
-+++ b/xen/include/asm-x86/mm.h
-@@ -605,6 +605,7 @@ void audit_domains(void);
- int new_guest_cr3(unsigned long pfn);
- void make_cr3(struct vcpu *v, unsigned long mfn);
- void update_cr3(struct vcpu *v);
-+int vcpu_destroy_pagetables(struct vcpu *);
- void propagate_page_fault(unsigned long addr, u16 error_code);
- void *do_page_walk(struct vcpu *v, unsigned long addr);
-
-diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
-index d4ac50f..504a70f 100644
---- a/xen/include/xen/domain.h
-+++ b/xen/include/xen/domain.h
-@@ -13,7 +13,7 @@ typedef union {
- struct vcpu *alloc_vcpu(
- struct domain *d, unsigned int vcpu_id, unsigned int cpu_id);
- struct vcpu *alloc_dom0_vcpu0(void);
--void vcpu_reset(struct vcpu *v);
-+int vcpu_reset(struct vcpu *);
-
- struct xen_domctl_getdomaininfo;
- void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info);
-@@ -67,7 +67,7 @@ void arch_dump_vcpu_info(struct vcpu *v);
-
- void arch_dump_domain_info(struct domain *d);
-
--void arch_vcpu_reset(struct vcpu *v);
-+int arch_vcpu_reset(struct vcpu *);
-
- extern spinlock_t vcpu_alloc_lock;
- bool_t domctl_lock_acquire(void);
-diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
-index b619269..b0715cb 100644
---- a/xen/include/xen/sched.h
-+++ b/xen/include/xen/sched.h
-@@ -644,6 +644,9 @@ static inline struct domain *next_domain_in_cpupool(
- /* VCPU is blocked due to missing mem_sharing ring. */
- #define _VPF_mem_sharing 6
- #define VPF_mem_sharing (1UL<<_VPF_mem_sharing)
-+ /* VCPU is being reset. */
-+#define _VPF_in_reset 7
-+#define VPF_in_reset (1UL<<_VPF_in_reset)
-
- static inline int vcpu_runnable(struct vcpu *v)
- {
diff --git a/main/xen/xsa48-4.2.patch b/main/xen/xsa48-4.2.patch
deleted file mode 100644
index 998dbcb1d5..0000000000
--- a/main/xen/xsa48-4.2.patch
+++ /dev/null
@@ -1,114 +0,0 @@
-Add -f FMT / --format FMT arg to qemu-nbd
-
-From: "Daniel P. Berrange" <berrange@redhat.com>
-
-Currently the qemu-nbd program will auto-detect the format of
-any disk it is given. This behaviour is known to be insecure.
-For example, if qemu-nbd initially exposes a 'raw' file to an
-unprivileged app, and that app runs
-
- 'qemu-img create -f qcow2 -o backing_file=/etc/shadow /dev/nbd0'
-
-then the next time the app is started, the qemu-nbd will now
-detect it as a 'qcow2' file and expose /etc/shadow to the
-unprivileged app.
-
-The only way to avoid this is to explicitly tell qemu-nbd what
-disk format to use on the command line, completely disabling
-auto-detection. This patch adds a '-f' / '--format' arg for
-this purpose, mirroring what is already available via qemu-img
-and qemu commands.
-
- qemu-nbd --format raw -p 9000 evil.img
-
-will now always use raw, regardless of what format 'evil.img'
-looks like it contains
-
-Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
-[Use errx, not err. - Paolo]
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
-
-[ This is a security issue, CVE-2013-1922 / XSA-48. ]
-
-diff --git a/qemu-nbd.c b/qemu-nbd.c
-index 291cba2..8fbe2cf 100644
---- a/tools/qemu-xen/qemu-nbd.c
-+++ b/tools/qemu-xen/qemu-nbd.c
-@@ -247,6 +247,7 @@ out:
- int main(int argc, char **argv)
- {
- BlockDriverState *bs;
-+ BlockDriver *drv;
- off_t dev_offset = 0;
- off_t offset = 0;
- uint32_t nbdflags = 0;
-@@ -256,7 +257,7 @@ int main(int argc, char **argv)
- struct sockaddr_in addr;
- socklen_t addr_len = sizeof(addr);
- off_t fd_size;
-- const char *sopt = "hVb:o:p:rsnP:c:dvk:e:t";
-+ const char *sopt = "hVb:o:p:rsnP:c:dvk:e:f:t";
- struct option lopt[] = {
- { "help", 0, NULL, 'h' },
- { "version", 0, NULL, 'V' },
-@@ -271,6 +272,7 @@ int main(int argc, char **argv)
- { "snapshot", 0, NULL, 's' },
- { "nocache", 0, NULL, 'n' },
- { "shared", 1, NULL, 'e' },
-+ { "format", 1, NULL, 'f' },
- { "persistent", 0, NULL, 't' },
- { "verbose", 0, NULL, 'v' },
- { NULL, 0, NULL, 0 }
-@@ -292,6 +294,7 @@ int main(int argc, char **argv)
- int max_fd;
- int persistent = 0;
- pthread_t client_thread;
-+ const char *fmt = NULL;
-
- /* The client thread uses SIGTERM to interrupt the server. A signal
- * handler ensures that "qemu-nbd -v -c" exits with a nice status code.
-@@ -368,6 +371,9 @@ int main(int argc, char **argv)
- errx(EXIT_FAILURE, "Shared device number must be greater than 0\n");
- }
- break;
-+ case 'f':
-+ fmt = optarg;
-+ break;
- case 't':
- persistent = 1;
- break;
-@@ -478,9 +484,19 @@ int main(int argc, char **argv)
- bdrv_init();
- atexit(bdrv_close_all);
-
-+ if (fmt) {
-+ drv = bdrv_find_format(fmt);
-+ if (!drv) {
-+ errx(EXIT_FAILURE, "Unknown file format '%s'", fmt);
-+ }
-+ } else {
-+ drv = NULL;
-+ }
-+
- bs = bdrv_new("hda");
- srcpath = argv[optind];
-- if ((ret = bdrv_open(bs, srcpath, flags, NULL)) < 0) {
-+ ret = bdrv_open(bs, srcpath, flags, drv);
-+ if (ret < 0) {
- errno = -ret;
- err(EXIT_FAILURE, "Failed to bdrv_open '%s'", argv[optind]);
- }
-diff --git a/qemu-nbd.texi b/qemu-nbd.texi
-index 44996cc..f56c68e 100644
---- a/tools/qemu-xen/qemu-nbd.texi
-+++ b/tools/qemu-xen/qemu-nbd.texi
-@@ -36,6 +36,8 @@ Export Qemu disk image using NBD protocol.
- disconnect the specified device
- @item -e, --shared=@var{num}
- device can be shared by @var{num} clients (default @samp{1})
-+@item -f, --format=@var{fmt}
-+ force block driver for format @var{fmt} instead of auto-detecting
- @item -t, --persistent
- don't exit on the last connection
- @item -v, --verbose
diff --git a/main/xen/xsa52-4.2-unstable.patch b/main/xen/xsa52-4.2-unstable.patch
deleted file mode 100644
index 14db8a8a7f..0000000000
--- a/main/xen/xsa52-4.2-unstable.patch
+++ /dev/null
@@ -1,46 +0,0 @@
-x86/xsave: fix information leak on AMD CPUs
-
-Just like for FXSAVE/FXRSTOR, XSAVE/XRSTOR also don't save/restore the
-last instruction and operand pointers as well as the last opcode if
-there's no pending unmasked exception (see CVE-2006-1056 and commit
-9747:4d667a139318).
-
-While the FXSR solution sits in the save path, I prefer to have this in
-the restore path because there the handling is simpler (namely in the
-context of the pending changes to properly save the selector values for
-32-bit guest code).
-
-Also this is using FFREE instead of EMMS, as it doesn't seem unlikely
-that in the future we may see CPUs with x87 and SSE/AVX but no MMX
-support. The goal here anyway is just to avoid an FPU stack overflow.
-I would have preferred to use FFREEP instead of FFREE (freeing two
-stack slots at once), but AMD doesn't document that instruction.
-
-This is CVE-2013-2076 / XSA-52.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-
---- a/xen/arch/x86/xstate.c
-+++ b/xen/arch/x86/xstate.c
-@@ -78,6 +78,21 @@ void xrstor(struct vcpu *v, uint64_t mas
-
- struct xsave_struct *ptr = v->arch.xsave_area;
-
-+ /*
-+ * AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
-+ * is pending. Clear the x87 state here by setting it to fixed
-+ * values. The hypervisor data segment can be sometimes 0 and
-+ * sometimes new user value. Both should be ok. Use the FPU saved
-+ * data block as a safe address because it should be in L1.
-+ */
-+ if ( (mask & ptr->xsave_hdr.xstate_bv & XSTATE_FP) &&
-+ !(ptr->fpu_sse.fsw & 0x0080) &&
-+ boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
-+ asm volatile ( "fnclex\n\t" /* clear exceptions */
-+ "ffree %%st(7)\n\t" /* clear stack tag */
-+ "fildl %0" /* load to clear state */
-+ : : "m" (ptr->fpu_sse) );
-+
- asm volatile (
- ".byte " REX_PREFIX "0x0f,0xae,0x2f"
- :
diff --git a/main/xen/xsa53-4.2.patch b/main/xen/xsa53-4.2.patch
deleted file mode 100644
index eb8e79bed2..0000000000
--- a/main/xen/xsa53-4.2.patch
+++ /dev/null
@@ -1,57 +0,0 @@
-x86/xsave: recover from faults on XRSTOR
-
-Just like FXRSTOR, XRSTOR can raise #GP if bad content is being passed
-to it in the memory block (i.e. aspects not under the control of the
-hypervisor, other than e.g. proper alignment of the block).
-
-Also correct the comment explaining why FXRSTOR needs exception
-recovery code to not wrongly state that this can only be a result of
-the control tools passing a bad image.
-
-This is CVE-2013-2077 / XSA-53.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-
---- a/xen/arch/x86/i387.c
-+++ b/xen/arch/x86/i387.c
-@@ -53,7 +53,7 @@ static inline void fpu_fxrstor(struct vc
- /*
- * FXRSTOR can fault if passed a corrupted data block. We handle this
- * possibility, which may occur if the block was passed to us by control
-- * tools, by silently clearing the block.
-+ * tools or through VCPUOP_initialise, by silently clearing the block.
- */
- asm volatile (
- #ifdef __i386__
---- a/xen/arch/x86/xstate.c
-+++ b/xen/arch/x86/xstate.c
-@@ -93,10 +93,25 @@ void xrstor(struct vcpu *v, uint64_t mas
- "fildl %0" /* load to clear state */
- : : "m" (ptr->fpu_sse) );
-
-- asm volatile (
-- ".byte " REX_PREFIX "0x0f,0xae,0x2f"
-- :
-- : "m" (*ptr), "a" (lmask), "d" (hmask), "D"(ptr) );
-+ /*
-+ * XRSTOR can fault if passed a corrupted data block. We handle this
-+ * possibility, which may occur if the block was passed to us by control
-+ * tools or through VCPUOP_initialise, by silently clearing the block.
-+ */
-+ asm volatile ( "1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n"
-+ ".section .fixup,\"ax\"\n"
-+ "2: mov %5,%%ecx \n"
-+ " xor %1,%1 \n"
-+ " rep stosb \n"
-+ " lea %2,%0 \n"
-+ " mov %3,%1 \n"
-+ " jmp 1b \n"
-+ ".previous \n"
-+ _ASM_EXTABLE(1b, 2b)
-+ : "+&D" (ptr), "+&a" (lmask)
-+ : "m" (*ptr), "g" (lmask), "d" (hmask),
-+ "m" (xsave_cntxt_size)
-+ : "ecx" );
- }
-
- bool_t xsave_enabled(const struct vcpu *v)
diff --git a/main/xen/xsa54.patch b/main/xen/xsa54.patch
deleted file mode 100644
index 83c8993d6a..0000000000
--- a/main/xen/xsa54.patch
+++ /dev/null
@@ -1,24 +0,0 @@
-x86/xsave: properly check guest input to XSETBV
-
-Other than the HVM emulation path, the PV case so far failed to check
-that YMM state requires SSE state to be enabled, allowing for a #GP to
-occur upon passing the inputs to XSETBV inside the hypervisor.
-
-This is CVE-2013-2078 / XSA-54.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-
---- a/xen/arch/x86/traps.c
-+++ b/xen/arch/x86/traps.c
-@@ -2205,6 +2205,11 @@ static int emulate_privileged_op(struct
- if ( !(new_xfeature & XSTATE_FP) || (new_xfeature & ~xfeature_mask) )
- goto fail;
-
-+ /* YMM state takes SSE state as prerequisite. */
-+ if ( (xfeature_mask & new_xfeature & XSTATE_YMM) &&
-+ !(new_xfeature & XSTATE_SSE) )
-+ goto fail;
-+
- v->arch.xcr0 = new_xfeature;
- v->arch.xcr0_accum |= new_xfeature;
- set_xcr0(new_xfeature);
diff --git a/main/xen/xsa55.patch b/main/xen/xsa55.patch
deleted file mode 100644
index 35fe7afd06..0000000000
--- a/main/xen/xsa55.patch
+++ /dev/null
@@ -1,3431 +0,0 @@
-diff --git a/tools/libxc/Makefile b/tools/libxc/Makefile
-index 876c148..1a5249c 100644
---- a/tools/libxc/Makefile
-+++ b/tools/libxc/Makefile
-@@ -52,8 +52,13 @@ endif
- vpath %.c ../../xen/common/libelf
- CFLAGS += -I../../xen/common/libelf
-
--GUEST_SRCS-y += libelf-tools.c libelf-loader.c
--GUEST_SRCS-y += libelf-dominfo.c libelf-relocate.c
-+ELF_SRCS-y += libelf-tools.c libelf-loader.c
-+ELF_SRCS-y += libelf-dominfo.c
-+
-+GUEST_SRCS-y += $(ELF_SRCS-y)
-+
-+$(patsubst %.c,%.o,$(ELF_SRCS-y)): CFLAGS += -Wno-pointer-sign
-+$(patsubst %.c,%.opic,$(ELF_SRCS-y)): CFLAGS += -Wno-pointer-sign
-
- # new domain builder
- GUEST_SRCS-y += xc_dom_core.c xc_dom_boot.c
-diff --git a/tools/libxc/ia64/xc_ia64_dom_fwloader.c b/tools/libxc/ia64/xc_ia64_dom_fwloader.c
-index cdf3333..dbd3349 100644
---- a/tools/libxc/ia64/xc_ia64_dom_fwloader.c
-+++ b/tools/libxc/ia64/xc_ia64_dom_fwloader.c
-@@ -60,6 +60,8 @@ static int xc_dom_load_fw_kernel(struct xc_dom_image *dom)
- unsigned long i;
-
- dest = xc_dom_vaddr_to_ptr(dom, dom->kernel_seg.vstart);
-+ if ( dest == NULL )
-+ return -1;
- memcpy(dest, dom->kernel_blob, FW_SIZE);
-
- /* Synchronize cache. */
-diff --git a/tools/libxc/xc_cpuid_x86.c b/tools/libxc/xc_cpuid_x86.c
-index 0882ce6..da435ce 100644
---- a/tools/libxc/xc_cpuid_x86.c
-+++ b/tools/libxc/xc_cpuid_x86.c
-@@ -589,6 +589,8 @@ static int xc_cpuid_do_domctl(
- static char *alloc_str(void)
- {
- char *s = malloc(33);
-+ if ( s == NULL )
-+ return s;
- memset(s, 0, 33);
- return s;
- }
-@@ -600,6 +602,8 @@ void xc_cpuid_to_str(const unsigned int *regs, char **strs)
- for ( i = 0; i < 4; i++ )
- {
- strs[i] = alloc_str();
-+ if ( strs[i] == NULL )
-+ continue;
- for ( j = 0; j < 32; j++ )
- strs[i][j] = !!((regs[i] & (1U << (31 - j)))) ? '1' : '0';
- }
-@@ -680,7 +684,7 @@ int xc_cpuid_check(
- const char **config,
- char **config_transformed)
- {
-- int i, j;
-+ int i, j, rc;
- unsigned int regs[4];
-
- memset(config_transformed, 0, 4 * sizeof(*config_transformed));
-@@ -692,6 +696,11 @@ int xc_cpuid_check(
- if ( config[i] == NULL )
- continue;
- config_transformed[i] = alloc_str();
-+ if ( config_transformed[i] == NULL )
-+ {
-+ rc = -ENOMEM;
-+ goto fail_rc;
-+ }
- for ( j = 0; j < 32; j++ )
- {
- unsigned char val = !!((regs[i] & (1U << (31 - j))));
-@@ -708,12 +717,14 @@ int xc_cpuid_check(
- return 0;
-
- fail:
-+ rc = -EPERM;
-+ fail_rc:
- for ( i = 0; i < 4; i++ )
- {
- free(config_transformed[i]);
- config_transformed[i] = NULL;
- }
-- return -EPERM;
-+ return rc;
- }
-
- /*
-@@ -758,6 +769,11 @@ int xc_cpuid_set(
- }
-
- config_transformed[i] = alloc_str();
-+ if ( config_transformed[i] == NULL )
-+ {
-+ rc = -ENOMEM;
-+ goto fail;
-+ }
-
- for ( j = 0; j < 32; j++ )
- {
-diff --git a/tools/libxc/xc_dom.h b/tools/libxc/xc_dom.h
-index 6a72aa9..d801f66 100644
---- a/tools/libxc/xc_dom.h
-+++ b/tools/libxc/xc_dom.h
-@@ -140,9 +140,10 @@ struct xc_dom_image {
-
- struct xc_dom_loader {
- char *name;
-- int (*probe) (struct xc_dom_image * dom);
-- int (*parser) (struct xc_dom_image * dom);
-- int (*loader) (struct xc_dom_image * dom);
-+ /* Sadly the error returns from these functions are not consistent: */
-+ elf_negerrnoval (*probe) (struct xc_dom_image * dom);
-+ elf_negerrnoval (*parser) (struct xc_dom_image * dom);
-+ elf_errorstatus (*loader) (struct xc_dom_image * dom);
-
- struct xc_dom_loader *next;
- };
-@@ -275,27 +276,50 @@ int xc_dom_alloc_segment(struct xc_dom_image *dom,
-
- void *xc_dom_pfn_to_ptr(struct xc_dom_image *dom, xen_pfn_t first,
- xen_pfn_t count);
-+void *xc_dom_pfn_to_ptr_retcount(struct xc_dom_image *dom, xen_pfn_t first,
-+ xen_pfn_t count, xen_pfn_t *count_out);
- void xc_dom_unmap_one(struct xc_dom_image *dom, xen_pfn_t pfn);
- void xc_dom_unmap_all(struct xc_dom_image *dom);
-
--static inline void *xc_dom_seg_to_ptr(struct xc_dom_image *dom,
-- struct xc_dom_seg *seg)
-+static inline void *xc_dom_seg_to_ptr_pages(struct xc_dom_image *dom,
-+ struct xc_dom_seg *seg,
-+ xen_pfn_t *pages_out)
- {
- xen_vaddr_t segsize = seg->vend - seg->vstart;
- unsigned int page_size = XC_DOM_PAGE_SIZE(dom);
- xen_pfn_t pages = (segsize + page_size - 1) / page_size;
-+ void *retval;
-+
-+ retval = xc_dom_pfn_to_ptr(dom, seg->pfn, pages);
-
-- return xc_dom_pfn_to_ptr(dom, seg->pfn, pages);
-+ *pages_out = retval ? pages : 0;
-+ return retval;
-+}
-+
-+static inline void *xc_dom_seg_to_ptr(struct xc_dom_image *dom,
-+ struct xc_dom_seg *seg)
-+{
-+ xen_pfn_t dummy;
-+
-+ return xc_dom_seg_to_ptr_pages(dom, seg, &dummy);
- }
-
- static inline void *xc_dom_vaddr_to_ptr(struct xc_dom_image *dom,
-- xen_vaddr_t vaddr)
-+ xen_vaddr_t vaddr,
-+ size_t *safe_region_out)
- {
- unsigned int page_size = XC_DOM_PAGE_SIZE(dom);
- xen_pfn_t page = (vaddr - dom->parms.virt_base) / page_size;
- unsigned int offset = (vaddr - dom->parms.virt_base) % page_size;
-- void *ptr = xc_dom_pfn_to_ptr(dom, page, 0);
-- return (ptr ? (ptr + offset) : NULL);
-+ xen_pfn_t safe_region_count;
-+ void *ptr;
-+
-+ *safe_region_out = 0;
-+ ptr = xc_dom_pfn_to_ptr_retcount(dom, page, 0, &safe_region_count);
-+ if ( ptr == NULL )
-+ return ptr;
-+ *safe_region_out = (safe_region_count << XC_DOM_PAGE_SHIFT(dom)) - offset;
-+ return ptr;
- }
-
- static inline int xc_dom_feature_translated(struct xc_dom_image *dom)
-@@ -307,6 +331,8 @@ static inline xen_pfn_t xc_dom_p2m_host(struct xc_dom_image *dom, xen_pfn_t pfn)
- {
- if (dom->shadow_enabled)
- return pfn;
-+ if (pfn >= dom->total_pages)
-+ return INVALID_MFN;
- return dom->p2m_host[pfn];
- }
-
-@@ -315,6 +341,8 @@ static inline xen_pfn_t xc_dom_p2m_guest(struct xc_dom_image *dom,
- {
- if (xc_dom_feature_translated(dom))
- return pfn;
-+ if (pfn >= dom->total_pages)
-+ return INVALID_MFN;
- return dom->p2m_host[pfn];
- }
-
-diff --git a/tools/libxc/xc_dom_binloader.c b/tools/libxc/xc_dom_binloader.c
-index 769e97d..553b366 100644
---- a/tools/libxc/xc_dom_binloader.c
-+++ b/tools/libxc/xc_dom_binloader.c
-@@ -123,10 +123,13 @@ static struct xen_bin_image_table *find_table(struct xc_dom_image *dom)
- uint32_t *probe_ptr;
- uint32_t *probe_end;
-
-+ if ( dom->kernel_size < sizeof(*table) )
-+ return NULL;
- probe_ptr = dom->kernel_blob;
-- probe_end = dom->kernel_blob + dom->kernel_size - sizeof(*table);
-- if ( (void*)probe_end > (dom->kernel_blob + 8192) )
-+ if ( dom->kernel_size > (8192 + sizeof(*table)) )
- probe_end = dom->kernel_blob + 8192;
-+ else
-+ probe_end = dom->kernel_blob + dom->kernel_size - sizeof(*table);
-
- for ( table = NULL; probe_ptr < probe_end; probe_ptr++ )
- {
-@@ -249,6 +252,7 @@ static int xc_dom_load_bin_kernel(struct xc_dom_image *dom)
- char *image = dom->kernel_blob;
- char *dest;
- size_t image_size = dom->kernel_size;
-+ size_t dest_size;
- uint32_t start_addr;
- uint32_t load_end_addr;
- uint32_t bss_end_addr;
-@@ -272,7 +276,29 @@ static int xc_dom_load_bin_kernel(struct xc_dom_image *dom)
- DOMPRINTF(" text_size: 0x%" PRIx32 "", text_size);
- DOMPRINTF(" bss_size: 0x%" PRIx32 "", bss_size);
-
-- dest = xc_dom_vaddr_to_ptr(dom, dom->kernel_seg.vstart);
-+ dest = xc_dom_vaddr_to_ptr(dom, dom->kernel_seg.vstart, &dest_size);
-+ if ( dest == NULL )
-+ {
-+ DOMPRINTF("%s: xc_dom_vaddr_to_ptr(dom, dom->kernel_seg.vstart)"
-+ " => NULL", __FUNCTION__);
-+ return -EINVAL;
-+ }
-+
-+ if ( dest_size < text_size ||
-+ dest_size - text_size < bss_size )
-+ {
-+ DOMPRINTF("%s: mapped region is too small for image", __FUNCTION__);
-+ return -EINVAL;
-+ }
-+
-+ if ( image_size < skip ||
-+ image_size - skip < text_size )
-+ {
-+ DOMPRINTF("%s: image is too small for declared text size",
-+ __FUNCTION__);
-+ return -EINVAL;
-+ }
-+
- memcpy(dest, image + skip, text_size);
- memset(dest + text_size, 0, bss_size);
-
-diff --git a/tools/libxc/xc_dom_core.c b/tools/libxc/xc_dom_core.c
-index 2a01d7c..e79e38d 100644
---- a/tools/libxc/xc_dom_core.c
-+++ b/tools/libxc/xc_dom_core.c
-@@ -120,9 +120,17 @@ void *xc_dom_malloc(struct xc_dom_image *dom, size_t size)
- {
- struct xc_dom_mem *block;
-
-+ if ( size > SIZE_MAX - sizeof(*block) )
-+ {
-+ DOMPRINTF("%s: unreasonable allocation size", __FUNCTION__);
-+ return NULL;
-+ }
- block = malloc(sizeof(*block) + size);
- if ( block == NULL )
-+ {
-+ DOMPRINTF("%s: allocation failed", __FUNCTION__);
- return NULL;
-+ }
- memset(block, 0, sizeof(*block) + size);
- block->next = dom->memblocks;
- dom->memblocks = block;
-@@ -138,7 +146,10 @@ void *xc_dom_malloc_page_aligned(struct xc_dom_image *dom, size_t size)
-
- block = malloc(sizeof(*block));
- if ( block == NULL )
-+ {
-+ DOMPRINTF("%s: allocation failed", __FUNCTION__);
- return NULL;
-+ }
- memset(block, 0, sizeof(*block));
- block->mmap_len = size;
- block->mmap_ptr = mmap(NULL, block->mmap_len,
-@@ -146,6 +157,7 @@ void *xc_dom_malloc_page_aligned(struct xc_dom_image *dom, size_t size)
- -1, 0);
- if ( block->mmap_ptr == MAP_FAILED )
- {
-+ DOMPRINTF("%s: mmap failed", __FUNCTION__);
- free(block);
- return NULL;
- }
-@@ -202,6 +214,7 @@ void *xc_dom_malloc_filemap(struct xc_dom_image *dom,
- close(fd);
- if ( block != NULL )
- free(block);
-+ DOMPRINTF("%s: failed (on file `%s')", __FUNCTION__, filename);
- return NULL;
- }
-
-@@ -271,6 +284,11 @@ size_t xc_dom_check_gzip(xc_interface *xch, void *blob, size_t ziplen)
- unsigned char *gzlen;
- size_t unziplen;
-
-+ if ( ziplen < 6 )
-+ /* Too small. We need (i.e. the subsequent code relies on)
-+ * 2 bytes for the magic number plus 4 bytes length. */
-+ return 0;
-+
- if ( strncmp(blob, "\037\213", 2) )
- /* not gzipped */
- return 0;
-@@ -351,10 +369,19 @@ int xc_dom_try_gunzip(struct xc_dom_image *dom, void **blob, size_t * size)
- void *xc_dom_pfn_to_ptr(struct xc_dom_image *dom, xen_pfn_t pfn,
- xen_pfn_t count)
- {
-+ xen_pfn_t count_out_dummy;
-+ return xc_dom_pfn_to_ptr_retcount(dom, pfn, count, &count_out_dummy);
-+}
-+
-+void *xc_dom_pfn_to_ptr_retcount(struct xc_dom_image *dom, xen_pfn_t pfn,
-+ xen_pfn_t count, xen_pfn_t *count_out)
-+{
- struct xc_dom_phys *phys;
- unsigned int page_shift = XC_DOM_PAGE_SHIFT(dom);
- char *mode = "unset";
-
-+ *count_out = 0;
-+
- if ( pfn > dom->total_pages || /* multiple checks to avoid overflows */
- count > dom->total_pages ||
- pfn > dom->total_pages - count )
-@@ -384,6 +411,7 @@ void *xc_dom_pfn_to_ptr(struct xc_dom_image *dom, xen_pfn_t pfn,
- phys->count);
- return NULL;
- }
-+ *count_out = count;
- }
- else
- {
-@@ -391,6 +419,9 @@ void *xc_dom_pfn_to_ptr(struct xc_dom_image *dom, xen_pfn_t pfn,
- just hand out a pointer to it */
- if ( pfn < phys->first )
- continue;
-+ if ( pfn >= phys->first + phys->count )
-+ continue;
-+ *count_out = phys->count - (pfn - phys->first);
- }
- return phys->ptr + ((pfn - phys->first) << page_shift);
- }
-@@ -478,7 +509,8 @@ int xc_dom_alloc_segment(struct xc_dom_image *dom,
- seg->vstart = start;
- seg->pfn = (seg->vstart - dom->parms.virt_base) / page_size;
-
-- if ( pages > dom->total_pages || /* double test avoids overflow probs */
-+ if ( pages > dom->total_pages || /* multiple test avoids overflow probs */
-+ seg->pfn > dom->total_pages ||
- pages > dom->total_pages - seg->pfn)
- {
- xc_dom_panic(dom->xch, XC_OUT_OF_MEMORY,
-@@ -855,6 +887,12 @@ int xc_dom_build_image(struct xc_dom_image *dom)
- ramdisklen) != 0 )
- goto err;
- ramdiskmap = xc_dom_seg_to_ptr(dom, &dom->ramdisk_seg);
-+ if ( ramdiskmap == NULL )
-+ {
-+ DOMPRINTF("%s: xc_dom_seg_to_ptr(dom, &dom->ramdisk_seg) => NULL",
-+ __FUNCTION__);
-+ goto err;
-+ }
- if ( unziplen )
- {
- if ( xc_dom_do_gunzip(dom->xch,
-diff --git a/tools/libxc/xc_dom_elfloader.c b/tools/libxc/xc_dom_elfloader.c
-index 2e69559..be58276 100644
---- a/tools/libxc/xc_dom_elfloader.c
-+++ b/tools/libxc/xc_dom_elfloader.c
-@@ -28,13 +28,14 @@
-
- #include "xg_private.h"
- #include "xc_dom.h"
-+#include "xc_bitops.h"
-
- #define XEN_VER "xen-3.0"
-
- /* ------------------------------------------------------------------------ */
-
- static void log_callback(struct elf_binary *elf, void *caller_data,
-- int iserr, const char *fmt, va_list al) {
-+ bool iserr, const char *fmt, va_list al) {
- xc_interface *xch = caller_data;
-
- xc_reportv(xch,
-@@ -46,7 +47,7 @@ static void log_callback(struct elf_binary *elf, void *caller_data,
-
- void xc_elf_set_logfile(xc_interface *xch, struct elf_binary *elf,
- int verbose) {
-- elf_set_log(elf, log_callback, xch, verbose);
-+ elf_set_log(elf, log_callback, xch, verbose /* convert to bool */);
- }
-
- /* ------------------------------------------------------------------------ */
-@@ -84,7 +85,7 @@ static char *xc_dom_guest_type(struct xc_dom_image *dom,
- /* ------------------------------------------------------------------------ */
- /* parse elf binary */
-
--static int check_elf_kernel(struct xc_dom_image *dom, int verbose)
-+static elf_negerrnoval check_elf_kernel(struct xc_dom_image *dom, bool verbose)
- {
- if ( dom->kernel_blob == NULL )
- {
-@@ -95,7 +96,7 @@ static int check_elf_kernel(struct xc_dom_image *dom, int verbose)
- return -EINVAL;
- }
-
-- if ( !elf_is_elfbinary(dom->kernel_blob) )
-+ if ( !elf_is_elfbinary(dom->kernel_blob, dom->kernel_size) )
- {
- if ( verbose )
- xc_dom_panic(dom->xch,
-@@ -106,20 +107,21 @@ static int check_elf_kernel(struct xc_dom_image *dom, int verbose)
- return 0;
- }
-
--static int xc_dom_probe_elf_kernel(struct xc_dom_image *dom)
-+static elf_negerrnoval xc_dom_probe_elf_kernel(struct xc_dom_image *dom)
- {
- return check_elf_kernel(dom, 0);
- }
-
--static int xc_dom_load_elf_symtab(struct xc_dom_image *dom,
-- struct elf_binary *elf, int load)
-+static elf_errorstatus xc_dom_load_elf_symtab(struct xc_dom_image *dom,
-+ struct elf_binary *elf, bool load)
- {
- struct elf_binary syms;
-- const elf_shdr *shdr, *shdr2;
-+ ELF_HANDLE_DECL(elf_shdr) shdr; ELF_HANDLE_DECL(elf_shdr) shdr2;
- xen_vaddr_t symtab, maxaddr;
-- char *hdr;
-+ elf_ptrval hdr;
- size_t size;
-- int h, count, type, i, tables = 0;
-+ unsigned h, count, type, i, tables = 0;
-+ unsigned long *strtab_referenced = NULL;
-
- if ( elf_swap(elf) )
- {
-@@ -130,31 +132,48 @@ static int xc_dom_load_elf_symtab(struct xc_dom_image *dom,
-
- if ( load )
- {
-+ char *hdr_ptr;
-+ size_t allow_size;
-+
- if ( !dom->bsd_symtab_start )
- return 0;
- size = dom->kernel_seg.vend - dom->bsd_symtab_start;
-- hdr = xc_dom_vaddr_to_ptr(dom, dom->bsd_symtab_start);
-- *(int *)hdr = size - sizeof(int);
-+ hdr_ptr = xc_dom_vaddr_to_ptr(dom, dom->bsd_symtab_start, &allow_size);
-+ if ( hdr_ptr == NULL )
-+ {
-+ DOMPRINTF("%s/load: xc_dom_vaddr_to_ptr(dom,dom->bsd_symtab_start"
-+ " => NULL", __FUNCTION__);
-+ return -1;
-+ }
-+ elf->caller_xdest_base = hdr_ptr;
-+ elf->caller_xdest_size = allow_size;
-+ hdr = ELF_REALPTR2PTRVAL(hdr_ptr);
-+ elf_store_val(elf, unsigned, hdr, size - sizeof(unsigned));
- }
- else
- {
-- size = sizeof(int) + elf_size(elf, elf->ehdr) +
-+ char *hdr_ptr;
-+
-+ size = sizeof(unsigned) + elf_size(elf, elf->ehdr) +
- elf_shdr_count(elf) * elf_size(elf, shdr);
-- hdr = xc_dom_malloc(dom, size);
-- if ( hdr == NULL )
-+ hdr_ptr = xc_dom_malloc(dom, size);
-+ if ( hdr_ptr == NULL )
- return 0;
-- dom->bsd_symtab_start = elf_round_up(&syms, dom->kernel_seg.vend);
-+ elf->caller_xdest_base = hdr_ptr;
-+ elf->caller_xdest_size = size;
-+ hdr = ELF_REALPTR2PTRVAL(hdr_ptr);
-+ dom->bsd_symtab_start = elf_round_up(elf, dom->kernel_seg.vend);
- }
-
-- memcpy(hdr + sizeof(int),
-- elf->image,
-+ elf_memcpy_safe(elf, hdr + sizeof(unsigned),
-+ ELF_IMAGE_BASE(elf),
- elf_size(elf, elf->ehdr));
-- memcpy(hdr + sizeof(int) + elf_size(elf, elf->ehdr),
-- elf->image + elf_uval(elf, elf->ehdr, e_shoff),
-+ elf_memcpy_safe(elf, hdr + sizeof(unsigned) + elf_size(elf, elf->ehdr),
-+ ELF_IMAGE_BASE(elf) + elf_uval(elf, elf->ehdr, e_shoff),
- elf_shdr_count(elf) * elf_size(elf, shdr));
- if ( elf_64bit(elf) )
- {
-- Elf64_Ehdr *ehdr = (Elf64_Ehdr *)(hdr + sizeof(int));
-+ Elf64_Ehdr *ehdr = (Elf64_Ehdr *)(hdr + sizeof(unsigned));
- ehdr->e_phoff = 0;
- ehdr->e_phentsize = 0;
- ehdr->e_phnum = 0;
-@@ -163,19 +182,42 @@ static int xc_dom_load_elf_symtab(struct xc_dom_image *dom,
- }
- else
- {
-- Elf32_Ehdr *ehdr = (Elf32_Ehdr *)(hdr + sizeof(int));
-+ Elf32_Ehdr *ehdr = (Elf32_Ehdr *)(hdr + sizeof(unsigned));
- ehdr->e_phoff = 0;
- ehdr->e_phentsize = 0;
- ehdr->e_phnum = 0;
- ehdr->e_shoff = elf_size(elf, elf->ehdr);
- ehdr->e_shstrndx = SHN_UNDEF;
- }
-- if ( elf_init(&syms, hdr + sizeof(int), size - sizeof(int)) )
-+ if ( elf->caller_xdest_size < sizeof(unsigned) )
-+ {
-+ DOMPRINTF("%s/%s: header size %"PRIx64" too small",
-+ __FUNCTION__, load ? "load" : "parse",
-+ (uint64_t)elf->caller_xdest_size);
-+ return -1;
-+ }
-+ if ( elf_init(&syms, elf->caller_xdest_base + sizeof(unsigned),
-+ elf->caller_xdest_size - sizeof(unsigned)) )
- return -1;
-
-+ /*
-+ * The caller_xdest_{base,size} and dest_{base,size} need to
-+ * remain valid so long as each struct elf_image does. The
-+ * principle we adopt is that these values are set when the
-+ * memory is allocated or mapped, and cleared when (and if)
-+ * they are unmapped.
-+ *
-+ * Mappings of the guest are normally undone by xc_dom_unmap_all
-+ * (directly or via xc_dom_release). We do not explicitly clear
-+ * these because in fact that happens only at the end of
-+ * xc_dom_boot_image, at which time all of these ELF loading
-+ * functions have returned. No relevant struct elf_binary*
-+ * escapes this file.
-+ */
-+
- xc_elf_set_logfile(dom->xch, &syms, 1);
-
-- symtab = dom->bsd_symtab_start + sizeof(int);
-+ symtab = dom->bsd_symtab_start + sizeof(unsigned);
- maxaddr = elf_round_up(&syms, symtab + elf_size(&syms, syms.ehdr) +
- elf_shdr_count(&syms) * elf_size(&syms, shdr));
-
-@@ -186,27 +228,40 @@ static int xc_dom_load_elf_symtab(struct xc_dom_image *dom,
- symtab, maxaddr);
-
- count = elf_shdr_count(&syms);
-+ /* elf_shdr_count guarantees that count is reasonable */
-+
-+ strtab_referenced = xc_dom_malloc(dom, bitmap_size(count));
-+ if ( strtab_referenced == NULL )
-+ return -1;
-+ bitmap_clear(strtab_referenced, count);
-+ /* Note the symtabs @h linked to by any strtab @i. */
-+ for ( i = 0; i < count; i++ )
-+ {
-+ shdr2 = elf_shdr_by_index(&syms, i);
-+ if ( elf_uval(&syms, shdr2, sh_type) == SHT_SYMTAB )
-+ {
-+ h = elf_uval(&syms, shdr2, sh_link);
-+ if (h < count)
-+ set_bit(h, strtab_referenced);
-+ }
-+ }
-+
- for ( h = 0; h < count; h++ )
- {
- shdr = elf_shdr_by_index(&syms, h);
-+ if ( !elf_access_ok(elf, ELF_HANDLE_PTRVAL(shdr), 1) )
-+ /* input has an insane section header count field */
-+ break;
- type = elf_uval(&syms, shdr, sh_type);
- if ( type == SHT_STRTAB )
- {
-- /* Look for a strtab @i linked to symtab @h. */
-- for ( i = 0; i < count; i++ )
-- {
-- shdr2 = elf_shdr_by_index(&syms, i);
-- if ( (elf_uval(&syms, shdr2, sh_type) == SHT_SYMTAB) &&
-- (elf_uval(&syms, shdr2, sh_link) == h) )
-- break;
-- }
- /* Skip symtab @h if we found no corresponding strtab @i. */
-- if ( i == count )
-+ if ( !test_bit(h, strtab_referenced) )
- {
- if ( elf_64bit(&syms) )
-- *(Elf64_Off*)(&shdr->e64.sh_offset) = 0;
-+ elf_store_field(elf, shdr, e64.sh_offset, 0);
- else
-- *(Elf32_Off*)(&shdr->e32.sh_offset) = 0;
-+ elf_store_field(elf, shdr, e32.sh_offset, 0);
- continue;
- }
- }
-@@ -215,13 +270,13 @@ static int xc_dom_load_elf_symtab(struct xc_dom_image *dom,
- {
- /* Mangled to be based on ELF header location. */
- if ( elf_64bit(&syms) )
-- *(Elf64_Off*)(&shdr->e64.sh_offset) = maxaddr - symtab;
-+ elf_store_field(elf, shdr, e64.sh_offset, maxaddr - symtab);
- else
-- *(Elf32_Off*)(&shdr->e32.sh_offset) = maxaddr - symtab;
-+ elf_store_field(elf, shdr, e32.sh_offset, maxaddr - symtab);
- size = elf_uval(&syms, shdr, sh_size);
- maxaddr = elf_round_up(&syms, maxaddr + size);
- tables++;
-- DOMPRINTF("%s: h=%d %s, size=0x%zx, maxaddr=0x%" PRIx64 "",
-+ DOMPRINTF("%s: h=%u %s, size=0x%zx, maxaddr=0x%" PRIx64 "",
- __FUNCTION__, h,
- type == SHT_SYMTAB ? "symtab" : "strtab",
- size, maxaddr);
-@@ -229,7 +284,7 @@ static int xc_dom_load_elf_symtab(struct xc_dom_image *dom,
- if ( load )
- {
- shdr2 = elf_shdr_by_index(elf, h);
-- memcpy((void*)elf_section_start(&syms, shdr),
-+ elf_memcpy_safe(elf, elf_section_start(&syms, shdr),
- elf_section_start(elf, shdr2),
- size);
- }
-@@ -237,11 +292,18 @@ static int xc_dom_load_elf_symtab(struct xc_dom_image *dom,
-
- /* Name is NULL. */
- if ( elf_64bit(&syms) )
-- *(Elf64_Word*)(&shdr->e64.sh_name) = 0;
-+ elf_store_field(elf, shdr, e64.sh_name, 0);
- else
-- *(Elf32_Word*)(&shdr->e32.sh_name) = 0;
-+ elf_store_field(elf, shdr, e32.sh_name, 0);
- }
-
-+ if ( elf_check_broken(&syms) )
-+ DOMPRINTF("%s: symbols ELF broken: %s", __FUNCTION__,
-+ elf_check_broken(&syms));
-+ if ( elf_check_broken(elf) )
-+ DOMPRINTF("%s: ELF broken: %s", __FUNCTION__,
-+ elf_check_broken(elf));
-+
- if ( tables == 0 )
- {
- DOMPRINTF("%s: no symbol table present", __FUNCTION__);
-@@ -253,16 +315,22 @@ static int xc_dom_load_elf_symtab(struct xc_dom_image *dom,
- return 0;
- }
-
--static int xc_dom_parse_elf_kernel(struct xc_dom_image *dom)
-+static elf_errorstatus xc_dom_parse_elf_kernel(struct xc_dom_image *dom)
-+ /*
-+ * This function sometimes returns -1 for error and sometimes
-+ * an errno value. ?!?!
-+ */
- {
- struct elf_binary *elf;
-- int rc;
-+ elf_errorstatus rc;
-
- rc = check_elf_kernel(dom, 1);
- if ( rc != 0 )
- return rc;
-
- elf = xc_dom_malloc(dom, sizeof(*elf));
-+ if ( elf == NULL )
-+ return -1;
- dom->private_loader = elf;
- rc = elf_init(elf, dom->kernel_blob, dom->kernel_size);
- xc_elf_set_logfile(dom->xch, elf, 1);
-@@ -274,23 +342,27 @@ static int xc_dom_parse_elf_kernel(struct xc_dom_image *dom)
- }
-
- /* Find the section-header strings table. */
-- if ( elf->sec_strtab == NULL )
-+ if ( ELF_PTRVAL_INVALID(elf->sec_strtab) )
- {
- xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: ELF image"
- " has no shstrtab", __FUNCTION__);
-- return -EINVAL;
-+ rc = -EINVAL;
-+ goto out;
- }
-
- /* parse binary and get xen meta info */
- elf_parse_binary(elf);
- if ( (rc = elf_xen_parse(elf, &dom->parms)) != 0 )
-- return rc;
-+ {
-+ goto out;
-+ }
-
- if ( elf_xen_feature_get(XENFEAT_dom0, dom->parms.f_required) )
- {
- xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: Kernel does not"
- " support unprivileged (DomU) operation", __FUNCTION__);
-- return -EINVAL;
-+ rc = -EINVAL;
-+ goto out;
- }
-
- /* find kernel segment */
-@@ -304,15 +376,30 @@ static int xc_dom_parse_elf_kernel(struct xc_dom_image *dom)
- DOMPRINTF("%s: %s: 0x%" PRIx64 " -> 0x%" PRIx64 "",
- __FUNCTION__, dom->guest_type,
- dom->kernel_seg.vstart, dom->kernel_seg.vend);
-- return 0;
-+ rc = 0;
-+out:
-+ if ( elf_check_broken(elf) )
-+ DOMPRINTF("%s: ELF broken: %s", __FUNCTION__,
-+ elf_check_broken(elf));
-+
-+ return rc;
- }
-
--static int xc_dom_load_elf_kernel(struct xc_dom_image *dom)
-+static elf_errorstatus xc_dom_load_elf_kernel(struct xc_dom_image *dom)
- {
- struct elf_binary *elf = dom->private_loader;
-- int rc;
-+ elf_errorstatus rc;
-+ xen_pfn_t pages;
-+
-+ elf->dest_base = xc_dom_seg_to_ptr_pages(dom, &dom->kernel_seg, &pages);
-+ if ( elf->dest_base == NULL )
-+ {
-+ DOMPRINTF("%s: xc_dom_vaddr_to_ptr(dom,dom->kernel_seg)"
-+ " => NULL", __FUNCTION__);
-+ return -1;
-+ }
-+ elf->dest_size = pages * XC_DOM_PAGE_SIZE(dom);
-
-- elf->dest = xc_dom_seg_to_ptr(dom, &dom->kernel_seg);
- rc = elf_load_binary(elf);
- if ( rc < 0 )
- {
-diff --git a/tools/libxc/xc_dom_ia64.c b/tools/libxc/xc_dom_ia64.c
-index dcd1523..076821c 100644
---- a/tools/libxc/xc_dom_ia64.c
-+++ b/tools/libxc/xc_dom_ia64.c
-@@ -60,6 +60,12 @@ int start_info_ia64(struct xc_dom_image *dom)
-
- DOMPRINTF_CALLED(dom->xch);
-
-+ if ( start_info == NULL )
-+ {
-+ DOMPRINTF("%s: xc_dom_pfn_to_ptr failed on start_info", __FUNCTION__);
-+ return -1; /* our caller throws away our return value :-/ */
-+ }
-+
- memset(start_info, 0, sizeof(*start_info));
- sprintf(start_info->magic, dom->guest_type);
- start_info->flags = dom->flags;
-@@ -182,6 +188,12 @@ int arch_setup_meminit(struct xc_dom_image *dom)
-
- /* setup initial p2m */
- dom->p2m_host = xc_dom_malloc(dom, sizeof(xen_pfn_t) * nbr);
-+ if ( dom->p2m_host == NULL )
-+ {
-+ DOMPRINTF("%s: xc_dom_malloc failed for p2m_host",
-+ __FUNCTION__);
-+ return -1;
-+ }
- for ( pfn = 0; pfn < nbr; pfn++ )
- dom->p2m_host[pfn] = start + pfn;
-
-diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c
-index 0cf1687..448d9a1 100644
---- a/tools/libxc/xc_dom_x86.c
-+++ b/tools/libxc/xc_dom_x86.c
-@@ -144,6 +144,9 @@ static int setup_pgtables_x86_32(struct xc_dom_image *dom)
- xen_vaddr_t addr;
- xen_pfn_t pgpfn;
-
-+ if ( l2tab == NULL )
-+ goto pfn_error;
-+
- for ( addr = dom->parms.virt_base; addr < dom->virt_pgtab_end;
- addr += PAGE_SIZE_X86 )
- {
-@@ -151,6 +154,8 @@ static int setup_pgtables_x86_32(struct xc_dom_image *dom)
- {
- /* get L1 tab, make L2 entry */
- l1tab = xc_dom_pfn_to_ptr(dom, l1pfn, 1);
-+ if ( l1tab == NULL )
-+ goto pfn_error;
- l2off = l2_table_offset_i386(addr);
- l2tab[l2off] =
- pfn_to_paddr(xc_dom_p2m_guest(dom, l1pfn)) | L2_PROT;
-@@ -169,6 +174,11 @@ static int setup_pgtables_x86_32(struct xc_dom_image *dom)
- l1tab = NULL;
- }
- return 0;
-+
-+pfn_error:
-+ xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-+ "%s: xc_dom_pfn_to_ptr failed", __FUNCTION__);
-+ return -EINVAL;
- }
-
- /*
-@@ -219,6 +229,12 @@ static xen_pfn_t move_l3_below_4G(struct xc_dom_image *dom,
- goto out;
-
- l3tab = xc_dom_pfn_to_ptr(dom, l3pfn, 1);
-+ if ( l3tab == NULL )
-+ {
-+ DOMPRINTF("%s: xc_dom_pfn_to_ptr(dom, l3pfn, 1) => NULL",
-+ __FUNCTION__);
-+ return l3mfn; /* our one call site will call xc_dom_panic and fail */
-+ }
- memset(l3tab, 0, XC_DOM_PAGE_SIZE(dom));
-
- DOMPRINTF("%s: successfully relocated L3 below 4G. "
-@@ -262,6 +278,8 @@ static int setup_pgtables_x86_32_pae(struct xc_dom_image *dom)
- }
-
- l3tab = xc_dom_pfn_to_ptr(dom, l3pfn, 1);
-+ if ( l3tab == NULL )
-+ goto pfn_error;
-
- for ( addr = dom->parms.virt_base; addr < dom->virt_pgtab_end;
- addr += PAGE_SIZE_X86 )
-@@ -270,6 +288,8 @@ static int setup_pgtables_x86_32_pae(struct xc_dom_image *dom)
- {
- /* get L2 tab, make L3 entry */
- l2tab = xc_dom_pfn_to_ptr(dom, l2pfn, 1);
-+ if ( l2tab == NULL )
-+ goto pfn_error;
- l3off = l3_table_offset_pae(addr);
- l3tab[l3off] =
- pfn_to_paddr(xc_dom_p2m_guest(dom, l2pfn)) | L3_PROT;
-@@ -280,6 +300,8 @@ static int setup_pgtables_x86_32_pae(struct xc_dom_image *dom)
- {
- /* get L1 tab, make L2 entry */
- l1tab = xc_dom_pfn_to_ptr(dom, l1pfn, 1);
-+ if ( l1tab == NULL )
-+ goto pfn_error;
- l2off = l2_table_offset_pae(addr);
- l2tab[l2off] =
- pfn_to_paddr(xc_dom_p2m_guest(dom, l1pfn)) | L2_PROT;
-@@ -306,6 +328,11 @@ static int setup_pgtables_x86_32_pae(struct xc_dom_image *dom)
- l3tab[3] = pfn_to_paddr(xc_dom_p2m_guest(dom, l2pfn)) | L3_PROT;
- }
- return 0;
-+
-+pfn_error:
-+ xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-+ "%s: xc_dom_pfn_to_ptr failed", __FUNCTION__);
-+ return -EINVAL;
- }
-
- #undef L1_PROT
-@@ -344,6 +371,9 @@ static int setup_pgtables_x86_64(struct xc_dom_image *dom)
- uint64_t addr;
- xen_pfn_t pgpfn;
-
-+ if ( l4tab == NULL )
-+ goto pfn_error;
-+
- for ( addr = dom->parms.virt_base; addr < dom->virt_pgtab_end;
- addr += PAGE_SIZE_X86 )
- {
-@@ -351,6 +381,8 @@ static int setup_pgtables_x86_64(struct xc_dom_image *dom)
- {
- /* get L3 tab, make L4 entry */
- l3tab = xc_dom_pfn_to_ptr(dom, l3pfn, 1);
-+ if ( l3tab == NULL )
-+ goto pfn_error;
- l4off = l4_table_offset_x86_64(addr);
- l4tab[l4off] =
- pfn_to_paddr(xc_dom_p2m_guest(dom, l3pfn)) | L4_PROT;
-@@ -361,6 +393,8 @@ static int setup_pgtables_x86_64(struct xc_dom_image *dom)
- {
- /* get L2 tab, make L3 entry */
- l2tab = xc_dom_pfn_to_ptr(dom, l2pfn, 1);
-+ if ( l2tab == NULL )
-+ goto pfn_error;
- l3off = l3_table_offset_x86_64(addr);
- l3tab[l3off] =
- pfn_to_paddr(xc_dom_p2m_guest(dom, l2pfn)) | L3_PROT;
-@@ -373,6 +407,8 @@ static int setup_pgtables_x86_64(struct xc_dom_image *dom)
- {
- /* get L1 tab, make L2 entry */
- l1tab = xc_dom_pfn_to_ptr(dom, l1pfn, 1);
-+ if ( l1tab == NULL )
-+ goto pfn_error;
- l2off = l2_table_offset_x86_64(addr);
- l2tab[l2off] =
- pfn_to_paddr(xc_dom_p2m_guest(dom, l1pfn)) | L2_PROT;
-@@ -393,6 +429,11 @@ static int setup_pgtables_x86_64(struct xc_dom_image *dom)
- l1tab = NULL;
- }
- return 0;
-+
-+pfn_error:
-+ xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-+ "%s: xc_dom_pfn_to_ptr failed", __FUNCTION__);
-+ return -EINVAL;
- }
-
- #undef L1_PROT
-@@ -410,6 +451,8 @@ static int alloc_magic_pages(struct xc_dom_image *dom)
- if ( xc_dom_alloc_segment(dom, &dom->p2m_seg, "phys2mach", 0, p2m_size) )
- return -1;
- dom->p2m_guest = xc_dom_seg_to_ptr(dom, &dom->p2m_seg);
-+ if ( dom->p2m_guest == NULL )
-+ return -1;
-
- /* allocate special pages */
- dom->start_info_pfn = xc_dom_alloc_page(dom, "start info");
-@@ -434,6 +477,12 @@ static int start_info_x86_32(struct xc_dom_image *dom)
-
- DOMPRINTF_CALLED(dom->xch);
-
-+ if ( start_info == NULL )
-+ {
-+ DOMPRINTF("%s: xc_dom_pfn_to_ptr failed on start_info", __FUNCTION__);
-+ return -1; /* our caller throws away our return value :-/ */
-+ }
-+
- memset(start_info, 0, sizeof(*start_info));
- strncpy(start_info->magic, dom->guest_type, sizeof(start_info->magic));
- start_info->magic[sizeof(start_info->magic) - 1] = '\0';
-@@ -474,6 +523,12 @@ static int start_info_x86_64(struct xc_dom_image *dom)
-
- DOMPRINTF_CALLED(dom->xch);
-
-+ if ( start_info == NULL )
-+ {
-+ DOMPRINTF("%s: xc_dom_pfn_to_ptr failed on start_info", __FUNCTION__);
-+ return -1; /* our caller throws away our return value :-/ */
-+ }
-+
- memset(start_info, 0, sizeof(*start_info));
- strncpy(start_info->magic, dom->guest_type, sizeof(start_info->magic));
- start_info->magic[sizeof(start_info->magic) - 1] = '\0';
-@@ -725,6 +780,9 @@ int arch_setup_meminit(struct xc_dom_image *dom)
- }
-
- dom->p2m_host = xc_dom_malloc(dom, sizeof(xen_pfn_t) * dom->total_pages);
-+ if ( dom->p2m_host == NULL )
-+ return -EINVAL;
-+
- if ( dom->superpages )
- {
- int count = dom->total_pages >> SUPERPAGE_PFN_SHIFT;
-diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
-index b4c0b10..f9ed6b2 100644
---- a/tools/libxc/xc_domain_restore.c
-+++ b/tools/libxc/xc_domain_restore.c
-@@ -1180,6 +1180,11 @@ static int apply_batch(xc_interface *xch, uint32_t dom, struct restore_ctx *ctx,
-
- /* Map relevant mfns */
- pfn_err = calloc(j, sizeof(*pfn_err));
-+ if ( pfn_err == NULL )
-+ {
-+ PERROR("allocation for pfn_err failed");
-+ return -1;
-+ }
- region_base = xc_map_foreign_bulk(
- xch, dom, PROT_WRITE, region_mfn, pfn_err, j);
-
-@@ -1556,6 +1561,12 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
- mfn = ctx->p2m[pfn];
- buf = xc_map_foreign_range(xch, dom, PAGE_SIZE,
- PROT_READ | PROT_WRITE, mfn);
-+ if ( buf == NULL )
-+ {
-+ ERROR("xc_map_foreign_range for generation id"
-+ " buffer failed");
-+ goto out;
-+ }
-
- generationid = *(unsigned long long *)(buf + offset);
- *(unsigned long long *)(buf + offset) = generationid + 1;
-@@ -1713,6 +1724,11 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
- l3tab = (uint64_t *)
- xc_map_foreign_range(xch, dom, PAGE_SIZE,
- PROT_READ, ctx->p2m[i]);
-+ if ( l3tab == NULL )
-+ {
-+ PERROR("xc_map_foreign_range failed (for l3tab)");
-+ goto out;
-+ }
-
- for ( j = 0; j < 4; j++ )
- l3ptes[j] = l3tab[j];
-@@ -1739,6 +1755,11 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
- l3tab = (uint64_t *)
- xc_map_foreign_range(xch, dom, PAGE_SIZE,
- PROT_READ | PROT_WRITE, ctx->p2m[i]);
-+ if ( l3tab == NULL )
-+ {
-+ PERROR("xc_map_foreign_range failed (for l3tab, 2nd)");
-+ goto out;
-+ }
-
- for ( j = 0; j < 4; j++ )
- l3tab[j] = l3ptes[j];
-@@ -1909,6 +1930,12 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
- SET_FIELD(ctxt, user_regs.edx, mfn);
- start_info = xc_map_foreign_range(
- xch, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn);
-+ if ( start_info == NULL )
-+ {
-+ PERROR("xc_map_foreign_range failed (for start_info)");
-+ goto out;
-+ }
-+
- SET_FIELD(start_info, nr_pages, dinfo->p2m_size);
- SET_FIELD(start_info, shared_info, shared_info_frame<<PAGE_SHIFT);
- SET_FIELD(start_info, flags, 0);
-@@ -2056,6 +2083,11 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
- /* Restore contents of shared-info page. No checking needed. */
- new_shared_info = xc_map_foreign_range(
- xch, dom, PAGE_SIZE, PROT_WRITE, shared_info_frame);
-+ if ( new_shared_info == NULL )
-+ {
-+ PERROR("xc_map_foreign_range failed (for new_shared_info)");
-+ goto out;
-+ }
-
- /* restore saved vcpu_info and arch specific info */
- MEMCPY_FIELD(new_shared_info, old_shared_info, vcpu_info);
-diff --git a/tools/libxc/xc_hvm_build_x86.c b/tools/libxc/xc_hvm_build_x86.c
-index cf5d7fb..8165287 100644
---- a/tools/libxc/xc_hvm_build_x86.c
-+++ b/tools/libxc/xc_hvm_build_x86.c
-@@ -104,21 +104,23 @@ static int loadelfimage(
- for ( i = 0; i < pages; i++ )
- entries[i].mfn = parray[(elf->pstart >> PAGE_SHIFT) + i];
-
-- elf->dest = xc_map_foreign_ranges(
-+ elf->dest_base = xc_map_foreign_ranges(
- xch, dom, pages << PAGE_SHIFT, PROT_READ | PROT_WRITE, 1 << PAGE_SHIFT,
- entries, pages);
-- if ( elf->dest == NULL )
-+ if ( elf->dest_base == NULL )
- goto err;
-+ elf->dest_size = pages * PAGE_SIZE;
-
-- elf->dest += elf->pstart & (PAGE_SIZE - 1);
-+ ELF_ADVANCE_DEST(elf, elf->pstart & (PAGE_SIZE - 1));
-
- /* Load the initial elf image. */
- rc = elf_load_binary(elf);
- if ( rc < 0 )
- PERROR("Failed to load elf binary\n");
-
-- munmap(elf->dest, pages << PAGE_SHIFT);
-- elf->dest = NULL;
-+ munmap(elf->dest_base, pages << PAGE_SHIFT);
-+ elf->dest_base = NULL;
-+ elf->dest_size = 0;
-
- err:
- free(entries);
-@@ -401,11 +403,16 @@ static int setup_guest(xc_interface *xch,
- munmap(page0, PAGE_SIZE);
- }
-
-+ if ( elf_check_broken(&elf) )
-+ ERROR("HVM ELF broken: %s", elf_check_broken(&elf));
-+
- free(page_array);
- return 0;
-
- error_out:
- free(page_array);
-+ if ( elf_check_broken(&elf) )
-+ ERROR("HVM ELF broken, failing: %s", elf_check_broken(&elf));
- return -1;
- }
-
-diff --git a/tools/libxc/xc_linux_osdep.c b/tools/libxc/xc_linux_osdep.c
-index 787e742..98e041c 100644
---- a/tools/libxc/xc_linux_osdep.c
-+++ b/tools/libxc/xc_linux_osdep.c
-@@ -378,6 +378,8 @@ static void *linux_privcmd_map_foreign_range(xc_interface *xch, xc_osdep_handle
-
- num = (size + XC_PAGE_SIZE - 1) >> XC_PAGE_SHIFT;
- arr = calloc(num, sizeof(xen_pfn_t));
-+ if ( arr == NULL )
-+ return NULL;
-
- for ( i = 0; i < num; i++ )
- arr[i] = mfn + i;
-@@ -402,6 +404,8 @@ static void *linux_privcmd_map_foreign_ranges(xc_interface *xch, xc_osdep_handle
- num_per_entry = chunksize >> XC_PAGE_SHIFT;
- num = num_per_entry * nentries;
- arr = calloc(num, sizeof(xen_pfn_t));
-+ if ( arr == NULL )
-+ return NULL;
-
- for ( i = 0; i < nentries; i++ )
- for ( j = 0; j < num_per_entry; j++ )
-diff --git a/tools/libxc/xc_offline_page.c b/tools/libxc/xc_offline_page.c
-index 089a361..36b9812 100644
---- a/tools/libxc/xc_offline_page.c
-+++ b/tools/libxc/xc_offline_page.c
-@@ -714,6 +714,11 @@ int xc_exchange_page(xc_interface *xch, int domid, xen_pfn_t mfn)
-
- new_p = xc_map_foreign_range(xch, domid, PAGE_SIZE,
- PROT_READ|PROT_WRITE, new_mfn);
-+ if ( new_p == NULL )
-+ {
-+ ERROR("failed to map new_p for copy, guest may be broken?");
-+ goto failed;
-+ }
- memcpy(new_p, backup, PAGE_SIZE);
- munmap(new_p, PAGE_SIZE);
- mops.arg1.mfn = new_mfn;
-diff --git a/tools/libxc/xc_private.c b/tools/libxc/xc_private.c
-index 3e03a91..848ceed 100644
---- a/tools/libxc/xc_private.c
-+++ b/tools/libxc/xc_private.c
-@@ -771,6 +771,8 @@ const char *xc_strerror(xc_interface *xch, int errcode)
- errbuf = pthread_getspecific(errbuf_pkey);
- if (errbuf == NULL) {
- errbuf = malloc(XS_BUFSIZE);
-+ if ( errbuf == NULL )
-+ return "(failed to allocate errbuf)";
- pthread_setspecific(errbuf_pkey, errbuf);
- }
-
-diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
-index b7741ca..8952048 100644
---- a/tools/libxc/xenctrl.h
-+++ b/tools/libxc/xenctrl.h
-@@ -1778,7 +1778,7 @@ int xc_cpuid_set(xc_interface *xch,
- int xc_cpuid_apply_policy(xc_interface *xch,
- domid_t domid);
- void xc_cpuid_to_str(const unsigned int *regs,
-- char **strs);
-+ char **strs); /* some strs[] may be NULL if ENOMEM */
- int xc_mca_op(xc_interface *xch, struct xen_mc *mc);
- #endif
-
-diff --git a/tools/xcutils/readnotes.c b/tools/xcutils/readnotes.c
-index c926186..5fa445e 100644
---- a/tools/xcutils/readnotes.c
-+++ b/tools/xcutils/readnotes.c
-@@ -61,51 +61,56 @@ struct setup_header {
- } __attribute__((packed));
-
- static void print_string_note(const char *prefix, struct elf_binary *elf,
-- const elf_note *note)
-+ ELF_HANDLE_DECL(elf_note) note)
- {
-- printf("%s: %s\n", prefix, (char*)elf_note_desc(elf, note));
-+ printf("%s: %s\n", prefix, elf_strfmt(elf, elf_note_desc(elf, note)));
- }
-
- static void print_numeric_note(const char *prefix, struct elf_binary *elf,
-- const elf_note *note)
-+ ELF_HANDLE_DECL(elf_note) note)
- {
- uint64_t value = elf_note_numeric(elf, note);
-- int descsz = elf_uval(elf, note, descsz);
-+ unsigned descsz = elf_uval(elf, note, descsz);
-
- printf("%s: %#*" PRIx64 " (%d bytes)\n",
- prefix, 2+2*descsz, value, descsz);
- }
-
- static void print_l1_mfn_valid_note(const char *prefix, struct elf_binary *elf,
-- const elf_note *note)
-+ ELF_HANDLE_DECL(elf_note) note)
- {
-- int descsz = elf_uval(elf, note, descsz);
-- const uint32_t *desc32 = elf_note_desc(elf, note);
-- const uint64_t *desc64 = elf_note_desc(elf, note);
-+ unsigned descsz = elf_uval(elf, note, descsz);
-+ elf_ptrval desc = elf_note_desc(elf, note);
-
- /* XXX should be able to cope with a list of values. */
- switch ( descsz / 2 )
- {
- case 8:
- printf("%s: mask=%#"PRIx64" value=%#"PRIx64"\n", prefix,
-- desc64[0], desc64[1]);
-+ elf_access_unsigned(elf, desc, 0, 8),
-+ elf_access_unsigned(elf, desc, 8, 8));
- break;
- case 4:
- printf("%s: mask=%#"PRIx32" value=%#"PRIx32"\n", prefix,
-- desc32[0],desc32[1]);
-+ (uint32_t)elf_access_unsigned(elf, desc, 0, 4),
-+ (uint32_t)elf_access_unsigned(elf, desc, 4, 4));
- break;
- }
-
- }
-
--static int print_notes(struct elf_binary *elf, const elf_note *start, const elf_note *end)
-+static unsigned print_notes(struct elf_binary *elf, ELF_HANDLE_DECL(elf_note) start, ELF_HANDLE_DECL(elf_note) end)
- {
-- const elf_note *note;
-- int notes_found = 0;
-+ ELF_HANDLE_DECL(elf_note) note;
-+ unsigned notes_found = 0;
-+ const char *this_note_name;
-
-- for ( note = start; note < end; note = elf_note_next(elf, note) )
-+ for ( note = start; ELF_HANDLE_PTRVAL(note) < ELF_HANDLE_PTRVAL(end); note = elf_note_next(elf, note) )
- {
-- if (0 != strcmp(elf_note_name(elf, note), "Xen"))
-+ this_note_name = elf_note_name(elf, note);
-+ if (NULL == this_note_name)
-+ continue;
-+ if (0 != strcmp(this_note_name, "Xen"))
- continue;
-
- notes_found++;
-@@ -156,7 +161,7 @@ static int print_notes(struct elf_binary *elf, const elf_note *start, const elf_
- break;
- default:
- printf("unknown note type %#x\n",
-- (int)elf_uval(elf, note, type));
-+ (unsigned)elf_uval(elf, note, type));
- break;
- }
- }
-@@ -166,12 +171,13 @@ static int print_notes(struct elf_binary *elf, const elf_note *start, const elf_
- int main(int argc, char **argv)
- {
- const char *f;
-- int fd,h,size,usize,count;
-+ int fd;
-+ unsigned h,size,usize,count;
- void *image,*tmp;
- struct stat st;
- struct elf_binary elf;
-- const elf_shdr *shdr;
-- int notes_found = 0;
-+ ELF_HANDLE_DECL(elf_shdr) shdr;
-+ unsigned notes_found = 0;
-
- struct setup_header *hdr;
- uint64_t payload_offset, payload_length;
-@@ -257,7 +263,7 @@ int main(int argc, char **argv)
- count = elf_phdr_count(&elf);
- for ( h=0; h < count; h++)
- {
-- const elf_phdr *phdr;
-+ ELF_HANDLE_DECL(elf_phdr) phdr;
- phdr = elf_phdr_by_index(&elf, h);
- if (elf_uval(&elf, phdr, p_type) != PT_NOTE)
- continue;
-@@ -269,8 +275,8 @@ int main(int argc, char **argv)
- continue;
-
- notes_found = print_notes(&elf,
-- elf_segment_start(&elf, phdr),
-- elf_segment_end(&elf, phdr));
-+ ELF_MAKE_HANDLE(elf_note, elf_segment_start(&elf, phdr)),
-+ ELF_MAKE_HANDLE(elf_note, elf_segment_end(&elf, phdr)));
- }
-
- if ( notes_found == 0 )
-@@ -278,13 +284,13 @@ int main(int argc, char **argv)
- count = elf_shdr_count(&elf);
- for ( h=0; h < count; h++)
- {
-- const elf_shdr *shdr;
-+ ELF_HANDLE_DECL(elf_shdr) shdr;
- shdr = elf_shdr_by_index(&elf, h);
- if (elf_uval(&elf, shdr, sh_type) != SHT_NOTE)
- continue;
- notes_found = print_notes(&elf,
-- elf_section_start(&elf, shdr),
-- elf_section_end(&elf, shdr));
-+ ELF_MAKE_HANDLE(elf_note, elf_section_start(&elf, shdr)),
-+ ELF_MAKE_HANDLE(elf_note, elf_section_end(&elf, shdr)));
- if ( notes_found )
- fprintf(stderr, "using notes from SHT_NOTE section\n");
-
-@@ -292,8 +298,12 @@ int main(int argc, char **argv)
- }
-
- shdr = elf_shdr_by_name(&elf, "__xen_guest");
-- if (shdr)
-- printf("__xen_guest: %s\n", (char*)elf_section_start(&elf, shdr));
-+ if (ELF_HANDLE_VALID(shdr))
-+ printf("__xen_guest: %s\n",
-+ elf_strfmt(&elf, elf_section_start(&elf, shdr)));
-+
-+ if (elf_check_broken(&elf))
-+ printf("warning: broken ELF: %s\n", elf_check_broken(&elf));
-
- return 0;
- }
-diff --git a/xen/arch/arm/kernel.c b/xen/arch/arm/kernel.c
-index 2d56130..dec0519 100644
---- a/xen/arch/arm/kernel.c
-+++ b/xen/arch/arm/kernel.c
-@@ -146,6 +146,8 @@ static int kernel_try_elf_prepare(struct kernel_info *info)
- {
- int rc;
-
-+ memset(&info->elf.elf, 0, sizeof(info->elf.elf));
-+
- info->kernel_order = get_order_from_bytes(KERNEL_FLASH_SIZE);
- info->kernel_img = alloc_xenheap_pages(info->kernel_order, 0);
- if ( info->kernel_img == NULL )
-@@ -160,7 +162,7 @@ static int kernel_try_elf_prepare(struct kernel_info *info)
- #endif
- elf_parse_binary(&info->elf.elf);
- if ( (rc = elf_xen_parse(&info->elf.elf, &info->elf.parms)) != 0 )
-- return rc;
-+ goto err;
-
- /*
- * TODO: can the ELF header be used to find the physical address
-@@ -169,7 +171,18 @@ static int kernel_try_elf_prepare(struct kernel_info *info)
- info->entry = info->elf.parms.virt_entry;
- info->load = kernel_elf_load;
-
-+ if ( elf_check_broken(&info->elf.elf) )
-+ printk("Xen: warning: ELF kernel broken: %s\n",
-+ elf_check_broken(&info->elf.elf));
-+
- return 0;
-+
-+err:
-+ if ( elf_check_broken(&info->elf.elf) )
-+ printk("Xen: ELF kernel broken: %s\n",
-+ elf_check_broken(&info->elf.elf));
-+
-+ return rc;
- }
-
- int kernel_prepare(struct kernel_info *info)
-diff --git a/xen/arch/x86/bzimage.c b/xen/arch/x86/bzimage.c
-index 5adc223..3600dca 100644
---- a/xen/arch/x86/bzimage.c
-+++ b/xen/arch/x86/bzimage.c
-@@ -220,7 +220,7 @@ unsigned long __init bzimage_headroom(char *image_start,
- image_length = hdr->payload_length;
- }
-
-- if ( elf_is_elfbinary(image_start) )
-+ if ( elf_is_elfbinary(image_start, image_length) )
- return 0;
-
- orig_image_len = image_length;
-@@ -251,7 +251,7 @@ int __init bzimage_parse(char *image_base, char **image_start, unsigned long *im
- *image_len = hdr->payload_length;
- }
-
-- if ( elf_is_elfbinary(*image_start) )
-+ if ( elf_is_elfbinary(*image_start, *image_len) )
- return 0;
-
- BUG_ON(!(image_base < *image_start));
-diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c
-index 469d363..0dbec96 100644
---- a/xen/arch/x86/domain_build.c
-+++ b/xen/arch/x86/domain_build.c
-@@ -374,7 +374,7 @@ int __init construct_dom0(
- #endif
- elf_parse_binary(&elf);
- if ( (rc = elf_xen_parse(&elf, &parms)) != 0 )
-- return rc;
-+ goto out;
-
- /* compatibility check */
- compatible = 0;
-@@ -413,14 +413,16 @@ int __init construct_dom0(
- if ( !compatible )
- {
- printk("Mismatch between Xen and DOM0 kernel\n");
-- return -EINVAL;
-+ rc = -EINVAL;
-+ goto out;
- }
-
- if ( parms.elf_notes[XEN_ELFNOTE_SUPPORTED_FEATURES].type != XEN_ENT_NONE &&
- !test_bit(XENFEAT_dom0, parms.f_supported) )
- {
- printk("Kernel does not support Dom0 operation\n");
-- return -EINVAL;
-+ rc = -EINVAL;
-+ goto out;
- }
-
- #if defined(__x86_64__)
-@@ -734,7 +736,8 @@ int __init construct_dom0(
- (v_end > HYPERVISOR_COMPAT_VIRT_START(d)) )
- {
- printk("DOM0 image overlaps with Xen private area.\n");
-- return -EINVAL;
-+ rc = -EINVAL;
-+ goto out;
- }
-
- if ( is_pv_32on64_domain(d) )
-@@ -908,12 +911,13 @@ int __init construct_dom0(
- write_ptbase(v);
-
- /* Copy the OS image and free temporary buffer. */
-- elf.dest = (void*)vkern_start;
-+ elf.dest_base = (void*)vkern_start;
-+ elf.dest_size = vkern_end - vkern_start;
- rc = elf_load_binary(&elf);
- if ( rc < 0 )
- {
- printk("Failed to load the kernel binary\n");
-- return rc;
-+ goto out;
- }
- bootstrap_map(NULL);
-
-@@ -924,7 +928,8 @@ int __init construct_dom0(
- {
- write_ptbase(current);
- printk("Invalid HYPERCALL_PAGE field in ELF notes.\n");
-- return -1;
-+ rc = -1;
-+ goto out;
- }
- hypercall_page_initialise(
- d, (void *)(unsigned long)parms.virt_hypercall);
-@@ -1271,9 +1276,19 @@ int __init construct_dom0(
-
- BUG_ON(rc != 0);
-
-- iommu_dom0_init(dom0);
-+ if ( elf_check_broken(&elf) )
-+ printk(" Xen warning: dom0 kernel broken ELF: %s\n",
-+ elf_check_broken(&elf));
-
-+ iommu_dom0_init(dom0);
- return 0;
-+
-+out:
-+ if ( elf_check_broken(&elf) )
-+ printk(" Xen dom0 kernel broken ELF: %s\n",
-+ elf_check_broken(&elf));
-+
-+ return rc;
- }
-
- /*
-diff --git a/xen/common/libelf/Makefile b/xen/common/libelf/Makefile
-index 18dc8e2..5bf8f76 100644
---- a/xen/common/libelf/Makefile
-+++ b/xen/common/libelf/Makefile
-@@ -2,6 +2,8 @@ obj-bin-y := libelf.o
-
- SECTIONS := text data $(SPECIAL_DATA_SECTIONS)
-
-+CFLAGS += -Wno-pointer-sign
-+
- libelf.o: libelf-temp.o Makefile
- $(OBJCOPY) $(foreach s,$(SECTIONS),--rename-section .$(s)=.init.$(s)) $< $@
-
-diff --git a/xen/common/libelf/libelf-dominfo.c b/xen/common/libelf/libelf-dominfo.c
-index 523837f..412ea70 100644
---- a/xen/common/libelf/libelf-dominfo.c
-+++ b/xen/common/libelf/libelf-dominfo.c
-@@ -29,22 +29,22 @@ static const char *const elf_xen_feature_names[] = {
- [XENFEAT_pae_pgdir_above_4gb] = "pae_pgdir_above_4gb",
- [XENFEAT_dom0] = "dom0"
- };
--static const int elf_xen_features =
-+static const unsigned elf_xen_features =
- sizeof(elf_xen_feature_names) / sizeof(elf_xen_feature_names[0]);
-
--int elf_xen_parse_features(const char *features,
-+elf_errorstatus elf_xen_parse_features(const char *features,
- uint32_t *supported,
- uint32_t *required)
- {
-- char feature[64];
-- int pos, len, i;
-+ unsigned char feature[64];
-+ unsigned pos, len, i;
-
- if ( features == NULL )
- return 0;
-
- for ( pos = 0; features[pos] != '\0'; pos += len )
- {
-- memset(feature, 0, sizeof(feature));
-+ elf_memset_unchecked(feature, 0, sizeof(feature));
- for ( len = 0;; len++ )
- {
- if ( len >= sizeof(feature)-1 )
-@@ -94,14 +94,14 @@ int elf_xen_parse_features(const char *features,
- /* ------------------------------------------------------------------------ */
- /* xen elf notes */
-
--int elf_xen_parse_note(struct elf_binary *elf,
-+elf_errorstatus elf_xen_parse_note(struct elf_binary *elf,
- struct elf_dom_parms *parms,
-- const elf_note *note)
-+ ELF_HANDLE_DECL(elf_note) note)
- {
- /* *INDENT-OFF* */
- static const struct {
- char *name;
-- int str;
-+ bool str;
- } note_desc[] = {
- [XEN_ELFNOTE_ENTRY] = { "ENTRY", 0},
- [XEN_ELFNOTE_HYPERCALL_PAGE] = { "HYPERCALL_PAGE", 0},
-@@ -125,7 +125,7 @@ int elf_xen_parse_note(struct elf_binary *elf,
- const char *str = NULL;
- uint64_t val = 0;
- unsigned int i;
-- int type = elf_uval(elf, note, type);
-+ unsigned type = elf_uval(elf, note, type);
-
- if ( (type >= sizeof(note_desc) / sizeof(note_desc[0])) ||
- (note_desc[type].name == NULL) )
-@@ -137,7 +137,10 @@ int elf_xen_parse_note(struct elf_binary *elf,
-
- if ( note_desc[type].str )
- {
-- str = elf_note_desc(elf, note);
-+ str = elf_strval(elf, elf_note_desc(elf, note));
-+ if (str == NULL)
-+ /* elf_strval will mark elf broken if it fails so no need to log */
-+ return 0;
- elf_msg(elf, "%s: %s = \"%s\"\n", __FUNCTION__,
- note_desc[type].name, str);
- parms->elf_notes[type].type = XEN_ENT_STR;
-@@ -213,23 +216,37 @@ int elf_xen_parse_note(struct elf_binary *elf,
- return 0;
- }
-
--static int elf_xen_parse_notes(struct elf_binary *elf,
-+#define ELF_NOTE_INVALID (~0U)
-+
-+static unsigned elf_xen_parse_notes(struct elf_binary *elf,
- struct elf_dom_parms *parms,
-- const void *start, const void *end)
-+ elf_ptrval start,
-+ elf_ptrval end,
-+ unsigned *total_note_count)
- {
-- int xen_elfnotes = 0;
-- const elf_note *note;
-+ unsigned xen_elfnotes = 0;
-+ ELF_HANDLE_DECL(elf_note) note;
-+ const char *note_name;
-
- parms->elf_note_start = start;
- parms->elf_note_end = end;
-- for ( note = parms->elf_note_start;
-- (void *)note < parms->elf_note_end;
-+ for ( note = ELF_MAKE_HANDLE(elf_note, parms->elf_note_start);
-+ ELF_HANDLE_PTRVAL(note) < parms->elf_note_end;
- note = elf_note_next(elf, note) )
- {
-- if ( strcmp(elf_note_name(elf, note), "Xen") )
-+ if ( *total_note_count >= ELF_MAX_TOTAL_NOTE_COUNT )
-+ {
-+ elf_mark_broken(elf, "too many ELF notes");
-+ break;
-+ }
-+ (*total_note_count)++;
-+ note_name = elf_note_name(elf, note);
-+ if ( note_name == NULL )
-+ continue;
-+ if ( strcmp(note_name, "Xen") )
- continue;
- if ( elf_xen_parse_note(elf, parms, note) )
-- return -1;
-+ return ELF_NOTE_INVALID;
- xen_elfnotes++;
- }
- return xen_elfnotes;
-@@ -238,48 +255,49 @@ static int elf_xen_parse_notes(struct elf_binary *elf,
- /* ------------------------------------------------------------------------ */
- /* __xen_guest section */
-
--int elf_xen_parse_guest_info(struct elf_binary *elf,
-+elf_errorstatus elf_xen_parse_guest_info(struct elf_binary *elf,
- struct elf_dom_parms *parms)
- {
-- const char *h;
-- char name[32], value[128];
-- int len;
-+ elf_ptrval h;
-+ unsigned char name[32], value[128];
-+ unsigned len;
-
- h = parms->guest_info;
-- while ( *h )
-+#define STAR(h) (elf_access_unsigned(elf, (h), 0, 1))
-+ while ( STAR(h) )
- {
-- memset(name, 0, sizeof(name));
-- memset(value, 0, sizeof(value));
-+ elf_memset_unchecked(name, 0, sizeof(name));
-+ elf_memset_unchecked(value, 0, sizeof(value));
- for ( len = 0;; len++, h++ )
- {
- if ( len >= sizeof(name)-1 )
- break;
-- if ( *h == '\0' )
-+ if ( STAR(h) == '\0' )
- break;
-- if ( *h == ',' )
-+ if ( STAR(h) == ',' )
- {
- h++;
- break;
- }
-- if ( *h == '=' )
-+ if ( STAR(h) == '=' )
- {
- h++;
- for ( len = 0;; len++, h++ )
- {
- if ( len >= sizeof(value)-1 )
- break;
-- if ( *h == '\0' )
-+ if ( STAR(h) == '\0' )
- break;
-- if ( *h == ',' )
-+ if ( STAR(h) == ',' )
- {
- h++;
- break;
- }
-- value[len] = *h;
-+ value[len] = STAR(h);
- }
- break;
- }
-- name[len] = *h;
-+ name[len] = STAR(h);
- }
- elf_msg(elf, "%s: %s=\"%s\"\n", __FUNCTION__, name, value);
-
-@@ -325,12 +343,13 @@ int elf_xen_parse_guest_info(struct elf_binary *elf,
- /* ------------------------------------------------------------------------ */
- /* sanity checks */
-
--static int elf_xen_note_check(struct elf_binary *elf,
-+static elf_errorstatus elf_xen_note_check(struct elf_binary *elf,
- struct elf_dom_parms *parms)
- {
-- if ( (parms->elf_note_start == NULL) && (parms->guest_info == NULL) )
-+ if ( (ELF_PTRVAL_INVALID(parms->elf_note_start)) &&
-+ (ELF_PTRVAL_INVALID(parms->guest_info)) )
- {
-- int machine = elf_uval(elf, elf->ehdr, e_machine);
-+ unsigned machine = elf_uval(elf, elf->ehdr, e_machine);
- if ( (machine == EM_386) || (machine == EM_X86_64) )
- {
- elf_err(elf, "%s: ERROR: Not a Xen-ELF image: "
-@@ -368,7 +387,7 @@ static int elf_xen_note_check(struct elf_binary *elf,
- return 0;
- }
-
--static int elf_xen_addr_calc_check(struct elf_binary *elf,
-+static elf_errorstatus elf_xen_addr_calc_check(struct elf_binary *elf,
- struct elf_dom_parms *parms)
- {
- if ( (parms->elf_paddr_offset != UNSET_ADDR) &&
-@@ -454,15 +473,16 @@ static int elf_xen_addr_calc_check(struct elf_binary *elf,
- /* ------------------------------------------------------------------------ */
- /* glue it all together ... */
-
--int elf_xen_parse(struct elf_binary *elf,
-+elf_errorstatus elf_xen_parse(struct elf_binary *elf,
- struct elf_dom_parms *parms)
- {
-- const elf_shdr *shdr;
-- const elf_phdr *phdr;
-- int xen_elfnotes = 0;
-- int i, count, rc;
-+ ELF_HANDLE_DECL(elf_shdr) shdr;
-+ ELF_HANDLE_DECL(elf_phdr) phdr;
-+ unsigned xen_elfnotes = 0;
-+ unsigned i, count, more_notes;
-+ unsigned total_note_count = 0;
-
-- memset(parms, 0, sizeof(*parms));
-+ elf_memset_unchecked(parms, 0, sizeof(*parms));
- parms->virt_base = UNSET_ADDR;
- parms->virt_entry = UNSET_ADDR;
- parms->virt_hypercall = UNSET_ADDR;
-@@ -475,6 +495,9 @@ int elf_xen_parse(struct elf_binary *elf,
- for ( i = 0; i < count; i++ )
- {
- phdr = elf_phdr_by_index(elf, i);
-+ if ( !elf_access_ok(elf, ELF_HANDLE_PTRVAL(phdr), 1) )
-+ /* input has an insane program header count field */
-+ break;
- if ( elf_uval(elf, phdr, p_type) != PT_NOTE )
- continue;
-
-@@ -485,13 +508,14 @@ int elf_xen_parse(struct elf_binary *elf,
- if (elf_uval(elf, phdr, p_offset) == 0)
- continue;
-
-- rc = elf_xen_parse_notes(elf, parms,
-+ more_notes = elf_xen_parse_notes(elf, parms,
- elf_segment_start(elf, phdr),
-- elf_segment_end(elf, phdr));
-- if ( rc == -1 )
-+ elf_segment_end(elf, phdr),
-+ &total_note_count);
-+ if ( more_notes == ELF_NOTE_INVALID )
- return -1;
-
-- xen_elfnotes += rc;
-+ xen_elfnotes += more_notes;
- }
-
- /*
-@@ -504,21 +528,25 @@ int elf_xen_parse(struct elf_binary *elf,
- for ( i = 0; i < count; i++ )
- {
- shdr = elf_shdr_by_index(elf, i);
-+ if ( !elf_access_ok(elf, ELF_HANDLE_PTRVAL(shdr), 1) )
-+ /* input has an insane section header count field */
-+ break;
-
- if ( elf_uval(elf, shdr, sh_type) != SHT_NOTE )
- continue;
-
-- rc = elf_xen_parse_notes(elf, parms,
-+ more_notes = elf_xen_parse_notes(elf, parms,
- elf_section_start(elf, shdr),
-- elf_section_end(elf, shdr));
-+ elf_section_end(elf, shdr),
-+ &total_note_count);
-
-- if ( rc == -1 )
-+ if ( more_notes == ELF_NOTE_INVALID )
- return -1;
-
-- if ( xen_elfnotes == 0 && rc > 0 )
-+ if ( xen_elfnotes == 0 && more_notes > 0 )
- elf_msg(elf, "%s: using notes from SHT_NOTE section\n", __FUNCTION__);
-
-- xen_elfnotes += rc;
-+ xen_elfnotes += more_notes;
- }
-
- }
-@@ -528,20 +556,15 @@ int elf_xen_parse(struct elf_binary *elf,
- */
- if ( xen_elfnotes == 0 )
- {
-- count = elf_shdr_count(elf);
-- for ( i = 0; i < count; i++ )
-+ shdr = elf_shdr_by_name(elf, "__xen_guest");
-+ if ( ELF_HANDLE_VALID(shdr) )
- {
-- shdr = elf_shdr_by_name(elf, "__xen_guest");
-- if ( shdr )
-- {
-- parms->guest_info = elf_section_start(elf, shdr);
-- parms->elf_note_start = NULL;
-- parms->elf_note_end = NULL;
-- elf_msg(elf, "%s: __xen_guest: \"%s\"\n", __FUNCTION__,
-- parms->guest_info);
-- elf_xen_parse_guest_info(elf, parms);
-- break;
-- }
-+ parms->guest_info = elf_section_start(elf, shdr);
-+ parms->elf_note_start = ELF_INVALID_PTRVAL;
-+ parms->elf_note_end = ELF_INVALID_PTRVAL;
-+ elf_msg(elf, "%s: __xen_guest: \"%s\"\n", __FUNCTION__,
-+ elf_strfmt(elf, parms->guest_info));
-+ elf_xen_parse_guest_info(elf, parms);
- }
- }
-
-diff --git a/xen/common/libelf/libelf-loader.c b/xen/common/libelf/libelf-loader.c
-index ab58b8b..e2e75af 100644
---- a/xen/common/libelf/libelf-loader.c
-+++ b/xen/common/libelf/libelf-loader.c
-@@ -16,27 +16,33 @@
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-+#ifdef __XEN__
-+#include <asm/guest_access.h>
-+#endif
-+
- #include "libelf-private.h"
-
- /* ------------------------------------------------------------------------ */
-
--int elf_init(struct elf_binary *elf, const char *image, size_t size)
-+elf_errorstatus elf_init(struct elf_binary *elf, const char *image_input, size_t size)
- {
-- const elf_shdr *shdr;
-+ ELF_HANDLE_DECL(elf_shdr) shdr;
- uint64_t i, count, section, offset;
-
-- if ( !elf_is_elfbinary(image) )
-+ if ( !elf_is_elfbinary(image_input, size) )
- {
- elf_err(elf, "%s: not an ELF binary\n", __FUNCTION__);
- return -1;
- }
-
-- memset(elf, 0, sizeof(*elf));
-- elf->image = image;
-+ elf_memset_unchecked(elf, 0, sizeof(*elf));
-+ elf->image_base = image_input;
- elf->size = size;
-- elf->ehdr = (elf_ehdr *)image;
-- elf->class = elf->ehdr->e32.e_ident[EI_CLASS];
-- elf->data = elf->ehdr->e32.e_ident[EI_DATA];
-+ elf->ehdr = ELF_MAKE_HANDLE(elf_ehdr, (elf_ptrval)image_input);
-+ elf->class = elf_uval_3264(elf, elf->ehdr, e32.e_ident[EI_CLASS]);
-+ elf->data = elf_uval_3264(elf, elf->ehdr, e32.e_ident[EI_DATA]);
-+ elf->caller_xdest_base = NULL;
-+ elf->caller_xdest_size = 0;
-
- /* Sanity check phdr. */
- offset = elf_uval(elf, elf->ehdr, e_phoff) +
-@@ -61,7 +67,7 @@ int elf_init(struct elf_binary *elf, const char *image, size_t size)
- /* Find section string table. */
- section = elf_uval(elf, elf->ehdr, e_shstrndx);
- shdr = elf_shdr_by_index(elf, section);
-- if ( shdr != NULL )
-+ if ( ELF_HANDLE_VALID(shdr) )
- elf->sec_strtab = elf_section_start(elf, shdr);
-
- /* Find symbol table and symbol string table. */
-@@ -69,13 +75,16 @@ int elf_init(struct elf_binary *elf, const char *image, size_t size)
- for ( i = 0; i < count; i++ )
- {
- shdr = elf_shdr_by_index(elf, i);
-+ if ( !elf_access_ok(elf, ELF_HANDLE_PTRVAL(shdr), 1) )
-+ /* input has an insane section header count field */
-+ break;
- if ( elf_uval(elf, shdr, sh_type) != SHT_SYMTAB )
- continue;
- elf->sym_tab = shdr;
- shdr = elf_shdr_by_index(elf, elf_uval(elf, shdr, sh_link));
-- if ( shdr == NULL )
-+ if ( !ELF_HANDLE_VALID(shdr) )
- {
-- elf->sym_tab = NULL;
-+ elf->sym_tab = ELF_INVALID_HANDLE(elf_shdr);
- continue;
- }
- elf->sym_strtab = elf_section_start(elf, shdr);
-@@ -86,7 +95,7 @@ int elf_init(struct elf_binary *elf, const char *image, size_t size)
- }
-
- #ifndef __XEN__
--void elf_call_log_callback(struct elf_binary *elf, int iserr,
-+void elf_call_log_callback(struct elf_binary *elf, bool iserr,
- const char *fmt,...) {
- va_list al;
-
-@@ -101,36 +110,39 @@ void elf_call_log_callback(struct elf_binary *elf, int iserr,
- }
-
- void elf_set_log(struct elf_binary *elf, elf_log_callback *log_callback,
-- void *log_caller_data, int verbose)
-+ void *log_caller_data, bool verbose)
- {
- elf->log_callback = log_callback;
- elf->log_caller_data = log_caller_data;
- elf->verbose = verbose;
- }
-
--static int elf_load_image(void *dst, const void *src, uint64_t filesz, uint64_t memsz)
-+static elf_errorstatus elf_load_image(struct elf_binary *elf,
-+ elf_ptrval dst, elf_ptrval src,
-+ uint64_t filesz, uint64_t memsz)
- {
-- memcpy(dst, src, filesz);
-- memset(dst + filesz, 0, memsz - filesz);
-+ elf_memcpy_safe(elf, dst, src, filesz);
-+ elf_memset_safe(elf, dst + filesz, 0, memsz - filesz);
- return 0;
- }
- #else
--#include <asm/guest_access.h>
-
- void elf_set_verbose(struct elf_binary *elf)
- {
- elf->verbose = 1;
- }
-
--static int elf_load_image(void *dst, const void *src, uint64_t filesz, uint64_t memsz)
-+static elf_errorstatus elf_load_image(struct elf_binary *elf, elf_ptrval dst, elf_ptrval src, uint64_t filesz, uint64_t memsz)
- {
-- int rc;
-+ elf_errorstatus rc;
- if ( filesz > ULONG_MAX || memsz > ULONG_MAX )
- return -1;
-- rc = raw_copy_to_guest(dst, src, filesz);
-+ /* We trust the dom0 kernel image completely, so we don't care
-+ * about overruns etc. here. */
-+ rc = raw_copy_to_guest(ELF_UNSAFE_PTR(dst), ELF_UNSAFE_PTR(src), filesz);
- if ( rc != 0 )
- return -1;
-- rc = raw_clear_guest(dst + filesz, memsz - filesz);
-+ rc = raw_clear_guest(ELF_UNSAFE_PTR(dst + filesz), memsz - filesz);
- if ( rc != 0 )
- return -1;
- return 0;
-@@ -141,10 +153,10 @@ static int elf_load_image(void *dst, const void *src, uint64_t filesz, uint64_t
- void elf_parse_bsdsyms(struct elf_binary *elf, uint64_t pstart)
- {
- uint64_t sz;
-- const elf_shdr *shdr;
-- int i, type;
-+ ELF_HANDLE_DECL(elf_shdr) shdr;
-+ unsigned i, type;
-
-- if ( !elf->sym_tab )
-+ if ( !ELF_HANDLE_VALID(elf->sym_tab) )
- return;
-
- pstart = elf_round_up(elf, pstart);
-@@ -161,7 +173,10 @@ void elf_parse_bsdsyms(struct elf_binary *elf, uint64_t pstart)
- for ( i = 0; i < elf_shdr_count(elf); i++ )
- {
- shdr = elf_shdr_by_index(elf, i);
-- type = elf_uval(elf, (elf_shdr *)shdr, sh_type);
-+ if ( !elf_access_ok(elf, ELF_HANDLE_PTRVAL(shdr), 1) )
-+ /* input has an insane section header count field */
-+ break;
-+ type = elf_uval(elf, shdr, sh_type);
- if ( (type == SHT_STRTAB) || (type == SHT_SYMTAB) )
- sz = elf_round_up(elf, sz + elf_uval(elf, shdr, sh_size));
- }
-@@ -172,11 +187,13 @@ void elf_parse_bsdsyms(struct elf_binary *elf, uint64_t pstart)
-
- static void elf_load_bsdsyms(struct elf_binary *elf)
- {
-- elf_ehdr *sym_ehdr;
-+ ELF_HANDLE_DECL(elf_ehdr) sym_ehdr;
- unsigned long sz;
-- char *maxva, *symbase, *symtab_addr;
-- elf_shdr *shdr;
-- int i, type;
-+ elf_ptrval maxva;
-+ elf_ptrval symbase;
-+ elf_ptrval symtab_addr;
-+ ELF_HANDLE_DECL(elf_shdr) shdr;
-+ unsigned i, type;
-
- if ( !elf->bsd_symtab_pstart )
- return;
-@@ -184,18 +201,18 @@ static void elf_load_bsdsyms(struct elf_binary *elf)
- #define elf_hdr_elm(_elf, _hdr, _elm, _val) \
- do { \
- if ( elf_64bit(_elf) ) \
-- (_hdr)->e64._elm = _val; \
-+ elf_store_field(_elf, _hdr, e64._elm, _val); \
- else \
-- (_hdr)->e32._elm = _val; \
-+ elf_store_field(_elf, _hdr, e32._elm, _val); \
- } while ( 0 )
-
- symbase = elf_get_ptr(elf, elf->bsd_symtab_pstart);
- symtab_addr = maxva = symbase + sizeof(uint32_t);
-
- /* Set up Elf header. */
-- sym_ehdr = (elf_ehdr *)symtab_addr;
-+ sym_ehdr = ELF_MAKE_HANDLE(elf_ehdr, symtab_addr);
- sz = elf_uval(elf, elf->ehdr, e_ehsize);
-- memcpy(sym_ehdr, elf->ehdr, sz);
-+ elf_memcpy_safe(elf, ELF_HANDLE_PTRVAL(sym_ehdr), ELF_HANDLE_PTRVAL(elf->ehdr), sz);
- maxva += sz; /* no round up */
-
- elf_hdr_elm(elf, sym_ehdr, e_phoff, 0);
-@@ -204,37 +221,50 @@ do { \
- elf_hdr_elm(elf, sym_ehdr, e_phnum, 0);
-
- /* Copy Elf section headers. */
-- shdr = (elf_shdr *)maxva;
-+ shdr = ELF_MAKE_HANDLE(elf_shdr, maxva);
- sz = elf_shdr_count(elf) * elf_uval(elf, elf->ehdr, e_shentsize);
-- memcpy(shdr, elf->image + elf_uval(elf, elf->ehdr, e_shoff), sz);
-- maxva = (char *)(long)elf_round_up(elf, (long)maxva + sz);
-+ elf_memcpy_safe(elf, ELF_HANDLE_PTRVAL(shdr),
-+ ELF_IMAGE_BASE(elf) + elf_uval(elf, elf->ehdr, e_shoff),
-+ sz);
-+ maxva = elf_round_up(elf, (unsigned long)maxva + sz);
-
- for ( i = 0; i < elf_shdr_count(elf); i++ )
- {
-+ elf_ptrval old_shdr_p;
-+ elf_ptrval new_shdr_p;
-+
- type = elf_uval(elf, shdr, sh_type);
- if ( (type == SHT_STRTAB) || (type == SHT_SYMTAB) )
- {
-- elf_msg(elf, "%s: shdr %i at 0x%p -> 0x%p\n", __func__, i,
-+ elf_msg(elf, "%s: shdr %i at 0x%"ELF_PRPTRVAL" -> 0x%"ELF_PRPTRVAL"\n", __func__, i,
- elf_section_start(elf, shdr), maxva);
- sz = elf_uval(elf, shdr, sh_size);
-- memcpy(maxva, elf_section_start(elf, shdr), sz);
-+ elf_memcpy_safe(elf, maxva, elf_section_start(elf, shdr), sz);
- /* Mangled to be based on ELF header location. */
- elf_hdr_elm(elf, shdr, sh_offset, maxva - symtab_addr);
-- maxva = (char *)(long)elf_round_up(elf, (long)maxva + sz);
-+ maxva = elf_round_up(elf, (unsigned long)maxva + sz);
- }
-- shdr = (elf_shdr *)((long)shdr +
-- (long)elf_uval(elf, elf->ehdr, e_shentsize));
-+ old_shdr_p = ELF_HANDLE_PTRVAL(shdr);
-+ new_shdr_p = old_shdr_p + elf_uval(elf, elf->ehdr, e_shentsize);
-+ if ( new_shdr_p <= old_shdr_p ) /* wrapped or stuck */
-+ {
-+ elf_mark_broken(elf, "bad section header length");
-+ break;
-+ }
-+ if ( !elf_access_ok(elf, new_shdr_p, 1) ) /* outside image */
-+ break;
-+ shdr = ELF_MAKE_HANDLE(elf_shdr, new_shdr_p);
- }
-
- /* Write down the actual sym size. */
-- *(uint32_t *)symbase = maxva - symtab_addr;
-+ elf_store_val(elf, uint32_t, symbase, maxva - symtab_addr);
-
- #undef elf_ehdr_elm
- }
-
- void elf_parse_binary(struct elf_binary *elf)
- {
-- const elf_phdr *phdr;
-+ ELF_HANDLE_DECL(elf_phdr) phdr;
- uint64_t low = -1;
- uint64_t high = 0;
- uint64_t i, count, paddr, memsz;
-@@ -243,6 +273,9 @@ void elf_parse_binary(struct elf_binary *elf)
- for ( i = 0; i < count; i++ )
- {
- phdr = elf_phdr_by_index(elf, i);
-+ if ( !elf_access_ok(elf, ELF_HANDLE_PTRVAL(phdr), 1) )
-+ /* input has an insane program header count field */
-+ break;
- if ( !elf_phdr_is_loadable(elf, phdr) )
- continue;
- paddr = elf_uval(elf, phdr, p_paddr);
-@@ -260,16 +293,25 @@ void elf_parse_binary(struct elf_binary *elf)
- __FUNCTION__, elf->pstart, elf->pend);
- }
-
--int elf_load_binary(struct elf_binary *elf)
-+elf_errorstatus elf_load_binary(struct elf_binary *elf)
- {
-- const elf_phdr *phdr;
-+ ELF_HANDLE_DECL(elf_phdr) phdr;
- uint64_t i, count, paddr, offset, filesz, memsz;
-- char *dest;
-+ elf_ptrval dest;
-+ /*
-+ * Let bizarre ELFs write the output image up to twice; this
-+ * calculation is just to ensure our copying loop is no worse than
-+ * O(domain_size).
-+ */
-+ uint64_t remain_allow_copy = (uint64_t)elf->dest_size * 2;
-
- count = elf_uval(elf, elf->ehdr, e_phnum);
- for ( i = 0; i < count; i++ )
- {
- phdr = elf_phdr_by_index(elf, i);
-+ if ( !elf_access_ok(elf, ELF_HANDLE_PTRVAL(phdr), 1) )
-+ /* input has an insane program header count field */
-+ break;
- if ( !elf_phdr_is_loadable(elf, phdr) )
- continue;
- paddr = elf_uval(elf, phdr, p_paddr);
-@@ -277,9 +319,23 @@ int elf_load_binary(struct elf_binary *elf)
- filesz = elf_uval(elf, phdr, p_filesz);
- memsz = elf_uval(elf, phdr, p_memsz);
- dest = elf_get_ptr(elf, paddr);
-- elf_msg(elf, "%s: phdr %" PRIu64 " at 0x%p -> 0x%p\n",
-- __func__, i, dest, dest + filesz);
-- if ( elf_load_image(dest, elf->image + offset, filesz, memsz) != 0 )
-+
-+ /*
-+ * We need to check that the input image doesn't have us copy
-+ * the whole image zillions of times, as that could lead to
-+ * O(n^2) time behaviour and possible DoS by a malicous ELF.
-+ */
-+ if ( remain_allow_copy < memsz )
-+ {
-+ elf_mark_broken(elf, "program segments total to more"
-+ " than the input image size");
-+ break;
-+ }
-+ remain_allow_copy -= memsz;
-+
-+ elf_msg(elf, "%s: phdr %" PRIu64 " at 0x%"ELF_PRPTRVAL" -> 0x%"ELF_PRPTRVAL"\n",
-+ __func__, i, dest, (elf_ptrval)(dest + filesz));
-+ if ( elf_load_image(elf, dest, ELF_IMAGE_BASE(elf) + offset, filesz, memsz) != 0 )
- return -1;
- }
-
-@@ -287,18 +343,18 @@ int elf_load_binary(struct elf_binary *elf)
- return 0;
- }
-
--void *elf_get_ptr(struct elf_binary *elf, unsigned long addr)
-+elf_ptrval elf_get_ptr(struct elf_binary *elf, unsigned long addr)
- {
-- return elf->dest + addr - elf->pstart;
-+ return ELF_REALPTR2PTRVAL(elf->dest_base) + addr - elf->pstart;
- }
-
- uint64_t elf_lookup_addr(struct elf_binary * elf, const char *symbol)
- {
-- const elf_sym *sym;
-+ ELF_HANDLE_DECL(elf_sym) sym;
- uint64_t value;
-
- sym = elf_sym_by_name(elf, symbol);
-- if ( sym == NULL )
-+ if ( !ELF_HANDLE_VALID(sym) )
- {
- elf_err(elf, "%s: not found: %s\n", __FUNCTION__, symbol);
- return -1;
-diff --git a/xen/common/libelf/libelf-private.h b/xen/common/libelf/libelf-private.h
-index 3ef753c..277be04 100644
---- a/xen/common/libelf/libelf-private.h
-+++ b/xen/common/libelf/libelf-private.h
-@@ -77,7 +77,7 @@
- #define elf_err(elf, fmt, args ... ) \
- elf_call_log_callback(elf, 1, fmt , ## args );
-
--void elf_call_log_callback(struct elf_binary*, int iserr, const char *fmt,...);
-+void elf_call_log_callback(struct elf_binary*, bool iserr, const char *fmt,...);
-
- #define safe_strcpy(d,s) \
- do { strncpy((d),(s),sizeof((d))-1); \
-@@ -86,6 +86,19 @@ do { strncpy((d),(s),sizeof((d))-1); \
-
- #endif
-
-+#undef memcpy
-+#undef memset
-+#undef memmove
-+#undef strcpy
-+
-+#define memcpy MISTAKE_unspecified_memcpy
-+#define memset MISTAKE_unspecified_memset
-+#define memmove MISTAKE_unspecified_memmove
-+#define strcpy MISTAKE_unspecified_strcpy
-+ /* This prevents libelf from using these undecorated versions
-+ * of memcpy, memset, memmove and strcpy. Every call site
-+ * must either use elf_mem*_unchecked, or elf_mem*_safe. */
-+
- #endif /* __LIBELF_PRIVATE_H_ */
-
- /*
-diff --git a/xen/common/libelf/libelf-relocate.c b/xen/common/libelf/libelf-relocate.c
-deleted file mode 100644
-index 7ef4b01..0000000
---- a/xen/common/libelf/libelf-relocate.c
-+++ /dev/null
-@@ -1,372 +0,0 @@
--/*
-- * ELF relocation code (not used by xen kernel right now).
-- *
-- * This library is free software; you can redistribute it and/or
-- * modify it under the terms of the GNU Lesser General Public
-- * License as published by the Free Software Foundation;
-- * version 2.1 of the License.
-- *
-- * This library is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-- * Lesser General Public License for more details.
-- *
-- * You should have received a copy of the GNU Lesser General Public
-- * License along with this library; if not, write to the Free Software
-- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-- */
--
--#include "libelf-private.h"
--
--/* ------------------------------------------------------------------------ */
--
--static const char *rel_names_i386[] = {
-- "R_386_NONE",
-- "R_386_32",
-- "R_386_PC32",
-- "R_386_GOT32",
-- "R_386_PLT32",
-- "R_386_COPY",
-- "R_386_GLOB_DAT",
-- "R_386_JMP_SLOT",
-- "R_386_RELATIVE",
-- "R_386_GOTOFF",
-- "R_386_GOTPC",
-- "R_386_32PLT",
-- "R_386_TLS_TPOFF",
-- "R_386_TLS_IE",
-- "R_386_TLS_GOTIE",
-- "R_386_TLS_LE",
-- "R_386_TLS_GD",
-- "R_386_TLS_LDM",
-- "R_386_16",
-- "R_386_PC16",
-- "R_386_8",
-- "R_386_PC8",
-- "R_386_TLS_GD_32",
-- "R_386_TLS_GD_PUSH",
-- "R_386_TLS_GD_CALL",
-- "R_386_TLS_GD_POP",
-- "R_386_TLS_LDM_32",
-- "R_386_TLS_LDM_PUSH",
-- "R_386_TLS_LDM_CALL",
-- "R_386_TLS_LDM_POP",
-- "R_386_TLS_LDO_32",
-- "R_386_TLS_IE_32",
-- "R_386_TLS_LE_32",
-- "R_386_TLS_DTPMOD32",
-- "R_386_TLS_DTPOFF32",
-- "R_386_TLS_TPOFF32",
--};
--
--static int elf_reloc_i386(struct elf_binary *elf, int type,
-- uint64_t addr, uint64_t value)
--{
-- void *ptr = elf_get_ptr(elf, addr);
-- uint32_t *u32;
--
-- switch ( type )
-- {
-- case 1 /* R_386_32 */ :
-- u32 = ptr;
-- *u32 += elf->reloc_offset;
-- break;
-- case 2 /* R_386_PC32 */ :
-- /* nothing */
-- break;
-- default:
-- return -1;
-- }
-- return 0;
--}
--
--/* ------------------------------------------------------------------------ */
--
--static const char *rel_names_x86_64[] = {
-- "R_X86_64_NONE",
-- "R_X86_64_64",
-- "R_X86_64_PC32",
-- "R_X86_64_GOT32",
-- "R_X86_64_PLT32",
-- "R_X86_64_COPY",
-- "R_X86_64_GLOB_DAT",
-- "R_X86_64_JUMP_SLOT",
-- "R_X86_64_RELATIVE",
-- "R_X86_64_GOTPCREL",
-- "R_X86_64_32",
-- "R_X86_64_32S",
-- "R_X86_64_16",
-- "R_X86_64_PC16",
-- "R_X86_64_8",
-- "R_X86_64_PC8",
-- "R_X86_64_DTPMOD64",
-- "R_X86_64_DTPOFF64",
-- "R_X86_64_TPOFF64",
-- "R_X86_64_TLSGD",
-- "R_X86_64_TLSLD",
-- "R_X86_64_DTPOFF32",
-- "R_X86_64_GOTTPOFF",
-- "R_X86_64_TPOFF32",
--};
--
--static int elf_reloc_x86_64(struct elf_binary *elf, int type,
-- uint64_t addr, uint64_t value)
--{
-- void *ptr = elf_get_ptr(elf, addr);
-- uint64_t *u64;
-- uint32_t *u32;
-- int32_t *s32;
--
-- switch ( type )
-- {
-- case 1 /* R_X86_64_64 */ :
-- u64 = ptr;
-- value += elf->reloc_offset;
-- *u64 = value;
-- break;
-- case 2 /* R_X86_64_PC32 */ :
-- u32 = ptr;
-- *u32 = value - addr;
-- if ( *u32 != (uint32_t)(value - addr) )
-- {
-- elf_err(elf, "R_X86_64_PC32 overflow: 0x%" PRIx32
-- " != 0x%" PRIx32 "\n",
-- *u32, (uint32_t) (value - addr));
-- return -1;
-- }
-- break;
-- case 10 /* R_X86_64_32 */ :
-- u32 = ptr;
-- value += elf->reloc_offset;
-- *u32 = value;
-- if ( *u32 != value )
-- {
-- elf_err(elf, "R_X86_64_32 overflow: 0x%" PRIx32
-- " != 0x%" PRIx64 "\n",
-- *u32, value);
-- return -1;
-- }
-- break;
-- case 11 /* R_X86_64_32S */ :
-- s32 = ptr;
-- value += elf->reloc_offset;
-- *s32 = value;
-- if ( *s32 != (int64_t) value )
-- {
-- elf_err(elf, "R_X86_64_32S overflow: 0x%" PRIx32
-- " != 0x%" PRIx64 "\n",
-- *s32, (int64_t) value);
-- return -1;
-- }
-- break;
-- default:
-- return -1;
-- }
-- return 0;
--}
--
--/* ------------------------------------------------------------------------ */
--
--static struct relocs {
-- const char **names;
-- int count;
-- int (*func) (struct elf_binary * elf, int type, uint64_t addr,
-- uint64_t value);
--} relocs[] =
--/* *INDENT-OFF* */
--{
-- [EM_386] = {
-- .names = rel_names_i386,
-- .count = sizeof(rel_names_i386) / sizeof(rel_names_i386[0]),
-- .func = elf_reloc_i386,
-- },
-- [EM_X86_64] = {
-- .names = rel_names_x86_64,
-- .count = sizeof(rel_names_x86_64) / sizeof(rel_names_x86_64[0]),
-- .func = elf_reloc_x86_64,
-- }
--};
--/* *INDENT-ON* */
--
--/* ------------------------------------------------------------------------ */
--
--static const char *rela_name(int machine, int type)
--{
-- if ( machine > sizeof(relocs) / sizeof(relocs[0]) )
-- return "unknown mach";
-- if ( !relocs[machine].names )
-- return "unknown mach";
-- if ( type > relocs[machine].count )
-- return "unknown rela";
-- return relocs[machine].names[type];
--}
--
--static int elf_reloc_section(struct elf_binary *elf,
-- const elf_shdr * rels,
-- const elf_shdr * sect, const elf_shdr * syms)
--{
-- const void *ptr, *end;
-- const elf_shdr *shdr;
-- const elf_rela *rela;
-- const elf_rel *rel;
-- const elf_sym *sym;
-- uint64_t s_type;
-- uint64_t r_offset;
-- uint64_t r_info;
-- uint64_t r_addend;
-- int r_type, r_sym;
-- size_t rsize;
-- uint64_t shndx, sbase, addr, value;
-- const char *sname;
-- int machine;
--
-- machine = elf_uval(elf, elf->ehdr, e_machine);
-- if ( (machine >= (sizeof(relocs) / sizeof(relocs[0]))) ||
-- (relocs[machine].func == NULL) )
-- {
-- elf_err(elf, "%s: can't handle machine %d\n",
-- __FUNCTION__, machine);
-- return -1;
-- }
-- if ( elf_swap(elf) )
-- {
-- elf_err(elf, "%s: non-native byte order, relocation not supported\n",
-- __FUNCTION__);
-- return -1;
-- }
--
-- s_type = elf_uval(elf, rels, sh_type);
-- rsize = (SHT_REL == s_type) ? elf_size(elf, rel) : elf_size(elf, rela);
-- ptr = elf_section_start(elf, rels);
-- end = elf_section_end(elf, rels);
--
-- for ( ; ptr < end; ptr += rsize )
-- {
-- switch ( s_type )
-- {
-- case SHT_REL:
-- rel = ptr;
-- r_offset = elf_uval(elf, rel, r_offset);
-- r_info = elf_uval(elf, rel, r_info);
-- r_addend = 0;
-- break;
-- case SHT_RELA:
-- rela = ptr;
-- r_offset = elf_uval(elf, rela, r_offset);
-- r_info = elf_uval(elf, rela, r_info);
-- r_addend = elf_uval(elf, rela, r_addend);
-- break;
-- default:
-- /* can't happen */
-- return -1;
-- }
-- if ( elf_64bit(elf) )
-- {
-- r_type = ELF64_R_TYPE(r_info);
-- r_sym = ELF64_R_SYM(r_info);
-- }
-- else
-- {
-- r_type = ELF32_R_TYPE(r_info);
-- r_sym = ELF32_R_SYM(r_info);
-- }
--
-- sym = elf_sym_by_index(elf, r_sym);
-- shndx = elf_uval(elf, sym, st_shndx);
-- switch ( shndx )
-- {
-- case SHN_UNDEF:
-- sname = "*UNDEF*";
-- sbase = 0;
-- break;
-- case SHN_COMMON:
-- elf_err(elf, "%s: invalid section: %" PRId64 "\n",
-- __FUNCTION__, shndx);
-- return -1;
-- case SHN_ABS:
-- sname = "*ABS*";
-- sbase = 0;
-- break;
-- default:
-- shdr = elf_shdr_by_index(elf, shndx);
-- if ( shdr == NULL )
-- {
-- elf_err(elf, "%s: invalid section: %" PRId64 "\n",
-- __FUNCTION__, shndx);
-- return -1;
-- }
-- sname = elf_section_name(elf, shdr);
-- sbase = elf_uval(elf, shdr, sh_addr);
-- }
--
-- addr = r_offset;
-- value = elf_uval(elf, sym, st_value);
-- value += r_addend;
--
-- if ( elf->log_callback && (elf->verbose > 1) )
-- {
-- uint64_t st_name = elf_uval(elf, sym, st_name);
-- const char *name = st_name ? elf->sym_strtab + st_name : "*NONE*";
--
-- elf_msg(elf,
-- "%s: type %s [%d], off 0x%" PRIx64 ", add 0x%" PRIx64 ","
-- " sym %s [0x%" PRIx64 "], sec %s [0x%" PRIx64 "]"
-- " -> addr 0x%" PRIx64 " value 0x%" PRIx64 "\n",
-- __FUNCTION__, rela_name(machine, r_type), r_type, r_offset,
-- r_addend, name, elf_uval(elf, sym, st_value), sname, sbase,
-- addr, value);
-- }
--
-- if ( relocs[machine].func(elf, r_type, addr, value) == -1 )
-- {
-- elf_err(elf, "%s: unknown/unsupported reloc type %s [%d]\n",
-- __FUNCTION__, rela_name(machine, r_type), r_type);
-- return -1;
-- }
-- }
-- return 0;
--}
--
--int elf_reloc(struct elf_binary *elf)
--{
-- const elf_shdr *rels, *sect, *syms;
-- uint64_t i, count, type;
--
-- count = elf_shdr_count(elf);
-- for ( i = 0; i < count; i++ )
-- {
-- rels = elf_shdr_by_index(elf, i);
-- type = elf_uval(elf, rels, sh_type);
-- if ( (type != SHT_REL) && (type != SHT_RELA) )
-- continue;
--
-- sect = elf_shdr_by_index(elf, elf_uval(elf, rels, sh_info));
-- syms = elf_shdr_by_index(elf, elf_uval(elf, rels, sh_link));
-- if ( NULL == sect || NULL == syms )
-- continue;
--
-- if ( !(elf_uval(elf, sect, sh_flags) & SHF_ALLOC) )
-- {
-- elf_msg(elf, "%s: relocations for %s, skipping\n",
-- __FUNCTION__, elf_section_name(elf, sect));
-- continue;
-- }
--
-- elf_msg(elf, "%s: relocations for %s @ 0x%" PRIx64 "\n",
-- __FUNCTION__, elf_section_name(elf, sect),
-- elf_uval(elf, sect, sh_addr));
-- if ( elf_reloc_section(elf, rels, sect, syms) != 0 )
-- return -1;
-- }
-- return 0;
--}
--
--/*
-- * Local variables:
-- * mode: C
-- * c-set-style: "BSD"
-- * c-basic-offset: 4
-- * tab-width: 4
-- * indent-tabs-mode: nil
-- * End:
-- */
-diff --git a/xen/common/libelf/libelf-tools.c b/xen/common/libelf/libelf-tools.c
-index cb97908..e202249 100644
---- a/xen/common/libelf/libelf-tools.c
-+++ b/xen/common/libelf/libelf-tools.c
-@@ -20,201 +20,292 @@
-
- /* ------------------------------------------------------------------------ */
-
--uint64_t elf_access_unsigned(struct elf_binary * elf, const void *ptr,
-- uint64_t offset, size_t size)
-+void elf_mark_broken(struct elf_binary *elf, const char *msg)
- {
-- int need_swap = elf_swap(elf);
-+ if ( elf->broken == NULL )
-+ elf->broken = msg;
-+}
-+
-+const char *elf_check_broken(const struct elf_binary *elf)
-+{
-+ return elf->broken;
-+}
-+
-+static bool elf_ptrval_in_range(elf_ptrval ptrval, uint64_t size,
-+ const void *region, uint64_t regionsize)
-+ /*
-+ * Returns true if the putative memory area [ptrval,ptrval+size>
-+ * is completely inside the region [region,region+regionsize>.
-+ *
-+ * ptrval and size are the untrusted inputs to be checked.
-+ * region and regionsize are trusted and must be correct and valid,
-+ * although it is OK for region to perhaps be maliciously NULL
-+ * (but not some other malicious value).
-+ */
-+{
-+ elf_ptrval regionp = (elf_ptrval)region;
-+
-+ if ( (region == NULL) ||
-+ (ptrval < regionp) || /* start is before region */
-+ (ptrval > regionp + regionsize) || /* start is after region */
-+ (size > regionsize - (ptrval - regionp)) ) /* too big */
-+ return 0;
-+ return 1;
-+}
-+
-+bool elf_access_ok(struct elf_binary * elf,
-+ uint64_t ptrval, size_t size)
-+{
-+ if ( elf_ptrval_in_range(ptrval, size, elf->image_base, elf->size) )
-+ return 1;
-+ if ( elf_ptrval_in_range(ptrval, size, elf->dest_base, elf->dest_size) )
-+ return 1;
-+ if ( elf_ptrval_in_range(ptrval, size,
-+ elf->caller_xdest_base, elf->caller_xdest_size) )
-+ return 1;
-+ elf_mark_broken(elf, "out of range access");
-+ return 0;
-+}
-+
-+void elf_memcpy_safe(struct elf_binary *elf, elf_ptrval dst,
-+ elf_ptrval src, size_t size)
-+{
-+ if ( elf_access_ok(elf, dst, size) &&
-+ elf_access_ok(elf, src, size) )
-+ {
-+ /* use memmove because these checks do not prove that the
-+ * regions don't overlap and overlapping regions grant
-+ * permission for compiler malice */
-+ elf_memmove_unchecked(ELF_UNSAFE_PTR(dst), ELF_UNSAFE_PTR(src), size);
-+ }
-+}
-+
-+void elf_memset_safe(struct elf_binary *elf, elf_ptrval dst, int c, size_t size)
-+{
-+ if ( elf_access_ok(elf, dst, size) )
-+ {
-+ elf_memset_unchecked(ELF_UNSAFE_PTR(dst), c, size);
-+ }
-+}
-+
-+uint64_t elf_access_unsigned(struct elf_binary * elf, elf_ptrval base,
-+ uint64_t moreoffset, size_t size)
-+{
-+ elf_ptrval ptrval = base + moreoffset;
-+ bool need_swap = elf_swap(elf);
- const uint8_t *u8;
- const uint16_t *u16;
- const uint32_t *u32;
- const uint64_t *u64;
-
-+ if ( !elf_access_ok(elf, ptrval, size) )
-+ return 0;
-+
- switch ( size )
- {
- case 1:
-- u8 = ptr + offset;
-+ u8 = (const void*)ptrval;
- return *u8;
- case 2:
-- u16 = ptr + offset;
-+ u16 = (const void*)ptrval;
- return need_swap ? bswap_16(*u16) : *u16;
- case 4:
-- u32 = ptr + offset;
-+ u32 = (const void*)ptrval;
- return need_swap ? bswap_32(*u32) : *u32;
- case 8:
-- u64 = ptr + offset;
-+ u64 = (const void*)ptrval;
- return need_swap ? bswap_64(*u64) : *u64;
- default:
- return 0;
- }
- }
-
--int64_t elf_access_signed(struct elf_binary *elf, const void *ptr,
-- uint64_t offset, size_t size)
--{
-- int need_swap = elf_swap(elf);
-- const int8_t *s8;
-- const int16_t *s16;
-- const int32_t *s32;
-- const int64_t *s64;
--
-- switch ( size )
-- {
-- case 1:
-- s8 = ptr + offset;
-- return *s8;
-- case 2:
-- s16 = ptr + offset;
-- return need_swap ? bswap_16(*s16) : *s16;
-- case 4:
-- s32 = ptr + offset;
-- return need_swap ? bswap_32(*s32) : *s32;
-- case 8:
-- s64 = ptr + offset;
-- return need_swap ? bswap_64(*s64) : *s64;
-- default:
-- return 0;
-- }
--}
--
- uint64_t elf_round_up(struct elf_binary *elf, uint64_t addr)
- {
-- int elf_round = (elf_64bit(elf) ? 8 : 4) - 1;
-+ uint64_t elf_round = (elf_64bit(elf) ? 8 : 4) - 1;
-
- return (addr + elf_round) & ~elf_round;
- }
-
- /* ------------------------------------------------------------------------ */
-
--int elf_shdr_count(struct elf_binary *elf)
-+unsigned elf_shdr_count(struct elf_binary *elf)
- {
-- return elf_uval(elf, elf->ehdr, e_shnum);
-+ unsigned count = elf_uval(elf, elf->ehdr, e_shnum);
-+ uint64_t max = elf->size / sizeof(Elf32_Shdr);
-+ if (max > ~(unsigned)0)
-+ max = ~(unsigned)0; /* Xen doesn't have limits.h :-/ */
-+ if (count > max)
-+ {
-+ elf_mark_broken(elf, "far too many section headers");
-+ count = max;
-+ }
-+ return count;
- }
-
--int elf_phdr_count(struct elf_binary *elf)
-+unsigned elf_phdr_count(struct elf_binary *elf)
- {
- return elf_uval(elf, elf->ehdr, e_phnum);
- }
-
--const elf_shdr *elf_shdr_by_name(struct elf_binary *elf, const char *name)
-+ELF_HANDLE_DECL(elf_shdr) elf_shdr_by_name(struct elf_binary *elf, const char *name)
- {
- uint64_t count = elf_shdr_count(elf);
-- const elf_shdr *shdr;
-+ ELF_HANDLE_DECL(elf_shdr) shdr;
- const char *sname;
-- int i;
-+ unsigned i;
-
- for ( i = 0; i < count; i++ )
- {
- shdr = elf_shdr_by_index(elf, i);
-+ if ( !elf_access_ok(elf, ELF_HANDLE_PTRVAL(shdr), 1) )
-+ /* input has an insane section header count field */
-+ break;
- sname = elf_section_name(elf, shdr);
- if ( sname && !strcmp(sname, name) )
- return shdr;
- }
-- return NULL;
-+ return ELF_INVALID_HANDLE(elf_shdr);
- }
-
--const elf_shdr *elf_shdr_by_index(struct elf_binary *elf, int index)
-+ELF_HANDLE_DECL(elf_shdr) elf_shdr_by_index(struct elf_binary *elf, unsigned index)
- {
- uint64_t count = elf_shdr_count(elf);
-- const void *ptr;
-+ elf_ptrval ptr;
-
- if ( index >= count )
-- return NULL;
-+ return ELF_INVALID_HANDLE(elf_shdr);
-
-- ptr = (elf->image
-+ ptr = (ELF_IMAGE_BASE(elf)
- + elf_uval(elf, elf->ehdr, e_shoff)
- + elf_uval(elf, elf->ehdr, e_shentsize) * index);
-- return ptr;
-+ return ELF_MAKE_HANDLE(elf_shdr, ptr);
- }
-
--const elf_phdr *elf_phdr_by_index(struct elf_binary *elf, int index)
-+ELF_HANDLE_DECL(elf_phdr) elf_phdr_by_index(struct elf_binary *elf, unsigned index)
- {
- uint64_t count = elf_uval(elf, elf->ehdr, e_phnum);
-- const void *ptr;
-+ elf_ptrval ptr;
-
- if ( index >= count )
-- return NULL;
-+ return ELF_INVALID_HANDLE(elf_phdr);
-
-- ptr = (elf->image
-+ ptr = (ELF_IMAGE_BASE(elf)
- + elf_uval(elf, elf->ehdr, e_phoff)
- + elf_uval(elf, elf->ehdr, e_phentsize) * index);
-- return ptr;
-+ return ELF_MAKE_HANDLE(elf_phdr, ptr);
- }
-
--const char *elf_section_name(struct elf_binary *elf, const elf_shdr * shdr)
-+
-+const char *elf_section_name(struct elf_binary *elf,
-+ ELF_HANDLE_DECL(elf_shdr) shdr)
- {
-- if ( elf->sec_strtab == NULL )
-+ if ( ELF_PTRVAL_INVALID(elf->sec_strtab) )
- return "unknown";
-- return elf->sec_strtab + elf_uval(elf, shdr, sh_name);
-+
-+ return elf_strval(elf, elf->sec_strtab + elf_uval(elf, shdr, sh_name));
- }
-
--const void *elf_section_start(struct elf_binary *elf, const elf_shdr * shdr)
-+const char *elf_strval(struct elf_binary *elf, elf_ptrval start)
- {
-- return elf->image + elf_uval(elf, shdr, sh_offset);
-+ uint64_t length;
-+
-+ for ( length = 0; ; length++ ) {
-+ if ( !elf_access_ok(elf, start + length, 1) )
-+ return NULL;
-+ if ( !elf_access_unsigned(elf, start, length, 1) )
-+ /* ok */
-+ return ELF_UNSAFE_PTR(start);
-+ if ( length >= ELF_MAX_STRING_LENGTH )
-+ {
-+ elf_mark_broken(elf, "excessively long string");
-+ return NULL;
-+ }
-+ }
- }
-
--const void *elf_section_end(struct elf_binary *elf, const elf_shdr * shdr)
-+const char *elf_strfmt(struct elf_binary *elf, elf_ptrval start)
- {
-- return elf->image
-+ const char *str = elf_strval(elf, start);
-+
-+ if ( str == NULL )
-+ return "(invalid)";
-+ return str;
-+}
-+
-+elf_ptrval elf_section_start(struct elf_binary *elf, ELF_HANDLE_DECL(elf_shdr) shdr)
-+{
-+ return ELF_IMAGE_BASE(elf) + elf_uval(elf, shdr, sh_offset);
-+}
-+
-+elf_ptrval elf_section_end(struct elf_binary *elf, ELF_HANDLE_DECL(elf_shdr) shdr)
-+{
-+ return ELF_IMAGE_BASE(elf)
- + elf_uval(elf, shdr, sh_offset) + elf_uval(elf, shdr, sh_size);
- }
-
--const void *elf_segment_start(struct elf_binary *elf, const elf_phdr * phdr)
-+elf_ptrval elf_segment_start(struct elf_binary *elf, ELF_HANDLE_DECL(elf_phdr) phdr)
- {
-- return elf->image + elf_uval(elf, phdr, p_offset);
-+ return ELF_IMAGE_BASE(elf)
-+ + elf_uval(elf, phdr, p_offset);
- }
-
--const void *elf_segment_end(struct elf_binary *elf, const elf_phdr * phdr)
-+elf_ptrval elf_segment_end(struct elf_binary *elf, ELF_HANDLE_DECL(elf_phdr) phdr)
- {
-- return elf->image
-+ return ELF_IMAGE_BASE(elf)
- + elf_uval(elf, phdr, p_offset) + elf_uval(elf, phdr, p_filesz);
- }
-
--const elf_sym *elf_sym_by_name(struct elf_binary *elf, const char *symbol)
-+ELF_HANDLE_DECL(elf_sym) elf_sym_by_name(struct elf_binary *elf, const char *symbol)
- {
-- const void *ptr = elf_section_start(elf, elf->sym_tab);
-- const void *end = elf_section_end(elf, elf->sym_tab);
-- const elf_sym *sym;
-+ elf_ptrval ptr = elf_section_start(elf, elf->sym_tab);
-+ elf_ptrval end = elf_section_end(elf, elf->sym_tab);
-+ ELF_HANDLE_DECL(elf_sym) sym;
- uint64_t info, name;
-+ const char *sym_name;
-
- for ( ; ptr < end; ptr += elf_size(elf, sym) )
- {
-- sym = ptr;
-+ sym = ELF_MAKE_HANDLE(elf_sym, ptr);
- info = elf_uval(elf, sym, st_info);
- name = elf_uval(elf, sym, st_name);
- if ( ELF32_ST_BIND(info) != STB_GLOBAL )
- continue;
-- if ( strcmp(elf->sym_strtab + name, symbol) )
-+ sym_name = elf_strval(elf, elf->sym_strtab + name);
-+ if ( sym_name == NULL ) /* out of range, oops */
-+ return ELF_INVALID_HANDLE(elf_sym);
-+ if ( strcmp(sym_name, symbol) )
- continue;
- return sym;
- }
-- return NULL;
-+ return ELF_INVALID_HANDLE(elf_sym);
- }
-
--const elf_sym *elf_sym_by_index(struct elf_binary *elf, int index)
-+ELF_HANDLE_DECL(elf_sym) elf_sym_by_index(struct elf_binary *elf, unsigned index)
- {
-- const void *ptr = elf_section_start(elf, elf->sym_tab);
-- const elf_sym *sym;
-+ elf_ptrval ptr = elf_section_start(elf, elf->sym_tab);
-+ ELF_HANDLE_DECL(elf_sym) sym;
-
-- sym = ptr + index * elf_size(elf, sym);
-+ sym = ELF_MAKE_HANDLE(elf_sym, ptr + index * elf_size(elf, sym));
- return sym;
- }
-
--const char *elf_note_name(struct elf_binary *elf, const elf_note * note)
-+const char *elf_note_name(struct elf_binary *elf, ELF_HANDLE_DECL(elf_note) note)
- {
-- return (void *)note + elf_size(elf, note);
-+ return elf_strval(elf, ELF_HANDLE_PTRVAL(note) + elf_size(elf, note));
- }
-
--const void *elf_note_desc(struct elf_binary *elf, const elf_note * note)
-+elf_ptrval elf_note_desc(struct elf_binary *elf, ELF_HANDLE_DECL(elf_note) note)
- {
-- int namesz = (elf_uval(elf, note, namesz) + 3) & ~3;
-+ unsigned namesz = (elf_uval(elf, note, namesz) + 3) & ~3;
-
-- return (void *)note + elf_size(elf, note) + namesz;
-+ return ELF_HANDLE_PTRVAL(note) + elf_size(elf, note) + namesz;
- }
-
--uint64_t elf_note_numeric(struct elf_binary *elf, const elf_note * note)
-+uint64_t elf_note_numeric(struct elf_binary *elf, ELF_HANDLE_DECL(elf_note) note)
- {
-- const void *desc = elf_note_desc(elf, note);
-- int descsz = elf_uval(elf, note, descsz);
-+ elf_ptrval desc = elf_note_desc(elf, note);
-+ unsigned descsz = elf_uval(elf, note, descsz);
-
- switch (descsz)
- {
-@@ -228,11 +319,11 @@ uint64_t elf_note_numeric(struct elf_binary *elf, const elf_note * note)
- }
- }
-
--uint64_t elf_note_numeric_array(struct elf_binary *elf, const elf_note *note,
-+uint64_t elf_note_numeric_array(struct elf_binary *elf, ELF_HANDLE_DECL(elf_note) note,
- unsigned int unitsz, unsigned int idx)
- {
-- const void *desc = elf_note_desc(elf, note);
-- int descsz = elf_uval(elf, note, descsz);
-+ elf_ptrval desc = elf_note_desc(elf, note);
-+ unsigned descsz = elf_uval(elf, note, descsz);
-
- if ( descsz % unitsz || idx >= descsz / unitsz )
- return 0;
-@@ -248,24 +339,34 @@ uint64_t elf_note_numeric_array(struct elf_binary *elf, const elf_note *note,
- }
- }
-
--const elf_note *elf_note_next(struct elf_binary *elf, const elf_note * note)
-+ELF_HANDLE_DECL(elf_note) elf_note_next(struct elf_binary *elf, ELF_HANDLE_DECL(elf_note) note)
- {
-- int namesz = (elf_uval(elf, note, namesz) + 3) & ~3;
-- int descsz = (elf_uval(elf, note, descsz) + 3) & ~3;
-+ unsigned namesz = (elf_uval(elf, note, namesz) + 3) & ~3;
-+ unsigned descsz = (elf_uval(elf, note, descsz) + 3) & ~3;
-+
-+ elf_ptrval ptrval = ELF_HANDLE_PTRVAL(note)
-+ + elf_size(elf, note) + namesz + descsz;
-
-- return (void *)note + elf_size(elf, note) + namesz + descsz;
-+ if ( ( ptrval <= ELF_HANDLE_PTRVAL(note) || /* wrapped or stuck */
-+ !elf_access_ok(elf, ELF_HANDLE_PTRVAL(note), 1) ) )
-+ ptrval = ELF_MAX_PTRVAL; /* terminate caller's loop */
-+
-+ return ELF_MAKE_HANDLE(elf_note, ptrval);
- }
-
- /* ------------------------------------------------------------------------ */
-
--int elf_is_elfbinary(const void *image)
-+bool elf_is_elfbinary(const void *image_start, size_t image_size)
- {
-- const Elf32_Ehdr *ehdr = image;
-+ const Elf32_Ehdr *ehdr = image_start;
-+
-+ if ( image_size < sizeof(*ehdr) )
-+ return 0;
-
- return IS_ELF(*ehdr);
- }
-
--int elf_phdr_is_loadable(struct elf_binary *elf, const elf_phdr * phdr)
-+bool elf_phdr_is_loadable(struct elf_binary *elf, ELF_HANDLE_DECL(elf_phdr) phdr)
- {
- uint64_t p_type = elf_uval(elf, phdr, p_type);
- uint64_t p_flags = elf_uval(elf, phdr, p_flags);
-diff --git a/xen/include/xen/libelf.h b/xen/include/xen/libelf.h
-index e8f6508..174f8da 100644
---- a/xen/include/xen/libelf.h
-+++ b/xen/include/xen/libelf.h
-@@ -29,6 +29,11 @@
- #error define architectural endianness
- #endif
-
-+#include <stdbool.h>
-+
-+typedef int elf_errorstatus; /* 0: ok; -ve (normally -1): error */
-+typedef int elf_negerrnoval; /* 0: ok; -EFOO: error */
-+
- #undef ELFSIZE
- #include "elfstructs.h"
- #ifdef __XEN__
-@@ -42,12 +47,98 @@
-
- struct elf_binary;
- typedef void elf_log_callback(struct elf_binary*, void *caller_data,
-- int iserr, const char *fmt, va_list al);
-+ bool iserr, const char *fmt, va_list al);
-+
-+#endif
-+
-+#define ELF_MAX_STRING_LENGTH 4096
-+#define ELF_MAX_TOTAL_NOTE_COUNT 65536
-+
-+/* ------------------------------------------------------------------------ */
-+
-+/* Macros for accessing the input image and output area. */
-+
-+/*
-+ * We abstract away the pointerness of these pointers, replacing
-+ * various void*, char* and struct* with the following:
-+ * elf_ptrval A pointer to a byte; one can do pointer arithmetic
-+ * on this.
-+ * HANDLE A pointer to a struct. There is one of these types
-+ * for each pointer type - that is, for each "structname".
-+ * In the arguments to the various HANDLE macros, structname
-+ * must be a single identifier which is a typedef.
-+ * It is not permitted to do arithmetic on these
-+ * pointers. In the current code attempts to do so will
-+ * compile, but in the next patch this will become a
-+ * compile error.
-+ */
-+
-+typedef uintptr_t elf_ptrval;
-+
-+#define ELF_REALPTR2PTRVAL(realpointer) ((elf_ptrval)(realpointer))
-+ /* Converts an actual C pointer into a PTRVAL */
-+
-+#define ELF_HANDLE_DECL(structname) structname##_handle
-+ /* Provides a type declaration for a HANDLE. */
-
-+#ifdef __XEN__
-+# define ELF_PRPTRVAL "lu"
-+ /*
-+ * PRIuPTR is misdefined in xen/include/xen/inttypes.h, on 32-bit,
-+ * to "u", when in fact uintptr_t is an unsigned long.
-+ */
-+#else
-+# define ELF_PRPTRVAL PRIuPTR
- #endif
-+ /* printf format a la PRId... for a PTRVAL */
-+
-+#define ELF_DEFINE_HANDLE(structname) \
-+ typedef union { \
-+ elf_ptrval ptrval; \
-+ const structname *typeonly; /* for sizeof, offsetof, &c only */ \
-+ } structname##_handle;
-+ /*
-+ * This must be invoked for each HANDLE type to define
-+ * the actual C type used for that kind of HANDLE.
-+ */
-+
-+#define ELF_MAKE_HANDLE(structname, ptrval) ((structname##_handle){ ptrval })
-+ /* Converts a PTRVAL to a HANDLE */
-+
-+#define ELF_IMAGE_BASE(elf) ((elf_ptrval)(elf)->image_base)
-+ /* Returns the base of the image as a PTRVAL. */
-+
-+#define ELF_HANDLE_PTRVAL(handleval) ((handleval).ptrval)
-+ /* Converts a HANDLE to a PTRVAL. */
-+
-+#define ELF_UNSAFE_PTR(ptrval) ((void*)(elf_ptrval)(ptrval))
-+ /*
-+ * Turns a PTRVAL into an actual C pointer. Before this is done
-+ * the caller must have ensured that the PTRVAL does in fact point
-+ * to a permissible location.
-+ */
-+
-+/* PTRVALs can be INVALID (ie, NULL). */
-+#define ELF_INVALID_PTRVAL ((elf_ptrval)0) /* returns NULL PTRVAL */
-+#define ELF_INVALID_HANDLE(structname) /* returns NULL handle */ \
-+ ELF_MAKE_HANDLE(structname, ELF_INVALID_PTRVAL)
-+#define ELF_PTRVAL_VALID(ptrval) (!!(ptrval)) /* } */
-+#define ELF_HANDLE_VALID(handleval) (!!(handleval).ptrval) /* } predicates */
-+#define ELF_PTRVAL_INVALID(ptrval) (!ELF_PTRVAL_VALID((ptrval))) /* } */
-+
-+#define ELF_MAX_PTRVAL (~(elf_ptrval)0)
-+ /* PTRVAL value guaranteed to compare > to any valid PTRVAL */
-+
-+/* For internal use by other macros here */
-+#define ELF__HANDLE_FIELD_TYPE(handleval, elm) \
-+ typeof((handleval).typeonly->elm)
-+#define ELF__HANDLE_FIELD_OFFSET(handleval, elm) \
-+ offsetof(typeof(*(handleval).typeonly),elm)
-+
-
- /* ------------------------------------------------------------------------ */
-
-+
- typedef union {
- Elf32_Ehdr e32;
- Elf64_Ehdr e64;
-@@ -83,20 +174,32 @@ typedef union {
- Elf64_Note e64;
- } elf_note;
-
-+ELF_DEFINE_HANDLE(elf_ehdr)
-+ELF_DEFINE_HANDLE(elf_shdr)
-+ELF_DEFINE_HANDLE(elf_phdr)
-+ELF_DEFINE_HANDLE(elf_sym)
-+ELF_DEFINE_HANDLE(elf_note)
-+
- struct elf_binary {
- /* elf binary */
-- const char *image;
-+ const void *image_base;
- size_t size;
- char class;
- char data;
-
-- const elf_ehdr *ehdr;
-- const char *sec_strtab;
-- const elf_shdr *sym_tab;
-- const char *sym_strtab;
-+ ELF_HANDLE_DECL(elf_ehdr) ehdr;
-+ elf_ptrval sec_strtab;
-+ ELF_HANDLE_DECL(elf_shdr) sym_tab;
-+ uint64_t sym_strtab;
-
- /* loaded to */
-- char *dest;
-+ /*
-+ * dest_base and dest_size are trusted and must be correct;
-+ * whenever dest_size is not 0, both of these must be valid
-+ * so long as the struct elf_binary is in use.
-+ */
-+ char *dest_base;
-+ size_t dest_size;
- uint64_t pstart;
- uint64_t pend;
- uint64_t reloc_offset;
-@@ -104,12 +207,22 @@ struct elf_binary {
- uint64_t bsd_symtab_pstart;
- uint64_t bsd_symtab_pend;
-
-+ /*
-+ * caller's other acceptable destination
-+ *
-+ * Again, these are trusted and must be valid (or 0) so long
-+ * as the struct elf_binary is in use.
-+ */
-+ void *caller_xdest_base;
-+ uint64_t caller_xdest_size;
-+
- #ifndef __XEN__
- /* misc */
- elf_log_callback *log_callback;
- void *log_caller_data;
- #endif
-- int verbose;
-+ bool verbose;
-+ const char *broken;
- };
-
- /* ------------------------------------------------------------------------ */
-@@ -127,88 +240,145 @@ struct elf_binary {
- #define elf_lsb(elf) (ELFDATA2LSB == (elf)->data)
- #define elf_swap(elf) (NATIVE_ELFDATA != (elf)->data)
-
--#define elf_uval(elf, str, elem) \
-- ((ELFCLASS64 == (elf)->class) \
-- ? elf_access_unsigned((elf), (str), \
-- offsetof(typeof(*(str)),e64.elem), \
-- sizeof((str)->e64.elem)) \
-- : elf_access_unsigned((elf), (str), \
-- offsetof(typeof(*(str)),e32.elem), \
-- sizeof((str)->e32.elem)))
--
--#define elf_sval(elf, str, elem) \
-- ((ELFCLASS64 == (elf)->class) \
-- ? elf_access_signed((elf), (str), \
-- offsetof(typeof(*(str)),e64.elem), \
-- sizeof((str)->e64.elem)) \
-- : elf_access_signed((elf), (str), \
-- offsetof(typeof(*(str)),e32.elem), \
-- sizeof((str)->e32.elem)))
--
--#define elf_size(elf, str) \
-- ((ELFCLASS64 == (elf)->class) \
-- ? sizeof((str)->e64) : sizeof((str)->e32))
-+#define elf_uval_3264(elf, handle, elem) \
-+ elf_access_unsigned((elf), (handle).ptrval, \
-+ offsetof(typeof(*(handle).typeonly),elem), \
-+ sizeof((handle).typeonly->elem))
-+
-+#define elf_uval(elf, handle, elem) \
-+ ((ELFCLASS64 == (elf)->class) \
-+ ? elf_uval_3264(elf, handle, e64.elem) \
-+ : elf_uval_3264(elf, handle, e32.elem))
-+ /*
-+ * Reads an unsigned field in a header structure in the ELF.
-+ * str is a HANDLE, and elem is the field name in it.
-+ */
-
--uint64_t elf_access_unsigned(struct elf_binary *elf, const void *ptr,
-+
-+#define elf_size(elf, handle_or_handletype) ({ \
-+ typeof(handle_or_handletype) elf_size__dummy; \
-+ ((ELFCLASS64 == (elf)->class) \
-+ ? sizeof(elf_size__dummy.typeonly->e64) \
-+ : sizeof(elf_size__dummy.typeonly->e32)); \
-+})
-+ /*
-+ * Returns the size of the substructure for the appropriate 32/64-bitness.
-+ * str should be a HANDLE.
-+ */
-+
-+uint64_t elf_access_unsigned(struct elf_binary *elf, elf_ptrval ptr,
- uint64_t offset, size_t size);
--int64_t elf_access_signed(struct elf_binary *elf, const void *ptr,
-- uint64_t offset, size_t size);
-+ /* Reads a field at arbitrary offset and alignemnt */
-
- uint64_t elf_round_up(struct elf_binary *elf, uint64_t addr);
-
-+const char *elf_strval(struct elf_binary *elf, elf_ptrval start);
-+ /* may return NULL if the string is out of range etc. */
-+
-+const char *elf_strfmt(struct elf_binary *elf, elf_ptrval start);
-+ /* like elf_strval but returns "(invalid)" instead of NULL */
-+
-+void elf_memcpy_safe(struct elf_binary*, elf_ptrval dst, elf_ptrval src, size_t);
-+void elf_memset_safe(struct elf_binary*, elf_ptrval dst, int c, size_t);
-+ /*
-+ * Versions of memcpy and memset which arrange never to write
-+ * outside permitted areas.
-+ */
-+
-+bool elf_access_ok(struct elf_binary * elf,
-+ uint64_t ptrval, size_t size);
-+
-+#define elf_store_val(elf, type, ptr, val) \
-+ ({ \
-+ typeof(type) elf_store__val = (val); \
-+ elf_ptrval elf_store__targ = ptr; \
-+ if (elf_access_ok((elf), elf_store__targ, \
-+ sizeof(elf_store__val))) { \
-+ elf_memcpy_unchecked((void*)elf_store__targ, &elf_store__val, \
-+ sizeof(elf_store__val)); \
-+ } \
-+ }) \
-+ /* Stores a value at a particular PTRVAL. */
-+
-+#define elf_store_field(elf, hdr, elm, val) \
-+ (elf_store_val((elf), ELF__HANDLE_FIELD_TYPE(hdr, elm), \
-+ ELF_HANDLE_PTRVAL(hdr) + ELF__HANDLE_FIELD_OFFSET(hdr, elm), \
-+ (val)))
-+ /* Stores a 32/64-bit field. hdr is a HANDLE and elm is the field name. */
-+
-+
- /* ------------------------------------------------------------------------ */
- /* xc_libelf_tools.c */
-
--int elf_shdr_count(struct elf_binary *elf);
--int elf_phdr_count(struct elf_binary *elf);
-+unsigned elf_shdr_count(struct elf_binary *elf);
-+unsigned elf_phdr_count(struct elf_binary *elf);
-
--const elf_shdr *elf_shdr_by_name(struct elf_binary *elf, const char *name);
--const elf_shdr *elf_shdr_by_index(struct elf_binary *elf, int index);
--const elf_phdr *elf_phdr_by_index(struct elf_binary *elf, int index);
-+ELF_HANDLE_DECL(elf_shdr) elf_shdr_by_name(struct elf_binary *elf, const char *name);
-+ELF_HANDLE_DECL(elf_shdr) elf_shdr_by_index(struct elf_binary *elf, unsigned index);
-+ELF_HANDLE_DECL(elf_phdr) elf_phdr_by_index(struct elf_binary *elf, unsigned index);
-
--const char *elf_section_name(struct elf_binary *elf, const elf_shdr * shdr);
--const void *elf_section_start(struct elf_binary *elf, const elf_shdr * shdr);
--const void *elf_section_end(struct elf_binary *elf, const elf_shdr * shdr);
-+const char *elf_section_name(struct elf_binary *elf, ELF_HANDLE_DECL(elf_shdr) shdr); /* might return NULL if inputs are invalid */
-+elf_ptrval elf_section_start(struct elf_binary *elf, ELF_HANDLE_DECL(elf_shdr) shdr);
-+elf_ptrval elf_section_end(struct elf_binary *elf, ELF_HANDLE_DECL(elf_shdr) shdr);
-
--const void *elf_segment_start(struct elf_binary *elf, const elf_phdr * phdr);
--const void *elf_segment_end(struct elf_binary *elf, const elf_phdr * phdr);
-+elf_ptrval elf_segment_start(struct elf_binary *elf, ELF_HANDLE_DECL(elf_phdr) phdr);
-+elf_ptrval elf_segment_end(struct elf_binary *elf, ELF_HANDLE_DECL(elf_phdr) phdr);
-
--const elf_sym *elf_sym_by_name(struct elf_binary *elf, const char *symbol);
--const elf_sym *elf_sym_by_index(struct elf_binary *elf, int index);
-+ELF_HANDLE_DECL(elf_sym) elf_sym_by_name(struct elf_binary *elf, const char *symbol);
-+ELF_HANDLE_DECL(elf_sym) elf_sym_by_index(struct elf_binary *elf, unsigned index);
-
--const char *elf_note_name(struct elf_binary *elf, const elf_note * note);
--const void *elf_note_desc(struct elf_binary *elf, const elf_note * note);
--uint64_t elf_note_numeric(struct elf_binary *elf, const elf_note * note);
--uint64_t elf_note_numeric_array(struct elf_binary *, const elf_note *,
-+const char *elf_note_name(struct elf_binary *elf, ELF_HANDLE_DECL(elf_note) note); /* may return NULL */
-+elf_ptrval elf_note_desc(struct elf_binary *elf, ELF_HANDLE_DECL(elf_note) note);
-+uint64_t elf_note_numeric(struct elf_binary *elf, ELF_HANDLE_DECL(elf_note) note);
-+uint64_t elf_note_numeric_array(struct elf_binary *, ELF_HANDLE_DECL(elf_note),
- unsigned int unitsz, unsigned int idx);
--const elf_note *elf_note_next(struct elf_binary *elf, const elf_note * note);
-
--int elf_is_elfbinary(const void *image);
--int elf_phdr_is_loadable(struct elf_binary *elf, const elf_phdr * phdr);
-+/*
-+ * If you use elf_note_next in a loop, you must put a nontrivial upper
-+ * bound on the returned value as part of your loop condition. In
-+ * some cases elf_note_next will substitute ELF_PTRVAL_MAX as return
-+ * value to indicate that the iteration isn't going well (for example,
-+ * the putative "next" value would be earlier in memory). In this
-+ * case the caller's loop must terminate. Checking against the
-+ * end of the notes segment with a strict inequality is sufficient.
-+ */
-+ELF_HANDLE_DECL(elf_note) elf_note_next(struct elf_binary *elf, ELF_HANDLE_DECL(elf_note) note);
-+
-+/* (Only) checks that the image has the right magic number. */
-+bool elf_is_elfbinary(const void *image_start, size_t image_size);
-+
-+bool elf_phdr_is_loadable(struct elf_binary *elf, ELF_HANDLE_DECL(elf_phdr) phdr);
-
- /* ------------------------------------------------------------------------ */
- /* xc_libelf_loader.c */
-
--int elf_init(struct elf_binary *elf, const char *image, size_t size);
-+elf_errorstatus elf_init(struct elf_binary *elf, const char *image, size_t size);
-+ /*
-+ * image and size must be correct. They will be recorded in
-+ * *elf, and must remain valid while the elf is in use.
-+ */
- #ifdef __XEN__
- void elf_set_verbose(struct elf_binary *elf);
- #else
- void elf_set_log(struct elf_binary *elf, elf_log_callback*,
-- void *log_caller_pointer, int verbose);
-+ void *log_caller_pointer, bool verbose);
- #endif
-
- void elf_parse_binary(struct elf_binary *elf);
--int elf_load_binary(struct elf_binary *elf);
-+elf_errorstatus elf_load_binary(struct elf_binary *elf);
-
--void *elf_get_ptr(struct elf_binary *elf, unsigned long addr);
-+elf_ptrval elf_get_ptr(struct elf_binary *elf, unsigned long addr);
- uint64_t elf_lookup_addr(struct elf_binary *elf, const char *symbol);
-
- void elf_parse_bsdsyms(struct elf_binary *elf, uint64_t pstart); /* private */
-
-+void elf_mark_broken(struct elf_binary *elf, const char *msg);
-+const char *elf_check_broken(const struct elf_binary *elf); /* NULL means OK */
-+
- /* ------------------------------------------------------------------------ */
- /* xc_libelf_relocate.c */
-
--int elf_reloc(struct elf_binary *elf);
-+elf_errorstatus elf_reloc(struct elf_binary *elf);
-
- /* ------------------------------------------------------------------------ */
- /* xc_libelf_dominfo.c */
-@@ -232,9 +402,9 @@ struct xen_elfnote {
-
- struct elf_dom_parms {
- /* raw */
-- const char *guest_info;
-- const void *elf_note_start;
-- const void *elf_note_end;
-+ elf_ptrval guest_info;
-+ elf_ptrval elf_note_start;
-+ elf_ptrval elf_note_end;
- struct xen_elfnote elf_notes[XEN_ELFNOTE_MAX + 1];
-
- /* parsed */
-@@ -242,8 +412,8 @@ struct elf_dom_parms {
- char guest_ver[16];
- char xen_ver[16];
- char loader[16];
-- int pae;
-- int bsd_symtab;
-+ int pae; /* some kind of enum apparently */
-+ bool bsd_symtab;
- uint64_t virt_base;
- uint64_t virt_entry;
- uint64_t virt_hypercall;
-@@ -273,10 +443,44 @@ int elf_xen_parse_features(const char *features,
- uint32_t *required);
- int elf_xen_parse_note(struct elf_binary *elf,
- struct elf_dom_parms *parms,
-- const elf_note *note);
-+ ELF_HANDLE_DECL(elf_note) note);
- int elf_xen_parse_guest_info(struct elf_binary *elf,
- struct elf_dom_parms *parms);
- int elf_xen_parse(struct elf_binary *elf,
- struct elf_dom_parms *parms);
-
-+static inline void *elf_memcpy_unchecked(void *dest, const void *src, size_t n)
-+ { return memcpy(dest, src, n); }
-+static inline void *elf_memmove_unchecked(void *dest, const void *src, size_t n)
-+ { return memmove(dest, src, n); }
-+static inline void *elf_memset_unchecked(void *s, int c, size_t n)
-+ { return memset(s, c, n); }
-+ /*
-+ * Unsafe versions of memcpy, memmove memset which take actual C
-+ * pointers. These are just like the real functions.
-+ * We provide these so that in libelf-private.h we can #define
-+ * memcpy, memset and memmove to undefined MISTAKE things.
-+ */
-+
-+
-+/* Advances past amount bytes of the current destination area. */
-+static inline void ELF_ADVANCE_DEST(struct elf_binary *elf, uint64_t amount)
-+{
-+ if ( elf->dest_base == NULL )
-+ {
-+ elf_mark_broken(elf, "advancing in null image");
-+ }
-+ else if ( elf->dest_size >= amount )
-+ {
-+ elf->dest_base += amount;
-+ elf->dest_size -= amount;
-+ }
-+ else
-+ {
-+ elf->dest_size = 0;
-+ elf_mark_broken(elf, "advancing past end (image very short?)");
-+ }
-+}
-+
-+
- #endif /* __XEN_LIBELF_H__ */
diff --git a/main/xen/xsa56.patch b/main/xen/xsa56.patch
deleted file mode 100644
index 1368ac3514..0000000000
--- a/main/xen/xsa56.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-libxc: limit cpu values when setting vcpu affinity
-
-When support for pinning more than 64 cpus was added, check for cpu
-out-of-range values was removed. This can lead to subsequent
-out-of-bounds cpumap array accesses in case the cpu number is higher
-than the actual count.
-
-This patch returns the check.
-
-This is CVE-2013-2072 / XSA-56
-
-Signed-off-by: Petr Matousek <pmatouse@redhat.com>
-
-diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c
-index e220f68..e611b24 100644
---- a/tools/python/xen/lowlevel/xc/xc.c
-+++ b/tools/python/xen/lowlevel/xc/xc.c
-@@ -228,6 +228,7 @@ static PyObject *pyxc_vcpu_setaffinity(XcObject *self,
- int vcpu = 0, i;
- xc_cpumap_t cpumap;
- PyObject *cpulist = NULL;
-+ int nr_cpus;
-
- static char *kwd_list[] = { "domid", "vcpu", "cpumap", NULL };
-
-@@ -235,6 +236,10 @@ static PyObject *pyxc_vcpu_setaffinity(XcObject *self,
- &dom, &vcpu, &cpulist) )
- return NULL;
-
-+ nr_cpus = xc_get_max_cpus(self->xc_handle);
-+ if ( nr_cpus == 0 )
-+ return pyxc_error_to_exception(self->xc_handle);
-+
- cpumap = xc_cpumap_alloc(self->xc_handle);
- if(cpumap == NULL)
- return pyxc_error_to_exception(self->xc_handle);
-@@ -244,6 +249,13 @@ static PyObject *pyxc_vcpu_setaffinity(XcObject *self,
- for ( i = 0; i < PyList_Size(cpulist); i++ )
- {
- long cpu = PyInt_AsLong(PyList_GetItem(cpulist, i));
-+ if ( cpu < 0 || cpu >= nr_cpus )
-+ {
-+ free(cpumap);
-+ errno = EINVAL;
-+ PyErr_SetFromErrno(xc_error_obj);
-+ return NULL;
-+ }
- cpumap[cpu / 8] |= 1 << (cpu % 8);
- }
- }
diff --git a/main/xen/xsa57.patch b/main/xen/xsa57.patch
deleted file mode 100644
index 178b818890..0000000000
--- a/main/xen/xsa57.patch
+++ /dev/null
@@ -1,333 +0,0 @@
-libxl: Restrict permissions on PV console device xenstore nodes
-
-Matthew Daley has observed that the PV console protocol places sensitive host
-state into a guest writeable xenstore locations, this includes:
-
- - The pty used to communicate between the console backend daemon and its
- client, allowing the guest administrator to read and write arbitrary host
- files.
- - The output file, allowing the guest administrator to write arbitrary host
- files or to target arbitrary qemu chardevs which include sockets, udp, ptr,
- pipes etc (see -chardev in qemu(1) for a more complete list).
- - The maximum buffer size, allowing the guest administrator to consume more
- resources than the host administrator has configured.
- - The backend to use (qemu vs xenconsoled), potentially allowing the guest
- administrator to confuse host software.
-
-So we arrange to make the sensitive keys in the xenstore frontend directory
-read only for the guest. This is safe since the xenstore permissions model,
-unlike POSIX directory permissions, does not allow the guest to remove and
-recreate a node if it has write access to the containing directory.
-
-There are a few associated wrinkles:
-
- - The primary PV console is "special". It's xenstore node is not under the
- usual /devices/ subtree and it does not use the customary xenstore state
- machine protocol. Unfortunately its directory is used for other things,
- including the vnc-port node, which we do not want the guest to be able to
- write to. Rather than trying to track down all the possible secondary uses
- of this directory just make it r/o to the guest. All newly created
- subdirectories inherit these permissions and so are now safe by default.
-
- - The other serial consoles do use the customary xenstore state machine and
- therefore need write access to at least the "protocol" and "state" nodes,
- however they may also want to use arbitrary "feature-foo" nodes (although
- I'm not aware of any) and therefore we cannot simply lock down the entire
- frontend directory. Instead we add support to libxl__device_generic_add for
- frontend keys which are explicitly read only and use that to lock down the
- sensitive keys.
-
- - Minios' console frontend wants to write the "type" node, which it has no
- business doing since this is a host/toolstack level decision. This fails
- now that the node has become read only to the PV guest. Since the toolstack
- already writes this node just remove the attempt to set it.
-
-This is CVE-XXXX-XXX / XSA-57
-
-Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
-
-Conflicts:
- tools/libxl/libxl.c (no vtpm, free front_ro on error in
- libxl__device_console_add)
-
-diff --git a/extras/mini-os/console/xenbus.c b/extras/mini-os/console/xenbus.c
-index 77de82a..e65baf7 100644
---- a/extras/mini-os/console/xenbus.c
-+++ b/extras/mini-os/console/xenbus.c
-@@ -122,12 +122,6 @@ again:
- goto abort_transaction;
- }
-
-- err = xenbus_printf(xbt, nodename, "type", "%s", "ioemu");
-- if (err) {
-- message = "writing type";
-- goto abort_transaction;
-- }
--
- snprintf(path, sizeof(path), "%s/state", nodename);
- err = xenbus_switch_state(xbt, path, XenbusStateConnected);
- if (err) {
-diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
-index a6e9601..32d788a 100644
---- a/tools/libxl/libxl.c
-+++ b/tools/libxl/libxl.c
-@@ -1920,8 +1920,9 @@ static void device_disk_add(libxl__egc *egc, uint32_t domid,
- flexarray_append(front, disk->is_cdrom ? "cdrom" : "disk");
-
- libxl__device_generic_add(gc, t, device,
-- libxl__xs_kvs_of_flexarray(gc, back, back->count),
-- libxl__xs_kvs_of_flexarray(gc, front, front->count));
-+ libxl__xs_kvs_of_flexarray(gc, back, back->count),
-+ libxl__xs_kvs_of_flexarray(gc, front, front->count),
-+ NULL);
-
- rc = libxl__xs_transaction_commit(gc, &t);
- if (!rc) break;
-@@ -2633,8 +2634,9 @@ void libxl__device_nic_add(libxl__egc *egc, uint32_t domid,
- flexarray_append(front, libxl__sprintf(gc,
- LIBXL_MAC_FMT, LIBXL_MAC_BYTES(nic->mac)));
- libxl__device_generic_add(gc, XBT_NULL, device,
-- libxl__xs_kvs_of_flexarray(gc, back, back->count),
-- libxl__xs_kvs_of_flexarray(gc, front, front->count));
-+ libxl__xs_kvs_of_flexarray(gc, back, back->count),
-+ libxl__xs_kvs_of_flexarray(gc, front, front->count),
-+ NULL);
-
- aodev->dev = device;
- aodev->action = DEVICE_CONNECT;
-@@ -2830,7 +2832,7 @@ int libxl__device_console_add(libxl__gc *gc, uint32_t domid,
- libxl__device_console *console,
- libxl__domain_build_state *state)
- {
-- flexarray_t *front;
-+ flexarray_t *front, *ro_front;
- flexarray_t *back;
- libxl__device device;
- int rc;
-@@ -2845,6 +2847,11 @@ int libxl__device_console_add(libxl__gc *gc, uint32_t domid,
- rc = ERROR_NOMEM;
- goto out;
- }
-+ ro_front = flexarray_make(16, 1);
-+ if (!ro_front) {
-+ rc = ERROR_NOMEM;
-+ goto out;
-+ }
- back = flexarray_make(16, 1);
- if (!back) {
- rc = ERROR_NOMEM;
-@@ -2871,21 +2878,24 @@ int libxl__device_console_add(libxl__gc *gc, uint32_t domid,
-
- flexarray_append(front, "backend-id");
- flexarray_append(front, libxl__sprintf(gc, "%d", console->backend_domid));
-- flexarray_append(front, "limit");
-- flexarray_append(front, libxl__sprintf(gc, "%d", LIBXL_XENCONSOLE_LIMIT));
-- flexarray_append(front, "type");
-+
-+ flexarray_append(ro_front, "limit");
-+ flexarray_append(ro_front, libxl__sprintf(gc, "%d", LIBXL_XENCONSOLE_LIMIT));
-+ flexarray_append(ro_front, "type");
- if (console->consback == LIBXL__CONSOLE_BACKEND_XENCONSOLED)
-- flexarray_append(front, "xenconsoled");
-+ flexarray_append(ro_front, "xenconsoled");
- else
-- flexarray_append(front, "ioemu");
-- flexarray_append(front, "output");
-- flexarray_append(front, console->output);
-+ flexarray_append(ro_front, "ioemu");
-+ flexarray_append(ro_front, "output");
-+ flexarray_append(ro_front, console->output);
-+ flexarray_append(ro_front, "tty");
-+ flexarray_append(ro_front, "");
-
- if (state) {
-- flexarray_append(front, "port");
-- flexarray_append(front, libxl__sprintf(gc, "%"PRIu32, state->console_port));
-- flexarray_append(front, "ring-ref");
-- flexarray_append(front, libxl__sprintf(gc, "%lu", state->console_mfn));
-+ flexarray_append(ro_front, "port");
-+ flexarray_append(ro_front, libxl__sprintf(gc, "%"PRIu32, state->console_port));
-+ flexarray_append(ro_front, "ring-ref");
-+ flexarray_append(ro_front, libxl__sprintf(gc, "%lu", state->console_mfn));
- } else {
- flexarray_append(front, "state");
- flexarray_append(front, libxl__sprintf(gc, "%d", 1));
-@@ -2894,11 +2904,13 @@ int libxl__device_console_add(libxl__gc *gc, uint32_t domid,
- }
-
- libxl__device_generic_add(gc, XBT_NULL, &device,
-- libxl__xs_kvs_of_flexarray(gc, back, back->count),
-- libxl__xs_kvs_of_flexarray(gc, front, front->count));
-+ libxl__xs_kvs_of_flexarray(gc, back, back->count),
-+ libxl__xs_kvs_of_flexarray(gc, front, front->count),
-+ libxl__xs_kvs_of_flexarray(gc, ro_front, ro_front->count));
- rc = 0;
- out_free:
- flexarray_free(back);
-+ flexarray_free(ro_front);
- flexarray_free(front);
- out:
- return rc;
-@@ -2982,8 +2994,9 @@ int libxl__device_vkb_add(libxl__gc *gc, uint32_t domid,
- flexarray_append(front, libxl__sprintf(gc, "%d", 1));
-
- libxl__device_generic_add(gc, XBT_NULL, &device,
-- libxl__xs_kvs_of_flexarray(gc, back, back->count),
-- libxl__xs_kvs_of_flexarray(gc, front, front->count));
-+ libxl__xs_kvs_of_flexarray(gc, back, back->count),
-+ libxl__xs_kvs_of_flexarray(gc, front, front->count),
-+ NULL);
- rc = 0;
- out_free:
- flexarray_free(back);
-@@ -3096,8 +3109,9 @@ int libxl__device_vfb_add(libxl__gc *gc, uint32_t domid, libxl_device_vfb *vfb)
- flexarray_append_pair(front, "state", libxl__sprintf(gc, "%d", 1));
-
- libxl__device_generic_add(gc, XBT_NULL, &device,
-- libxl__xs_kvs_of_flexarray(gc, back, back->count),
-- libxl__xs_kvs_of_flexarray(gc, front, front->count));
-+ libxl__xs_kvs_of_flexarray(gc, back, back->count),
-+ libxl__xs_kvs_of_flexarray(gc, front, front->count),
-+ NULL);
- rc = 0;
- out_free:
- flexarray_free(front);
-diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c
-index c3283f1..1c04a21 100644
---- a/tools/libxl/libxl_device.c
-+++ b/tools/libxl/libxl_device.c
-@@ -84,11 +84,12 @@ out:
- }
-
- int libxl__device_generic_add(libxl__gc *gc, xs_transaction_t t,
-- libxl__device *device, char **bents, char **fents)
-+ libxl__device *device, char **bents, char **fents, char **ro_fents)
- {
- libxl_ctx *ctx = libxl__gc_owner(gc);
- char *frontend_path, *backend_path;
- struct xs_permissions frontend_perms[2];
-+ struct xs_permissions ro_frontend_perms[2];
- struct xs_permissions backend_perms[2];
- int create_transaction = t == XBT_NULL;
-
-@@ -100,22 +101,37 @@ int libxl__device_generic_add(libxl__gc *gc, xs_transaction_t t,
- frontend_perms[1].id = device->backend_domid;
- frontend_perms[1].perms = XS_PERM_READ;
-
-- backend_perms[0].id = device->backend_domid;
-- backend_perms[0].perms = XS_PERM_NONE;
-- backend_perms[1].id = device->domid;
-- backend_perms[1].perms = XS_PERM_READ;
-+ ro_frontend_perms[0].id = backend_perms[0].id = device->backend_domid;
-+ ro_frontend_perms[0].perms = backend_perms[0].perms = XS_PERM_NONE;
-+ ro_frontend_perms[1].id = backend_perms[1].id = device->domid;
-+ ro_frontend_perms[1].perms = backend_perms[1].perms = XS_PERM_READ;
-
- retry_transaction:
- if (create_transaction)
- t = xs_transaction_start(ctx->xsh);
- /* FIXME: read frontend_path and check state before removing stuff */
-
-- if (fents) {
-+ if (fents || ro_fents) {
- xs_rm(ctx->xsh, t, frontend_path);
- xs_mkdir(ctx->xsh, t, frontend_path);
-- xs_set_permissions(ctx->xsh, t, frontend_path, frontend_perms, ARRAY_SIZE(frontend_perms));
-+ /* Console 0 is a special case. It doesn't use the regular PV
-+ * state machine but also the frontend directory has
-+ * historically contained other information, such as the
-+ * vnc-port, which we don't want the guest fiddling with.
-+ */
-+ if (device->kind == LIBXL__DEVICE_KIND_CONSOLE && device->devid == 0)
-+ xs_set_permissions(ctx->xsh, t, frontend_path,
-+ ro_frontend_perms, ARRAY_SIZE(ro_frontend_perms));
-+ else
-+ xs_set_permissions(ctx->xsh, t, frontend_path,
-+ frontend_perms, ARRAY_SIZE(frontend_perms));
- xs_write(ctx->xsh, t, libxl__sprintf(gc, "%s/backend", frontend_path), backend_path, strlen(backend_path));
-- libxl__xs_writev(gc, t, frontend_path, fents);
-+ if (fents)
-+ libxl__xs_writev_perms(gc, t, frontend_path, fents,
-+ frontend_perms, ARRAY_SIZE(frontend_perms));
-+ if (ro_fents)
-+ libxl__xs_writev_perms(gc, t, frontend_path, ro_fents,
-+ ro_frontend_perms, ARRAY_SIZE(ro_frontend_perms));
- }
-
- if (bents) {
-diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
-index 13fa509..ae96a74 100644
---- a/tools/libxl/libxl_internal.h
-+++ b/tools/libxl/libxl_internal.h
-@@ -516,6 +516,11 @@ _hidden char **libxl__xs_kvs_of_flexarray(libxl__gc *gc, flexarray_t *array, int
- /* treats kvs as pairs of keys and values and writes each to dir. */
- _hidden int libxl__xs_writev(libxl__gc *gc, xs_transaction_t t,
- const char *dir, char **kvs);
-+/* as writev but also sets the permissions on each path */
-+_hidden int libxl__xs_writev_perms(libxl__gc *gc, xs_transaction_t t,
-+ const char *dir, char *kvs[],
-+ struct xs_permissions *perms,
-+ unsigned int num_perms);
- /* _atonce creates a transaction and writes all keys at once */
- _hidden int libxl__xs_writev_atonce(libxl__gc *gc,
- const char *dir, char **kvs);
-@@ -930,7 +935,7 @@ _hidden int libxl__device_console_add(libxl__gc *gc, uint32_t domid,
- libxl__domain_build_state *state);
-
- _hidden int libxl__device_generic_add(libxl__gc *gc, xs_transaction_t t,
-- libxl__device *device, char **bents, char **fents);
-+ libxl__device *device, char **bents, char **fents, char **ro_fents);
- _hidden char *libxl__device_backend_path(libxl__gc *gc, libxl__device *device);
- _hidden char *libxl__device_frontend_path(libxl__gc *gc, libxl__device *device);
- _hidden int libxl__parse_backend_path(libxl__gc *gc, const char *path,
-diff --git a/tools/libxl/libxl_pci.c b/tools/libxl/libxl_pci.c
-index 48986f3..d373b4d 100644
---- a/tools/libxl/libxl_pci.c
-+++ b/tools/libxl/libxl_pci.c
-@@ -106,7 +106,8 @@ int libxl__create_pci_backend(libxl__gc *gc, uint32_t domid,
-
- libxl__device_generic_add(gc, XBT_NULL, &device,
- libxl__xs_kvs_of_flexarray(gc, back, back->count),
-- libxl__xs_kvs_of_flexarray(gc, front, front->count));
-+ libxl__xs_kvs_of_flexarray(gc, front, front->count),
-+ NULL);
-
- out:
- if (back)
-diff --git a/tools/libxl/libxl_xshelp.c b/tools/libxl/libxl_xshelp.c
-index 52af484..d7eaa66 100644
---- a/tools/libxl/libxl_xshelp.c
-+++ b/tools/libxl/libxl_xshelp.c
-@@ -41,8 +41,10 @@ char **libxl__xs_kvs_of_flexarray(libxl__gc *gc, flexarray_t *array, int length)
- return kvs;
- }
-
--int libxl__xs_writev(libxl__gc *gc, xs_transaction_t t,
-- const char *dir, char *kvs[])
-+int libxl__xs_writev_perms(libxl__gc *gc, xs_transaction_t t,
-+ const char *dir, char *kvs[],
-+ struct xs_permissions *perms,
-+ unsigned int num_perms)
- {
- libxl_ctx *ctx = libxl__gc_owner(gc);
- char *path;
-@@ -56,11 +58,19 @@ int libxl__xs_writev(libxl__gc *gc, xs_transaction_t t,
- if (path && kvs[i + 1]) {
- int length = strlen(kvs[i + 1]);
- xs_write(ctx->xsh, t, path, kvs[i + 1], length);
-+ if (perms)
-+ xs_set_permissions(ctx->xsh, t, path, perms, num_perms);
- }
- }
- return 0;
- }
-
-+int libxl__xs_writev(libxl__gc *gc, xs_transaction_t t,
-+ const char *dir, char *kvs[])
-+{
-+ return libxl__xs_writev_perms(gc, t, dir, kvs, NULL, 0);
-+}
-+
- int libxl__xs_writev_atonce(libxl__gc *gc,
- const char *dir, char *kvs[])
- {
diff --git a/main/xen/xsa58-4.2.patch b/main/xen/xsa58-4.2.patch
deleted file mode 100644
index 1ea3aaa97d..0000000000
--- a/main/xen/xsa58-4.2.patch
+++ /dev/null
@@ -1,129 +0,0 @@
-x86: fix page refcount handling in page table pin error path
-
-In the original patch 7 of the series addressing XSA-45 I mistakenly
-took the addition of the call to get_page_light() in alloc_page_type()
-to cover two decrements that would happen: One for the PGT_partial bit
-that is getting set along with the call, and the other for the page
-reference the caller hold (and would be dropping on its error path).
-But of course the additional page reference is tied to the PGT_partial
-bit, and hence any caller of a function that may leave
-->arch.old_guest_table non-NULL for error cleanup purposes has to make
-sure a respective page reference gets retained.
-
-Similar issues were then also spotted elsewhere: In effect all callers
-of get_page_type_preemptible() need to deal with errors in similar
-ways. To make sure error handling can work this way without leaking
-page references, a respective assertion gets added to that function.
-
-This is CVE-2013-1432 / XSA-58.
-
-Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Tested-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Tim Deegan <tim@xen.org>
-
---- a/xen/arch/x86/domain.c
-+++ b/xen/arch/x86/domain.c
-@@ -941,6 +941,10 @@ int arch_set_info_guest(
- if ( v->vcpu_id == 0 )
- d->vm_assist = c(vm_assist);
-
-+ rc = put_old_guest_table(current);
-+ if ( rc )
-+ return rc;
-+
- if ( !compat )
- rc = (int)set_gdt(v, c.nat->gdt_frames, c.nat->gdt_ents);
- #ifdef CONFIG_COMPAT
-@@ -980,18 +984,24 @@ int arch_set_info_guest(
- }
- else
- {
-- /*
-- * Since v->arch.guest_table{,_user} are both NULL, this effectively
-- * is just a call to put_old_guest_table().
-- */
- if ( !compat )
-- rc = vcpu_destroy_pagetables(v);
-+ rc = put_old_guest_table(v);
- if ( !rc )
- rc = get_page_type_preemptible(cr3_page,
- !compat ? PGT_root_page_table
- : PGT_l3_page_table);
-- if ( rc == -EINTR )
-+ switch ( rc )
-+ {
-+ case -EINTR:
- rc = -EAGAIN;
-+ case -EAGAIN:
-+ case 0:
-+ break;
-+ default:
-+ if ( cr3_page == current->arch.old_guest_table )
-+ cr3_page = NULL;
-+ break;
-+ }
- }
- if ( rc )
- /* handled below */;
-@@ -1018,6 +1028,11 @@ int arch_set_info_guest(
- pagetable_get_page(v->arch.guest_table);
- v->arch.guest_table = pagetable_null();
- break;
-+ default:
-+ if ( cr3_page == current->arch.old_guest_table )
-+ cr3_page = NULL;
-+ case 0:
-+ break;
- }
- }
- if ( !rc )
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -718,7 +718,8 @@ static int get_page_and_type_from_pagenr
- get_page_type_preemptible(page, type) :
- (get_page_type(page, type) ? 0 : -EINVAL));
-
-- if ( unlikely(rc) && partial >= 0 )
-+ if ( unlikely(rc) && partial >= 0 &&
-+ (!preemptible || page != current->arch.old_guest_table) )
- put_page(page);
-
- return rc;
-@@ -2638,6 +2639,7 @@ int put_page_type_preemptible(struct pag
-
- int get_page_type_preemptible(struct page_info *page, unsigned long type)
- {
-+ ASSERT(!current->arch.old_guest_table);
- return __get_page_type(page, type, 1);
- }
-
-@@ -2848,7 +2850,7 @@ static void put_superpage(unsigned long
-
- #endif
-
--static int put_old_guest_table(struct vcpu *v)
-+int put_old_guest_table(struct vcpu *v)
- {
- int rc;
-
-@@ -3253,7 +3255,8 @@ long do_mmuext_op(
- rc = -EAGAIN;
- else if ( rc != -EAGAIN )
- MEM_LOG("Error while pinning mfn %lx", page_to_mfn(page));
-- put_page(page);
-+ if ( page != curr->arch.old_guest_table )
-+ put_page(page);
- break;
- }
-
---- a/xen/include/asm-x86/mm.h
-+++ b/xen/include/asm-x86/mm.h
-@@ -374,6 +374,7 @@ void put_page_type(struct page_info *pag
- int get_page_type(struct page_info *page, unsigned long type);
- int put_page_type_preemptible(struct page_info *page);
- int get_page_type_preemptible(struct page_info *page, unsigned long type);
-+int put_old_guest_table(struct vcpu *);
- int get_page_from_l1e(
- l1_pgentry_t l1e, struct domain *l1e_owner, struct domain *pg_owner);
- void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner);
diff --git a/main/xen/xsa61-4.2-unstable.patch b/main/xen/xsa61-4.2-unstable.patch
deleted file mode 100644
index 87fbf3239f..0000000000
--- a/main/xen/xsa61-4.2-unstable.patch
+++ /dev/null
@@ -1,44 +0,0 @@
-libxl: suppress device assignment to HVM guest when there is no IOMMU
-
-This in effect copies similar logic from xend: While there's no way to
-check whether a device is assigned to a particular guest,
-XEN_DOMCTL_test_assign_device at least allows checking whether an
-IOMMU is there and whether a device has been assign to _some_
-guest.
-
-For the time being, this should be enough to cover for the missing
-error checking/recovery in other parts of libxl's device assignment
-paths.
-
-There remains a (functionality-, but not security-related) race in
-that the iommu should be set up earlier, but this is too risky a
-change for this stage of the 4.3 release.
-
-This is a security issue, XSA-61.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Tested-by: George Dunlap <george.dunlap@eu.citrix.com>
-Acked-by: George Dunlap <george.dunlap@eu.citrix.com>
-Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
-
---- a/tools/libxl/libxl_pci.c
-+++ b/tools/libxl/libxl_pci.c
-@@ -1036,6 +1036,18 @@ int libxl__device_pci_add(libxl__gc *gc, uint32_t domid, libxl_device_pci *pcide
- int num_assigned, i, rc;
- int stubdomid = 0;
-
-+ if (libxl__domain_type(gc, domid) == LIBXL_DOMAIN_TYPE_HVM) {
-+ rc = xc_test_assign_device(ctx->xch, domid, pcidev_encode_bdf(pcidev));
-+ if (rc) {
-+ LIBXL__LOG(ctx, LIBXL__LOG_ERROR,
-+ "PCI device %04x:%02x:%02x.%u %s?",
-+ pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func,
-+ errno == ENOSYS ? "cannot be assigned - no IOMMU"
-+ : "already assigned to a different guest");
-+ goto out;
-+ }
-+ }
-+
- rc = libxl__device_pci_setdefault(gc, pcidev);
- if (rc) goto out;
-
diff --git a/main/xen/xsa75-4.2.patch b/main/xen/xsa75-4.2.patch
deleted file mode 100644
index c171562e4d..0000000000
--- a/main/xen/xsa75-4.2.patch
+++ /dev/null
@@ -1,53 +0,0 @@
-nested VMX: VMLANUCH/VMRESUME emulation must check permission first thing
-
-Otherwise uninitialized data may be used, leading to crashes.
-
-This is XSA-75.
-
-Reported-and-tested-by: Jeff Zimmerman <Jeff_Zimmerman@McAfee.com>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-and-tested-by: Andrew Cooper <andrew.cooper3@citrix.com>
-
---- a/xen/arch/x86/hvm/vmx/vvmx.c
-+++ b/xen/arch/x86/hvm/vmx/vvmx.c
-@@ -1075,15 +1075,10 @@ int nvmx_handle_vmxoff(struct cpu_user_r
- return X86EMUL_OKAY;
- }
-
--int nvmx_vmresume(struct vcpu *v, struct cpu_user_regs *regs)
-+static int nvmx_vmresume(struct vcpu *v, struct cpu_user_regs *regs)
- {
- struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
- struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
-- int rc;
--
-- rc = vmx_inst_check_privilege(regs, 0);
-- if ( rc != X86EMUL_OKAY )
-- return rc;
-
- /* check VMCS is valid and IO BITMAP is set */
- if ( (nvcpu->nv_vvmcxaddr != VMCX_EADDR) &&
-@@ -1100,6 +1095,10 @@ int nvmx_handle_vmresume(struct cpu_user
- {
- int launched;
- struct vcpu *v = current;
-+ int rc = vmx_inst_check_privilege(regs, 0);
-+
-+ if ( rc != X86EMUL_OKAY )
-+ return rc;
-
- if ( vcpu_nestedhvm(v).nv_vvmcxaddr == VMCX_EADDR )
- {
-@@ -1119,8 +1118,11 @@ int nvmx_handle_vmresume(struct cpu_user
- int nvmx_handle_vmlaunch(struct cpu_user_regs *regs)
- {
- int launched;
-- int rc;
- struct vcpu *v = current;
-+ int rc = vmx_inst_check_privilege(regs, 0);
-+
-+ if ( rc != X86EMUL_OKAY )
-+ return rc;
-
- if ( vcpu_nestedhvm(v).nv_vvmcxaddr == VMCX_EADDR )
- {
diff --git a/main/xen/xsa97-hap-4_2-prereq.patch b/main/xen/xsa97-hap-4_2-prereq.patch
new file mode 100644
index 0000000000..ce2240aec8
--- /dev/null
+++ b/main/xen/xsa97-hap-4_2-prereq.patch
@@ -0,0 +1,466 @@
+x86/mm/hap: Adjust vram tracking to play nicely with log-dirty.
+
+The previous code assumed the guest would be in one of three mutually exclusive
+modes for bookkeeping dirty pages: (1) shadow, (2) hap utilizing the log dirty
+bitmap to support functionality such as live migrate, (3) hap utilizing the
+log dirty bitmap to track dirty vram pages.
+Races arose when a guest attempted to track dirty vram while performing live
+migrate. (The dispatch table managed by paging_log_dirty_init() might change
+in the middle of a log dirty or a vram tracking function.)
+
+This change allows hap log dirty and hap vram tracking to be concurrent.
+Vram tracking no longer uses the log dirty bitmap. Instead it detects
+dirty vram pages by examining their p2m type. The log dirty bitmap is only
+used by the log dirty code. Because the two operations use different
+mechanisms, they are no longer mutually exclusive.
+
+Signed-Off-By: Robert Phillips <robert.phillips@citrix.com>
+Acked-by: Tim Deegan <tim@xen.org>
+
+Minor whitespace changes to conform with coding style
+Signed-off-by: Tim Deegan <tim@xen.org>
+
+Committed-by: Tim Deegan <tim@xen.org>
+master commit: fd91a2a662bc59677e0f217423a7a155d5465886
+master date: 2012-12-13 12:10:14 +0000
+
+--- a/xen/arch/x86/mm/hap/hap.c
++++ b/xen/arch/x86/mm/hap/hap.c
+@@ -56,132 +56,110 @@
+ /* HAP VRAM TRACKING SUPPORT */
+ /************************************************/
+
+-static int hap_enable_vram_tracking(struct domain *d)
+-{
+- struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+-
+- if ( !dirty_vram )
+- return -EINVAL;
+-
+- /* turn on PG_log_dirty bit in paging mode */
+- paging_lock(d);
+- d->arch.paging.mode |= PG_log_dirty;
+- paging_unlock(d);
+-
+- /* set l1e entries of P2M table to be read-only. */
+- p2m_change_type_range(d, dirty_vram->begin_pfn, dirty_vram->end_pfn,
+- p2m_ram_rw, p2m_ram_logdirty);
+-
+- flush_tlb_mask(d->domain_dirty_cpumask);
+- return 0;
+-}
+-
+-static int hap_disable_vram_tracking(struct domain *d)
+-{
+- struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+-
+- if ( !dirty_vram )
+- return -EINVAL;
+-
+- paging_lock(d);
+- d->arch.paging.mode &= ~PG_log_dirty;
+- paging_unlock(d);
+-
+- /* set l1e entries of P2M table with normal mode */
+- p2m_change_type_range(d, dirty_vram->begin_pfn, dirty_vram->end_pfn,
+- p2m_ram_logdirty, p2m_ram_rw);
+-
+- flush_tlb_mask(d->domain_dirty_cpumask);
+- return 0;
+-}
+-
+-static void hap_clean_vram_tracking(struct domain *d)
+-{
+- struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+-
+- if ( !dirty_vram )
+- return;
+-
+- /* set l1e entries of P2M table to be read-only. */
+- p2m_change_type_range(d, dirty_vram->begin_pfn, dirty_vram->end_pfn,
+- p2m_ram_rw, p2m_ram_logdirty);
+-
+- flush_tlb_mask(d->domain_dirty_cpumask);
+-}
+-
+-static void hap_vram_tracking_init(struct domain *d)
+-{
+- paging_log_dirty_init(d, hap_enable_vram_tracking,
+- hap_disable_vram_tracking,
+- hap_clean_vram_tracking);
+-}
++/*
++ * hap_track_dirty_vram()
++ * Create the domain's dv_dirty_vram struct on demand.
++ * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is
++ * first encountered.
++ * Collect the guest_dirty bitmask, a bit mask of the dirty vram pages, by
++ * calling paging_log_dirty_range(), which interrogates each vram
++ * page's p2m type looking for pages that have been made writable.
++ */
+
+ int hap_track_dirty_vram(struct domain *d,
+ unsigned long begin_pfn,
+ unsigned long nr,
+- XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
++ XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
+ {
+ long rc = 0;
+- struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
++ struct sh_dirty_vram *dirty_vram;
++ uint8_t *dirty_bitmap = NULL;
+
+ if ( nr )
+ {
+- if ( paging_mode_log_dirty(d) && dirty_vram )
++ int size = (nr + BITS_PER_BYTE - 1) / BITS_PER_BYTE;
++
++ if ( !paging_mode_log_dirty(d) )
+ {
+- if ( begin_pfn != dirty_vram->begin_pfn ||
+- begin_pfn + nr != dirty_vram->end_pfn )
+- {
+- paging_log_dirty_disable(d);
+- dirty_vram->begin_pfn = begin_pfn;
+- dirty_vram->end_pfn = begin_pfn + nr;
+- rc = paging_log_dirty_enable(d);
+- if (rc != 0)
+- goto param_fail;
+- }
++ hap_logdirty_init(d);
++ rc = paging_log_dirty_enable(d);
++ if ( rc )
++ goto out;
+ }
+- else if ( !paging_mode_log_dirty(d) && !dirty_vram )
++
++ rc = -ENOMEM;
++ dirty_bitmap = xzalloc_bytes(size);
++ if ( !dirty_bitmap )
++ goto out;
++
++ paging_lock(d);
++
++ dirty_vram = d->arch.hvm_domain.dirty_vram;
++ if ( !dirty_vram )
+ {
+ rc = -ENOMEM;
+- if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
+- goto param_fail;
++ if ( (dirty_vram = xzalloc(struct sh_dirty_vram)) == NULL )
++ {
++ paging_unlock(d);
++ goto out;
++ }
+
++ d->arch.hvm_domain.dirty_vram = dirty_vram;
++ }
++
++ if ( begin_pfn != dirty_vram->begin_pfn ||
++ begin_pfn + nr != dirty_vram->end_pfn )
++ {
+ dirty_vram->begin_pfn = begin_pfn;
+ dirty_vram->end_pfn = begin_pfn + nr;
+- d->arch.hvm_domain.dirty_vram = dirty_vram;
+- hap_vram_tracking_init(d);
+- rc = paging_log_dirty_enable(d);
+- if (rc != 0)
+- goto param_fail;
++
++ paging_unlock(d);
++
++ /* set l1e entries of range within P2M table to be read-only. */
++ p2m_change_type_range(d, begin_pfn, begin_pfn + nr,
++ p2m_ram_rw, p2m_ram_logdirty);
++
++ flush_tlb_mask(d->domain_dirty_cpumask);
++
++ memset(dirty_bitmap, 0xff, size); /* consider all pages dirty */
+ }
+ else
+ {
+- if ( !paging_mode_log_dirty(d) && dirty_vram )
+- rc = -EINVAL;
+- else
+- rc = -ENODATA;
+- goto param_fail;
++ paging_unlock(d);
++
++ domain_pause(d);
++
++ /* get the bitmap */
++ paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
++
++ domain_unpause(d);
+ }
+- /* get the bitmap */
+- rc = paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
++
++ rc = -EFAULT;
++ if ( copy_to_guest(guest_dirty_bitmap, dirty_bitmap, size) == 0 )
++ rc = 0;
+ }
+ else
+ {
+- if ( paging_mode_log_dirty(d) && dirty_vram ) {
+- rc = paging_log_dirty_disable(d);
+- xfree(dirty_vram);
+- dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
+- } else
+- rc = 0;
+- }
++ paging_lock(d);
+
+- return rc;
++ dirty_vram = d->arch.hvm_domain.dirty_vram;
++ if ( dirty_vram )
++ {
++ /*
++ * If zero pages specified while tracking dirty vram
++ * then stop tracking
++ */
++ xfree(dirty_vram);
++ d->arch.hvm_domain.dirty_vram = NULL;
++ }
+
+-param_fail:
+- if ( dirty_vram )
+- {
+- xfree(dirty_vram);
+- dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
++ paging_unlock(d);
+ }
++out:
++ if ( dirty_bitmap )
++ xfree(dirty_bitmap);
++
+ return rc;
+ }
+
+@@ -223,13 +201,6 @@ static void hap_clean_dirty_bitmap(struc
+
+ void hap_logdirty_init(struct domain *d)
+ {
+- struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
+- if ( paging_mode_log_dirty(d) && dirty_vram )
+- {
+- paging_log_dirty_disable(d);
+- xfree(dirty_vram);
+- dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
+- }
+
+ /* Reinitialize logdirty mechanism */
+ paging_log_dirty_init(d, hap_enable_log_dirty,
+--- a/xen/arch/x86/mm/paging.c
++++ b/xen/arch/x86/mm/paging.c
+@@ -447,157 +447,38 @@ int paging_log_dirty_op(struct domain *d
+ return rv;
+ }
+
+-int paging_log_dirty_range(struct domain *d,
+- unsigned long begin_pfn,
+- unsigned long nr,
+- XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
+-{
+- int rv = 0;
+- unsigned long pages = 0;
+- mfn_t *l4, *l3, *l2;
+- unsigned long *l1;
+- int b1, b2, b3, b4;
+- int i2, i3, i4;
+-
+- d->arch.paging.log_dirty.clean_dirty_bitmap(d);
+- paging_lock(d);
+-
+- PAGING_DEBUG(LOGDIRTY, "log-dirty-range: dom %u faults=%u dirty=%u\n",
+- d->domain_id,
+- d->arch.paging.log_dirty.fault_count,
+- d->arch.paging.log_dirty.dirty_count);
+-
+- if ( unlikely(d->arch.paging.log_dirty.failed_allocs) ) {
+- printk("%s: %d failed page allocs while logging dirty pages\n",
+- __FUNCTION__, d->arch.paging.log_dirty.failed_allocs);
+- rv = -ENOMEM;
+- goto out;
+- }
++void paging_log_dirty_range(struct domain *d,
++ unsigned long begin_pfn,
++ unsigned long nr,
++ uint8_t *dirty_bitmap)
++{
++ struct p2m_domain *p2m = p2m_get_hostp2m(d);
++ int i;
++ unsigned long pfn;
++
++ /*
++ * Set l1e entries of P2M table to be read-only.
++ *
++ * On first write, it page faults, its entry is changed to read-write,
++ * and on retry the write succeeds.
++ *
++ * We populate dirty_bitmap by looking for entries that have been
++ * switched to read-write.
++ */
+
+- if ( !d->arch.paging.log_dirty.fault_count &&
+- !d->arch.paging.log_dirty.dirty_count ) {
+- unsigned int size = BITS_TO_LONGS(nr);
+-
+- if ( clear_guest(dirty_bitmap, size * BYTES_PER_LONG) != 0 )
+- rv = -EFAULT;
+- goto out;
+- }
+- d->arch.paging.log_dirty.fault_count = 0;
+- d->arch.paging.log_dirty.dirty_count = 0;
++ p2m_lock(p2m);
+
+- b1 = L1_LOGDIRTY_IDX(begin_pfn);
+- b2 = L2_LOGDIRTY_IDX(begin_pfn);
+- b3 = L3_LOGDIRTY_IDX(begin_pfn);
+- b4 = L4_LOGDIRTY_IDX(begin_pfn);
+- l4 = paging_map_log_dirty_bitmap(d);
+-
+- for ( i4 = b4;
+- (pages < nr) && (i4 < LOGDIRTY_NODE_ENTRIES);
+- i4++ )
++ for ( i = 0, pfn = begin_pfn; pfn < begin_pfn + nr; i++, pfn++ )
+ {
+- l3 = (l4 && mfn_valid(l4[i4])) ? map_domain_page(mfn_x(l4[i4])) : NULL;
+- for ( i3 = b3;
+- (pages < nr) && (i3 < LOGDIRTY_NODE_ENTRIES);
+- i3++ )
+- {
+- l2 = ((l3 && mfn_valid(l3[i3])) ?
+- map_domain_page(mfn_x(l3[i3])) : NULL);
+- for ( i2 = b2;
+- (pages < nr) && (i2 < LOGDIRTY_NODE_ENTRIES);
+- i2++ )
+- {
+- unsigned int bytes = PAGE_SIZE;
+- uint8_t *s;
+- l1 = ((l2 && mfn_valid(l2[i2])) ?
+- map_domain_page(mfn_x(l2[i2])) : NULL);
+-
+- s = ((uint8_t*)l1) + (b1 >> 3);
+- bytes -= b1 >> 3;
+-
+- if ( likely(((nr - pages + 7) >> 3) < bytes) )
+- bytes = (unsigned int)((nr - pages + 7) >> 3);
+-
+- if ( !l1 )
+- {
+- if ( clear_guest_offset(dirty_bitmap, pages >> 3,
+- bytes) != 0 )
+- {
+- rv = -EFAULT;
+- goto out;
+- }
+- }
+- /* begin_pfn is not 32K aligned, hence we have to bit
+- * shift the bitmap */
+- else if ( b1 & 0x7 )
+- {
+- int i, j;
+- uint32_t *l = (uint32_t*) s;
+- int bits = b1 & 0x7;
+- int bitmask = (1 << bits) - 1;
+- int size = (bytes + BYTES_PER_LONG - 1) / BYTES_PER_LONG;
+- unsigned long bitmap[size];
+- static unsigned long printed = 0;
+-
+- if ( printed != begin_pfn )
+- {
+- dprintk(XENLOG_DEBUG, "%s: begin_pfn %lx is not 32K aligned!\n",
+- __FUNCTION__, begin_pfn);
+- printed = begin_pfn;
+- }
+-
+- for ( i = 0; i < size - 1; i++, l++ ) {
+- bitmap[i] = ((*l) >> bits) |
+- (((*((uint8_t*)(l + 1))) & bitmask) << (sizeof(*l) * 8 - bits));
+- }
+- s = (uint8_t*) l;
+- size = BYTES_PER_LONG - ((b1 >> 3) & 0x3);
+- bitmap[i] = 0;
+- for ( j = 0; j < size; j++, s++ )
+- bitmap[i] |= (*s) << (j * 8);
+- bitmap[i] = (bitmap[i] >> bits) | (bitmask << (size * 8 - bits));
+- if ( copy_to_guest_offset(dirty_bitmap, (pages >> 3),
+- (uint8_t*) bitmap, bytes) != 0 )
+- {
+- rv = -EFAULT;
+- goto out;
+- }
+- }
+- else
+- {
+- if ( copy_to_guest_offset(dirty_bitmap, pages >> 3,
+- s, bytes) != 0 )
+- {
+- rv = -EFAULT;
+- goto out;
+- }
+- }
+-
+- pages += bytes << 3;
+- if ( l1 )
+- {
+- clear_page(l1);
+- unmap_domain_page(l1);
+- }
+- b1 = b1 & 0x7;
+- }
+- b2 = 0;
+- if ( l2 )
+- unmap_domain_page(l2);
+- }
+- b3 = 0;
+- if ( l3 )
+- unmap_domain_page(l3);
++ p2m_type_t pt;
++ pt = p2m_change_type(d, pfn, p2m_ram_rw, p2m_ram_logdirty);
++ if ( pt == p2m_ram_rw )
++ dirty_bitmap[i >> 3] |= (1 << (i & 7));
+ }
+- if ( l4 )
+- unmap_domain_page(l4);
+-
+- paging_unlock(d);
+
+- return rv;
++ p2m_unlock(p2m);
+
+- out:
+- paging_unlock(d);
+- return rv;
++ flush_tlb_mask(d->domain_dirty_cpumask);
+ }
+
+ /* Note that this function takes three function pointers. Callers must supply
+--- a/xen/include/asm-x86/config.h
++++ b/xen/include/asm-x86/config.h
+@@ -17,6 +17,7 @@
+
+ #define BYTES_PER_LONG (1 << LONG_BYTEORDER)
+ #define BITS_PER_LONG (BYTES_PER_LONG << 3)
++#define BITS_PER_BYTE 8
+
+ #define CONFIG_X86 1
+ #define CONFIG_X86_HT 1
+--- a/xen/include/asm-x86/paging.h
++++ b/xen/include/asm-x86/paging.h
+@@ -145,10 +145,10 @@ struct paging_mode {
+ void paging_free_log_dirty_bitmap(struct domain *d);
+
+ /* get the dirty bitmap for a specific range of pfns */
+-int paging_log_dirty_range(struct domain *d,
+- unsigned long begin_pfn,
+- unsigned long nr,
+- XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
++void paging_log_dirty_range(struct domain *d,
++ unsigned long begin_pfn,
++ unsigned long nr,
++ uint8_t *dirty_bitmap);
+
+ /* enable log dirty */
+ int paging_log_dirty_enable(struct domain *d);
diff --git a/main/xen/xsa97-hap-4_2.patch b/main/xen/xsa97-hap-4_2.patch
new file mode 100644
index 0000000000..5f89b58b7b
--- /dev/null
+++ b/main/xen/xsa97-hap-4_2.patch
@@ -0,0 +1,485 @@
+x86/paging: make log-dirty operations preemptible
+
+Both the freeing and the inspection of the bitmap get done in (nested)
+loops which - besides having a rather high iteration count in general,
+albeit that would be covered by XSA-77 - have the number of non-trivial
+iterations they need to perform (indirectly) controllable by both the
+guest they are for and any domain controlling the guest (including the
+one running qemu for it).
+
+This is XSA-97.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Tim Deegan <tim@xen.org>
+
+--- a/xen/arch/x86/domain.c
++++ b/xen/arch/x86/domain.c
+@@ -2136,7 +2136,9 @@ int domain_relinquish_resources(struct d
+ pci_release_devices(d);
+
+ /* Tear down paging-assistance stuff. */
+- paging_teardown(d);
++ ret = paging_teardown(d);
++ if ( ret )
++ return ret;
+
+ /* Drop the in-use references to page-table bases. */
+ for_each_vcpu ( d, v )
+--- a/xen/arch/x86/domctl.c
++++ b/xen/arch/x86/domctl.c
+@@ -66,6 +66,9 @@ long arch_do_domctl(
+ &domctl->u.shadow_op,
+ guest_handle_cast(u_domctl, void));
+ rcu_unlock_domain(d);
++ if ( ret == -EAGAIN )
++ return hypercall_create_continuation(__HYPERVISOR_domctl,
++ "h", u_domctl);
+ copy_to_guest(u_domctl, domctl, 1);
+ }
+ }
+--- a/xen/arch/x86/mm/hap/hap.c
++++ b/xen/arch/x86/mm/hap/hap.c
+@@ -678,8 +678,7 @@ int hap_domctl(struct domain *d, xen_dom
+ paging_unlock(d);
+ if ( preempted )
+ /* Not finished. Set up to re-run the call. */
+- rc = hypercall_create_continuation(__HYPERVISOR_domctl, "h",
+- u_domctl);
++ rc = -EAGAIN;
+ else
+ /* Finished. Return the new allocation */
+ sc->mb = hap_get_allocation(d);
+--- a/xen/arch/x86/mm/paging.c
++++ b/xen/arch/x86/mm/paging.c
+@@ -26,6 +26,7 @@
+ #include <asm/shadow.h>
+ #include <asm/p2m.h>
+ #include <asm/hap.h>
++#include <asm/event.h>
+ #include <asm/hvm/nestedhvm.h>
+ #include <xen/numa.h>
+ #include <xsm/xsm.h>
+@@ -116,26 +117,46 @@ static void paging_free_log_dirty_page(s
+ d->arch.paging.free_page(d, mfn_to_page(mfn));
+ }
+
+-void paging_free_log_dirty_bitmap(struct domain *d)
++static int paging_free_log_dirty_bitmap(struct domain *d, int rc)
+ {
+ mfn_t *l4, *l3, *l2;
+ int i4, i3, i2;
+
++ paging_lock(d);
++
+ if ( !mfn_valid(d->arch.paging.log_dirty.top) )
+- return;
++ {
++ paging_unlock(d);
++ return 0;
++ }
+
+- paging_lock(d);
++ if ( !d->arch.paging.preempt.vcpu )
++ {
++ memset(&d->arch.paging.preempt.log_dirty, 0,
++ sizeof(d->arch.paging.preempt.log_dirty));
++ ASSERT(rc <= 0);
++ d->arch.paging.preempt.log_dirty.done = -rc;
++ }
++ else if ( d->arch.paging.preempt.vcpu != current ||
++ d->arch.paging.preempt.op != XEN_DOMCTL_SHADOW_OP_OFF )
++ {
++ paging_unlock(d);
++ return -EBUSY;
++ }
+
+ l4 = map_domain_page(mfn_x(d->arch.paging.log_dirty.top));
++ i4 = d->arch.paging.preempt.log_dirty.i4;
++ i3 = d->arch.paging.preempt.log_dirty.i3;
++ rc = 0;
+
+- for ( i4 = 0; i4 < LOGDIRTY_NODE_ENTRIES; i4++ )
++ for ( ; i4 < LOGDIRTY_NODE_ENTRIES; i4++, i3 = 0 )
+ {
+ if ( !mfn_valid(l4[i4]) )
+ continue;
+
+ l3 = map_domain_page(mfn_x(l4[i4]));
+
+- for ( i3 = 0; i3 < LOGDIRTY_NODE_ENTRIES; i3++ )
++ for ( ; i3 < LOGDIRTY_NODE_ENTRIES; i3++ )
+ {
+ if ( !mfn_valid(l3[i3]) )
+ continue;
+@@ -148,20 +169,54 @@ void paging_free_log_dirty_bitmap(struct
+
+ unmap_domain_page(l2);
+ paging_free_log_dirty_page(d, l3[i3]);
++ l3[i3] = _mfn(INVALID_MFN);
++
++ if ( i3 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() )
++ {
++ d->arch.paging.preempt.log_dirty.i3 = i3 + 1;
++ d->arch.paging.preempt.log_dirty.i4 = i4;
++ rc = -EAGAIN;
++ break;
++ }
+ }
+
+ unmap_domain_page(l3);
++ if ( rc )
++ break;
+ paging_free_log_dirty_page(d, l4[i4]);
++ l4[i4] = _mfn(INVALID_MFN);
++
++ if ( i4 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() )
++ {
++ d->arch.paging.preempt.log_dirty.i3 = 0;
++ d->arch.paging.preempt.log_dirty.i4 = i4 + 1;
++ rc = -EAGAIN;
++ break;
++ }
+ }
+
+ unmap_domain_page(l4);
+- paging_free_log_dirty_page(d, d->arch.paging.log_dirty.top);
+- d->arch.paging.log_dirty.top = _mfn(INVALID_MFN);
+
+- ASSERT(d->arch.paging.log_dirty.allocs == 0);
+- d->arch.paging.log_dirty.failed_allocs = 0;
++ if ( !rc )
++ {
++ paging_free_log_dirty_page(d, d->arch.paging.log_dirty.top);
++ d->arch.paging.log_dirty.top = _mfn(INVALID_MFN);
++
++ ASSERT(d->arch.paging.log_dirty.allocs == 0);
++ d->arch.paging.log_dirty.failed_allocs = 0;
++
++ rc = -d->arch.paging.preempt.log_dirty.done;
++ d->arch.paging.preempt.vcpu = NULL;
++ }
++ else
++ {
++ d->arch.paging.preempt.vcpu = current;
++ d->arch.paging.preempt.op = XEN_DOMCTL_SHADOW_OP_OFF;
++ }
+
+ paging_unlock(d);
++
++ return rc;
+ }
+
+ int paging_log_dirty_enable(struct domain *d)
+@@ -178,15 +233,25 @@ int paging_log_dirty_enable(struct domai
+ return ret;
+ }
+
+-int paging_log_dirty_disable(struct domain *d)
++static int paging_log_dirty_disable(struct domain *d, bool_t resuming)
+ {
+- int ret;
++ int ret = 1;
++
++ if ( !resuming )
++ {
++ domain_pause(d);
++ /* Safe because the domain is paused. */
++ ret = d->arch.paging.log_dirty.disable_log_dirty(d);
++ ASSERT(ret <= 0);
++ }
+
+- domain_pause(d);
+- /* Safe because the domain is paused. */
+- ret = d->arch.paging.log_dirty.disable_log_dirty(d);
+ if ( !paging_mode_log_dirty(d) )
+- paging_free_log_dirty_bitmap(d);
++ {
++ ret = paging_free_log_dirty_bitmap(d, ret);
++ if ( ret == -EAGAIN )
++ return ret;
++ }
++
+ domain_unpause(d);
+
+ return ret;
+@@ -326,7 +391,9 @@ int paging_mfn_is_dirty(struct domain *d
+
+ /* Read a domain's log-dirty bitmap and stats. If the operation is a CLEAN,
+ * clear the bitmap and stats as well. */
+-int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc)
++static int paging_log_dirty_op(struct domain *d,
++ struct xen_domctl_shadow_op *sc,
++ bool_t resuming)
+ {
+ int rv = 0, clean = 0, peek = 1;
+ unsigned long pages = 0;
+@@ -334,9 +401,22 @@ int paging_log_dirty_op(struct domain *d
+ unsigned long *l1 = NULL;
+ int i4, i3, i2;
+
+- domain_pause(d);
++ if ( !resuming )
++ domain_pause(d);
+ paging_lock(d);
+
++ if ( !d->arch.paging.preempt.vcpu )
++ memset(&d->arch.paging.preempt.log_dirty, 0,
++ sizeof(d->arch.paging.preempt.log_dirty));
++ else if ( d->arch.paging.preempt.vcpu != current ||
++ d->arch.paging.preempt.op != sc->op )
++ {
++ paging_unlock(d);
++ ASSERT(!resuming);
++ domain_unpause(d);
++ return -EBUSY;
++ }
++
+ clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
+
+ PAGING_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n",
+@@ -365,17 +445,15 @@ int paging_log_dirty_op(struct domain *d
+ goto out;
+ }
+
+- pages = 0;
+ l4 = paging_map_log_dirty_bitmap(d);
++ i4 = d->arch.paging.preempt.log_dirty.i4;
++ i3 = d->arch.paging.preempt.log_dirty.i3;
++ pages = d->arch.paging.preempt.log_dirty.done;
+
+- for ( i4 = 0;
+- (pages < sc->pages) && (i4 < LOGDIRTY_NODE_ENTRIES);
+- i4++ )
++ for ( ; (pages < sc->pages) && (i4 < LOGDIRTY_NODE_ENTRIES); i4++, i3 = 0 )
+ {
+ l3 = (l4 && mfn_valid(l4[i4])) ? map_domain_page(mfn_x(l4[i4])) : NULL;
+- for ( i3 = 0;
+- (pages < sc->pages) && (i3 < LOGDIRTY_NODE_ENTRIES);
+- i3++ )
++ for ( ; (pages < sc->pages) && (i3 < LOGDIRTY_NODE_ENTRIES); i3++ )
+ {
+ l2 = ((l3 && mfn_valid(l3[i3])) ?
+ map_domain_page(mfn_x(l3[i3])) : NULL);
+@@ -410,18 +488,51 @@ int paging_log_dirty_op(struct domain *d
+ }
+ if ( l2 )
+ unmap_domain_page(l2);
++
++ if ( i3 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() )
++ {
++ d->arch.paging.preempt.log_dirty.i4 = i4;
++ d->arch.paging.preempt.log_dirty.i3 = i3 + 1;
++ rv = -EAGAIN;
++ break;
++ }
+ }
+ if ( l3 )
+ unmap_domain_page(l3);
++
++ if ( !rv && i4 < LOGDIRTY_NODE_ENTRIES - 1 &&
++ hypercall_preempt_check() )
++ {
++ d->arch.paging.preempt.log_dirty.i4 = i4 + 1;
++ d->arch.paging.preempt.log_dirty.i3 = 0;
++ rv = -EAGAIN;
++ }
++ if ( rv )
++ break;
+ }
+ if ( l4 )
+ unmap_domain_page(l4);
+
+- if ( pages < sc->pages )
+- sc->pages = pages;
++ if ( !rv )
++ d->arch.paging.preempt.vcpu = NULL;
++ else
++ {
++ d->arch.paging.preempt.vcpu = current;
++ d->arch.paging.preempt.op = sc->op;
++ d->arch.paging.preempt.log_dirty.done = pages;
++ }
+
+ paging_unlock(d);
+
++ if ( rv )
++ {
++ /* Never leave the domain paused for other errors. */
++ ASSERT(rv == -EAGAIN);
++ return rv;
++ }
++
++ if ( pages < sc->pages )
++ sc->pages = pages;
+ if ( clean )
+ {
+ /* We need to further call clean_dirty_bitmap() functions of specific
+@@ -432,6 +543,7 @@ int paging_log_dirty_op(struct domain *d
+ return rv;
+
+ out:
++ d->arch.paging.preempt.vcpu = NULL;
+ paging_unlock(d);
+ domain_unpause(d);
+
+@@ -498,12 +610,6 @@ void paging_log_dirty_init(struct domain
+ d->arch.paging.log_dirty.clean_dirty_bitmap = clean_dirty_bitmap;
+ }
+
+-/* This function fress log dirty bitmap resources. */
+-static void paging_log_dirty_teardown(struct domain*d)
+-{
+- paging_free_log_dirty_bitmap(d);
+-}
+-
+ /************************************************/
+ /* CODE FOR PAGING SUPPORT */
+ /************************************************/
+@@ -547,6 +653,7 @@ void paging_vcpu_init(struct vcpu *v)
+ int paging_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
+ XEN_GUEST_HANDLE(void) u_domctl)
+ {
++ bool_t resuming = 0;
+ int rc;
+
+ if ( unlikely(d == current->domain) )
+@@ -569,6 +676,20 @@ int paging_domctl(struct domain *d, xen_
+ return -EINVAL;
+ }
+
++ if ( d->arch.paging.preempt.vcpu )
++ {
++ if ( d->arch.paging.preempt.vcpu != current ||
++ d->arch.paging.preempt.op != sc->op )
++ {
++ printk(XENLOG_G_DEBUG
++ "d%d:v%d: Paging op %#x on Dom%u with unfinished prior op %#x\n",
++ current->domain->domain_id, current->vcpu_id,
++ sc->op, d->domain_id, d->arch.paging.preempt.op);
++ return -EBUSY;
++ }
++ resuming = 1;
++ }
++
+ rc = xsm_shadow_control(d, sc->op);
+ if ( rc )
+ return rc;
+@@ -594,13 +714,13 @@ int paging_domctl(struct domain *d, xen_
+
+ case XEN_DOMCTL_SHADOW_OP_OFF:
+ if ( paging_mode_log_dirty(d) )
+- if ( (rc = paging_log_dirty_disable(d)) != 0 )
++ if ( (rc = paging_log_dirty_disable(d, resuming)) != 0 )
+ return rc;
+ break;
+
+ case XEN_DOMCTL_SHADOW_OP_CLEAN:
+ case XEN_DOMCTL_SHADOW_OP_PEEK:
+- return paging_log_dirty_op(d, sc);
++ return paging_log_dirty_op(d, sc, resuming);
+ }
+
+ /* Here, dispatch domctl to the appropriate paging code */
+@@ -611,18 +731,24 @@ int paging_domctl(struct domain *d, xen_
+ }
+
+ /* Call when destroying a domain */
+-void paging_teardown(struct domain *d)
++int paging_teardown(struct domain *d)
+ {
++ int rc;
++
+ if ( hap_enabled(d) )
+ hap_teardown(d);
+ else
+ shadow_teardown(d);
+
+ /* clean up log dirty resources. */
+- paging_log_dirty_teardown(d);
++ rc = paging_free_log_dirty_bitmap(d, 0);
++ if ( rc == -EAGAIN )
++ return rc;
+
+ /* Move populate-on-demand cache back to domain_list for destruction */
+ p2m_pod_empty_cache(d);
++
++ return rc;
+ }
+
+ /* Call once all of the references to the domain have gone away */
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -3829,8 +3829,7 @@ int shadow_domctl(struct domain *d,
+ paging_unlock(d);
+ if ( preempted )
+ /* Not finished. Set up to re-run the call. */
+- rc = hypercall_create_continuation(
+- __HYPERVISOR_domctl, "h", u_domctl);
++ rc = -EAGAIN;
+ else
+ /* Finished. Return the new allocation */
+ sc->mb = shadow_get_allocation(d);
+--- a/xen/common/domain.c
++++ b/xen/common/domain.c
+@@ -479,7 +479,6 @@ int domain_kill(struct domain *d)
+ rc = domain_relinquish_resources(d);
+ if ( rc != 0 )
+ {
+- BUG_ON(rc != -EAGAIN);
+ break;
+ }
+ if ( sched_move_domain(d, cpupool0) )
+--- a/xen/include/asm-x86/domain.h
++++ b/xen/include/asm-x86/domain.h
+@@ -193,6 +193,20 @@ struct paging_domain {
+ struct hap_domain hap;
+ /* log dirty support */
+ struct log_dirty_domain log_dirty;
++
++ /* preemption handling */
++ struct {
++ struct vcpu *vcpu;
++ unsigned int op;
++ union {
++ struct {
++ unsigned long done:PADDR_BITS - PAGE_SHIFT;
++ unsigned long i4:PAGETABLE_ORDER;
++ unsigned long i3:PAGETABLE_ORDER;
++ } log_dirty;
++ };
++ } preempt;
++
+ /* alloc/free pages from the pool for paging-assistance structures
+ * (used by p2m and log-dirty code for their tries) */
+ struct page_info * (*alloc_page)(struct domain *d);
+--- a/xen/include/asm-x86/paging.h
++++ b/xen/include/asm-x86/paging.h
+@@ -141,9 +141,6 @@ struct paging_mode {
+ /*****************************************************************************
+ * Log dirty code */
+
+-/* free log dirty bitmap resource */
+-void paging_free_log_dirty_bitmap(struct domain *d);
+-
+ /* get the dirty bitmap for a specific range of pfns */
+ void paging_log_dirty_range(struct domain *d,
+ unsigned long begin_pfn,
+@@ -153,9 +150,6 @@ void paging_log_dirty_range(struct domai
+ /* enable log dirty */
+ int paging_log_dirty_enable(struct domain *d);
+
+-/* disable log dirty */
+-int paging_log_dirty_disable(struct domain *d);
+-
+ /* log dirty initialization */
+ void paging_log_dirty_init(struct domain *d,
+ int (*enable_log_dirty)(struct domain *d),
+@@ -218,7 +212,7 @@ int paging_domctl(struct domain *d, xen_
+ XEN_GUEST_HANDLE(void) u_domctl);
+
+ /* Call when destroying a domain */
+-void paging_teardown(struct domain *d);
++int paging_teardown(struct domain *d);
+
+ /* Call once all of the references to the domain have gone away */
+ void paging_final_teardown(struct domain *d);