diff options
-rw-r--r-- | main/xen/APKBUILD | 17 | ||||
-rw-r--r-- | main/xen/xsa104.patch | 44 | ||||
-rw-r--r-- | main/xen/xsa105.patch | 37 | ||||
-rw-r--r-- | main/xen/xsa106.patch | 23 | ||||
-rw-r--r-- | main/xen/xsa108.patch | 36 | ||||
-rw-r--r-- | main/xen/xsa97-hap-4_2-prereq.patch | 466 | ||||
-rw-r--r-- | main/xen/xsa97-hap-4_2.patch | 485 |
7 files changed, 151 insertions, 957 deletions
diff --git a/main/xen/APKBUILD b/main/xen/APKBUILD index 9f3221d865..169649b1e7 100644 --- a/main/xen/APKBUILD +++ b/main/xen/APKBUILD @@ -2,7 +2,7 @@ # Contributor: Roger Pau Monne <roger.pau@entel.upc.edu> # Maintainer: William Pitcock <nenolod@dereferenced.org> pkgname=xen -pkgver=4.2.4 +pkgver=4.2.5 pkgrel=0 pkgdesc="Xen hypervisor" url="http://www.xen.org/" @@ -23,8 +23,11 @@ source="http://bits.xensource.com/oss-xen/release/$pkgver/$pkgname-$pkgver.tar.g xsa41.patch xsa41b.patch xsa41c.patch - xsa97-hap-4_2-prereq.patch - xsa97-hap-4_2.patch + + xsa104.patch + xsa105.patch + xsa106.patch + xsa108.patch xenstored.initd xenstored.confd @@ -134,7 +137,7 @@ xend() { -exec mv '{}' "$subpkgdir"/"$sitepackages"/xen \; } -md5sums="b32be39471c93249828b117473adca9d xen-4.2.4.tar.gz +md5sums="d8a589be1558496f8c639f7c890bcffc xen-4.2.5.tar.gz 506e7ab6f9482dc95f230978d340bcd9 qemu_uclibc_configure.patch 2dc5ddf47c53ea168729975046c3c1f9 librt.patch 1ccde6b36a6f9542a16d998204dc9a22 qemu-xen_paths.patch @@ -142,8 +145,10 @@ md5sums="b32be39471c93249828b117473adca9d xen-4.2.4.tar.gz 8ad8942000b8a4be4917599cad9209cf xsa41.patch ed7d0399c6ca6aeee479da5d8f807fe0 xsa41b.patch 2f3dd7bdc59d104370066d6582725575 xsa41c.patch -4778066a3338ca9a2263048e6a22bb6b xsa97-hap-4_2-prereq.patch -052b4144e2eef36757a28e7011d0ac74 xsa97-hap-4_2.patch +1cc14dc8cc1a42aa93a6ea1508931d98 xsa104.patch +cdc40a86a58fc864ebb7b1dbf90d2352 xsa105.patch +f58b915ad62aef72bde99f8d04f9a7a4 xsa106.patch +1f66f6c52941309c825f60e1bf144987 xsa108.patch 95d8af17bf844d41a015ff32aae51ba1 xenstored.initd b017ccdd5e1c27bbf1513e3569d4ff07 xenstored.confd ed262f15fb880badb53575539468646c xenconsoled.initd diff --git a/main/xen/xsa104.patch b/main/xen/xsa104.patch new file mode 100644 index 0000000000..2c5b39ee9b --- /dev/null +++ b/main/xen/xsa104.patch @@ -0,0 +1,44 @@ +x86/shadow: fix race condition sampling the dirty vram state + +d->arch.hvm_domain.dirty_vram must be read with the domain's paging lock held. + +If not, two concurrent hypercalls could both end up attempting to free +dirty_vram (the second of which will free a wild pointer), or both end up +allocating a new dirty_vram structure (the first of which will be leaked). + +This is XSA-104. + +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Tim Deegan <tim@xen.org> + +--- a/xen/arch/x86/mm/shadow/common.c ++++ b/xen/arch/x86/mm/shadow/common.c +@@ -3485,7 +3485,7 @@ int shadow_track_dirty_vram(struct domai + int flush_tlb = 0; + unsigned long i; + p2m_type_t t; +- struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram; ++ struct sh_dirty_vram *dirty_vram; + struct p2m_domain *p2m = p2m_get_hostp2m(d); + + if ( end_pfn < begin_pfn || end_pfn > p2m->max_mapped_pfn + 1 ) +@@ -3495,6 +3495,8 @@ int shadow_track_dirty_vram(struct domai + p2m_lock(p2m_get_hostp2m(d)); + paging_lock(d); + ++ dirty_vram = d->arch.hvm_domain.dirty_vram; ++ + if ( dirty_vram && (!nr || + ( begin_pfn != dirty_vram->begin_pfn + || end_pfn != dirty_vram->end_pfn )) ) +--- a/xen/include/asm-x86/hvm/domain.h ++++ b/xen/include/asm-x86/hvm/domain.h +@@ -112,7 +112,7 @@ struct hvm_domain { + /* Memory ranges with pinned cache attributes. */ + struct list_head pinned_cacheattr_ranges; + +- /* VRAM dirty support. */ ++ /* VRAM dirty support. Protect with the domain paging lock. */ + struct sh_dirty_vram *dirty_vram; + + /* If one of vcpus of this domain is in no_fill_mode or diff --git a/main/xen/xsa105.patch b/main/xen/xsa105.patch new file mode 100644 index 0000000000..cc7cafddd6 --- /dev/null +++ b/main/xen/xsa105.patch @@ -0,0 +1,37 @@ +x86/emulate: check cpl for all privileged instructions + +Without this, it is possible for userspace to load its own IDT or GDT. + +This is XSA-105. + +Reported-by: Andrei LUTAS <vlutas@bitdefender.com> +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +Tested-by: Andrei LUTAS <vlutas@bitdefender.com> + +--- a/xen/arch/x86/x86_emulate/x86_emulate.c ++++ b/xen/arch/x86/x86_emulate/x86_emulate.c +@@ -3314,6 +3314,7 @@ x86_emulate( + goto swint; + + case 0xf4: /* hlt */ ++ generate_exception_if(!mode_ring0(), EXC_GP, 0); + ctxt->retire.flags.hlt = 1; + break; + +@@ -3710,6 +3711,7 @@ x86_emulate( + break; + case 2: /* lgdt */ + case 3: /* lidt */ ++ generate_exception_if(!mode_ring0(), EXC_GP, 0); + generate_exception_if(ea.type != OP_MEM, EXC_UD, -1); + fail_if(ops->write_segment == NULL); + memset(®, 0, sizeof(reg)); +@@ -3738,6 +3740,7 @@ x86_emulate( + case 6: /* lmsw */ + fail_if(ops->read_cr == NULL); + fail_if(ops->write_cr == NULL); ++ generate_exception_if(!mode_ring0(), EXC_GP, 0); + if ( (rc = ops->read_cr(0, &cr0, ctxt)) ) + goto done; + if ( ea.type == OP_REG ) diff --git a/main/xen/xsa106.patch b/main/xen/xsa106.patch new file mode 100644 index 0000000000..436724dbc1 --- /dev/null +++ b/main/xen/xsa106.patch @@ -0,0 +1,23 @@ +x86emul: only emulate software interrupt injection for real mode + +Protected mode emulation currently lacks proper privilege checking of +the referenced IDT entry, and there's currently no legitimate way for +any of the respective instructions to reach the emulator when the guest +is in protected mode. + +This is XSA-106. + +Reported-by: Andrei LUTAS <vlutas@bitdefender.com> +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Acked-by: Keir Fraser <keir@xen.org> + +--- a/xen/arch/x86/x86_emulate/x86_emulate.c ++++ b/xen/arch/x86/x86_emulate/x86_emulate.c +@@ -2634,6 +2634,7 @@ x86_emulate( + case 0xcd: /* int imm8 */ + src.val = insn_fetch_type(uint8_t); + swint: ++ fail_if(!in_realmode(ctxt, ops)); /* XSA-106 */ + fail_if(ops->inject_sw_interrupt == NULL); + rc = ops->inject_sw_interrupt(src.val, _regs.eip - ctxt->regs->eip, + ctxt) ? : X86EMUL_EXCEPTION; diff --git a/main/xen/xsa108.patch b/main/xen/xsa108.patch new file mode 100644 index 0000000000..e162185789 --- /dev/null +++ b/main/xen/xsa108.patch @@ -0,0 +1,36 @@ +x86/HVM: properly bound x2APIC MSR range + +While the write path change appears to be purely cosmetic (but still +gets done here for consistency), the read side mistake permitted +accesses beyond the virtual APIC page. + +Note that while this isn't fully in line with the specification +(digesting MSRs 0x800-0xBFF for the x2APIC), this is the minimal +possible fix addressing the security issue and getting x2APIC related +code into a consistent shape (elsewhere a 256 rather than 1024 wide +window is being used too). This will be dealt with subsequently. + +This is XSA-108. + +Signed-off-by: Jan Beulich <jbeulich@suse.com> + +--- a/xen/arch/x86/hvm/hvm.c ++++ b/xen/arch/x86/hvm/hvm.c +@@ -4380,7 +4380,7 @@ int hvm_msr_read_intercept(unsigned int + *msr_content = vcpu_vlapic(v)->hw.apic_base_msr; + break; + +- case MSR_IA32_APICBASE_MSR ... MSR_IA32_APICBASE_MSR + 0x3ff: ++ case MSR_IA32_APICBASE_MSR ... MSR_IA32_APICBASE_MSR + 0xff: + if ( hvm_x2apic_msr_read(v, msr, msr_content) ) + goto gp_fault; + break; +@@ -4506,7 +4506,7 @@ int hvm_msr_write_intercept(unsigned int + vlapic_tdt_msr_set(vcpu_vlapic(v), msr_content); + break; + +- case MSR_IA32_APICBASE_MSR ... MSR_IA32_APICBASE_MSR + 0x3ff: ++ case MSR_IA32_APICBASE_MSR ... MSR_IA32_APICBASE_MSR + 0xff: + if ( hvm_x2apic_msr_write(v, msr, msr_content) ) + goto gp_fault; + break; diff --git a/main/xen/xsa97-hap-4_2-prereq.patch b/main/xen/xsa97-hap-4_2-prereq.patch deleted file mode 100644 index ce2240aec8..0000000000 --- a/main/xen/xsa97-hap-4_2-prereq.patch +++ /dev/null @@ -1,466 +0,0 @@ -x86/mm/hap: Adjust vram tracking to play nicely with log-dirty. - -The previous code assumed the guest would be in one of three mutually exclusive -modes for bookkeeping dirty pages: (1) shadow, (2) hap utilizing the log dirty -bitmap to support functionality such as live migrate, (3) hap utilizing the -log dirty bitmap to track dirty vram pages. -Races arose when a guest attempted to track dirty vram while performing live -migrate. (The dispatch table managed by paging_log_dirty_init() might change -in the middle of a log dirty or a vram tracking function.) - -This change allows hap log dirty and hap vram tracking to be concurrent. -Vram tracking no longer uses the log dirty bitmap. Instead it detects -dirty vram pages by examining their p2m type. The log dirty bitmap is only -used by the log dirty code. Because the two operations use different -mechanisms, they are no longer mutually exclusive. - -Signed-Off-By: Robert Phillips <robert.phillips@citrix.com> -Acked-by: Tim Deegan <tim@xen.org> - -Minor whitespace changes to conform with coding style -Signed-off-by: Tim Deegan <tim@xen.org> - -Committed-by: Tim Deegan <tim@xen.org> -master commit: fd91a2a662bc59677e0f217423a7a155d5465886 -master date: 2012-12-13 12:10:14 +0000 - ---- a/xen/arch/x86/mm/hap/hap.c -+++ b/xen/arch/x86/mm/hap/hap.c -@@ -56,132 +56,110 @@ - /* HAP VRAM TRACKING SUPPORT */ - /************************************************/ - --static int hap_enable_vram_tracking(struct domain *d) --{ -- struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram; -- -- if ( !dirty_vram ) -- return -EINVAL; -- -- /* turn on PG_log_dirty bit in paging mode */ -- paging_lock(d); -- d->arch.paging.mode |= PG_log_dirty; -- paging_unlock(d); -- -- /* set l1e entries of P2M table to be read-only. */ -- p2m_change_type_range(d, dirty_vram->begin_pfn, dirty_vram->end_pfn, -- p2m_ram_rw, p2m_ram_logdirty); -- -- flush_tlb_mask(d->domain_dirty_cpumask); -- return 0; --} -- --static int hap_disable_vram_tracking(struct domain *d) --{ -- struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram; -- -- if ( !dirty_vram ) -- return -EINVAL; -- -- paging_lock(d); -- d->arch.paging.mode &= ~PG_log_dirty; -- paging_unlock(d); -- -- /* set l1e entries of P2M table with normal mode */ -- p2m_change_type_range(d, dirty_vram->begin_pfn, dirty_vram->end_pfn, -- p2m_ram_logdirty, p2m_ram_rw); -- -- flush_tlb_mask(d->domain_dirty_cpumask); -- return 0; --} -- --static void hap_clean_vram_tracking(struct domain *d) --{ -- struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram; -- -- if ( !dirty_vram ) -- return; -- -- /* set l1e entries of P2M table to be read-only. */ -- p2m_change_type_range(d, dirty_vram->begin_pfn, dirty_vram->end_pfn, -- p2m_ram_rw, p2m_ram_logdirty); -- -- flush_tlb_mask(d->domain_dirty_cpumask); --} -- --static void hap_vram_tracking_init(struct domain *d) --{ -- paging_log_dirty_init(d, hap_enable_vram_tracking, -- hap_disable_vram_tracking, -- hap_clean_vram_tracking); --} -+/* -+ * hap_track_dirty_vram() -+ * Create the domain's dv_dirty_vram struct on demand. -+ * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is -+ * first encountered. -+ * Collect the guest_dirty bitmask, a bit mask of the dirty vram pages, by -+ * calling paging_log_dirty_range(), which interrogates each vram -+ * page's p2m type looking for pages that have been made writable. -+ */ - - int hap_track_dirty_vram(struct domain *d, - unsigned long begin_pfn, - unsigned long nr, -- XEN_GUEST_HANDLE_64(uint8) dirty_bitmap) -+ XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap) - { - long rc = 0; -- struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram; -+ struct sh_dirty_vram *dirty_vram; -+ uint8_t *dirty_bitmap = NULL; - - if ( nr ) - { -- if ( paging_mode_log_dirty(d) && dirty_vram ) -+ int size = (nr + BITS_PER_BYTE - 1) / BITS_PER_BYTE; -+ -+ if ( !paging_mode_log_dirty(d) ) - { -- if ( begin_pfn != dirty_vram->begin_pfn || -- begin_pfn + nr != dirty_vram->end_pfn ) -- { -- paging_log_dirty_disable(d); -- dirty_vram->begin_pfn = begin_pfn; -- dirty_vram->end_pfn = begin_pfn + nr; -- rc = paging_log_dirty_enable(d); -- if (rc != 0) -- goto param_fail; -- } -+ hap_logdirty_init(d); -+ rc = paging_log_dirty_enable(d); -+ if ( rc ) -+ goto out; - } -- else if ( !paging_mode_log_dirty(d) && !dirty_vram ) -+ -+ rc = -ENOMEM; -+ dirty_bitmap = xzalloc_bytes(size); -+ if ( !dirty_bitmap ) -+ goto out; -+ -+ paging_lock(d); -+ -+ dirty_vram = d->arch.hvm_domain.dirty_vram; -+ if ( !dirty_vram ) - { - rc = -ENOMEM; -- if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL ) -- goto param_fail; -+ if ( (dirty_vram = xzalloc(struct sh_dirty_vram)) == NULL ) -+ { -+ paging_unlock(d); -+ goto out; -+ } - -+ d->arch.hvm_domain.dirty_vram = dirty_vram; -+ } -+ -+ if ( begin_pfn != dirty_vram->begin_pfn || -+ begin_pfn + nr != dirty_vram->end_pfn ) -+ { - dirty_vram->begin_pfn = begin_pfn; - dirty_vram->end_pfn = begin_pfn + nr; -- d->arch.hvm_domain.dirty_vram = dirty_vram; -- hap_vram_tracking_init(d); -- rc = paging_log_dirty_enable(d); -- if (rc != 0) -- goto param_fail; -+ -+ paging_unlock(d); -+ -+ /* set l1e entries of range within P2M table to be read-only. */ -+ p2m_change_type_range(d, begin_pfn, begin_pfn + nr, -+ p2m_ram_rw, p2m_ram_logdirty); -+ -+ flush_tlb_mask(d->domain_dirty_cpumask); -+ -+ memset(dirty_bitmap, 0xff, size); /* consider all pages dirty */ - } - else - { -- if ( !paging_mode_log_dirty(d) && dirty_vram ) -- rc = -EINVAL; -- else -- rc = -ENODATA; -- goto param_fail; -+ paging_unlock(d); -+ -+ domain_pause(d); -+ -+ /* get the bitmap */ -+ paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap); -+ -+ domain_unpause(d); - } -- /* get the bitmap */ -- rc = paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap); -+ -+ rc = -EFAULT; -+ if ( copy_to_guest(guest_dirty_bitmap, dirty_bitmap, size) == 0 ) -+ rc = 0; - } - else - { -- if ( paging_mode_log_dirty(d) && dirty_vram ) { -- rc = paging_log_dirty_disable(d); -- xfree(dirty_vram); -- dirty_vram = d->arch.hvm_domain.dirty_vram = NULL; -- } else -- rc = 0; -- } -+ paging_lock(d); - -- return rc; -+ dirty_vram = d->arch.hvm_domain.dirty_vram; -+ if ( dirty_vram ) -+ { -+ /* -+ * If zero pages specified while tracking dirty vram -+ * then stop tracking -+ */ -+ xfree(dirty_vram); -+ d->arch.hvm_domain.dirty_vram = NULL; -+ } - --param_fail: -- if ( dirty_vram ) -- { -- xfree(dirty_vram); -- dirty_vram = d->arch.hvm_domain.dirty_vram = NULL; -+ paging_unlock(d); - } -+out: -+ if ( dirty_bitmap ) -+ xfree(dirty_bitmap); -+ - return rc; - } - -@@ -223,13 +201,6 @@ static void hap_clean_dirty_bitmap(struc - - void hap_logdirty_init(struct domain *d) - { -- struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram; -- if ( paging_mode_log_dirty(d) && dirty_vram ) -- { -- paging_log_dirty_disable(d); -- xfree(dirty_vram); -- dirty_vram = d->arch.hvm_domain.dirty_vram = NULL; -- } - - /* Reinitialize logdirty mechanism */ - paging_log_dirty_init(d, hap_enable_log_dirty, ---- a/xen/arch/x86/mm/paging.c -+++ b/xen/arch/x86/mm/paging.c -@@ -447,157 +447,38 @@ int paging_log_dirty_op(struct domain *d - return rv; - } - --int paging_log_dirty_range(struct domain *d, -- unsigned long begin_pfn, -- unsigned long nr, -- XEN_GUEST_HANDLE_64(uint8) dirty_bitmap) --{ -- int rv = 0; -- unsigned long pages = 0; -- mfn_t *l4, *l3, *l2; -- unsigned long *l1; -- int b1, b2, b3, b4; -- int i2, i3, i4; -- -- d->arch.paging.log_dirty.clean_dirty_bitmap(d); -- paging_lock(d); -- -- PAGING_DEBUG(LOGDIRTY, "log-dirty-range: dom %u faults=%u dirty=%u\n", -- d->domain_id, -- d->arch.paging.log_dirty.fault_count, -- d->arch.paging.log_dirty.dirty_count); -- -- if ( unlikely(d->arch.paging.log_dirty.failed_allocs) ) { -- printk("%s: %d failed page allocs while logging dirty pages\n", -- __FUNCTION__, d->arch.paging.log_dirty.failed_allocs); -- rv = -ENOMEM; -- goto out; -- } -+void paging_log_dirty_range(struct domain *d, -+ unsigned long begin_pfn, -+ unsigned long nr, -+ uint8_t *dirty_bitmap) -+{ -+ struct p2m_domain *p2m = p2m_get_hostp2m(d); -+ int i; -+ unsigned long pfn; -+ -+ /* -+ * Set l1e entries of P2M table to be read-only. -+ * -+ * On first write, it page faults, its entry is changed to read-write, -+ * and on retry the write succeeds. -+ * -+ * We populate dirty_bitmap by looking for entries that have been -+ * switched to read-write. -+ */ - -- if ( !d->arch.paging.log_dirty.fault_count && -- !d->arch.paging.log_dirty.dirty_count ) { -- unsigned int size = BITS_TO_LONGS(nr); -- -- if ( clear_guest(dirty_bitmap, size * BYTES_PER_LONG) != 0 ) -- rv = -EFAULT; -- goto out; -- } -- d->arch.paging.log_dirty.fault_count = 0; -- d->arch.paging.log_dirty.dirty_count = 0; -+ p2m_lock(p2m); - -- b1 = L1_LOGDIRTY_IDX(begin_pfn); -- b2 = L2_LOGDIRTY_IDX(begin_pfn); -- b3 = L3_LOGDIRTY_IDX(begin_pfn); -- b4 = L4_LOGDIRTY_IDX(begin_pfn); -- l4 = paging_map_log_dirty_bitmap(d); -- -- for ( i4 = b4; -- (pages < nr) && (i4 < LOGDIRTY_NODE_ENTRIES); -- i4++ ) -+ for ( i = 0, pfn = begin_pfn; pfn < begin_pfn + nr; i++, pfn++ ) - { -- l3 = (l4 && mfn_valid(l4[i4])) ? map_domain_page(mfn_x(l4[i4])) : NULL; -- for ( i3 = b3; -- (pages < nr) && (i3 < LOGDIRTY_NODE_ENTRIES); -- i3++ ) -- { -- l2 = ((l3 && mfn_valid(l3[i3])) ? -- map_domain_page(mfn_x(l3[i3])) : NULL); -- for ( i2 = b2; -- (pages < nr) && (i2 < LOGDIRTY_NODE_ENTRIES); -- i2++ ) -- { -- unsigned int bytes = PAGE_SIZE; -- uint8_t *s; -- l1 = ((l2 && mfn_valid(l2[i2])) ? -- map_domain_page(mfn_x(l2[i2])) : NULL); -- -- s = ((uint8_t*)l1) + (b1 >> 3); -- bytes -= b1 >> 3; -- -- if ( likely(((nr - pages + 7) >> 3) < bytes) ) -- bytes = (unsigned int)((nr - pages + 7) >> 3); -- -- if ( !l1 ) -- { -- if ( clear_guest_offset(dirty_bitmap, pages >> 3, -- bytes) != 0 ) -- { -- rv = -EFAULT; -- goto out; -- } -- } -- /* begin_pfn is not 32K aligned, hence we have to bit -- * shift the bitmap */ -- else if ( b1 & 0x7 ) -- { -- int i, j; -- uint32_t *l = (uint32_t*) s; -- int bits = b1 & 0x7; -- int bitmask = (1 << bits) - 1; -- int size = (bytes + BYTES_PER_LONG - 1) / BYTES_PER_LONG; -- unsigned long bitmap[size]; -- static unsigned long printed = 0; -- -- if ( printed != begin_pfn ) -- { -- dprintk(XENLOG_DEBUG, "%s: begin_pfn %lx is not 32K aligned!\n", -- __FUNCTION__, begin_pfn); -- printed = begin_pfn; -- } -- -- for ( i = 0; i < size - 1; i++, l++ ) { -- bitmap[i] = ((*l) >> bits) | -- (((*((uint8_t*)(l + 1))) & bitmask) << (sizeof(*l) * 8 - bits)); -- } -- s = (uint8_t*) l; -- size = BYTES_PER_LONG - ((b1 >> 3) & 0x3); -- bitmap[i] = 0; -- for ( j = 0; j < size; j++, s++ ) -- bitmap[i] |= (*s) << (j * 8); -- bitmap[i] = (bitmap[i] >> bits) | (bitmask << (size * 8 - bits)); -- if ( copy_to_guest_offset(dirty_bitmap, (pages >> 3), -- (uint8_t*) bitmap, bytes) != 0 ) -- { -- rv = -EFAULT; -- goto out; -- } -- } -- else -- { -- if ( copy_to_guest_offset(dirty_bitmap, pages >> 3, -- s, bytes) != 0 ) -- { -- rv = -EFAULT; -- goto out; -- } -- } -- -- pages += bytes << 3; -- if ( l1 ) -- { -- clear_page(l1); -- unmap_domain_page(l1); -- } -- b1 = b1 & 0x7; -- } -- b2 = 0; -- if ( l2 ) -- unmap_domain_page(l2); -- } -- b3 = 0; -- if ( l3 ) -- unmap_domain_page(l3); -+ p2m_type_t pt; -+ pt = p2m_change_type(d, pfn, p2m_ram_rw, p2m_ram_logdirty); -+ if ( pt == p2m_ram_rw ) -+ dirty_bitmap[i >> 3] |= (1 << (i & 7)); - } -- if ( l4 ) -- unmap_domain_page(l4); -- -- paging_unlock(d); - -- return rv; -+ p2m_unlock(p2m); - -- out: -- paging_unlock(d); -- return rv; -+ flush_tlb_mask(d->domain_dirty_cpumask); - } - - /* Note that this function takes three function pointers. Callers must supply ---- a/xen/include/asm-x86/config.h -+++ b/xen/include/asm-x86/config.h -@@ -17,6 +17,7 @@ - - #define BYTES_PER_LONG (1 << LONG_BYTEORDER) - #define BITS_PER_LONG (BYTES_PER_LONG << 3) -+#define BITS_PER_BYTE 8 - - #define CONFIG_X86 1 - #define CONFIG_X86_HT 1 ---- a/xen/include/asm-x86/paging.h -+++ b/xen/include/asm-x86/paging.h -@@ -145,10 +145,10 @@ struct paging_mode { - void paging_free_log_dirty_bitmap(struct domain *d); - - /* get the dirty bitmap for a specific range of pfns */ --int paging_log_dirty_range(struct domain *d, -- unsigned long begin_pfn, -- unsigned long nr, -- XEN_GUEST_HANDLE_64(uint8) dirty_bitmap); -+void paging_log_dirty_range(struct domain *d, -+ unsigned long begin_pfn, -+ unsigned long nr, -+ uint8_t *dirty_bitmap); - - /* enable log dirty */ - int paging_log_dirty_enable(struct domain *d); diff --git a/main/xen/xsa97-hap-4_2.patch b/main/xen/xsa97-hap-4_2.patch deleted file mode 100644 index 5f89b58b7b..0000000000 --- a/main/xen/xsa97-hap-4_2.patch +++ /dev/null @@ -1,485 +0,0 @@ -x86/paging: make log-dirty operations preemptible - -Both the freeing and the inspection of the bitmap get done in (nested) -loops which - besides having a rather high iteration count in general, -albeit that would be covered by XSA-77 - have the number of non-trivial -iterations they need to perform (indirectly) controllable by both the -guest they are for and any domain controlling the guest (including the -one running qemu for it). - -This is XSA-97. - -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Tim Deegan <tim@xen.org> - ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -2136,7 +2136,9 @@ int domain_relinquish_resources(struct d - pci_release_devices(d); - - /* Tear down paging-assistance stuff. */ -- paging_teardown(d); -+ ret = paging_teardown(d); -+ if ( ret ) -+ return ret; - - /* Drop the in-use references to page-table bases. */ - for_each_vcpu ( d, v ) ---- a/xen/arch/x86/domctl.c -+++ b/xen/arch/x86/domctl.c -@@ -66,6 +66,9 @@ long arch_do_domctl( - &domctl->u.shadow_op, - guest_handle_cast(u_domctl, void)); - rcu_unlock_domain(d); -+ if ( ret == -EAGAIN ) -+ return hypercall_create_continuation(__HYPERVISOR_domctl, -+ "h", u_domctl); - copy_to_guest(u_domctl, domctl, 1); - } - } ---- a/xen/arch/x86/mm/hap/hap.c -+++ b/xen/arch/x86/mm/hap/hap.c -@@ -678,8 +678,7 @@ int hap_domctl(struct domain *d, xen_dom - paging_unlock(d); - if ( preempted ) - /* Not finished. Set up to re-run the call. */ -- rc = hypercall_create_continuation(__HYPERVISOR_domctl, "h", -- u_domctl); -+ rc = -EAGAIN; - else - /* Finished. Return the new allocation */ - sc->mb = hap_get_allocation(d); ---- a/xen/arch/x86/mm/paging.c -+++ b/xen/arch/x86/mm/paging.c -@@ -26,6 +26,7 @@ - #include <asm/shadow.h> - #include <asm/p2m.h> - #include <asm/hap.h> -+#include <asm/event.h> - #include <asm/hvm/nestedhvm.h> - #include <xen/numa.h> - #include <xsm/xsm.h> -@@ -116,26 +117,46 @@ static void paging_free_log_dirty_page(s - d->arch.paging.free_page(d, mfn_to_page(mfn)); - } - --void paging_free_log_dirty_bitmap(struct domain *d) -+static int paging_free_log_dirty_bitmap(struct domain *d, int rc) - { - mfn_t *l4, *l3, *l2; - int i4, i3, i2; - -+ paging_lock(d); -+ - if ( !mfn_valid(d->arch.paging.log_dirty.top) ) -- return; -+ { -+ paging_unlock(d); -+ return 0; -+ } - -- paging_lock(d); -+ if ( !d->arch.paging.preempt.vcpu ) -+ { -+ memset(&d->arch.paging.preempt.log_dirty, 0, -+ sizeof(d->arch.paging.preempt.log_dirty)); -+ ASSERT(rc <= 0); -+ d->arch.paging.preempt.log_dirty.done = -rc; -+ } -+ else if ( d->arch.paging.preempt.vcpu != current || -+ d->arch.paging.preempt.op != XEN_DOMCTL_SHADOW_OP_OFF ) -+ { -+ paging_unlock(d); -+ return -EBUSY; -+ } - - l4 = map_domain_page(mfn_x(d->arch.paging.log_dirty.top)); -+ i4 = d->arch.paging.preempt.log_dirty.i4; -+ i3 = d->arch.paging.preempt.log_dirty.i3; -+ rc = 0; - -- for ( i4 = 0; i4 < LOGDIRTY_NODE_ENTRIES; i4++ ) -+ for ( ; i4 < LOGDIRTY_NODE_ENTRIES; i4++, i3 = 0 ) - { - if ( !mfn_valid(l4[i4]) ) - continue; - - l3 = map_domain_page(mfn_x(l4[i4])); - -- for ( i3 = 0; i3 < LOGDIRTY_NODE_ENTRIES; i3++ ) -+ for ( ; i3 < LOGDIRTY_NODE_ENTRIES; i3++ ) - { - if ( !mfn_valid(l3[i3]) ) - continue; -@@ -148,20 +169,54 @@ void paging_free_log_dirty_bitmap(struct - - unmap_domain_page(l2); - paging_free_log_dirty_page(d, l3[i3]); -+ l3[i3] = _mfn(INVALID_MFN); -+ -+ if ( i3 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() ) -+ { -+ d->arch.paging.preempt.log_dirty.i3 = i3 + 1; -+ d->arch.paging.preempt.log_dirty.i4 = i4; -+ rc = -EAGAIN; -+ break; -+ } - } - - unmap_domain_page(l3); -+ if ( rc ) -+ break; - paging_free_log_dirty_page(d, l4[i4]); -+ l4[i4] = _mfn(INVALID_MFN); -+ -+ if ( i4 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() ) -+ { -+ d->arch.paging.preempt.log_dirty.i3 = 0; -+ d->arch.paging.preempt.log_dirty.i4 = i4 + 1; -+ rc = -EAGAIN; -+ break; -+ } - } - - unmap_domain_page(l4); -- paging_free_log_dirty_page(d, d->arch.paging.log_dirty.top); -- d->arch.paging.log_dirty.top = _mfn(INVALID_MFN); - -- ASSERT(d->arch.paging.log_dirty.allocs == 0); -- d->arch.paging.log_dirty.failed_allocs = 0; -+ if ( !rc ) -+ { -+ paging_free_log_dirty_page(d, d->arch.paging.log_dirty.top); -+ d->arch.paging.log_dirty.top = _mfn(INVALID_MFN); -+ -+ ASSERT(d->arch.paging.log_dirty.allocs == 0); -+ d->arch.paging.log_dirty.failed_allocs = 0; -+ -+ rc = -d->arch.paging.preempt.log_dirty.done; -+ d->arch.paging.preempt.vcpu = NULL; -+ } -+ else -+ { -+ d->arch.paging.preempt.vcpu = current; -+ d->arch.paging.preempt.op = XEN_DOMCTL_SHADOW_OP_OFF; -+ } - - paging_unlock(d); -+ -+ return rc; - } - - int paging_log_dirty_enable(struct domain *d) -@@ -178,15 +233,25 @@ int paging_log_dirty_enable(struct domai - return ret; - } - --int paging_log_dirty_disable(struct domain *d) -+static int paging_log_dirty_disable(struct domain *d, bool_t resuming) - { -- int ret; -+ int ret = 1; -+ -+ if ( !resuming ) -+ { -+ domain_pause(d); -+ /* Safe because the domain is paused. */ -+ ret = d->arch.paging.log_dirty.disable_log_dirty(d); -+ ASSERT(ret <= 0); -+ } - -- domain_pause(d); -- /* Safe because the domain is paused. */ -- ret = d->arch.paging.log_dirty.disable_log_dirty(d); - if ( !paging_mode_log_dirty(d) ) -- paging_free_log_dirty_bitmap(d); -+ { -+ ret = paging_free_log_dirty_bitmap(d, ret); -+ if ( ret == -EAGAIN ) -+ return ret; -+ } -+ - domain_unpause(d); - - return ret; -@@ -326,7 +391,9 @@ int paging_mfn_is_dirty(struct domain *d - - /* Read a domain's log-dirty bitmap and stats. If the operation is a CLEAN, - * clear the bitmap and stats as well. */ --int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc) -+static int paging_log_dirty_op(struct domain *d, -+ struct xen_domctl_shadow_op *sc, -+ bool_t resuming) - { - int rv = 0, clean = 0, peek = 1; - unsigned long pages = 0; -@@ -334,9 +401,22 @@ int paging_log_dirty_op(struct domain *d - unsigned long *l1 = NULL; - int i4, i3, i2; - -- domain_pause(d); -+ if ( !resuming ) -+ domain_pause(d); - paging_lock(d); - -+ if ( !d->arch.paging.preempt.vcpu ) -+ memset(&d->arch.paging.preempt.log_dirty, 0, -+ sizeof(d->arch.paging.preempt.log_dirty)); -+ else if ( d->arch.paging.preempt.vcpu != current || -+ d->arch.paging.preempt.op != sc->op ) -+ { -+ paging_unlock(d); -+ ASSERT(!resuming); -+ domain_unpause(d); -+ return -EBUSY; -+ } -+ - clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN); - - PAGING_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n", -@@ -365,17 +445,15 @@ int paging_log_dirty_op(struct domain *d - goto out; - } - -- pages = 0; - l4 = paging_map_log_dirty_bitmap(d); -+ i4 = d->arch.paging.preempt.log_dirty.i4; -+ i3 = d->arch.paging.preempt.log_dirty.i3; -+ pages = d->arch.paging.preempt.log_dirty.done; - -- for ( i4 = 0; -- (pages < sc->pages) && (i4 < LOGDIRTY_NODE_ENTRIES); -- i4++ ) -+ for ( ; (pages < sc->pages) && (i4 < LOGDIRTY_NODE_ENTRIES); i4++, i3 = 0 ) - { - l3 = (l4 && mfn_valid(l4[i4])) ? map_domain_page(mfn_x(l4[i4])) : NULL; -- for ( i3 = 0; -- (pages < sc->pages) && (i3 < LOGDIRTY_NODE_ENTRIES); -- i3++ ) -+ for ( ; (pages < sc->pages) && (i3 < LOGDIRTY_NODE_ENTRIES); i3++ ) - { - l2 = ((l3 && mfn_valid(l3[i3])) ? - map_domain_page(mfn_x(l3[i3])) : NULL); -@@ -410,18 +488,51 @@ int paging_log_dirty_op(struct domain *d - } - if ( l2 ) - unmap_domain_page(l2); -+ -+ if ( i3 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() ) -+ { -+ d->arch.paging.preempt.log_dirty.i4 = i4; -+ d->arch.paging.preempt.log_dirty.i3 = i3 + 1; -+ rv = -EAGAIN; -+ break; -+ } - } - if ( l3 ) - unmap_domain_page(l3); -+ -+ if ( !rv && i4 < LOGDIRTY_NODE_ENTRIES - 1 && -+ hypercall_preempt_check() ) -+ { -+ d->arch.paging.preempt.log_dirty.i4 = i4 + 1; -+ d->arch.paging.preempt.log_dirty.i3 = 0; -+ rv = -EAGAIN; -+ } -+ if ( rv ) -+ break; - } - if ( l4 ) - unmap_domain_page(l4); - -- if ( pages < sc->pages ) -- sc->pages = pages; -+ if ( !rv ) -+ d->arch.paging.preempt.vcpu = NULL; -+ else -+ { -+ d->arch.paging.preempt.vcpu = current; -+ d->arch.paging.preempt.op = sc->op; -+ d->arch.paging.preempt.log_dirty.done = pages; -+ } - - paging_unlock(d); - -+ if ( rv ) -+ { -+ /* Never leave the domain paused for other errors. */ -+ ASSERT(rv == -EAGAIN); -+ return rv; -+ } -+ -+ if ( pages < sc->pages ) -+ sc->pages = pages; - if ( clean ) - { - /* We need to further call clean_dirty_bitmap() functions of specific -@@ -432,6 +543,7 @@ int paging_log_dirty_op(struct domain *d - return rv; - - out: -+ d->arch.paging.preempt.vcpu = NULL; - paging_unlock(d); - domain_unpause(d); - -@@ -498,12 +610,6 @@ void paging_log_dirty_init(struct domain - d->arch.paging.log_dirty.clean_dirty_bitmap = clean_dirty_bitmap; - } - --/* This function fress log dirty bitmap resources. */ --static void paging_log_dirty_teardown(struct domain*d) --{ -- paging_free_log_dirty_bitmap(d); --} -- - /************************************************/ - /* CODE FOR PAGING SUPPORT */ - /************************************************/ -@@ -547,6 +653,7 @@ void paging_vcpu_init(struct vcpu *v) - int paging_domctl(struct domain *d, xen_domctl_shadow_op_t *sc, - XEN_GUEST_HANDLE(void) u_domctl) - { -+ bool_t resuming = 0; - int rc; - - if ( unlikely(d == current->domain) ) -@@ -569,6 +676,20 @@ int paging_domctl(struct domain *d, xen_ - return -EINVAL; - } - -+ if ( d->arch.paging.preempt.vcpu ) -+ { -+ if ( d->arch.paging.preempt.vcpu != current || -+ d->arch.paging.preempt.op != sc->op ) -+ { -+ printk(XENLOG_G_DEBUG -+ "d%d:v%d: Paging op %#x on Dom%u with unfinished prior op %#x\n", -+ current->domain->domain_id, current->vcpu_id, -+ sc->op, d->domain_id, d->arch.paging.preempt.op); -+ return -EBUSY; -+ } -+ resuming = 1; -+ } -+ - rc = xsm_shadow_control(d, sc->op); - if ( rc ) - return rc; -@@ -594,13 +714,13 @@ int paging_domctl(struct domain *d, xen_ - - case XEN_DOMCTL_SHADOW_OP_OFF: - if ( paging_mode_log_dirty(d) ) -- if ( (rc = paging_log_dirty_disable(d)) != 0 ) -+ if ( (rc = paging_log_dirty_disable(d, resuming)) != 0 ) - return rc; - break; - - case XEN_DOMCTL_SHADOW_OP_CLEAN: - case XEN_DOMCTL_SHADOW_OP_PEEK: -- return paging_log_dirty_op(d, sc); -+ return paging_log_dirty_op(d, sc, resuming); - } - - /* Here, dispatch domctl to the appropriate paging code */ -@@ -611,18 +731,24 @@ int paging_domctl(struct domain *d, xen_ - } - - /* Call when destroying a domain */ --void paging_teardown(struct domain *d) -+int paging_teardown(struct domain *d) - { -+ int rc; -+ - if ( hap_enabled(d) ) - hap_teardown(d); - else - shadow_teardown(d); - - /* clean up log dirty resources. */ -- paging_log_dirty_teardown(d); -+ rc = paging_free_log_dirty_bitmap(d, 0); -+ if ( rc == -EAGAIN ) -+ return rc; - - /* Move populate-on-demand cache back to domain_list for destruction */ - p2m_pod_empty_cache(d); -+ -+ return rc; - } - - /* Call once all of the references to the domain have gone away */ ---- a/xen/arch/x86/mm/shadow/common.c -+++ b/xen/arch/x86/mm/shadow/common.c -@@ -3829,8 +3829,7 @@ int shadow_domctl(struct domain *d, - paging_unlock(d); - if ( preempted ) - /* Not finished. Set up to re-run the call. */ -- rc = hypercall_create_continuation( -- __HYPERVISOR_domctl, "h", u_domctl); -+ rc = -EAGAIN; - else - /* Finished. Return the new allocation */ - sc->mb = shadow_get_allocation(d); ---- a/xen/common/domain.c -+++ b/xen/common/domain.c -@@ -479,7 +479,6 @@ int domain_kill(struct domain *d) - rc = domain_relinquish_resources(d); - if ( rc != 0 ) - { -- BUG_ON(rc != -EAGAIN); - break; - } - if ( sched_move_domain(d, cpupool0) ) ---- a/xen/include/asm-x86/domain.h -+++ b/xen/include/asm-x86/domain.h -@@ -193,6 +193,20 @@ struct paging_domain { - struct hap_domain hap; - /* log dirty support */ - struct log_dirty_domain log_dirty; -+ -+ /* preemption handling */ -+ struct { -+ struct vcpu *vcpu; -+ unsigned int op; -+ union { -+ struct { -+ unsigned long done:PADDR_BITS - PAGE_SHIFT; -+ unsigned long i4:PAGETABLE_ORDER; -+ unsigned long i3:PAGETABLE_ORDER; -+ } log_dirty; -+ }; -+ } preempt; -+ - /* alloc/free pages from the pool for paging-assistance structures - * (used by p2m and log-dirty code for their tries) */ - struct page_info * (*alloc_page)(struct domain *d); ---- a/xen/include/asm-x86/paging.h -+++ b/xen/include/asm-x86/paging.h -@@ -141,9 +141,6 @@ struct paging_mode { - /***************************************************************************** - * Log dirty code */ - --/* free log dirty bitmap resource */ --void paging_free_log_dirty_bitmap(struct domain *d); -- - /* get the dirty bitmap for a specific range of pfns */ - void paging_log_dirty_range(struct domain *d, - unsigned long begin_pfn, -@@ -153,9 +150,6 @@ void paging_log_dirty_range(struct domai - /* enable log dirty */ - int paging_log_dirty_enable(struct domain *d); - --/* disable log dirty */ --int paging_log_dirty_disable(struct domain *d); -- - /* log dirty initialization */ - void paging_log_dirty_init(struct domain *d, - int (*enable_log_dirty)(struct domain *d), -@@ -218,7 +212,7 @@ int paging_domctl(struct domain *d, xen_ - XEN_GUEST_HANDLE(void) u_domctl); - - /* Call when destroying a domain */ --void paging_teardown(struct domain *d); -+int paging_teardown(struct domain *d); - - /* Call once all of the references to the domain have gone away */ - void paging_final_teardown(struct domain *d); |