aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNatanael Copa <ncopa@alpinelinux.org>2013-07-02 09:52:41 +0000
committerNatanael Copa <ncopa@alpinelinux.org>2013-07-02 09:54:33 +0000
commit14e8058dddb5be40c29deb267ffbc23171991c7a (patch)
tree26a1c27117adbfe6980444978a3b18e1e48f8e52
parent142cf745af9329dae1913280158d183956942c1a (diff)
downloadaports-14e8058dddb5be40c29deb267ffbc23171991c7a.tar.bz2
aports-14e8058dddb5be40c29deb267ffbc23171991c7a.tar.xz
main/xen: main/xen: fix xsa45 and xsa58 (CVE-2013-1918,CVE-2013-1432)
ref #2123 fixes #2127
-rw-r--r--main/xen/APKBUILD6
-rw-r--r--main/xen/xsa45-4.1.patch1113
-rw-r--r--main/xen/xsa58-4.1.patch119
3 files changed, 1237 insertions, 1 deletions
diff --git a/main/xen/APKBUILD b/main/xen/APKBUILD
index 4f44f4d0a6..4d95a09909 100644
--- a/main/xen/APKBUILD
+++ b/main/xen/APKBUILD
@@ -2,7 +2,7 @@
# Maintainer: William Pitcock <nenolod@dereferenced.org>
pkgname=xen
pkgver=4.1.4
-pkgrel=5
+pkgrel=6
pkgdesc="Xen hypervisor"
url="http://www.xen.org/"
arch="x86 x86_64"
@@ -23,12 +23,14 @@ source="http://bits.xensource.com/oss-xen/release/$pkgver/$pkgname-$pkgver.tar.g
busybox-sed.patch
xsa33-4.1.patch
xsa41.patch
+ xsa45-4.1.patch
xsa52-4.1.patch
xsa53-4.1.patch
xsa54.patch
xsa55-4.1.patch
xsa56.patch
xsa57-4.1.patch
+ xsa58-4.1.patch
xencommons.initd
xend.initd
@@ -106,12 +108,14 @@ fa06495a175571f4aa3b6cb88937953e librt.patch
1bea3543ddc712330527b62fd9ff6520 busybox-sed.patch
25ba4efc5eee29daa12855fbadce84f8 xsa33-4.1.patch
ce56f00762139cd611dfc3332b7571cf xsa41.patch
+09c675a4a28ee00dd9abeacc07426edd xsa45-4.1.patch
db1e5a92547c8c8ed2e1872efed99ab0 xsa52-4.1.patch
e11ae888997d11fcb91b431ebf609d4e xsa53-4.1.patch
a8393d1ec6b886ea72ffe624a04ee10a xsa54.patch
391d90e3851df0b42b2971e8b860e19a xsa55-4.1.patch
e70b9128ffc2175cea314a533a7d8457 xsa56.patch
a065178b8f5ed028b97fa51db97e41e2 xsa57-4.1.patch
+dd46795cf7bb7bb9baa50bfdf4813d4f xsa58-4.1.patch
0b62c1fbe2699a32e745724fd301db5b xencommons.initd
5ee6a16ec70dfbcd4944ded71b393fa2 xend.initd
a2b5234483f1b5892d22e9315d9c307f xendomains.initd"
diff --git a/main/xen/xsa45-4.1.patch b/main/xen/xsa45-4.1.patch
new file mode 100644
index 0000000000..6dbf1f4456
--- /dev/null
+++ b/main/xen/xsa45-4.1.patch
@@ -0,0 +1,1113 @@
+diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
+index 4009a60..9a34488 100644
+--- a/xen/arch/x86/domain.c
++++ b/xen/arch/x86/domain.c
+@@ -70,8 +70,6 @@ void (*dead_idle) (void) __read_mostly = default_dead_idle;
+ static void paravirt_ctxt_switch_from(struct vcpu *v);
+ static void paravirt_ctxt_switch_to(struct vcpu *v);
+
+-static void vcpu_destroy_pagetables(struct vcpu *v);
+-
+ static void continue_idle_domain(struct vcpu *v)
+ {
+ reset_stack_and_jump(idle_loop);
+@@ -678,6 +676,7 @@ int arch_set_info_guest(
+ {
+ struct domain *d = v->domain;
+ unsigned long cr3_pfn = INVALID_MFN;
++ struct page_info *cr3_page;
+ unsigned long flags, cr4;
+ int i, rc = 0, compat;
+
+@@ -817,72 +816,103 @@ int arch_set_info_guest(
+ if ( rc != 0 )
+ return rc;
+
++ set_bit(_VPF_in_reset, &v->pause_flags);
++
+ if ( !compat )
+- {
+ cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c.nat->ctrlreg[3]));
++#ifdef __x86_64__
++ else
++ cr3_pfn = gmfn_to_mfn(d, compat_cr3_to_pfn(c.cmp->ctrlreg[3]));
++#endif
++ cr3_page = mfn_to_page(cr3_pfn);
+
+- if ( !mfn_valid(cr3_pfn) ||
+- (paging_mode_refcounts(d)
+- ? !get_page(mfn_to_page(cr3_pfn), d)
+- : !get_page_and_type(mfn_to_page(cr3_pfn), d,
+- PGT_base_page_table)) )
+- {
+- destroy_gdt(v);
+- return -EINVAL;
+- }
++ if ( !mfn_valid(cr3_pfn) || !get_page(cr3_page, d) )
++ {
++ cr3_page = NULL;
++ rc = -EINVAL;
++ }
++ else if ( paging_mode_refcounts(d) )
++ /* nothing */;
++ else if ( cr3_page == v->arch.old_guest_table )
++ {
++ v->arch.old_guest_table = NULL;
++ put_page(cr3_page);
++ }
++ else
++ {
++ /*
++ * Since v->arch.guest_table{,_user} are both NULL, this effectively
++ * is just a call to put_old_guest_table().
++ */
++ if ( !compat )
++ rc = vcpu_destroy_pagetables(v);
++ if ( !rc )
++ rc = get_page_type_preemptible(cr3_page,
++ !compat ? PGT_root_page_table
++ : PGT_l3_page_table);
++ if ( rc == -EINTR )
++ rc = -EAGAIN;
++ }
+
++ if ( rc )
++ /* handled below */;
++ else if ( !compat )
++ {
+ v->arch.guest_table = pagetable_from_pfn(cr3_pfn);
+
+ #ifdef __x86_64__
+ if ( c.nat->ctrlreg[1] )
+ {
+ cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c.nat->ctrlreg[1]));
++ cr3_page = mfn_to_page(cr3_pfn);
+
+- if ( !mfn_valid(cr3_pfn) ||
+- (paging_mode_refcounts(d)
+- ? !get_page(mfn_to_page(cr3_pfn), d)
+- : !get_page_and_type(mfn_to_page(cr3_pfn), d,
+- PGT_base_page_table)) )
++ if ( !mfn_valid(cr3_pfn) || !get_page(cr3_page, d) )
+ {
+- cr3_pfn = pagetable_get_pfn(v->arch.guest_table);
+- v->arch.guest_table = pagetable_null();
+- if ( paging_mode_refcounts(d) )
+- put_page(mfn_to_page(cr3_pfn));
+- else
+- put_page_and_type(mfn_to_page(cr3_pfn));
+- destroy_gdt(v);
+- return -EINVAL;
++ cr3_page = NULL;
++ rc = -EINVAL;
++ }
++ else if ( !paging_mode_refcounts(d) )
++ {
++ rc = get_page_type_preemptible(cr3_page, PGT_root_page_table);
++ switch ( rc )
++ {
++ case -EINTR:
++ rc = -EAGAIN;
++ case -EAGAIN:
++ v->arch.old_guest_table =
++ pagetable_get_page(v->arch.guest_table);
++ v->arch.guest_table = pagetable_null();
++ break;
++ }
+ }
+
+- v->arch.guest_table_user = pagetable_from_pfn(cr3_pfn);
++ if ( !rc )
++ v->arch.guest_table_user = pagetable_from_pfn(cr3_pfn);
+ }
+ else if ( !(flags & VGCF_in_kernel) )
+ {
+- destroy_gdt(v);
+- return -EINVAL;
++ cr3_page = NULL;
++ rc = -EINVAL;
+ }
+ }
+ else
+ {
+ l4_pgentry_t *l4tab;
+
+- cr3_pfn = gmfn_to_mfn(d, compat_cr3_to_pfn(c.cmp->ctrlreg[3]));
+-
+- if ( !mfn_valid(cr3_pfn) ||
+- (paging_mode_refcounts(d)
+- ? !get_page(mfn_to_page(cr3_pfn), d)
+- : !get_page_and_type(mfn_to_page(cr3_pfn), d,
+- PGT_l3_page_table)) )
+- {
+- destroy_gdt(v);
+- return -EINVAL;
+- }
+-
+ l4tab = __va(pagetable_get_paddr(v->arch.guest_table));
+ *l4tab = l4e_from_pfn(
+ cr3_pfn, _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED);
+ #endif
+ }
++ if ( rc )
++ {
++ if ( cr3_page )
++ put_page(cr3_page);
++ destroy_gdt(v);
++ return rc;
++ }
++
++ clear_bit(_VPF_in_reset, &v->pause_flags);
+
+ if ( v->vcpu_id == 0 )
+ update_domain_wallclock_time(d);
+@@ -904,17 +934,16 @@ int arch_set_info_guest(
+ #undef c
+ }
+
+-void arch_vcpu_reset(struct vcpu *v)
++int arch_vcpu_reset(struct vcpu *v)
+ {
+ if ( !is_hvm_vcpu(v) )
+ {
+ destroy_gdt(v);
+- vcpu_destroy_pagetables(v);
+- }
+- else
+- {
+- vcpu_end_shutdown_deferral(v);
++ return vcpu_destroy_pagetables(v);
+ }
++
++ vcpu_end_shutdown_deferral(v);
++ return 0;
+ }
+
+ /*
+@@ -1917,63 +1946,6 @@ static int relinquish_memory(
+ return ret;
+ }
+
+-static void vcpu_destroy_pagetables(struct vcpu *v)
+-{
+- struct domain *d = v->domain;
+- unsigned long pfn;
+-
+-#ifdef __x86_64__
+- if ( is_pv_32on64_vcpu(v) )
+- {
+- pfn = l4e_get_pfn(*(l4_pgentry_t *)
+- __va(pagetable_get_paddr(v->arch.guest_table)));
+-
+- if ( pfn != 0 )
+- {
+- if ( paging_mode_refcounts(d) )
+- put_page(mfn_to_page(pfn));
+- else
+- put_page_and_type(mfn_to_page(pfn));
+- }
+-
+- l4e_write(
+- (l4_pgentry_t *)__va(pagetable_get_paddr(v->arch.guest_table)),
+- l4e_empty());
+-
+- v->arch.cr3 = 0;
+- return;
+- }
+-#endif
+-
+- pfn = pagetable_get_pfn(v->arch.guest_table);
+- if ( pfn != 0 )
+- {
+- if ( paging_mode_refcounts(d) )
+- put_page(mfn_to_page(pfn));
+- else
+- put_page_and_type(mfn_to_page(pfn));
+- v->arch.guest_table = pagetable_null();
+- }
+-
+-#ifdef __x86_64__
+- /* Drop ref to guest_table_user (from MMUEXT_NEW_USER_BASEPTR) */
+- pfn = pagetable_get_pfn(v->arch.guest_table_user);
+- if ( pfn != 0 )
+- {
+- if ( !is_pv_32bit_vcpu(v) )
+- {
+- if ( paging_mode_refcounts(d) )
+- put_page(mfn_to_page(pfn));
+- else
+- put_page_and_type(mfn_to_page(pfn));
+- }
+- v->arch.guest_table_user = pagetable_null();
+- }
+-#endif
+-
+- v->arch.cr3 = 0;
+-}
+-
+ int domain_relinquish_resources(struct domain *d)
+ {
+ int ret;
+@@ -1992,7 +1964,9 @@ int domain_relinquish_resources(struct domain *d)
+ for_each_vcpu ( d, v )
+ {
+ /* Drop the in-use references to page-table bases. */
+- vcpu_destroy_pagetables(v);
++ ret = vcpu_destroy_pagetables(v);
++ if ( ret )
++ return ret;
+
+ /*
+ * Relinquish GDT mappings. No need for explicit unmapping of the
+diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
+index 9f53728..140e70c 100644
+--- a/xen/arch/x86/hvm/hvm.c
++++ b/xen/arch/x86/hvm/hvm.c
+@@ -3083,8 +3083,11 @@ static void hvm_s3_suspend(struct domain *d)
+
+ for_each_vcpu ( d, v )
+ {
++ int rc;
++
+ vlapic_reset(vcpu_vlapic(v));
+- vcpu_reset(v);
++ rc = vcpu_reset(v);
++ ASSERT(!rc);
+ }
+
+ vpic_reset(d);
+diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
+index 3af41cc..8d47bd0 100644
+--- a/xen/arch/x86/hvm/vlapic.c
++++ b/xen/arch/x86/hvm/vlapic.c
+@@ -252,10 +252,13 @@ static void vlapic_init_sipi_action(unsigned long _vcpu)
+ {
+ case APIC_DM_INIT: {
+ bool_t fpu_initialised;
++ int rc;
++
+ domain_lock(target->domain);
+ /* Reset necessary VCPU state. This does not include FPU state. */
+ fpu_initialised = target->fpu_initialised;
+- vcpu_reset(target);
++ rc = vcpu_reset(target);
++ ASSERT(!rc);
+ target->fpu_initialised = fpu_initialised;
+ vlapic_reset(vcpu_vlapic(target));
+ domain_unlock(target->domain);
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 30d281d..ceeb998 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -1182,7 +1182,16 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
+ #endif
+
+ if ( unlikely(partial > 0) )
++ {
++ ASSERT(preemptible >= 0);
+ return __put_page_type(l3e_get_page(l3e), preemptible);
++ }
++
++ if ( preemptible < 0 )
++ {
++ current->arch.old_guest_table = l3e_get_page(l3e);
++ return 0;
++ }
+
+ return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
+ }
+@@ -1195,7 +1204,17 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
+ (l4e_get_pfn(l4e) != pfn) )
+ {
+ if ( unlikely(partial > 0) )
++ {
++ ASSERT(preemptible >= 0);
+ return __put_page_type(l4e_get_page(l4e), preemptible);
++ }
++
++ if ( preemptible < 0 )
++ {
++ current->arch.old_guest_table = l4e_get_page(l4e);
++ return 0;
++ }
++
+ return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible);
+ }
+ return 1;
+@@ -1485,12 +1504,17 @@ static int alloc_l3_table(struct page_info *page, int preemptible)
+ if ( rc < 0 && rc != -EAGAIN && rc != -EINTR )
+ {
+ MEM_LOG("Failure in alloc_l3_table: entry %d", i);
++ if ( i )
++ {
++ page->nr_validated_ptes = i;
++ page->partial_pte = 0;
++ current->arch.old_guest_table = page;
++ }
+ while ( i-- > 0 )
+ {
+ if ( !is_guest_l3_slot(i) )
+ continue;
+ unadjust_guest_l3e(pl3e[i], d);
+- put_page_from_l3e(pl3e[i], pfn, 0, 0);
+ }
+ }
+
+@@ -1520,22 +1544,24 @@ static int alloc_l4_table(struct page_info *page, int preemptible)
+ page->nr_validated_ptes = i;
+ page->partial_pte = partial ?: 1;
+ }
+- else if ( rc == -EINTR )
++ else if ( rc < 0 )
+ {
++ if ( rc != -EINTR )
++ MEM_LOG("Failure in alloc_l4_table: entry %d", i);
+ if ( i )
+ {
+ page->nr_validated_ptes = i;
+ page->partial_pte = 0;
+- rc = -EAGAIN;
++ if ( rc == -EINTR )
++ rc = -EAGAIN;
++ else
++ {
++ if ( current->arch.old_guest_table )
++ page->nr_validated_ptes++;
++ current->arch.old_guest_table = page;
++ }
+ }
+ }
+- else if ( rc < 0 )
+- {
+- MEM_LOG("Failure in alloc_l4_table: entry %d", i);
+- while ( i-- > 0 )
+- if ( is_guest_l4_slot(d, i) )
+- put_page_from_l4e(pl4e[i], pfn, 0, 0);
+- }
+ if ( rc < 0 )
+ return rc;
+
+@@ -1965,7 +1991,7 @@ static int mod_l3_entry(l3_pgentry_t *pl3e,
+ pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
+ }
+
+- put_page_from_l3e(ol3e, pfn, 0, 0);
++ put_page_from_l3e(ol3e, pfn, 0, -preemptible);
+ return rc;
+ }
+
+@@ -2028,7 +2054,7 @@ static int mod_l4_entry(l4_pgentry_t *pl4e,
+ return -EFAULT;
+ }
+
+- put_page_from_l4e(ol4e, pfn, 0, 0);
++ put_page_from_l4e(ol4e, pfn, 0, -preemptible);
+ return rc;
+ }
+
+@@ -2186,7 +2212,15 @@ static int alloc_page_type(struct page_info *page, unsigned long type,
+ PRtype_info ": caf=%08lx taf=%" PRtype_info,
+ page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
+ type, page->count_info, page->u.inuse.type_info);
+- page->u.inuse.type_info = 0;
++ if ( page != current->arch.old_guest_table )
++ page->u.inuse.type_info = 0;
++ else
++ {
++ ASSERT((page->u.inuse.type_info &
++ (PGT_count_mask | PGT_validated)) == 1);
++ get_page_light(page);
++ page->u.inuse.type_info |= PGT_partial;
++ }
+ }
+ else
+ {
+@@ -2724,49 +2758,150 @@ static void put_superpage(unsigned long mfn)
+
+ #endif
+
++static int put_old_guest_table(struct vcpu *v)
++{
++ int rc;
++
++ if ( !v->arch.old_guest_table )
++ return 0;
++
++ switch ( rc = put_page_and_type_preemptible(v->arch.old_guest_table, 1) )
++ {
++ case -EINTR:
++ case -EAGAIN:
++ return -EAGAIN;
++ }
++
++ v->arch.old_guest_table = NULL;
++
++ return rc;
++}
++
++int vcpu_destroy_pagetables(struct vcpu *v)
++{
++ unsigned long mfn = pagetable_get_pfn(v->arch.guest_table);
++ struct page_info *page;
++ int rc = put_old_guest_table(v);
++
++ if ( rc )
++ return rc;
++
++#ifdef __x86_64__
++ if ( is_pv_32on64_vcpu(v) )
++ mfn = l4e_get_pfn(*(l4_pgentry_t *)mfn_to_virt(mfn));
++#endif
++
++ if ( mfn )
++ {
++ page = mfn_to_page(mfn);
++ if ( paging_mode_refcounts(v->domain) )
++ put_page(page);
++ else
++ rc = put_page_and_type_preemptible(page, 1);
++ }
++
++#ifdef __x86_64__
++ if ( is_pv_32on64_vcpu(v) )
++ {
++ if ( !rc )
++ l4e_write(
++ (l4_pgentry_t *)__va(pagetable_get_paddr(v->arch.guest_table)),
++ l4e_empty());
++ }
++ else
++#endif
++ if ( !rc )
++ {
++ v->arch.guest_table = pagetable_null();
++
++#ifdef __x86_64__
++ /* Drop ref to guest_table_user (from MMUEXT_NEW_USER_BASEPTR) */
++ mfn = pagetable_get_pfn(v->arch.guest_table_user);
++ if ( mfn )
++ {
++ page = mfn_to_page(mfn);
++ if ( paging_mode_refcounts(v->domain) )
++ put_page(page);
++ else
++ rc = put_page_and_type_preemptible(page, 1);
++ }
++ if ( !rc )
++ v->arch.guest_table_user = pagetable_null();
++#endif
++ }
++
++ v->arch.cr3 = 0;
++
++ return rc;
++}
+
+ int new_guest_cr3(unsigned long mfn)
+ {
+ struct vcpu *curr = current;
+ struct domain *d = curr->domain;
+- int okay;
++ int rc;
+ unsigned long old_base_mfn;
+
+ #ifdef __x86_64__
+ if ( is_pv_32on64_domain(d) )
+ {
+- okay = paging_mode_refcounts(d)
+- ? 0 /* Old code was broken, but what should it be? */
+- : mod_l4_entry(
++ rc = paging_mode_refcounts(d)
++ ? -EINVAL /* Old code was broken, but what should it be? */
++ : mod_l4_entry(
+ __va(pagetable_get_paddr(curr->arch.guest_table)),
+ l4e_from_pfn(
+ mfn,
+ (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)),
+- pagetable_get_pfn(curr->arch.guest_table), 0, 0, curr) == 0;
+- if ( unlikely(!okay) )
++ pagetable_get_pfn(curr->arch.guest_table), 0, 1, curr);
++ switch ( rc )
+ {
++ case 0:
++ break;
++ case -EINTR:
++ case -EAGAIN:
++ return -EAGAIN;
++ default:
+ MEM_LOG("Error while installing new compat baseptr %lx", mfn);
+- return 0;
++ return rc;
+ }
+
+ invalidate_shadow_ldt(curr, 0);
+ write_ptbase(curr);
+
+- return 1;
++ return 0;
+ }
+ #endif
+- okay = paging_mode_refcounts(d)
+- ? get_page_from_pagenr(mfn, d)
+- : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 0);
+- if ( unlikely(!okay) )
++ rc = put_old_guest_table(curr);
++ if ( unlikely(rc) )
++ return rc;
++
++ old_base_mfn = pagetable_get_pfn(curr->arch.guest_table);
++ /*
++ * This is particularly important when getting restarted after the
++ * previous attempt got preempted in the put-old-MFN phase.
++ */
++ if ( old_base_mfn == mfn )
+ {
+- MEM_LOG("Error while installing new baseptr %lx", mfn);
++ write_ptbase(curr);
+ return 0;
+ }
+
+- invalidate_shadow_ldt(curr, 0);
++ rc = paging_mode_refcounts(d)
++ ? (get_page_from_pagenr(mfn, d) ? 0 : -EINVAL)
++ : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 1);
++ switch ( rc )
++ {
++ case 0:
++ break;
++ case -EINTR:
++ case -EAGAIN:
++ return -EAGAIN;
++ default:
++ MEM_LOG("Error while installing new baseptr %lx", mfn);
++ return rc;
++ }
+
+- old_base_mfn = pagetable_get_pfn(curr->arch.guest_table);
++ invalidate_shadow_ldt(curr, 0);
+
+ curr->arch.guest_table = pagetable_from_pfn(mfn);
+ update_cr3(curr);
+@@ -2775,13 +2910,25 @@ int new_guest_cr3(unsigned long mfn)
+
+ if ( likely(old_base_mfn != 0) )
+ {
++ struct page_info *page = mfn_to_page(old_base_mfn);
++
+ if ( paging_mode_refcounts(d) )
+- put_page(mfn_to_page(old_base_mfn));
++ put_page(page);
+ else
+- put_page_and_type(mfn_to_page(old_base_mfn));
++ switch ( rc = put_page_and_type_preemptible(page, 1) )
++ {
++ case -EINTR:
++ rc = -EAGAIN;
++ case -EAGAIN:
++ curr->arch.old_guest_table = page;
++ break;
++ default:
++ BUG_ON(rc);
++ break;
++ }
+ }
+
+- return 1;
++ return rc;
+ }
+
+ static struct domain *get_pg_owner(domid_t domid)
+@@ -2910,12 +3057,29 @@ long do_mmuext_op(
+ unsigned int foreigndom)
+ {
+ struct mmuext_op op;
+- int rc = 0, i = 0, okay;
+ unsigned long type;
+- unsigned int done = 0;
++ unsigned int i = 0, done = 0;
+ struct vcpu *curr = current;
+ struct domain *d = curr->domain;
+ struct domain *pg_owner;
++ int okay, rc = put_old_guest_table(curr);
++
++ if ( unlikely(rc) )
++ {
++ if ( likely(rc == -EAGAIN) )
++ rc = hypercall_create_continuation(
++ __HYPERVISOR_mmuext_op, "hihi", uops, count, pdone,
++ foreigndom);
++ return rc;
++ }
++
++ if ( unlikely(count == MMU_UPDATE_PREEMPTED) &&
++ likely(guest_handle_is_null(uops)) )
++ {
++ /* See the curr->arch.old_guest_table related
++ * hypercall_create_continuation() below. */
++ return (int)foreigndom;
++ }
+
+ if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
+ {
+@@ -2940,7 +3104,7 @@ long do_mmuext_op(
+
+ for ( i = 0; i < count; i++ )
+ {
+- if ( hypercall_preempt_check() )
++ if ( curr->arch.old_guest_table || hypercall_preempt_check() )
+ {
+ rc = -EAGAIN;
+ break;
+@@ -3000,21 +3164,17 @@ long do_mmuext_op(
+ page = mfn_to_page(mfn);
+
+ if ( (rc = xsm_memory_pin_page(d, page)) != 0 )
+- {
+- put_page_and_type(page);
+ okay = 0;
+- break;
+- }
+-
+- if ( unlikely(test_and_set_bit(_PGT_pinned,
+- &page->u.inuse.type_info)) )
++ else if ( unlikely(test_and_set_bit(_PGT_pinned,
++ &page->u.inuse.type_info)) )
+ {
+ MEM_LOG("Mfn %lx already pinned", mfn);
+- put_page_and_type(page);
+ okay = 0;
+- break;
+ }
+
++ if ( unlikely(!okay) )
++ goto pin_drop;
++
+ /* A page is dirtied when its pin status is set. */
+ paging_mark_dirty(pg_owner, mfn);
+
+@@ -3028,7 +3188,13 @@ long do_mmuext_op(
+ &page->u.inuse.type_info));
+ spin_unlock(&pg_owner->page_alloc_lock);
+ if ( drop_ref )
+- put_page_and_type(page);
++ {
++ pin_drop:
++ if ( type == PGT_l1_page_table )
++ put_page_and_type(page);
++ else
++ curr->arch.old_guest_table = page;
++ }
+ }
+
+ break;
+@@ -3058,7 +3224,17 @@ long do_mmuext_op(
+ break;
+ }
+
+- put_page_and_type(page);
++ switch ( rc = put_page_and_type_preemptible(page, 1) )
++ {
++ case -EINTR:
++ case -EAGAIN:
++ curr->arch.old_guest_table = page;
++ rc = 0;
++ break;
++ default:
++ BUG_ON(rc);
++ break;
++ }
+ put_page(page);
+
+ /* A page is dirtied when its pin status is cleared. */
+@@ -3068,7 +3244,8 @@ long do_mmuext_op(
+ }
+
+ case MMUEXT_NEW_BASEPTR:
+- okay = new_guest_cr3(gmfn_to_mfn(d, op.arg1.mfn));
++ rc = new_guest_cr3(gmfn_to_mfn(d, op.arg1.mfn));
++ okay = !rc;
+ break;
+
+ #ifdef __x86_64__
+@@ -3076,29 +3253,55 @@ long do_mmuext_op(
+ unsigned long old_mfn, mfn;
+
+ mfn = gmfn_to_mfn(d, op.arg1.mfn);
++ old_mfn = pagetable_get_pfn(curr->arch.guest_table_user);
++ /*
++ * This is particularly important when getting restarted after the
++ * previous attempt got preempted in the put-old-MFN phase.
++ */
++ if ( old_mfn == mfn )
++ break;
++
+ if ( mfn != 0 )
+ {
+ if ( paging_mode_refcounts(d) )
+ okay = get_page_from_pagenr(mfn, d);
+ else
+- okay = !get_page_and_type_from_pagenr(
+- mfn, PGT_root_page_table, d, 0, 0);
++ {
++ rc = get_page_and_type_from_pagenr(
++ mfn, PGT_root_page_table, d, 0, 1);
++ okay = !rc;
++ }
+ if ( unlikely(!okay) )
+ {
+- MEM_LOG("Error while installing new mfn %lx", mfn);
++ if ( rc == -EINTR )
++ rc = -EAGAIN;
++ else if ( rc != -EAGAIN )
++ MEM_LOG("Error while installing new mfn %lx", mfn);
+ break;
+ }
+ }
+
+- old_mfn = pagetable_get_pfn(curr->arch.guest_table_user);
+ curr->arch.guest_table_user = pagetable_from_pfn(mfn);
+
+ if ( old_mfn != 0 )
+ {
++ struct page_info *page = mfn_to_page(old_mfn);
++
+ if ( paging_mode_refcounts(d) )
+- put_page(mfn_to_page(old_mfn));
++ put_page(page);
+ else
+- put_page_and_type(mfn_to_page(old_mfn));
++ switch ( rc = put_page_and_type_preemptible(page, 1) )
++ {
++ case -EINTR:
++ rc = -EAGAIN;
++ case -EAGAIN:
++ curr->arch.old_guest_table = page;
++ okay = 0;
++ break;
++ default:
++ BUG_ON(rc);
++ break;
++ }
+ }
+
+ break;
+@@ -3337,9 +3540,27 @@ long do_mmuext_op(
+ }
+
+ if ( rc == -EAGAIN )
++ {
++ ASSERT(i < count);
+ rc = hypercall_create_continuation(
+ __HYPERVISOR_mmuext_op, "hihi",
+ uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
++ }
++ else if ( curr->arch.old_guest_table )
++ {
++ XEN_GUEST_HANDLE(void) null;
++
++ ASSERT(rc || i == count);
++ set_xen_guest_handle(null, NULL);
++ /*
++ * In order to have a way to communicate the final return value to
++ * our continuation, we pass this in place of "foreigndom", building
++ * on the fact that this argument isn't needed anymore.
++ */
++ rc = hypercall_create_continuation(
++ __HYPERVISOR_mmuext_op, "hihi", null,
++ MMU_UPDATE_PREEMPTED, null, rc);
++ }
+
+ put_pg_owner(pg_owner);
+
+@@ -3366,11 +3587,28 @@ long do_mmu_update(
+ void *va;
+ unsigned long gpfn, gmfn, mfn;
+ struct page_info *page;
+- int rc = 0, okay = 1, i = 0;
+- unsigned int cmd, done = 0, pt_dom;
+- struct vcpu *v = current;
++ unsigned int cmd, i = 0, done = 0, pt_dom;
++ struct vcpu *curr = current, *v = curr;
+ struct domain *d = v->domain, *pt_owner = d, *pg_owner;
+ struct domain_mmap_cache mapcache;
++ int rc = put_old_guest_table(curr), okay = 1;
++
++ if ( unlikely(rc) )
++ {
++ if ( likely(rc == -EAGAIN) )
++ rc = hypercall_create_continuation(
++ __HYPERVISOR_mmu_update, "hihi", ureqs, count, pdone,
++ foreigndom);
++ return rc;
++ }
++
++ if ( unlikely(count == MMU_UPDATE_PREEMPTED) &&
++ likely(guest_handle_is_null(ureqs)) )
++ {
++ /* See the curr->arch.old_guest_table related
++ * hypercall_create_continuation() below. */
++ return (int)foreigndom;
++ }
+
+ if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
+ {
+@@ -3419,7 +3657,7 @@ long do_mmu_update(
+
+ for ( i = 0; i < count; i++ )
+ {
+- if ( hypercall_preempt_check() )
++ if ( curr->arch.old_guest_table || hypercall_preempt_check() )
+ {
+ rc = -EAGAIN;
+ break;
+@@ -3684,9 +3922,27 @@ long do_mmu_update(
+ }
+
+ if ( rc == -EAGAIN )
++ {
++ ASSERT(i < count);
+ rc = hypercall_create_continuation(
+ __HYPERVISOR_mmu_update, "hihi",
+ ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
++ }
++ else if ( curr->arch.old_guest_table )
++ {
++ XEN_GUEST_HANDLE(void) null;
++
++ ASSERT(rc || i == count);
++ set_xen_guest_handle(null, NULL);
++ /*
++ * In order to have a way to communicate the final return value to
++ * our continuation, we pass this in place of "foreigndom", building
++ * on the fact that this argument isn't needed anymore.
++ */
++ rc = hypercall_create_continuation(
++ __HYPERVISOR_mmu_update, "hihi", null,
++ MMU_UPDATE_PREEMPTED, null, rc);
++ }
+
+ put_pg_owner(pg_owner);
+
+diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
+index 234d9ac..e336439 100644
+--- a/xen/arch/x86/traps.c
++++ b/xen/arch/x86/traps.c
+@@ -2317,8 +2317,15 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
+ rc = new_guest_cr3(gmfn_to_mfn(v->domain, compat_cr3_to_pfn(*reg)));
+ #endif
+ domain_unlock(v->domain);
+- if ( rc == 0 ) /* not okay */
++ switch ( rc )
++ {
++ case 0:
++ break;
++ case -EAGAIN: /* retry after preemption */
++ goto skip;
++ default: /* not okay */
+ goto fail;
++ }
+ break;
+
+ case 4: /* Write CR4 */
+diff --git a/xen/arch/x86/x86_64/compat/mm.c b/xen/arch/x86/x86_64/compat/mm.c
+index 3ef08a5..6ad41d4 100644
+--- a/xen/arch/x86/x86_64/compat/mm.c
++++ b/xen/arch/x86/x86_64/compat/mm.c
+@@ -222,6 +222,13 @@ int compat_mmuext_op(XEN_GUEST_HANDLE(mmuext_op_compat_t) cmp_uops,
+ int rc = 0;
+ XEN_GUEST_HANDLE(mmuext_op_t) nat_ops;
+
++ if ( unlikely(count == MMU_UPDATE_PREEMPTED) &&
++ likely(guest_handle_is_null(cmp_uops)) )
++ {
++ set_xen_guest_handle(nat_ops, NULL);
++ return do_mmuext_op(nat_ops, count, pdone, foreigndom);
++ }
++
+ preempt_mask = count & MMU_UPDATE_PREEMPTED;
+ count ^= preempt_mask;
+
+@@ -319,17 +326,23 @@ int compat_mmuext_op(XEN_GUEST_HANDLE(mmuext_op_compat_t) cmp_uops,
+ : mcs->call.args[1];
+ unsigned int left = arg1 & ~MMU_UPDATE_PREEMPTED;
+
+- BUG_ON(left == arg1);
++ BUG_ON(left == arg1 && left != i);
+ BUG_ON(left > count);
+ guest_handle_add_offset(nat_ops, i - left);
+ guest_handle_subtract_offset(cmp_uops, left);
+ left = 1;
+- BUG_ON(!hypercall_xlat_continuation(&left, 0x01, nat_ops, cmp_uops));
+- BUG_ON(left != arg1);
+- if (!test_bit(_MCSF_in_multicall, &mcs->flags))
+- regs->_ecx += count - i;
++ if ( arg1 != MMU_UPDATE_PREEMPTED )
++ {
++ BUG_ON(!hypercall_xlat_continuation(&left, 0x01, nat_ops,
++ cmp_uops));
++ if ( !test_bit(_MCSF_in_multicall, &mcs->flags) )
++ regs->_ecx += count - i;
++ else
++ mcs->compat_call.args[1] += count - i;
++ }
+ else
+- mcs->compat_call.args[1] += count - i;
++ BUG_ON(hypercall_xlat_continuation(&left, 0));
++ BUG_ON(left != arg1);
+ }
+ else
+ BUG_ON(err > 0);
+diff --git a/xen/common/compat/domain.c b/xen/common/compat/domain.c
+index 67e0e5e..5fe393f 100644
+--- a/xen/common/compat/domain.c
++++ b/xen/common/compat/domain.c
+@@ -52,6 +52,10 @@ int compat_vcpu_op(int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg)
+ rc = boot_vcpu(d, vcpuid, cmp_ctxt);
+ domain_unlock(d);
+
++ if ( rc == -EAGAIN )
++ rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iih",
++ cmd, vcpuid, arg);
++
+ xfree(cmp_ctxt);
+ break;
+ }
+diff --git a/xen/common/domain.c b/xen/common/domain.c
+index 054f7c4..5fa045b 100644
+--- a/xen/common/domain.c
++++ b/xen/common/domain.c
+@@ -770,14 +770,18 @@ int boot_vcpu(struct domain *d, int vcpuid, vcpu_guest_context_u ctxt)
+ return arch_set_info_guest(v, ctxt);
+ }
+
+-void vcpu_reset(struct vcpu *v)
++int vcpu_reset(struct vcpu *v)
+ {
+ struct domain *d = v->domain;
++ int rc;
+
+ vcpu_pause(v);
+ domain_lock(d);
+
+- arch_vcpu_reset(v);
++ set_bit(_VPF_in_reset, &v->pause_flags);
++ rc = arch_vcpu_reset(v);
++ if ( rc )
++ goto out_unlock;
+
+ set_bit(_VPF_down, &v->pause_flags);
+
+@@ -793,9 +797,13 @@ void vcpu_reset(struct vcpu *v)
+ #endif
+ cpus_clear(v->cpu_affinity_tmp);
+ clear_bit(_VPF_blocked, &v->pause_flags);
++ clear_bit(_VPF_in_reset, &v->pause_flags);
+
++ out_unlock:
+ domain_unlock(v->domain);
+ vcpu_unpause(v);
++
++ return rc;
+ }
+
+
+@@ -834,6 +842,11 @@ long do_vcpu_op(int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg)
+ domain_unlock(d);
+
+ xfree(ctxt);
++
++ if ( rc == -EAGAIN )
++ rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iih",
++ cmd, vcpuid, arg);
++
+ break;
+
+ case VCPUOP_up:
+diff --git a/xen/common/domctl.c b/xen/common/domctl.c
+index 981cb1a..faac366 100644
+--- a/xen/common/domctl.c
++++ b/xen/common/domctl.c
+@@ -286,8 +286,10 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
+
+ if ( guest_handle_is_null(op->u.vcpucontext.ctxt) )
+ {
+- vcpu_reset(v);
+- ret = 0;
++ ret = vcpu_reset(v);
++ if ( ret == -EAGAIN )
++ ret = hypercall_create_continuation(
++ __HYPERVISOR_domctl, "h", u_domctl);
+ goto svc_out;
+ }
+
+@@ -316,6 +318,10 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
+ domain_pause(d);
+ ret = arch_set_info_guest(v, c);
+ domain_unpause(d);
++
++ if ( ret == -EAGAIN )
++ ret = hypercall_create_continuation(
++ __HYPERVISOR_domctl, "h", u_domctl);
+ }
+
+ svc_out:
+diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
+index fe1459d..a387862 100644
+--- a/xen/include/asm-x86/domain.h
++++ b/xen/include/asm-x86/domain.h
+@@ -405,6 +405,7 @@ struct arch_vcpu
+ pagetable_t guest_table_user; /* (MFN) x86/64 user-space pagetable */
+ #endif
+ pagetable_t guest_table; /* (MFN) guest notion of cr3 */
++ struct page_info *old_guest_table; /* partially destructed pagetable */
+ /* guest_table holds a ref to the page, and also a type-count unless
+ * shadow refcounts are in use */
+ pagetable_t shadow_table[4]; /* (MFN) shadow(s) of guest */
+diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
+index c93a022..2498007 100644
+--- a/xen/include/asm-x86/mm.h
++++ b/xen/include/asm-x86/mm.h
+@@ -554,6 +554,7 @@ void audit_domains(void);
+ int new_guest_cr3(unsigned long pfn);
+ void make_cr3(struct vcpu *v, unsigned long mfn);
+ void update_cr3(struct vcpu *v);
++int vcpu_destroy_pagetables(struct vcpu *);
+ void propagate_page_fault(unsigned long addr, u16 error_code);
+ void *do_page_walk(struct vcpu *v, unsigned long addr);
+
+diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
+index edffd1f..5175ef7 100644
+--- a/xen/include/xen/domain.h
++++ b/xen/include/xen/domain.h
+@@ -15,7 +15,7 @@ struct vcpu *alloc_vcpu(
+ int boot_vcpu(
+ struct domain *d, int vcpuid, vcpu_guest_context_u ctxt);
+ struct vcpu *alloc_dom0_vcpu0(void);
+-void vcpu_reset(struct vcpu *v);
++int vcpu_reset(struct vcpu *);
+
+ struct xen_domctl_getdomaininfo;
+ void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info);
+@@ -57,7 +57,7 @@ void arch_dump_vcpu_info(struct vcpu *v);
+
+ void arch_dump_domain_info(struct domain *d);
+
+-void arch_vcpu_reset(struct vcpu *v);
++int arch_vcpu_reset(struct vcpu *);
+
+ bool_t domctl_lock_acquire(void);
+ void domctl_lock_release(void);
+diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
+index 35c3a7f..c04b25d 100644
+--- a/xen/include/xen/sched.h
++++ b/xen/include/xen/sched.h
+@@ -592,6 +592,9 @@ extern struct domain *domain_list;
+ /* VCPU is blocked on memory-event ring. */
+ #define _VPF_mem_event 4
+ #define VPF_mem_event (1UL<<_VPF_mem_event)
++ /* VCPU is being reset. */
++#define _VPF_in_reset 7
++#define VPF_in_reset (1UL<<_VPF_in_reset)
+
+ static inline int vcpu_runnable(struct vcpu *v)
+ {
diff --git a/main/xen/xsa58-4.1.patch b/main/xen/xsa58-4.1.patch
new file mode 100644
index 0000000000..3c982a35e0
--- /dev/null
+++ b/main/xen/xsa58-4.1.patch
@@ -0,0 +1,119 @@
+x86: fix page refcount handling in page table pin error path
+
+In the original patch 7 of the series addressing XSA-45 I mistakenly
+took the addition of the call to get_page_light() in alloc_page_type()
+to cover two decrements that would happen: One for the PGT_partial bit
+that is getting set along with the call, and the other for the page
+reference the caller hold (and would be dropping on its error path).
+But of course the additional page reference is tied to the PGT_partial
+bit, and hence any caller of a function that may leave
+->arch.old_guest_table non-NULL for error cleanup purposes has to make
+sure a respective page reference gets retained.
+
+Similar issues were then also spotted elsewhere: In effect all callers
+of get_page_type_preemptible() need to deal with errors in similar
+ways. To make sure error handling can work this way without leaking
+page references, a respective assertion gets added to that function.
+
+This is CVE-2013-1432 / XSA-58.
+
+Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Tested-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Tim Deegan <tim@xen.org>
+
+--- a/xen/arch/x86/domain.c
++++ b/xen/arch/x86/domain.c
+@@ -798,6 +798,10 @@ int arch_set_info_guest(
+ if ( v->vcpu_id == 0 )
+ d->vm_assist = c(vm_assist);
+
++ rc = put_old_guest_table(current);
++ if ( rc )
++ return rc;
++
+ if ( !compat )
+ rc = (int)set_gdt(v, c.nat->gdt_frames, c.nat->gdt_ents);
+ #ifdef CONFIG_COMPAT
+@@ -840,18 +844,24 @@ int arch_set_info_guest(
+ }
+ else
+ {
+- /*
+- * Since v->arch.guest_table{,_user} are both NULL, this effectively
+- * is just a call to put_old_guest_table().
+- */
+ if ( !compat )
+- rc = vcpu_destroy_pagetables(v);
++ rc = put_old_guest_table(v);
+ if ( !rc )
+ rc = get_page_type_preemptible(cr3_page,
+ !compat ? PGT_root_page_table
+ : PGT_l3_page_table);
+- if ( rc == -EINTR )
++ switch ( rc )
++ {
++ case -EINTR:
+ rc = -EAGAIN;
++ case -EAGAIN:
++ case 0:
++ break;
++ default:
++ if ( cr3_page == current->arch.old_guest_table )
++ cr3_page = NULL;
++ break;
++ }
+ }
+
+ if ( rc )
+@@ -883,6 +893,11 @@ int arch_set_info_guest(
+ pagetable_get_page(v->arch.guest_table);
+ v->arch.guest_table = pagetable_null();
+ break;
++ default:
++ if ( cr3_page == current->arch.old_guest_table )
++ cr3_page = NULL;
++ case 0:
++ break;
+ }
+ }
+
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -682,7 +682,8 @@ static int get_page_and_type_from_pagenr
+ get_page_type_preemptible(page, type) :
+ (get_page_type(page, type) ? 0 : -EINVAL));
+
+- if ( unlikely(rc) && partial >= 0 )
++ if ( unlikely(rc) && partial >= 0 &&
++ (!preemptible || page != current->arch.old_guest_table) )
+ put_page(page);
+
+ return rc;
+@@ -2555,6 +2556,7 @@ int put_page_type_preemptible(struct pag
+
+ int get_page_type_preemptible(struct page_info *page, unsigned long type)
+ {
++ ASSERT(!current->arch.old_guest_table);
+ return __get_page_type(page, type, 1);
+ }
+
+@@ -2765,7 +2767,7 @@ static void put_superpage(unsigned long
+
+ #endif
+
+-static int put_old_guest_table(struct vcpu *v)
++int put_old_guest_table(struct vcpu *v)
+ {
+ int rc;
+
+--- a/xen/include/asm-x86/mm.h
++++ b/xen/include/asm-x86/mm.h
+@@ -337,6 +337,7 @@ void put_page_type(struct page_info *pag
+ int get_page_type(struct page_info *page, unsigned long type);
+ int put_page_type_preemptible(struct page_info *page);
+ int get_page_type_preemptible(struct page_info *page, unsigned long type);
++int put_old_guest_table(struct vcpu *);
+ int get_page_from_l1e(
+ l1_pgentry_t l1e, struct domain *l1e_owner, struct domain *pg_owner);
+ void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner);