diff options
author | Natanael Copa <ncopa@alpinelinux.org> | 2013-07-02 09:52:41 +0000 |
---|---|---|
committer | Natanael Copa <ncopa@alpinelinux.org> | 2013-07-02 09:54:33 +0000 |
commit | 14e8058dddb5be40c29deb267ffbc23171991c7a (patch) | |
tree | 26a1c27117adbfe6980444978a3b18e1e48f8e52 | |
parent | 142cf745af9329dae1913280158d183956942c1a (diff) | |
download | aports-14e8058dddb5be40c29deb267ffbc23171991c7a.tar.bz2 aports-14e8058dddb5be40c29deb267ffbc23171991c7a.tar.xz |
main/xen: main/xen: fix xsa45 and xsa58 (CVE-2013-1918,CVE-2013-1432)
-rw-r--r-- | main/xen/APKBUILD | 6 | ||||
-rw-r--r-- | main/xen/xsa45-4.1.patch | 1113 | ||||
-rw-r--r-- | main/xen/xsa58-4.1.patch | 119 |
3 files changed, 1237 insertions, 1 deletions
diff --git a/main/xen/APKBUILD b/main/xen/APKBUILD index 4f44f4d0a6..4d95a09909 100644 --- a/main/xen/APKBUILD +++ b/main/xen/APKBUILD @@ -2,7 +2,7 @@ # Maintainer: William Pitcock <nenolod@dereferenced.org> pkgname=xen pkgver=4.1.4 -pkgrel=5 +pkgrel=6 pkgdesc="Xen hypervisor" url="http://www.xen.org/" arch="x86 x86_64" @@ -23,12 +23,14 @@ source="http://bits.xensource.com/oss-xen/release/$pkgver/$pkgname-$pkgver.tar.g busybox-sed.patch xsa33-4.1.patch xsa41.patch + xsa45-4.1.patch xsa52-4.1.patch xsa53-4.1.patch xsa54.patch xsa55-4.1.patch xsa56.patch xsa57-4.1.patch + xsa58-4.1.patch xencommons.initd xend.initd @@ -106,12 +108,14 @@ fa06495a175571f4aa3b6cb88937953e librt.patch 1bea3543ddc712330527b62fd9ff6520 busybox-sed.patch 25ba4efc5eee29daa12855fbadce84f8 xsa33-4.1.patch ce56f00762139cd611dfc3332b7571cf xsa41.patch +09c675a4a28ee00dd9abeacc07426edd xsa45-4.1.patch db1e5a92547c8c8ed2e1872efed99ab0 xsa52-4.1.patch e11ae888997d11fcb91b431ebf609d4e xsa53-4.1.patch a8393d1ec6b886ea72ffe624a04ee10a xsa54.patch 391d90e3851df0b42b2971e8b860e19a xsa55-4.1.patch e70b9128ffc2175cea314a533a7d8457 xsa56.patch a065178b8f5ed028b97fa51db97e41e2 xsa57-4.1.patch +dd46795cf7bb7bb9baa50bfdf4813d4f xsa58-4.1.patch 0b62c1fbe2699a32e745724fd301db5b xencommons.initd 5ee6a16ec70dfbcd4944ded71b393fa2 xend.initd a2b5234483f1b5892d22e9315d9c307f xendomains.initd" diff --git a/main/xen/xsa45-4.1.patch b/main/xen/xsa45-4.1.patch new file mode 100644 index 0000000000..6dbf1f4456 --- /dev/null +++ b/main/xen/xsa45-4.1.patch @@ -0,0 +1,1113 @@ +diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c +index 4009a60..9a34488 100644 +--- a/xen/arch/x86/domain.c ++++ b/xen/arch/x86/domain.c +@@ -70,8 +70,6 @@ void (*dead_idle) (void) __read_mostly = default_dead_idle; + static void paravirt_ctxt_switch_from(struct vcpu *v); + static void paravirt_ctxt_switch_to(struct vcpu *v); + +-static void vcpu_destroy_pagetables(struct vcpu *v); +- + static void continue_idle_domain(struct vcpu *v) + { + reset_stack_and_jump(idle_loop); +@@ -678,6 +676,7 @@ int arch_set_info_guest( + { + struct domain *d = v->domain; + unsigned long cr3_pfn = INVALID_MFN; ++ struct page_info *cr3_page; + unsigned long flags, cr4; + int i, rc = 0, compat; + +@@ -817,72 +816,103 @@ int arch_set_info_guest( + if ( rc != 0 ) + return rc; + ++ set_bit(_VPF_in_reset, &v->pause_flags); ++ + if ( !compat ) +- { + cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c.nat->ctrlreg[3])); ++#ifdef __x86_64__ ++ else ++ cr3_pfn = gmfn_to_mfn(d, compat_cr3_to_pfn(c.cmp->ctrlreg[3])); ++#endif ++ cr3_page = mfn_to_page(cr3_pfn); + +- if ( !mfn_valid(cr3_pfn) || +- (paging_mode_refcounts(d) +- ? !get_page(mfn_to_page(cr3_pfn), d) +- : !get_page_and_type(mfn_to_page(cr3_pfn), d, +- PGT_base_page_table)) ) +- { +- destroy_gdt(v); +- return -EINVAL; +- } ++ if ( !mfn_valid(cr3_pfn) || !get_page(cr3_page, d) ) ++ { ++ cr3_page = NULL; ++ rc = -EINVAL; ++ } ++ else if ( paging_mode_refcounts(d) ) ++ /* nothing */; ++ else if ( cr3_page == v->arch.old_guest_table ) ++ { ++ v->arch.old_guest_table = NULL; ++ put_page(cr3_page); ++ } ++ else ++ { ++ /* ++ * Since v->arch.guest_table{,_user} are both NULL, this effectively ++ * is just a call to put_old_guest_table(). ++ */ ++ if ( !compat ) ++ rc = vcpu_destroy_pagetables(v); ++ if ( !rc ) ++ rc = get_page_type_preemptible(cr3_page, ++ !compat ? PGT_root_page_table ++ : PGT_l3_page_table); ++ if ( rc == -EINTR ) ++ rc = -EAGAIN; ++ } + ++ if ( rc ) ++ /* handled below */; ++ else if ( !compat ) ++ { + v->arch.guest_table = pagetable_from_pfn(cr3_pfn); + + #ifdef __x86_64__ + if ( c.nat->ctrlreg[1] ) + { + cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c.nat->ctrlreg[1])); ++ cr3_page = mfn_to_page(cr3_pfn); + +- if ( !mfn_valid(cr3_pfn) || +- (paging_mode_refcounts(d) +- ? !get_page(mfn_to_page(cr3_pfn), d) +- : !get_page_and_type(mfn_to_page(cr3_pfn), d, +- PGT_base_page_table)) ) ++ if ( !mfn_valid(cr3_pfn) || !get_page(cr3_page, d) ) + { +- cr3_pfn = pagetable_get_pfn(v->arch.guest_table); +- v->arch.guest_table = pagetable_null(); +- if ( paging_mode_refcounts(d) ) +- put_page(mfn_to_page(cr3_pfn)); +- else +- put_page_and_type(mfn_to_page(cr3_pfn)); +- destroy_gdt(v); +- return -EINVAL; ++ cr3_page = NULL; ++ rc = -EINVAL; ++ } ++ else if ( !paging_mode_refcounts(d) ) ++ { ++ rc = get_page_type_preemptible(cr3_page, PGT_root_page_table); ++ switch ( rc ) ++ { ++ case -EINTR: ++ rc = -EAGAIN; ++ case -EAGAIN: ++ v->arch.old_guest_table = ++ pagetable_get_page(v->arch.guest_table); ++ v->arch.guest_table = pagetable_null(); ++ break; ++ } + } + +- v->arch.guest_table_user = pagetable_from_pfn(cr3_pfn); ++ if ( !rc ) ++ v->arch.guest_table_user = pagetable_from_pfn(cr3_pfn); + } + else if ( !(flags & VGCF_in_kernel) ) + { +- destroy_gdt(v); +- return -EINVAL; ++ cr3_page = NULL; ++ rc = -EINVAL; + } + } + else + { + l4_pgentry_t *l4tab; + +- cr3_pfn = gmfn_to_mfn(d, compat_cr3_to_pfn(c.cmp->ctrlreg[3])); +- +- if ( !mfn_valid(cr3_pfn) || +- (paging_mode_refcounts(d) +- ? !get_page(mfn_to_page(cr3_pfn), d) +- : !get_page_and_type(mfn_to_page(cr3_pfn), d, +- PGT_l3_page_table)) ) +- { +- destroy_gdt(v); +- return -EINVAL; +- } +- + l4tab = __va(pagetable_get_paddr(v->arch.guest_table)); + *l4tab = l4e_from_pfn( + cr3_pfn, _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED); + #endif + } ++ if ( rc ) ++ { ++ if ( cr3_page ) ++ put_page(cr3_page); ++ destroy_gdt(v); ++ return rc; ++ } ++ ++ clear_bit(_VPF_in_reset, &v->pause_flags); + + if ( v->vcpu_id == 0 ) + update_domain_wallclock_time(d); +@@ -904,17 +934,16 @@ int arch_set_info_guest( + #undef c + } + +-void arch_vcpu_reset(struct vcpu *v) ++int arch_vcpu_reset(struct vcpu *v) + { + if ( !is_hvm_vcpu(v) ) + { + destroy_gdt(v); +- vcpu_destroy_pagetables(v); +- } +- else +- { +- vcpu_end_shutdown_deferral(v); ++ return vcpu_destroy_pagetables(v); + } ++ ++ vcpu_end_shutdown_deferral(v); ++ return 0; + } + + /* +@@ -1917,63 +1946,6 @@ static int relinquish_memory( + return ret; + } + +-static void vcpu_destroy_pagetables(struct vcpu *v) +-{ +- struct domain *d = v->domain; +- unsigned long pfn; +- +-#ifdef __x86_64__ +- if ( is_pv_32on64_vcpu(v) ) +- { +- pfn = l4e_get_pfn(*(l4_pgentry_t *) +- __va(pagetable_get_paddr(v->arch.guest_table))); +- +- if ( pfn != 0 ) +- { +- if ( paging_mode_refcounts(d) ) +- put_page(mfn_to_page(pfn)); +- else +- put_page_and_type(mfn_to_page(pfn)); +- } +- +- l4e_write( +- (l4_pgentry_t *)__va(pagetable_get_paddr(v->arch.guest_table)), +- l4e_empty()); +- +- v->arch.cr3 = 0; +- return; +- } +-#endif +- +- pfn = pagetable_get_pfn(v->arch.guest_table); +- if ( pfn != 0 ) +- { +- if ( paging_mode_refcounts(d) ) +- put_page(mfn_to_page(pfn)); +- else +- put_page_and_type(mfn_to_page(pfn)); +- v->arch.guest_table = pagetable_null(); +- } +- +-#ifdef __x86_64__ +- /* Drop ref to guest_table_user (from MMUEXT_NEW_USER_BASEPTR) */ +- pfn = pagetable_get_pfn(v->arch.guest_table_user); +- if ( pfn != 0 ) +- { +- if ( !is_pv_32bit_vcpu(v) ) +- { +- if ( paging_mode_refcounts(d) ) +- put_page(mfn_to_page(pfn)); +- else +- put_page_and_type(mfn_to_page(pfn)); +- } +- v->arch.guest_table_user = pagetable_null(); +- } +-#endif +- +- v->arch.cr3 = 0; +-} +- + int domain_relinquish_resources(struct domain *d) + { + int ret; +@@ -1992,7 +1964,9 @@ int domain_relinquish_resources(struct domain *d) + for_each_vcpu ( d, v ) + { + /* Drop the in-use references to page-table bases. */ +- vcpu_destroy_pagetables(v); ++ ret = vcpu_destroy_pagetables(v); ++ if ( ret ) ++ return ret; + + /* + * Relinquish GDT mappings. No need for explicit unmapping of the +diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c +index 9f53728..140e70c 100644 +--- a/xen/arch/x86/hvm/hvm.c ++++ b/xen/arch/x86/hvm/hvm.c +@@ -3083,8 +3083,11 @@ static void hvm_s3_suspend(struct domain *d) + + for_each_vcpu ( d, v ) + { ++ int rc; ++ + vlapic_reset(vcpu_vlapic(v)); +- vcpu_reset(v); ++ rc = vcpu_reset(v); ++ ASSERT(!rc); + } + + vpic_reset(d); +diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c +index 3af41cc..8d47bd0 100644 +--- a/xen/arch/x86/hvm/vlapic.c ++++ b/xen/arch/x86/hvm/vlapic.c +@@ -252,10 +252,13 @@ static void vlapic_init_sipi_action(unsigned long _vcpu) + { + case APIC_DM_INIT: { + bool_t fpu_initialised; ++ int rc; ++ + domain_lock(target->domain); + /* Reset necessary VCPU state. This does not include FPU state. */ + fpu_initialised = target->fpu_initialised; +- vcpu_reset(target); ++ rc = vcpu_reset(target); ++ ASSERT(!rc); + target->fpu_initialised = fpu_initialised; + vlapic_reset(vcpu_vlapic(target)); + domain_unlock(target->domain); +diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c +index 30d281d..ceeb998 100644 +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c +@@ -1182,7 +1182,16 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, + #endif + + if ( unlikely(partial > 0) ) ++ { ++ ASSERT(preemptible >= 0); + return __put_page_type(l3e_get_page(l3e), preemptible); ++ } ++ ++ if ( preemptible < 0 ) ++ { ++ current->arch.old_guest_table = l3e_get_page(l3e); ++ return 0; ++ } + + return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible); + } +@@ -1195,7 +1204,17 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, + (l4e_get_pfn(l4e) != pfn) ) + { + if ( unlikely(partial > 0) ) ++ { ++ ASSERT(preemptible >= 0); + return __put_page_type(l4e_get_page(l4e), preemptible); ++ } ++ ++ if ( preemptible < 0 ) ++ { ++ current->arch.old_guest_table = l4e_get_page(l4e); ++ return 0; ++ } ++ + return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible); + } + return 1; +@@ -1485,12 +1504,17 @@ static int alloc_l3_table(struct page_info *page, int preemptible) + if ( rc < 0 && rc != -EAGAIN && rc != -EINTR ) + { + MEM_LOG("Failure in alloc_l3_table: entry %d", i); ++ if ( i ) ++ { ++ page->nr_validated_ptes = i; ++ page->partial_pte = 0; ++ current->arch.old_guest_table = page; ++ } + while ( i-- > 0 ) + { + if ( !is_guest_l3_slot(i) ) + continue; + unadjust_guest_l3e(pl3e[i], d); +- put_page_from_l3e(pl3e[i], pfn, 0, 0); + } + } + +@@ -1520,22 +1544,24 @@ static int alloc_l4_table(struct page_info *page, int preemptible) + page->nr_validated_ptes = i; + page->partial_pte = partial ?: 1; + } +- else if ( rc == -EINTR ) ++ else if ( rc < 0 ) + { ++ if ( rc != -EINTR ) ++ MEM_LOG("Failure in alloc_l4_table: entry %d", i); + if ( i ) + { + page->nr_validated_ptes = i; + page->partial_pte = 0; +- rc = -EAGAIN; ++ if ( rc == -EINTR ) ++ rc = -EAGAIN; ++ else ++ { ++ if ( current->arch.old_guest_table ) ++ page->nr_validated_ptes++; ++ current->arch.old_guest_table = page; ++ } + } + } +- else if ( rc < 0 ) +- { +- MEM_LOG("Failure in alloc_l4_table: entry %d", i); +- while ( i-- > 0 ) +- if ( is_guest_l4_slot(d, i) ) +- put_page_from_l4e(pl4e[i], pfn, 0, 0); +- } + if ( rc < 0 ) + return rc; + +@@ -1965,7 +1991,7 @@ static int mod_l3_entry(l3_pgentry_t *pl3e, + pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e); + } + +- put_page_from_l3e(ol3e, pfn, 0, 0); ++ put_page_from_l3e(ol3e, pfn, 0, -preemptible); + return rc; + } + +@@ -2028,7 +2054,7 @@ static int mod_l4_entry(l4_pgentry_t *pl4e, + return -EFAULT; + } + +- put_page_from_l4e(ol4e, pfn, 0, 0); ++ put_page_from_l4e(ol4e, pfn, 0, -preemptible); + return rc; + } + +@@ -2186,7 +2212,15 @@ static int alloc_page_type(struct page_info *page, unsigned long type, + PRtype_info ": caf=%08lx taf=%" PRtype_info, + page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)), + type, page->count_info, page->u.inuse.type_info); +- page->u.inuse.type_info = 0; ++ if ( page != current->arch.old_guest_table ) ++ page->u.inuse.type_info = 0; ++ else ++ { ++ ASSERT((page->u.inuse.type_info & ++ (PGT_count_mask | PGT_validated)) == 1); ++ get_page_light(page); ++ page->u.inuse.type_info |= PGT_partial; ++ } + } + else + { +@@ -2724,49 +2758,150 @@ static void put_superpage(unsigned long mfn) + + #endif + ++static int put_old_guest_table(struct vcpu *v) ++{ ++ int rc; ++ ++ if ( !v->arch.old_guest_table ) ++ return 0; ++ ++ switch ( rc = put_page_and_type_preemptible(v->arch.old_guest_table, 1) ) ++ { ++ case -EINTR: ++ case -EAGAIN: ++ return -EAGAIN; ++ } ++ ++ v->arch.old_guest_table = NULL; ++ ++ return rc; ++} ++ ++int vcpu_destroy_pagetables(struct vcpu *v) ++{ ++ unsigned long mfn = pagetable_get_pfn(v->arch.guest_table); ++ struct page_info *page; ++ int rc = put_old_guest_table(v); ++ ++ if ( rc ) ++ return rc; ++ ++#ifdef __x86_64__ ++ if ( is_pv_32on64_vcpu(v) ) ++ mfn = l4e_get_pfn(*(l4_pgentry_t *)mfn_to_virt(mfn)); ++#endif ++ ++ if ( mfn ) ++ { ++ page = mfn_to_page(mfn); ++ if ( paging_mode_refcounts(v->domain) ) ++ put_page(page); ++ else ++ rc = put_page_and_type_preemptible(page, 1); ++ } ++ ++#ifdef __x86_64__ ++ if ( is_pv_32on64_vcpu(v) ) ++ { ++ if ( !rc ) ++ l4e_write( ++ (l4_pgentry_t *)__va(pagetable_get_paddr(v->arch.guest_table)), ++ l4e_empty()); ++ } ++ else ++#endif ++ if ( !rc ) ++ { ++ v->arch.guest_table = pagetable_null(); ++ ++#ifdef __x86_64__ ++ /* Drop ref to guest_table_user (from MMUEXT_NEW_USER_BASEPTR) */ ++ mfn = pagetable_get_pfn(v->arch.guest_table_user); ++ if ( mfn ) ++ { ++ page = mfn_to_page(mfn); ++ if ( paging_mode_refcounts(v->domain) ) ++ put_page(page); ++ else ++ rc = put_page_and_type_preemptible(page, 1); ++ } ++ if ( !rc ) ++ v->arch.guest_table_user = pagetable_null(); ++#endif ++ } ++ ++ v->arch.cr3 = 0; ++ ++ return rc; ++} + + int new_guest_cr3(unsigned long mfn) + { + struct vcpu *curr = current; + struct domain *d = curr->domain; +- int okay; ++ int rc; + unsigned long old_base_mfn; + + #ifdef __x86_64__ + if ( is_pv_32on64_domain(d) ) + { +- okay = paging_mode_refcounts(d) +- ? 0 /* Old code was broken, but what should it be? */ +- : mod_l4_entry( ++ rc = paging_mode_refcounts(d) ++ ? -EINVAL /* Old code was broken, but what should it be? */ ++ : mod_l4_entry( + __va(pagetable_get_paddr(curr->arch.guest_table)), + l4e_from_pfn( + mfn, + (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)), +- pagetable_get_pfn(curr->arch.guest_table), 0, 0, curr) == 0; +- if ( unlikely(!okay) ) ++ pagetable_get_pfn(curr->arch.guest_table), 0, 1, curr); ++ switch ( rc ) + { ++ case 0: ++ break; ++ case -EINTR: ++ case -EAGAIN: ++ return -EAGAIN; ++ default: + MEM_LOG("Error while installing new compat baseptr %lx", mfn); +- return 0; ++ return rc; + } + + invalidate_shadow_ldt(curr, 0); + write_ptbase(curr); + +- return 1; ++ return 0; + } + #endif +- okay = paging_mode_refcounts(d) +- ? get_page_from_pagenr(mfn, d) +- : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 0); +- if ( unlikely(!okay) ) ++ rc = put_old_guest_table(curr); ++ if ( unlikely(rc) ) ++ return rc; ++ ++ old_base_mfn = pagetable_get_pfn(curr->arch.guest_table); ++ /* ++ * This is particularly important when getting restarted after the ++ * previous attempt got preempted in the put-old-MFN phase. ++ */ ++ if ( old_base_mfn == mfn ) + { +- MEM_LOG("Error while installing new baseptr %lx", mfn); ++ write_ptbase(curr); + return 0; + } + +- invalidate_shadow_ldt(curr, 0); ++ rc = paging_mode_refcounts(d) ++ ? (get_page_from_pagenr(mfn, d) ? 0 : -EINVAL) ++ : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 1); ++ switch ( rc ) ++ { ++ case 0: ++ break; ++ case -EINTR: ++ case -EAGAIN: ++ return -EAGAIN; ++ default: ++ MEM_LOG("Error while installing new baseptr %lx", mfn); ++ return rc; ++ } + +- old_base_mfn = pagetable_get_pfn(curr->arch.guest_table); ++ invalidate_shadow_ldt(curr, 0); + + curr->arch.guest_table = pagetable_from_pfn(mfn); + update_cr3(curr); +@@ -2775,13 +2910,25 @@ int new_guest_cr3(unsigned long mfn) + + if ( likely(old_base_mfn != 0) ) + { ++ struct page_info *page = mfn_to_page(old_base_mfn); ++ + if ( paging_mode_refcounts(d) ) +- put_page(mfn_to_page(old_base_mfn)); ++ put_page(page); + else +- put_page_and_type(mfn_to_page(old_base_mfn)); ++ switch ( rc = put_page_and_type_preemptible(page, 1) ) ++ { ++ case -EINTR: ++ rc = -EAGAIN; ++ case -EAGAIN: ++ curr->arch.old_guest_table = page; ++ break; ++ default: ++ BUG_ON(rc); ++ break; ++ } + } + +- return 1; ++ return rc; + } + + static struct domain *get_pg_owner(domid_t domid) +@@ -2910,12 +3057,29 @@ long do_mmuext_op( + unsigned int foreigndom) + { + struct mmuext_op op; +- int rc = 0, i = 0, okay; + unsigned long type; +- unsigned int done = 0; ++ unsigned int i = 0, done = 0; + struct vcpu *curr = current; + struct domain *d = curr->domain; + struct domain *pg_owner; ++ int okay, rc = put_old_guest_table(curr); ++ ++ if ( unlikely(rc) ) ++ { ++ if ( likely(rc == -EAGAIN) ) ++ rc = hypercall_create_continuation( ++ __HYPERVISOR_mmuext_op, "hihi", uops, count, pdone, ++ foreigndom); ++ return rc; ++ } ++ ++ if ( unlikely(count == MMU_UPDATE_PREEMPTED) && ++ likely(guest_handle_is_null(uops)) ) ++ { ++ /* See the curr->arch.old_guest_table related ++ * hypercall_create_continuation() below. */ ++ return (int)foreigndom; ++ } + + if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) + { +@@ -2940,7 +3104,7 @@ long do_mmuext_op( + + for ( i = 0; i < count; i++ ) + { +- if ( hypercall_preempt_check() ) ++ if ( curr->arch.old_guest_table || hypercall_preempt_check() ) + { + rc = -EAGAIN; + break; +@@ -3000,21 +3164,17 @@ long do_mmuext_op( + page = mfn_to_page(mfn); + + if ( (rc = xsm_memory_pin_page(d, page)) != 0 ) +- { +- put_page_and_type(page); + okay = 0; +- break; +- } +- +- if ( unlikely(test_and_set_bit(_PGT_pinned, +- &page->u.inuse.type_info)) ) ++ else if ( unlikely(test_and_set_bit(_PGT_pinned, ++ &page->u.inuse.type_info)) ) + { + MEM_LOG("Mfn %lx already pinned", mfn); +- put_page_and_type(page); + okay = 0; +- break; + } + ++ if ( unlikely(!okay) ) ++ goto pin_drop; ++ + /* A page is dirtied when its pin status is set. */ + paging_mark_dirty(pg_owner, mfn); + +@@ -3028,7 +3188,13 @@ long do_mmuext_op( + &page->u.inuse.type_info)); + spin_unlock(&pg_owner->page_alloc_lock); + if ( drop_ref ) +- put_page_and_type(page); ++ { ++ pin_drop: ++ if ( type == PGT_l1_page_table ) ++ put_page_and_type(page); ++ else ++ curr->arch.old_guest_table = page; ++ } + } + + break; +@@ -3058,7 +3224,17 @@ long do_mmuext_op( + break; + } + +- put_page_and_type(page); ++ switch ( rc = put_page_and_type_preemptible(page, 1) ) ++ { ++ case -EINTR: ++ case -EAGAIN: ++ curr->arch.old_guest_table = page; ++ rc = 0; ++ break; ++ default: ++ BUG_ON(rc); ++ break; ++ } + put_page(page); + + /* A page is dirtied when its pin status is cleared. */ +@@ -3068,7 +3244,8 @@ long do_mmuext_op( + } + + case MMUEXT_NEW_BASEPTR: +- okay = new_guest_cr3(gmfn_to_mfn(d, op.arg1.mfn)); ++ rc = new_guest_cr3(gmfn_to_mfn(d, op.arg1.mfn)); ++ okay = !rc; + break; + + #ifdef __x86_64__ +@@ -3076,29 +3253,55 @@ long do_mmuext_op( + unsigned long old_mfn, mfn; + + mfn = gmfn_to_mfn(d, op.arg1.mfn); ++ old_mfn = pagetable_get_pfn(curr->arch.guest_table_user); ++ /* ++ * This is particularly important when getting restarted after the ++ * previous attempt got preempted in the put-old-MFN phase. ++ */ ++ if ( old_mfn == mfn ) ++ break; ++ + if ( mfn != 0 ) + { + if ( paging_mode_refcounts(d) ) + okay = get_page_from_pagenr(mfn, d); + else +- okay = !get_page_and_type_from_pagenr( +- mfn, PGT_root_page_table, d, 0, 0); ++ { ++ rc = get_page_and_type_from_pagenr( ++ mfn, PGT_root_page_table, d, 0, 1); ++ okay = !rc; ++ } + if ( unlikely(!okay) ) + { +- MEM_LOG("Error while installing new mfn %lx", mfn); ++ if ( rc == -EINTR ) ++ rc = -EAGAIN; ++ else if ( rc != -EAGAIN ) ++ MEM_LOG("Error while installing new mfn %lx", mfn); + break; + } + } + +- old_mfn = pagetable_get_pfn(curr->arch.guest_table_user); + curr->arch.guest_table_user = pagetable_from_pfn(mfn); + + if ( old_mfn != 0 ) + { ++ struct page_info *page = mfn_to_page(old_mfn); ++ + if ( paging_mode_refcounts(d) ) +- put_page(mfn_to_page(old_mfn)); ++ put_page(page); + else +- put_page_and_type(mfn_to_page(old_mfn)); ++ switch ( rc = put_page_and_type_preemptible(page, 1) ) ++ { ++ case -EINTR: ++ rc = -EAGAIN; ++ case -EAGAIN: ++ curr->arch.old_guest_table = page; ++ okay = 0; ++ break; ++ default: ++ BUG_ON(rc); ++ break; ++ } + } + + break; +@@ -3337,9 +3540,27 @@ long do_mmuext_op( + } + + if ( rc == -EAGAIN ) ++ { ++ ASSERT(i < count); + rc = hypercall_create_continuation( + __HYPERVISOR_mmuext_op, "hihi", + uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); ++ } ++ else if ( curr->arch.old_guest_table ) ++ { ++ XEN_GUEST_HANDLE(void) null; ++ ++ ASSERT(rc || i == count); ++ set_xen_guest_handle(null, NULL); ++ /* ++ * In order to have a way to communicate the final return value to ++ * our continuation, we pass this in place of "foreigndom", building ++ * on the fact that this argument isn't needed anymore. ++ */ ++ rc = hypercall_create_continuation( ++ __HYPERVISOR_mmuext_op, "hihi", null, ++ MMU_UPDATE_PREEMPTED, null, rc); ++ } + + put_pg_owner(pg_owner); + +@@ -3366,11 +3587,28 @@ long do_mmu_update( + void *va; + unsigned long gpfn, gmfn, mfn; + struct page_info *page; +- int rc = 0, okay = 1, i = 0; +- unsigned int cmd, done = 0, pt_dom; +- struct vcpu *v = current; ++ unsigned int cmd, i = 0, done = 0, pt_dom; ++ struct vcpu *curr = current, *v = curr; + struct domain *d = v->domain, *pt_owner = d, *pg_owner; + struct domain_mmap_cache mapcache; ++ int rc = put_old_guest_table(curr), okay = 1; ++ ++ if ( unlikely(rc) ) ++ { ++ if ( likely(rc == -EAGAIN) ) ++ rc = hypercall_create_continuation( ++ __HYPERVISOR_mmu_update, "hihi", ureqs, count, pdone, ++ foreigndom); ++ return rc; ++ } ++ ++ if ( unlikely(count == MMU_UPDATE_PREEMPTED) && ++ likely(guest_handle_is_null(ureqs)) ) ++ { ++ /* See the curr->arch.old_guest_table related ++ * hypercall_create_continuation() below. */ ++ return (int)foreigndom; ++ } + + if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) + { +@@ -3419,7 +3657,7 @@ long do_mmu_update( + + for ( i = 0; i < count; i++ ) + { +- if ( hypercall_preempt_check() ) ++ if ( curr->arch.old_guest_table || hypercall_preempt_check() ) + { + rc = -EAGAIN; + break; +@@ -3684,9 +3922,27 @@ long do_mmu_update( + } + + if ( rc == -EAGAIN ) ++ { ++ ASSERT(i < count); + rc = hypercall_create_continuation( + __HYPERVISOR_mmu_update, "hihi", + ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); ++ } ++ else if ( curr->arch.old_guest_table ) ++ { ++ XEN_GUEST_HANDLE(void) null; ++ ++ ASSERT(rc || i == count); ++ set_xen_guest_handle(null, NULL); ++ /* ++ * In order to have a way to communicate the final return value to ++ * our continuation, we pass this in place of "foreigndom", building ++ * on the fact that this argument isn't needed anymore. ++ */ ++ rc = hypercall_create_continuation( ++ __HYPERVISOR_mmu_update, "hihi", null, ++ MMU_UPDATE_PREEMPTED, null, rc); ++ } + + put_pg_owner(pg_owner); + +diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c +index 234d9ac..e336439 100644 +--- a/xen/arch/x86/traps.c ++++ b/xen/arch/x86/traps.c +@@ -2317,8 +2317,15 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) + rc = new_guest_cr3(gmfn_to_mfn(v->domain, compat_cr3_to_pfn(*reg))); + #endif + domain_unlock(v->domain); +- if ( rc == 0 ) /* not okay */ ++ switch ( rc ) ++ { ++ case 0: ++ break; ++ case -EAGAIN: /* retry after preemption */ ++ goto skip; ++ default: /* not okay */ + goto fail; ++ } + break; + + case 4: /* Write CR4 */ +diff --git a/xen/arch/x86/x86_64/compat/mm.c b/xen/arch/x86/x86_64/compat/mm.c +index 3ef08a5..6ad41d4 100644 +--- a/xen/arch/x86/x86_64/compat/mm.c ++++ b/xen/arch/x86/x86_64/compat/mm.c +@@ -222,6 +222,13 @@ int compat_mmuext_op(XEN_GUEST_HANDLE(mmuext_op_compat_t) cmp_uops, + int rc = 0; + XEN_GUEST_HANDLE(mmuext_op_t) nat_ops; + ++ if ( unlikely(count == MMU_UPDATE_PREEMPTED) && ++ likely(guest_handle_is_null(cmp_uops)) ) ++ { ++ set_xen_guest_handle(nat_ops, NULL); ++ return do_mmuext_op(nat_ops, count, pdone, foreigndom); ++ } ++ + preempt_mask = count & MMU_UPDATE_PREEMPTED; + count ^= preempt_mask; + +@@ -319,17 +326,23 @@ int compat_mmuext_op(XEN_GUEST_HANDLE(mmuext_op_compat_t) cmp_uops, + : mcs->call.args[1]; + unsigned int left = arg1 & ~MMU_UPDATE_PREEMPTED; + +- BUG_ON(left == arg1); ++ BUG_ON(left == arg1 && left != i); + BUG_ON(left > count); + guest_handle_add_offset(nat_ops, i - left); + guest_handle_subtract_offset(cmp_uops, left); + left = 1; +- BUG_ON(!hypercall_xlat_continuation(&left, 0x01, nat_ops, cmp_uops)); +- BUG_ON(left != arg1); +- if (!test_bit(_MCSF_in_multicall, &mcs->flags)) +- regs->_ecx += count - i; ++ if ( arg1 != MMU_UPDATE_PREEMPTED ) ++ { ++ BUG_ON(!hypercall_xlat_continuation(&left, 0x01, nat_ops, ++ cmp_uops)); ++ if ( !test_bit(_MCSF_in_multicall, &mcs->flags) ) ++ regs->_ecx += count - i; ++ else ++ mcs->compat_call.args[1] += count - i; ++ } + else +- mcs->compat_call.args[1] += count - i; ++ BUG_ON(hypercall_xlat_continuation(&left, 0)); ++ BUG_ON(left != arg1); + } + else + BUG_ON(err > 0); +diff --git a/xen/common/compat/domain.c b/xen/common/compat/domain.c +index 67e0e5e..5fe393f 100644 +--- a/xen/common/compat/domain.c ++++ b/xen/common/compat/domain.c +@@ -52,6 +52,10 @@ int compat_vcpu_op(int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg) + rc = boot_vcpu(d, vcpuid, cmp_ctxt); + domain_unlock(d); + ++ if ( rc == -EAGAIN ) ++ rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iih", ++ cmd, vcpuid, arg); ++ + xfree(cmp_ctxt); + break; + } +diff --git a/xen/common/domain.c b/xen/common/domain.c +index 054f7c4..5fa045b 100644 +--- a/xen/common/domain.c ++++ b/xen/common/domain.c +@@ -770,14 +770,18 @@ int boot_vcpu(struct domain *d, int vcpuid, vcpu_guest_context_u ctxt) + return arch_set_info_guest(v, ctxt); + } + +-void vcpu_reset(struct vcpu *v) ++int vcpu_reset(struct vcpu *v) + { + struct domain *d = v->domain; ++ int rc; + + vcpu_pause(v); + domain_lock(d); + +- arch_vcpu_reset(v); ++ set_bit(_VPF_in_reset, &v->pause_flags); ++ rc = arch_vcpu_reset(v); ++ if ( rc ) ++ goto out_unlock; + + set_bit(_VPF_down, &v->pause_flags); + +@@ -793,9 +797,13 @@ void vcpu_reset(struct vcpu *v) + #endif + cpus_clear(v->cpu_affinity_tmp); + clear_bit(_VPF_blocked, &v->pause_flags); ++ clear_bit(_VPF_in_reset, &v->pause_flags); + ++ out_unlock: + domain_unlock(v->domain); + vcpu_unpause(v); ++ ++ return rc; + } + + +@@ -834,6 +842,11 @@ long do_vcpu_op(int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg) + domain_unlock(d); + + xfree(ctxt); ++ ++ if ( rc == -EAGAIN ) ++ rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iih", ++ cmd, vcpuid, arg); ++ + break; + + case VCPUOP_up: +diff --git a/xen/common/domctl.c b/xen/common/domctl.c +index 981cb1a..faac366 100644 +--- a/xen/common/domctl.c ++++ b/xen/common/domctl.c +@@ -286,8 +286,10 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) + + if ( guest_handle_is_null(op->u.vcpucontext.ctxt) ) + { +- vcpu_reset(v); +- ret = 0; ++ ret = vcpu_reset(v); ++ if ( ret == -EAGAIN ) ++ ret = hypercall_create_continuation( ++ __HYPERVISOR_domctl, "h", u_domctl); + goto svc_out; + } + +@@ -316,6 +318,10 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) + domain_pause(d); + ret = arch_set_info_guest(v, c); + domain_unpause(d); ++ ++ if ( ret == -EAGAIN ) ++ ret = hypercall_create_continuation( ++ __HYPERVISOR_domctl, "h", u_domctl); + } + + svc_out: +diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h +index fe1459d..a387862 100644 +--- a/xen/include/asm-x86/domain.h ++++ b/xen/include/asm-x86/domain.h +@@ -405,6 +405,7 @@ struct arch_vcpu + pagetable_t guest_table_user; /* (MFN) x86/64 user-space pagetable */ + #endif + pagetable_t guest_table; /* (MFN) guest notion of cr3 */ ++ struct page_info *old_guest_table; /* partially destructed pagetable */ + /* guest_table holds a ref to the page, and also a type-count unless + * shadow refcounts are in use */ + pagetable_t shadow_table[4]; /* (MFN) shadow(s) of guest */ +diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h +index c93a022..2498007 100644 +--- a/xen/include/asm-x86/mm.h ++++ b/xen/include/asm-x86/mm.h +@@ -554,6 +554,7 @@ void audit_domains(void); + int new_guest_cr3(unsigned long pfn); + void make_cr3(struct vcpu *v, unsigned long mfn); + void update_cr3(struct vcpu *v); ++int vcpu_destroy_pagetables(struct vcpu *); + void propagate_page_fault(unsigned long addr, u16 error_code); + void *do_page_walk(struct vcpu *v, unsigned long addr); + +diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h +index edffd1f..5175ef7 100644 +--- a/xen/include/xen/domain.h ++++ b/xen/include/xen/domain.h +@@ -15,7 +15,7 @@ struct vcpu *alloc_vcpu( + int boot_vcpu( + struct domain *d, int vcpuid, vcpu_guest_context_u ctxt); + struct vcpu *alloc_dom0_vcpu0(void); +-void vcpu_reset(struct vcpu *v); ++int vcpu_reset(struct vcpu *); + + struct xen_domctl_getdomaininfo; + void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info); +@@ -57,7 +57,7 @@ void arch_dump_vcpu_info(struct vcpu *v); + + void arch_dump_domain_info(struct domain *d); + +-void arch_vcpu_reset(struct vcpu *v); ++int arch_vcpu_reset(struct vcpu *); + + bool_t domctl_lock_acquire(void); + void domctl_lock_release(void); +diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h +index 35c3a7f..c04b25d 100644 +--- a/xen/include/xen/sched.h ++++ b/xen/include/xen/sched.h +@@ -592,6 +592,9 @@ extern struct domain *domain_list; + /* VCPU is blocked on memory-event ring. */ + #define _VPF_mem_event 4 + #define VPF_mem_event (1UL<<_VPF_mem_event) ++ /* VCPU is being reset. */ ++#define _VPF_in_reset 7 ++#define VPF_in_reset (1UL<<_VPF_in_reset) + + static inline int vcpu_runnable(struct vcpu *v) + { diff --git a/main/xen/xsa58-4.1.patch b/main/xen/xsa58-4.1.patch new file mode 100644 index 0000000000..3c982a35e0 --- /dev/null +++ b/main/xen/xsa58-4.1.patch @@ -0,0 +1,119 @@ +x86: fix page refcount handling in page table pin error path + +In the original patch 7 of the series addressing XSA-45 I mistakenly +took the addition of the call to get_page_light() in alloc_page_type() +to cover two decrements that would happen: One for the PGT_partial bit +that is getting set along with the call, and the other for the page +reference the caller hold (and would be dropping on its error path). +But of course the additional page reference is tied to the PGT_partial +bit, and hence any caller of a function that may leave +->arch.old_guest_table non-NULL for error cleanup purposes has to make +sure a respective page reference gets retained. + +Similar issues were then also spotted elsewhere: In effect all callers +of get_page_type_preemptible() need to deal with errors in similar +ways. To make sure error handling can work this way without leaking +page references, a respective assertion gets added to that function. + +This is CVE-2013-1432 / XSA-58. + +Reported-by: Andrew Cooper <andrew.cooper3@citrix.com> +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Tested-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Tim Deegan <tim@xen.org> + +--- a/xen/arch/x86/domain.c ++++ b/xen/arch/x86/domain.c +@@ -798,6 +798,10 @@ int arch_set_info_guest( + if ( v->vcpu_id == 0 ) + d->vm_assist = c(vm_assist); + ++ rc = put_old_guest_table(current); ++ if ( rc ) ++ return rc; ++ + if ( !compat ) + rc = (int)set_gdt(v, c.nat->gdt_frames, c.nat->gdt_ents); + #ifdef CONFIG_COMPAT +@@ -840,18 +844,24 @@ int arch_set_info_guest( + } + else + { +- /* +- * Since v->arch.guest_table{,_user} are both NULL, this effectively +- * is just a call to put_old_guest_table(). +- */ + if ( !compat ) +- rc = vcpu_destroy_pagetables(v); ++ rc = put_old_guest_table(v); + if ( !rc ) + rc = get_page_type_preemptible(cr3_page, + !compat ? PGT_root_page_table + : PGT_l3_page_table); +- if ( rc == -EINTR ) ++ switch ( rc ) ++ { ++ case -EINTR: + rc = -EAGAIN; ++ case -EAGAIN: ++ case 0: ++ break; ++ default: ++ if ( cr3_page == current->arch.old_guest_table ) ++ cr3_page = NULL; ++ break; ++ } + } + + if ( rc ) +@@ -883,6 +893,11 @@ int arch_set_info_guest( + pagetable_get_page(v->arch.guest_table); + v->arch.guest_table = pagetable_null(); + break; ++ default: ++ if ( cr3_page == current->arch.old_guest_table ) ++ cr3_page = NULL; ++ case 0: ++ break; + } + } + +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c +@@ -682,7 +682,8 @@ static int get_page_and_type_from_pagenr + get_page_type_preemptible(page, type) : + (get_page_type(page, type) ? 0 : -EINVAL)); + +- if ( unlikely(rc) && partial >= 0 ) ++ if ( unlikely(rc) && partial >= 0 && ++ (!preemptible || page != current->arch.old_guest_table) ) + put_page(page); + + return rc; +@@ -2555,6 +2556,7 @@ int put_page_type_preemptible(struct pag + + int get_page_type_preemptible(struct page_info *page, unsigned long type) + { ++ ASSERT(!current->arch.old_guest_table); + return __get_page_type(page, type, 1); + } + +@@ -2765,7 +2767,7 @@ static void put_superpage(unsigned long + + #endif + +-static int put_old_guest_table(struct vcpu *v) ++int put_old_guest_table(struct vcpu *v) + { + int rc; + +--- a/xen/include/asm-x86/mm.h ++++ b/xen/include/asm-x86/mm.h +@@ -337,6 +337,7 @@ void put_page_type(struct page_info *pag + int get_page_type(struct page_info *page, unsigned long type); + int put_page_type_preemptible(struct page_info *page); + int get_page_type_preemptible(struct page_info *page, unsigned long type); ++int put_old_guest_table(struct vcpu *); + int get_page_from_l1e( + l1_pgentry_t l1e, struct domain *l1e_owner, struct domain *pg_owner); + void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner); |