diff options
Diffstat (limited to 'main/xen/xsa45-4.2.patch')
-rw-r--r-- | main/xen/xsa45-4.2.patch | 1133 |
1 files changed, 1133 insertions, 0 deletions
diff --git a/main/xen/xsa45-4.2.patch b/main/xen/xsa45-4.2.patch new file mode 100644 index 0000000000..dfdfdea64b --- /dev/null +++ b/main/xen/xsa45-4.2.patch @@ -0,0 +1,1133 @@ +diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c +index 26a7f12..b97ac6d 100644 +--- a/xen/arch/x86/domain.c ++++ b/xen/arch/x86/domain.c +@@ -73,8 +73,6 @@ void (*dead_idle) (void) __read_mostly = default_dead_idle; + static void paravirt_ctxt_switch_from(struct vcpu *v); + static void paravirt_ctxt_switch_to(struct vcpu *v); + +-static void vcpu_destroy_pagetables(struct vcpu *v); +- + static void default_idle(void) + { + local_irq_disable(); +@@ -860,6 +858,9 @@ int arch_set_info_guest( + + if ( !v->is_initialised ) + { ++ if ( !compat && !(flags & VGCF_in_kernel) && !c.nat->ctrlreg[1] ) ++ return -EINVAL; ++ + v->arch.pv_vcpu.ldt_base = c(ldt_base); + v->arch.pv_vcpu.ldt_ents = c(ldt_ents); + } +@@ -957,24 +958,44 @@ int arch_set_info_guest( + if ( rc != 0 ) + return rc; + ++ set_bit(_VPF_in_reset, &v->pause_flags); ++ + if ( !compat ) +- { + cr3_gfn = xen_cr3_to_pfn(c.nat->ctrlreg[3]); +- cr3_page = get_page_from_gfn(d, cr3_gfn, NULL, P2M_ALLOC); +- +- if ( !cr3_page ) +- { +- destroy_gdt(v); +- return -EINVAL; +- } +- if ( !paging_mode_refcounts(d) +- && !get_page_type(cr3_page, PGT_base_page_table) ) +- { +- put_page(cr3_page); +- destroy_gdt(v); +- return -EINVAL; +- } ++#ifdef CONFIG_COMPAT ++ else ++ cr3_gfn = compat_cr3_to_pfn(c.cmp->ctrlreg[3]); ++#endif ++ cr3_page = get_page_from_gfn(d, cr3_gfn, NULL, P2M_ALLOC); + ++ if ( !cr3_page ) ++ rc = -EINVAL; ++ else if ( paging_mode_refcounts(d) ) ++ /* nothing */; ++ else if ( cr3_page == v->arch.old_guest_table ) ++ { ++ v->arch.old_guest_table = NULL; ++ put_page(cr3_page); ++ } ++ else ++ { ++ /* ++ * Since v->arch.guest_table{,_user} are both NULL, this effectively ++ * is just a call to put_old_guest_table(). ++ */ ++ if ( !compat ) ++ rc = vcpu_destroy_pagetables(v); ++ if ( !rc ) ++ rc = get_page_type_preemptible(cr3_page, ++ !compat ? PGT_root_page_table ++ : PGT_l3_page_table); ++ if ( rc == -EINTR ) ++ rc = -EAGAIN; ++ } ++ if ( rc ) ++ /* handled below */; ++ else if ( !compat ) ++ { + v->arch.guest_table = pagetable_from_page(cr3_page); + #ifdef __x86_64__ + if ( c.nat->ctrlreg[1] ) +@@ -982,56 +1003,44 @@ int arch_set_info_guest( + cr3_gfn = xen_cr3_to_pfn(c.nat->ctrlreg[1]); + cr3_page = get_page_from_gfn(d, cr3_gfn, NULL, P2M_ALLOC); + +- if ( !cr3_page || +- (!paging_mode_refcounts(d) +- && !get_page_type(cr3_page, PGT_base_page_table)) ) ++ if ( !cr3_page ) ++ rc = -EINVAL; ++ else if ( !paging_mode_refcounts(d) ) + { +- if (cr3_page) +- put_page(cr3_page); +- cr3_page = pagetable_get_page(v->arch.guest_table); +- v->arch.guest_table = pagetable_null(); +- if ( paging_mode_refcounts(d) ) +- put_page(cr3_page); +- else +- put_page_and_type(cr3_page); +- destroy_gdt(v); +- return -EINVAL; ++ rc = get_page_type_preemptible(cr3_page, PGT_root_page_table); ++ switch ( rc ) ++ { ++ case -EINTR: ++ rc = -EAGAIN; ++ case -EAGAIN: ++ v->arch.old_guest_table = ++ pagetable_get_page(v->arch.guest_table); ++ v->arch.guest_table = pagetable_null(); ++ break; ++ } + } +- +- v->arch.guest_table_user = pagetable_from_page(cr3_page); +- } +- else if ( !(flags & VGCF_in_kernel) ) +- { +- destroy_gdt(v); +- return -EINVAL; ++ if ( !rc ) ++ v->arch.guest_table_user = pagetable_from_page(cr3_page); + } + } + else + { + l4_pgentry_t *l4tab; + +- cr3_gfn = compat_cr3_to_pfn(c.cmp->ctrlreg[3]); +- cr3_page = get_page_from_gfn(d, cr3_gfn, NULL, P2M_ALLOC); +- +- if ( !cr3_page) +- { +- destroy_gdt(v); +- return -EINVAL; +- } +- +- if (!paging_mode_refcounts(d) +- && !get_page_type(cr3_page, PGT_l3_page_table) ) +- { +- put_page(cr3_page); +- destroy_gdt(v); +- return -EINVAL; +- } +- + l4tab = __va(pagetable_get_paddr(v->arch.guest_table)); + *l4tab = l4e_from_pfn(page_to_mfn(cr3_page), + _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED); + #endif + } ++ if ( rc ) ++ { ++ if ( cr3_page ) ++ put_page(cr3_page); ++ destroy_gdt(v); ++ return rc; ++ } ++ ++ clear_bit(_VPF_in_reset, &v->pause_flags); + + if ( v->vcpu_id == 0 ) + update_domain_wallclock_time(d); +@@ -1053,17 +1062,16 @@ int arch_set_info_guest( + #undef c + } + +-void arch_vcpu_reset(struct vcpu *v) ++int arch_vcpu_reset(struct vcpu *v) + { + if ( !is_hvm_vcpu(v) ) + { + destroy_gdt(v); +- vcpu_destroy_pagetables(v); +- } +- else +- { +- vcpu_end_shutdown_deferral(v); ++ return vcpu_destroy_pagetables(v); + } ++ ++ vcpu_end_shutdown_deferral(v); ++ return 0; + } + + /* +@@ -2069,63 +2077,6 @@ static int relinquish_memory( + return ret; + } + +-static void vcpu_destroy_pagetables(struct vcpu *v) +-{ +- struct domain *d = v->domain; +- unsigned long pfn; +- +-#ifdef __x86_64__ +- if ( is_pv_32on64_vcpu(v) ) +- { +- pfn = l4e_get_pfn(*(l4_pgentry_t *) +- __va(pagetable_get_paddr(v->arch.guest_table))); +- +- if ( pfn != 0 ) +- { +- if ( paging_mode_refcounts(d) ) +- put_page(mfn_to_page(pfn)); +- else +- put_page_and_type(mfn_to_page(pfn)); +- } +- +- l4e_write( +- (l4_pgentry_t *)__va(pagetable_get_paddr(v->arch.guest_table)), +- l4e_empty()); +- +- v->arch.cr3 = 0; +- return; +- } +-#endif +- +- pfn = pagetable_get_pfn(v->arch.guest_table); +- if ( pfn != 0 ) +- { +- if ( paging_mode_refcounts(d) ) +- put_page(mfn_to_page(pfn)); +- else +- put_page_and_type(mfn_to_page(pfn)); +- v->arch.guest_table = pagetable_null(); +- } +- +-#ifdef __x86_64__ +- /* Drop ref to guest_table_user (from MMUEXT_NEW_USER_BASEPTR) */ +- pfn = pagetable_get_pfn(v->arch.guest_table_user); +- if ( pfn != 0 ) +- { +- if ( !is_pv_32bit_vcpu(v) ) +- { +- if ( paging_mode_refcounts(d) ) +- put_page(mfn_to_page(pfn)); +- else +- put_page_and_type(mfn_to_page(pfn)); +- } +- v->arch.guest_table_user = pagetable_null(); +- } +-#endif +- +- v->arch.cr3 = 0; +-} +- + int domain_relinquish_resources(struct domain *d) + { + int ret; +@@ -2143,7 +2094,11 @@ int domain_relinquish_resources(struct domain *d) + + /* Drop the in-use references to page-table bases. */ + for_each_vcpu ( d, v ) +- vcpu_destroy_pagetables(v); ++ { ++ ret = vcpu_destroy_pagetables(v); ++ if ( ret ) ++ return ret; ++ } + + if ( !is_hvm_domain(d) ) + { +diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c +index 3d471a5..efacc98 100644 +--- a/xen/arch/x86/hvm/hvm.c ++++ b/xen/arch/x86/hvm/hvm.c +@@ -3509,8 +3509,11 @@ static void hvm_s3_suspend(struct domain *d) + + for_each_vcpu ( d, v ) + { ++ int rc; ++ + vlapic_reset(vcpu_vlapic(v)); +- vcpu_reset(v); ++ rc = vcpu_reset(v); ++ ASSERT(!rc); + } + + vpic_reset(d); +diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c +index 52d111b..7778342 100644 +--- a/xen/arch/x86/hvm/vlapic.c ++++ b/xen/arch/x86/hvm/vlapic.c +@@ -252,10 +252,13 @@ static void vlapic_init_sipi_action(unsigned long _vcpu) + { + case APIC_DM_INIT: { + bool_t fpu_initialised; ++ int rc; ++ + domain_lock(target->domain); + /* Reset necessary VCPU state. This does not include FPU state. */ + fpu_initialised = target->fpu_initialised; +- vcpu_reset(target); ++ rc = vcpu_reset(target); ++ ASSERT(!rc); + target->fpu_initialised = fpu_initialised; + vlapic_reset(vcpu_vlapic(target)); + domain_unlock(target->domain); +diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c +index 8444610..055f307 100644 +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c +@@ -1241,7 +1241,16 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, + #endif + + if ( unlikely(partial > 0) ) ++ { ++ ASSERT(preemptible >= 0); + return __put_page_type(l3e_get_page(l3e), preemptible); ++ } ++ ++ if ( preemptible < 0 ) ++ { ++ current->arch.old_guest_table = l3e_get_page(l3e); ++ return 0; ++ } + + return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible); + } +@@ -1254,7 +1263,17 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, + (l4e_get_pfn(l4e) != pfn) ) + { + if ( unlikely(partial > 0) ) ++ { ++ ASSERT(preemptible >= 0); + return __put_page_type(l4e_get_page(l4e), preemptible); ++ } ++ ++ if ( preemptible < 0 ) ++ { ++ current->arch.old_guest_table = l4e_get_page(l4e); ++ return 0; ++ } ++ + return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible); + } + return 1; +@@ -1549,12 +1568,17 @@ static int alloc_l3_table(struct page_info *page, int preemptible) + if ( rc < 0 && rc != -EAGAIN && rc != -EINTR ) + { + MEM_LOG("Failure in alloc_l3_table: entry %d", i); ++ if ( i ) ++ { ++ page->nr_validated_ptes = i; ++ page->partial_pte = 0; ++ current->arch.old_guest_table = page; ++ } + while ( i-- > 0 ) + { + if ( !is_guest_l3_slot(i) ) + continue; + unadjust_guest_l3e(pl3e[i], d); +- put_page_from_l3e(pl3e[i], pfn, 0, 0); + } + } + +@@ -1584,22 +1608,24 @@ static int alloc_l4_table(struct page_info *page, int preemptible) + page->nr_validated_ptes = i; + page->partial_pte = partial ?: 1; + } +- else if ( rc == -EINTR ) ++ else if ( rc < 0 ) + { ++ if ( rc != -EINTR ) ++ MEM_LOG("Failure in alloc_l4_table: entry %d", i); + if ( i ) + { + page->nr_validated_ptes = i; + page->partial_pte = 0; +- rc = -EAGAIN; ++ if ( rc == -EINTR ) ++ rc = -EAGAIN; ++ else ++ { ++ if ( current->arch.old_guest_table ) ++ page->nr_validated_ptes++; ++ current->arch.old_guest_table = page; ++ } + } + } +- else if ( rc < 0 ) +- { +- MEM_LOG("Failure in alloc_l4_table: entry %d", i); +- while ( i-- > 0 ) +- if ( is_guest_l4_slot(d, i) ) +- put_page_from_l4e(pl4e[i], pfn, 0, 0); +- } + if ( rc < 0 ) + return rc; + +@@ -2047,7 +2073,7 @@ static int mod_l3_entry(l3_pgentry_t *pl3e, + pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e); + } + +- put_page_from_l3e(ol3e, pfn, 0, 0); ++ put_page_from_l3e(ol3e, pfn, 0, -preemptible); + return rc; + } + +@@ -2110,7 +2136,7 @@ static int mod_l4_entry(l4_pgentry_t *pl4e, + return -EFAULT; + } + +- put_page_from_l4e(ol4e, pfn, 0, 0); ++ put_page_from_l4e(ol4e, pfn, 0, -preemptible); + return rc; + } + +@@ -2268,7 +2294,15 @@ static int alloc_page_type(struct page_info *page, unsigned long type, + PRtype_info ": caf=%08lx taf=%" PRtype_info, + page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)), + type, page->count_info, page->u.inuse.type_info); +- page->u.inuse.type_info = 0; ++ if ( page != current->arch.old_guest_table ) ++ page->u.inuse.type_info = 0; ++ else ++ { ++ ASSERT((page->u.inuse.type_info & ++ (PGT_count_mask | PGT_validated)) == 1); ++ get_page_light(page); ++ page->u.inuse.type_info |= PGT_partial; ++ } + } + else + { +@@ -2808,49 +2842,150 @@ static void put_superpage(unsigned long mfn) + + #endif + ++static int put_old_guest_table(struct vcpu *v) ++{ ++ int rc; ++ ++ if ( !v->arch.old_guest_table ) ++ return 0; ++ ++ switch ( rc = put_page_and_type_preemptible(v->arch.old_guest_table, 1) ) ++ { ++ case -EINTR: ++ case -EAGAIN: ++ return -EAGAIN; ++ } ++ ++ v->arch.old_guest_table = NULL; ++ ++ return rc; ++} ++ ++int vcpu_destroy_pagetables(struct vcpu *v) ++{ ++ unsigned long mfn = pagetable_get_pfn(v->arch.guest_table); ++ struct page_info *page; ++ int rc = put_old_guest_table(v); ++ ++ if ( rc ) ++ return rc; ++ ++#ifdef __x86_64__ ++ if ( is_pv_32on64_vcpu(v) ) ++ mfn = l4e_get_pfn(*(l4_pgentry_t *)mfn_to_virt(mfn)); ++#endif ++ ++ if ( mfn ) ++ { ++ page = mfn_to_page(mfn); ++ if ( paging_mode_refcounts(v->domain) ) ++ put_page(page); ++ else ++ rc = put_page_and_type_preemptible(page, 1); ++ } ++ ++#ifdef __x86_64__ ++ if ( is_pv_32on64_vcpu(v) ) ++ { ++ if ( !rc ) ++ l4e_write( ++ (l4_pgentry_t *)__va(pagetable_get_paddr(v->arch.guest_table)), ++ l4e_empty()); ++ } ++ else ++#endif ++ if ( !rc ) ++ { ++ v->arch.guest_table = pagetable_null(); ++ ++#ifdef __x86_64__ ++ /* Drop ref to guest_table_user (from MMUEXT_NEW_USER_BASEPTR) */ ++ mfn = pagetable_get_pfn(v->arch.guest_table_user); ++ if ( mfn ) ++ { ++ page = mfn_to_page(mfn); ++ if ( paging_mode_refcounts(v->domain) ) ++ put_page(page); ++ else ++ rc = put_page_and_type_preemptible(page, 1); ++ } ++ if ( !rc ) ++ v->arch.guest_table_user = pagetable_null(); ++#endif ++ } ++ ++ v->arch.cr3 = 0; ++ ++ return rc; ++} + + int new_guest_cr3(unsigned long mfn) + { + struct vcpu *curr = current; + struct domain *d = curr->domain; +- int okay; ++ int rc; + unsigned long old_base_mfn; + + #ifdef __x86_64__ + if ( is_pv_32on64_domain(d) ) + { +- okay = paging_mode_refcounts(d) +- ? 0 /* Old code was broken, but what should it be? */ +- : mod_l4_entry( ++ rc = paging_mode_refcounts(d) ++ ? -EINVAL /* Old code was broken, but what should it be? */ ++ : mod_l4_entry( + __va(pagetable_get_paddr(curr->arch.guest_table)), + l4e_from_pfn( + mfn, + (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)), +- pagetable_get_pfn(curr->arch.guest_table), 0, 0, curr) == 0; +- if ( unlikely(!okay) ) ++ pagetable_get_pfn(curr->arch.guest_table), 0, 1, curr); ++ switch ( rc ) + { ++ case 0: ++ break; ++ case -EINTR: ++ case -EAGAIN: ++ return -EAGAIN; ++ default: + MEM_LOG("Error while installing new compat baseptr %lx", mfn); +- return 0; ++ return rc; + } + + invalidate_shadow_ldt(curr, 0); + write_ptbase(curr); + +- return 1; ++ return 0; + } + #endif +- okay = paging_mode_refcounts(d) +- ? get_page_from_pagenr(mfn, d) +- : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 0); +- if ( unlikely(!okay) ) ++ rc = put_old_guest_table(curr); ++ if ( unlikely(rc) ) ++ return rc; ++ ++ old_base_mfn = pagetable_get_pfn(curr->arch.guest_table); ++ /* ++ * This is particularly important when getting restarted after the ++ * previous attempt got preempted in the put-old-MFN phase. ++ */ ++ if ( old_base_mfn == mfn ) + { +- MEM_LOG("Error while installing new baseptr %lx", mfn); ++ write_ptbase(curr); + return 0; + } + +- invalidate_shadow_ldt(curr, 0); ++ rc = paging_mode_refcounts(d) ++ ? (get_page_from_pagenr(mfn, d) ? 0 : -EINVAL) ++ : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 1); ++ switch ( rc ) ++ { ++ case 0: ++ break; ++ case -EINTR: ++ case -EAGAIN: ++ return -EAGAIN; ++ default: ++ MEM_LOG("Error while installing new baseptr %lx", mfn); ++ return rc; ++ } + +- old_base_mfn = pagetable_get_pfn(curr->arch.guest_table); ++ invalidate_shadow_ldt(curr, 0); + + curr->arch.guest_table = pagetable_from_pfn(mfn); + update_cr3(curr); +@@ -2859,13 +2994,25 @@ int new_guest_cr3(unsigned long mfn) + + if ( likely(old_base_mfn != 0) ) + { ++ struct page_info *page = mfn_to_page(old_base_mfn); ++ + if ( paging_mode_refcounts(d) ) +- put_page(mfn_to_page(old_base_mfn)); ++ put_page(page); + else +- put_page_and_type(mfn_to_page(old_base_mfn)); ++ switch ( rc = put_page_and_type_preemptible(page, 1) ) ++ { ++ case -EINTR: ++ rc = -EAGAIN; ++ case -EAGAIN: ++ curr->arch.old_guest_table = page; ++ break; ++ default: ++ BUG_ON(rc); ++ break; ++ } + } + +- return 1; ++ return rc; + } + + static struct domain *get_pg_owner(domid_t domid) +@@ -2994,12 +3141,29 @@ long do_mmuext_op( + unsigned int foreigndom) + { + struct mmuext_op op; +- int rc = 0, i = 0, okay; + unsigned long type; +- unsigned int done = 0; ++ unsigned int i = 0, done = 0; + struct vcpu *curr = current; + struct domain *d = curr->domain; + struct domain *pg_owner; ++ int okay, rc = put_old_guest_table(curr); ++ ++ if ( unlikely(rc) ) ++ { ++ if ( likely(rc == -EAGAIN) ) ++ rc = hypercall_create_continuation( ++ __HYPERVISOR_mmuext_op, "hihi", uops, count, pdone, ++ foreigndom); ++ return rc; ++ } ++ ++ if ( unlikely(count == MMU_UPDATE_PREEMPTED) && ++ likely(guest_handle_is_null(uops)) ) ++ { ++ /* See the curr->arch.old_guest_table related ++ * hypercall_create_continuation() below. */ ++ return (int)foreigndom; ++ } + + if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) + { +@@ -3024,7 +3188,7 @@ long do_mmuext_op( + + for ( i = 0; i < count; i++ ) + { +- if ( hypercall_preempt_check() ) ++ if ( curr->arch.old_guest_table || hypercall_preempt_check() ) + { + rc = -EAGAIN; + break; +@@ -3088,21 +3252,17 @@ long do_mmuext_op( + } + + if ( (rc = xsm_memory_pin_page(d, pg_owner, page)) != 0 ) +- { +- put_page_and_type(page); + okay = 0; +- break; +- } +- +- if ( unlikely(test_and_set_bit(_PGT_pinned, +- &page->u.inuse.type_info)) ) ++ else if ( unlikely(test_and_set_bit(_PGT_pinned, ++ &page->u.inuse.type_info)) ) + { + MEM_LOG("Mfn %lx already pinned", page_to_mfn(page)); +- put_page_and_type(page); + okay = 0; +- break; + } + ++ if ( unlikely(!okay) ) ++ goto pin_drop; ++ + /* A page is dirtied when its pin status is set. */ + paging_mark_dirty(pg_owner, page_to_mfn(page)); + +@@ -3116,7 +3276,13 @@ long do_mmuext_op( + &page->u.inuse.type_info)); + spin_unlock(&pg_owner->page_alloc_lock); + if ( drop_ref ) +- put_page_and_type(page); ++ { ++ pin_drop: ++ if ( type == PGT_l1_page_table ) ++ put_page_and_type(page); ++ else ++ curr->arch.old_guest_table = page; ++ } + } + + break; +@@ -3144,7 +3310,17 @@ long do_mmuext_op( + break; + } + +- put_page_and_type(page); ++ switch ( rc = put_page_and_type_preemptible(page, 1) ) ++ { ++ case -EINTR: ++ case -EAGAIN: ++ curr->arch.old_guest_table = page; ++ rc = 0; ++ break; ++ default: ++ BUG_ON(rc); ++ break; ++ } + put_page(page); + + /* A page is dirtied when its pin status is cleared. */ +@@ -3154,8 +3330,13 @@ long do_mmuext_op( + } + + case MMUEXT_NEW_BASEPTR: +- okay = (!paging_mode_translate(d) +- && new_guest_cr3(op.arg1.mfn)); ++ if ( paging_mode_translate(d) ) ++ okay = 0; ++ else ++ { ++ rc = new_guest_cr3(op.arg1.mfn); ++ okay = !rc; ++ } + break; + + +@@ -3169,29 +3350,56 @@ long do_mmuext_op( + break; + } + ++ old_mfn = pagetable_get_pfn(curr->arch.guest_table_user); ++ /* ++ * This is particularly important when getting restarted after the ++ * previous attempt got preempted in the put-old-MFN phase. ++ */ ++ if ( old_mfn == op.arg1.mfn ) ++ break; ++ + if ( op.arg1.mfn != 0 ) + { + if ( paging_mode_refcounts(d) ) + okay = get_page_from_pagenr(op.arg1.mfn, d); + else +- okay = !get_page_and_type_from_pagenr( +- op.arg1.mfn, PGT_root_page_table, d, 0, 0); ++ { ++ rc = get_page_and_type_from_pagenr( ++ op.arg1.mfn, PGT_root_page_table, d, 0, 1); ++ okay = !rc; ++ } + if ( unlikely(!okay) ) + { +- MEM_LOG("Error while installing new mfn %lx", op.arg1.mfn); ++ if ( rc == -EINTR ) ++ rc = -EAGAIN; ++ else if ( rc != -EAGAIN ) ++ MEM_LOG("Error while installing new mfn %lx", ++ op.arg1.mfn); + break; + } + } + +- old_mfn = pagetable_get_pfn(curr->arch.guest_table_user); + curr->arch.guest_table_user = pagetable_from_pfn(op.arg1.mfn); + + if ( old_mfn != 0 ) + { ++ struct page_info *page = mfn_to_page(old_mfn); ++ + if ( paging_mode_refcounts(d) ) +- put_page(mfn_to_page(old_mfn)); ++ put_page(page); + else +- put_page_and_type(mfn_to_page(old_mfn)); ++ switch ( rc = put_page_and_type_preemptible(page, 1) ) ++ { ++ case -EINTR: ++ rc = -EAGAIN; ++ case -EAGAIN: ++ curr->arch.old_guest_table = page; ++ okay = 0; ++ break; ++ default: ++ BUG_ON(rc); ++ break; ++ } + } + + break; +@@ -3433,9 +3641,27 @@ long do_mmuext_op( + } + + if ( rc == -EAGAIN ) ++ { ++ ASSERT(i < count); + rc = hypercall_create_continuation( + __HYPERVISOR_mmuext_op, "hihi", + uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); ++ } ++ else if ( curr->arch.old_guest_table ) ++ { ++ XEN_GUEST_HANDLE(void) null; ++ ++ ASSERT(rc || i == count); ++ set_xen_guest_handle(null, NULL); ++ /* ++ * In order to have a way to communicate the final return value to ++ * our continuation, we pass this in place of "foreigndom", building ++ * on the fact that this argument isn't needed anymore. ++ */ ++ rc = hypercall_create_continuation( ++ __HYPERVISOR_mmuext_op, "hihi", null, ++ MMU_UPDATE_PREEMPTED, null, rc); ++ } + + put_pg_owner(pg_owner); + +@@ -3462,11 +3688,28 @@ long do_mmu_update( + void *va; + unsigned long gpfn, gmfn, mfn; + struct page_info *page; +- int rc = 0, i = 0; +- unsigned int cmd, done = 0, pt_dom; +- struct vcpu *v = current; ++ unsigned int cmd, i = 0, done = 0, pt_dom; ++ struct vcpu *curr = current, *v = curr; + struct domain *d = v->domain, *pt_owner = d, *pg_owner; + struct domain_mmap_cache mapcache; ++ int rc = put_old_guest_table(curr); ++ ++ if ( unlikely(rc) ) ++ { ++ if ( likely(rc == -EAGAIN) ) ++ rc = hypercall_create_continuation( ++ __HYPERVISOR_mmu_update, "hihi", ureqs, count, pdone, ++ foreigndom); ++ return rc; ++ } ++ ++ if ( unlikely(count == MMU_UPDATE_PREEMPTED) && ++ likely(guest_handle_is_null(ureqs)) ) ++ { ++ /* See the curr->arch.old_guest_table related ++ * hypercall_create_continuation() below. */ ++ return (int)foreigndom; ++ } + + if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) + { +@@ -3515,7 +3758,7 @@ long do_mmu_update( + + for ( i = 0; i < count; i++ ) + { +- if ( hypercall_preempt_check() ) ++ if ( curr->arch.old_guest_table || hypercall_preempt_check() ) + { + rc = -EAGAIN; + break; +@@ -3696,9 +3939,27 @@ long do_mmu_update( + } + + if ( rc == -EAGAIN ) ++ { ++ ASSERT(i < count); + rc = hypercall_create_continuation( + __HYPERVISOR_mmu_update, "hihi", + ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); ++ } ++ else if ( curr->arch.old_guest_table ) ++ { ++ XEN_GUEST_HANDLE(void) null; ++ ++ ASSERT(rc || i == count); ++ set_xen_guest_handle(null, NULL); ++ /* ++ * In order to have a way to communicate the final return value to ++ * our continuation, we pass this in place of "foreigndom", building ++ * on the fact that this argument isn't needed anymore. ++ */ ++ rc = hypercall_create_continuation( ++ __HYPERVISOR_mmu_update, "hihi", null, ++ MMU_UPDATE_PREEMPTED, null, rc); ++ } + + put_pg_owner(pg_owner); + +diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c +index 692281a..eada470 100644 +--- a/xen/arch/x86/traps.c ++++ b/xen/arch/x86/traps.c +@@ -2407,12 +2407,23 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) + #endif + } + page = get_page_from_gfn(v->domain, gfn, NULL, P2M_ALLOC); +- rc = page ? new_guest_cr3(page_to_mfn(page)) : 0; + if ( page ) ++ { ++ rc = new_guest_cr3(page_to_mfn(page)); + put_page(page); ++ } ++ else ++ rc = -EINVAL; + domain_unlock(v->domain); +- if ( rc == 0 ) /* not okay */ ++ switch ( rc ) ++ { ++ case 0: ++ break; ++ case -EAGAIN: /* retry after preemption */ ++ goto skip; ++ default: /* not okay */ + goto fail; ++ } + break; + } + +diff --git a/xen/arch/x86/x86_64/compat/mm.c b/xen/arch/x86/x86_64/compat/mm.c +index fb7baca..ef7822b 100644 +--- a/xen/arch/x86/x86_64/compat/mm.c ++++ b/xen/arch/x86/x86_64/compat/mm.c +@@ -268,6 +268,13 @@ int compat_mmuext_op(XEN_GUEST_HANDLE(mmuext_op_compat_t) cmp_uops, + int rc = 0; + XEN_GUEST_HANDLE(mmuext_op_t) nat_ops; + ++ if ( unlikely(count == MMU_UPDATE_PREEMPTED) && ++ likely(guest_handle_is_null(cmp_uops)) ) ++ { ++ set_xen_guest_handle(nat_ops, NULL); ++ return do_mmuext_op(nat_ops, count, pdone, foreigndom); ++ } ++ + preempt_mask = count & MMU_UPDATE_PREEMPTED; + count ^= preempt_mask; + +@@ -365,17 +372,23 @@ int compat_mmuext_op(XEN_GUEST_HANDLE(mmuext_op_compat_t) cmp_uops, + : mcs->call.args[1]; + unsigned int left = arg1 & ~MMU_UPDATE_PREEMPTED; + +- BUG_ON(left == arg1); ++ BUG_ON(left == arg1 && left != i); + BUG_ON(left > count); + guest_handle_add_offset(nat_ops, i - left); + guest_handle_subtract_offset(cmp_uops, left); + left = 1; +- BUG_ON(!hypercall_xlat_continuation(&left, 0x01, nat_ops, cmp_uops)); +- BUG_ON(left != arg1); +- if (!test_bit(_MCSF_in_multicall, &mcs->flags)) +- regs->_ecx += count - i; ++ if ( arg1 != MMU_UPDATE_PREEMPTED ) ++ { ++ BUG_ON(!hypercall_xlat_continuation(&left, 0x01, nat_ops, ++ cmp_uops)); ++ if ( !test_bit(_MCSF_in_multicall, &mcs->flags) ) ++ regs->_ecx += count - i; ++ else ++ mcs->compat_call.args[1] += count - i; ++ } + else +- mcs->compat_call.args[1] += count - i; ++ BUG_ON(hypercall_xlat_continuation(&left, 0)); ++ BUG_ON(left != arg1); + } + else + BUG_ON(err > 0); +diff --git a/xen/common/compat/domain.c b/xen/common/compat/domain.c +index 40a0287..9ddaa38 100644 +--- a/xen/common/compat/domain.c ++++ b/xen/common/compat/domain.c +@@ -50,6 +50,10 @@ int compat_vcpu_op(int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg) + rc = v->is_initialised ? -EEXIST : arch_set_info_guest(v, cmp_ctxt); + domain_unlock(d); + ++ if ( rc == -EAGAIN ) ++ rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iih", ++ cmd, vcpuid, arg); ++ + xfree(cmp_ctxt); + break; + } +diff --git a/xen/common/domain.c b/xen/common/domain.c +index c09fb73..89ab922 100644 +--- a/xen/common/domain.c ++++ b/xen/common/domain.c +@@ -779,14 +779,18 @@ void domain_unpause_by_systemcontroller(struct domain *d) + domain_unpause(d); + } + +-void vcpu_reset(struct vcpu *v) ++int vcpu_reset(struct vcpu *v) + { + struct domain *d = v->domain; ++ int rc; + + vcpu_pause(v); + domain_lock(d); + +- arch_vcpu_reset(v); ++ set_bit(_VPF_in_reset, &v->pause_flags); ++ rc = arch_vcpu_reset(v); ++ if ( rc ) ++ goto out_unlock; + + set_bit(_VPF_down, &v->pause_flags); + +@@ -802,9 +806,13 @@ void vcpu_reset(struct vcpu *v) + #endif + cpumask_clear(v->cpu_affinity_tmp); + clear_bit(_VPF_blocked, &v->pause_flags); ++ clear_bit(_VPF_in_reset, &v->pause_flags); + ++ out_unlock: + domain_unlock(v->domain); + vcpu_unpause(v); ++ ++ return rc; + } + + +@@ -841,6 +849,11 @@ long do_vcpu_op(int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg) + domain_unlock(d); + + free_vcpu_guest_context(ctxt); ++ ++ if ( rc == -EAGAIN ) ++ rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iih", ++ cmd, vcpuid, arg); ++ + break; + + case VCPUOP_up: { +diff --git a/xen/common/domctl.c b/xen/common/domctl.c +index cbc8146..b3bfb38 100644 +--- a/xen/common/domctl.c ++++ b/xen/common/domctl.c +@@ -307,8 +307,10 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) + + if ( guest_handle_is_null(op->u.vcpucontext.ctxt) ) + { +- vcpu_reset(v); +- ret = 0; ++ ret = vcpu_reset(v); ++ if ( ret == -EAGAIN ) ++ ret = hypercall_create_continuation( ++ __HYPERVISOR_domctl, "h", u_domctl); + goto svc_out; + } + +@@ -337,6 +339,10 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) + domain_pause(d); + ret = arch_set_info_guest(v, c); + domain_unpause(d); ++ ++ if ( ret == -EAGAIN ) ++ ret = hypercall_create_continuation( ++ __HYPERVISOR_domctl, "h", u_domctl); + } + + svc_out: +diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h +index aecee68..898f63a 100644 +--- a/xen/include/asm-x86/domain.h ++++ b/xen/include/asm-x86/domain.h +@@ -464,6 +464,7 @@ struct arch_vcpu + pagetable_t guest_table_user; /* (MFN) x86/64 user-space pagetable */ + #endif + pagetable_t guest_table; /* (MFN) guest notion of cr3 */ ++ struct page_info *old_guest_table; /* partially destructed pagetable */ + /* guest_table holds a ref to the page, and also a type-count unless + * shadow refcounts are in use */ + pagetable_t shadow_table[4]; /* (MFN) shadow(s) of guest */ +diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h +index ba92568..82cdde6 100644 +--- a/xen/include/asm-x86/mm.h ++++ b/xen/include/asm-x86/mm.h +@@ -605,6 +605,7 @@ void audit_domains(void); + int new_guest_cr3(unsigned long pfn); + void make_cr3(struct vcpu *v, unsigned long mfn); + void update_cr3(struct vcpu *v); ++int vcpu_destroy_pagetables(struct vcpu *); + void propagate_page_fault(unsigned long addr, u16 error_code); + void *do_page_walk(struct vcpu *v, unsigned long addr); + +diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h +index d4ac50f..504a70f 100644 +--- a/xen/include/xen/domain.h ++++ b/xen/include/xen/domain.h +@@ -13,7 +13,7 @@ typedef union { + struct vcpu *alloc_vcpu( + struct domain *d, unsigned int vcpu_id, unsigned int cpu_id); + struct vcpu *alloc_dom0_vcpu0(void); +-void vcpu_reset(struct vcpu *v); ++int vcpu_reset(struct vcpu *); + + struct xen_domctl_getdomaininfo; + void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info); +@@ -67,7 +67,7 @@ void arch_dump_vcpu_info(struct vcpu *v); + + void arch_dump_domain_info(struct domain *d); + +-void arch_vcpu_reset(struct vcpu *v); ++int arch_vcpu_reset(struct vcpu *); + + extern spinlock_t vcpu_alloc_lock; + bool_t domctl_lock_acquire(void); +diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h +index b619269..b0715cb 100644 +--- a/xen/include/xen/sched.h ++++ b/xen/include/xen/sched.h +@@ -644,6 +644,9 @@ static inline struct domain *next_domain_in_cpupool( + /* VCPU is blocked due to missing mem_sharing ring. */ + #define _VPF_mem_sharing 6 + #define VPF_mem_sharing (1UL<<_VPF_mem_sharing) ++ /* VCPU is being reset. */ ++#define _VPF_in_reset 7 ++#define VPF_in_reset (1UL<<_VPF_in_reset) + + static inline int vcpu_runnable(struct vcpu *v) + { |