diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 26a7f12..b97ac6d 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -73,8 +73,6 @@ void (*dead_idle) (void) __read_mostly = default_dead_idle; static void paravirt_ctxt_switch_from(struct vcpu *v); static void paravirt_ctxt_switch_to(struct vcpu *v); -static void vcpu_destroy_pagetables(struct vcpu *v); - static void default_idle(void) { local_irq_disable(); @@ -860,6 +858,9 @@ int arch_set_info_guest( if ( !v->is_initialised ) { + if ( !compat && !(flags & VGCF_in_kernel) && !c.nat->ctrlreg[1] ) + return -EINVAL; + v->arch.pv_vcpu.ldt_base = c(ldt_base); v->arch.pv_vcpu.ldt_ents = c(ldt_ents); } @@ -957,24 +958,44 @@ int arch_set_info_guest( if ( rc != 0 ) return rc; + set_bit(_VPF_in_reset, &v->pause_flags); + if ( !compat ) - { cr3_gfn = xen_cr3_to_pfn(c.nat->ctrlreg[3]); - cr3_page = get_page_from_gfn(d, cr3_gfn, NULL, P2M_ALLOC); - - if ( !cr3_page ) - { - destroy_gdt(v); - return -EINVAL; - } - if ( !paging_mode_refcounts(d) - && !get_page_type(cr3_page, PGT_base_page_table) ) - { - put_page(cr3_page); - destroy_gdt(v); - return -EINVAL; - } +#ifdef CONFIG_COMPAT + else + cr3_gfn = compat_cr3_to_pfn(c.cmp->ctrlreg[3]); +#endif + cr3_page = get_page_from_gfn(d, cr3_gfn, NULL, P2M_ALLOC); + if ( !cr3_page ) + rc = -EINVAL; + else if ( paging_mode_refcounts(d) ) + /* nothing */; + else if ( cr3_page == v->arch.old_guest_table ) + { + v->arch.old_guest_table = NULL; + put_page(cr3_page); + } + else + { + /* + * Since v->arch.guest_table{,_user} are both NULL, this effectively + * is just a call to put_old_guest_table(). + */ + if ( !compat ) + rc = vcpu_destroy_pagetables(v); + if ( !rc ) + rc = get_page_type_preemptible(cr3_page, + !compat ? PGT_root_page_table + : PGT_l3_page_table); + if ( rc == -EINTR ) + rc = -EAGAIN; + } + if ( rc ) + /* handled below */; + else if ( !compat ) + { v->arch.guest_table = pagetable_from_page(cr3_page); #ifdef __x86_64__ if ( c.nat->ctrlreg[1] ) @@ -982,56 +1003,44 @@ int arch_set_info_guest( cr3_gfn = xen_cr3_to_pfn(c.nat->ctrlreg[1]); cr3_page = get_page_from_gfn(d, cr3_gfn, NULL, P2M_ALLOC); - if ( !cr3_page || - (!paging_mode_refcounts(d) - && !get_page_type(cr3_page, PGT_base_page_table)) ) + if ( !cr3_page ) + rc = -EINVAL; + else if ( !paging_mode_refcounts(d) ) { - if (cr3_page) - put_page(cr3_page); - cr3_page = pagetable_get_page(v->arch.guest_table); - v->arch.guest_table = pagetable_null(); - if ( paging_mode_refcounts(d) ) - put_page(cr3_page); - else - put_page_and_type(cr3_page); - destroy_gdt(v); - return -EINVAL; + rc = get_page_type_preemptible(cr3_page, PGT_root_page_table); + switch ( rc ) + { + case -EINTR: + rc = -EAGAIN; + case -EAGAIN: + v->arch.old_guest_table = + pagetable_get_page(v->arch.guest_table); + v->arch.guest_table = pagetable_null(); + break; + } } - - v->arch.guest_table_user = pagetable_from_page(cr3_page); - } - else if ( !(flags & VGCF_in_kernel) ) - { - destroy_gdt(v); - return -EINVAL; + if ( !rc ) + v->arch.guest_table_user = pagetable_from_page(cr3_page); } } else { l4_pgentry_t *l4tab; - cr3_gfn = compat_cr3_to_pfn(c.cmp->ctrlreg[3]); - cr3_page = get_page_from_gfn(d, cr3_gfn, NULL, P2M_ALLOC); - - if ( !cr3_page) - { - destroy_gdt(v); - return -EINVAL; - } - - if (!paging_mode_refcounts(d) - && !get_page_type(cr3_page, PGT_l3_page_table) ) - { - put_page(cr3_page); - destroy_gdt(v); - return -EINVAL; - } - l4tab = __va(pagetable_get_paddr(v->arch.guest_table)); *l4tab = l4e_from_pfn(page_to_mfn(cr3_page), _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED); #endif } + if ( rc ) + { + if ( cr3_page ) + put_page(cr3_page); + destroy_gdt(v); + return rc; + } + + clear_bit(_VPF_in_reset, &v->pause_flags); if ( v->vcpu_id == 0 ) update_domain_wallclock_time(d); @@ -1053,17 +1062,16 @@ int arch_set_info_guest( #undef c } -void arch_vcpu_reset(struct vcpu *v) +int arch_vcpu_reset(struct vcpu *v) { if ( !is_hvm_vcpu(v) ) { destroy_gdt(v); - vcpu_destroy_pagetables(v); - } - else - { - vcpu_end_shutdown_deferral(v); + return vcpu_destroy_pagetables(v); } + + vcpu_end_shutdown_deferral(v); + return 0; } /* @@ -2069,63 +2077,6 @@ static int relinquish_memory( return ret; } -static void vcpu_destroy_pagetables(struct vcpu *v) -{ - struct domain *d = v->domain; - unsigned long pfn; - -#ifdef __x86_64__ - if ( is_pv_32on64_vcpu(v) ) - { - pfn = l4e_get_pfn(*(l4_pgentry_t *) - __va(pagetable_get_paddr(v->arch.guest_table))); - - if ( pfn != 0 ) - { - if ( paging_mode_refcounts(d) ) - put_page(mfn_to_page(pfn)); - else - put_page_and_type(mfn_to_page(pfn)); - } - - l4e_write( - (l4_pgentry_t *)__va(pagetable_get_paddr(v->arch.guest_table)), - l4e_empty()); - - v->arch.cr3 = 0; - return; - } -#endif - - pfn = pagetable_get_pfn(v->arch.guest_table); - if ( pfn != 0 ) - { - if ( paging_mode_refcounts(d) ) - put_page(mfn_to_page(pfn)); - else - put_page_and_type(mfn_to_page(pfn)); - v->arch.guest_table = pagetable_null(); - } - -#ifdef __x86_64__ - /* Drop ref to guest_table_user (from MMUEXT_NEW_USER_BASEPTR) */ - pfn = pagetable_get_pfn(v->arch.guest_table_user); - if ( pfn != 0 ) - { - if ( !is_pv_32bit_vcpu(v) ) - { - if ( paging_mode_refcounts(d) ) - put_page(mfn_to_page(pfn)); - else - put_page_and_type(mfn_to_page(pfn)); - } - v->arch.guest_table_user = pagetable_null(); - } -#endif - - v->arch.cr3 = 0; -} - int domain_relinquish_resources(struct domain *d) { int ret; @@ -2143,7 +2094,11 @@ int domain_relinquish_resources(struct domain *d) /* Drop the in-use references to page-table bases. */ for_each_vcpu ( d, v ) - vcpu_destroy_pagetables(v); + { + ret = vcpu_destroy_pagetables(v); + if ( ret ) + return ret; + } if ( !is_hvm_domain(d) ) { diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 3d471a5..efacc98 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -3509,8 +3509,11 @@ static void hvm_s3_suspend(struct domain *d) for_each_vcpu ( d, v ) { + int rc; + vlapic_reset(vcpu_vlapic(v)); - vcpu_reset(v); + rc = vcpu_reset(v); + ASSERT(!rc); } vpic_reset(d); diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c index 52d111b..7778342 100644 --- a/xen/arch/x86/hvm/vlapic.c +++ b/xen/arch/x86/hvm/vlapic.c @@ -252,10 +252,13 @@ static void vlapic_init_sipi_action(unsigned long _vcpu) { case APIC_DM_INIT: { bool_t fpu_initialised; + int rc; + domain_lock(target->domain); /* Reset necessary VCPU state. This does not include FPU state. */ fpu_initialised = target->fpu_initialised; - vcpu_reset(target); + rc = vcpu_reset(target); + ASSERT(!rc); target->fpu_initialised = fpu_initialised; vlapic_reset(vcpu_vlapic(target)); domain_unlock(target->domain); diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index 8444610..055f307 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -1241,7 +1241,16 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, #endif if ( unlikely(partial > 0) ) + { + ASSERT(preemptible >= 0); return __put_page_type(l3e_get_page(l3e), preemptible); + } + + if ( preemptible < 0 ) + { + current->arch.old_guest_table = l3e_get_page(l3e); + return 0; + } return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible); } @@ -1254,7 +1263,17 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, (l4e_get_pfn(l4e) != pfn) ) { if ( unlikely(partial > 0) ) + { + ASSERT(preemptible >= 0); return __put_page_type(l4e_get_page(l4e), preemptible); + } + + if ( preemptible < 0 ) + { + current->arch.old_guest_table = l4e_get_page(l4e); + return 0; + } + return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible); } return 1; @@ -1549,12 +1568,17 @@ static int alloc_l3_table(struct page_info *page, int preemptible) if ( rc < 0 && rc != -EAGAIN && rc != -EINTR ) { MEM_LOG("Failure in alloc_l3_table: entry %d", i); + if ( i ) + { + page->nr_validated_ptes = i; + page->partial_pte = 0; + current->arch.old_guest_table = page; + } while ( i-- > 0 ) { if ( !is_guest_l3_slot(i) ) continue; unadjust_guest_l3e(pl3e[i], d); - put_page_from_l3e(pl3e[i], pfn, 0, 0); } } @@ -1584,22 +1608,24 @@ static int alloc_l4_table(struct page_info *page, int preemptible) page->nr_validated_ptes = i; page->partial_pte = partial ?: 1; } - else if ( rc == -EINTR ) + else if ( rc < 0 ) { + if ( rc != -EINTR ) + MEM_LOG("Failure in alloc_l4_table: entry %d", i); if ( i ) { page->nr_validated_ptes = i; page->partial_pte = 0; - rc = -EAGAIN; + if ( rc == -EINTR ) + rc = -EAGAIN; + else + { + if ( current->arch.old_guest_table ) + page->nr_validated_ptes++; + current->arch.old_guest_table = page; + } } } - else if ( rc < 0 ) - { - MEM_LOG("Failure in alloc_l4_table: entry %d", i); - while ( i-- > 0 ) - if ( is_guest_l4_slot(d, i) ) - put_page_from_l4e(pl4e[i], pfn, 0, 0); - } if ( rc < 0 ) return rc; @@ -2047,7 +2073,7 @@ static int mod_l3_entry(l3_pgentry_t *pl3e, pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e); } - put_page_from_l3e(ol3e, pfn, 0, 0); + put_page_from_l3e(ol3e, pfn, 0, -preemptible); return rc; } @@ -2110,7 +2136,7 @@ static int mod_l4_entry(l4_pgentry_t *pl4e, return -EFAULT; } - put_page_from_l4e(ol4e, pfn, 0, 0); + put_page_from_l4e(ol4e, pfn, 0, -preemptible); return rc; } @@ -2268,7 +2294,15 @@ static int alloc_page_type(struct page_info *page, unsigned long type, PRtype_info ": caf=%08lx taf=%" PRtype_info, page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)), type, page->count_info, page->u.inuse.type_info); - page->u.inuse.type_info = 0; + if ( page != current->arch.old_guest_table ) + page->u.inuse.type_info = 0; + else + { + ASSERT((page->u.inuse.type_info & + (PGT_count_mask | PGT_validated)) == 1); + get_page_light(page); + page->u.inuse.type_info |= PGT_partial; + } } else { @@ -2808,49 +2842,150 @@ static void put_superpage(unsigned long mfn) #endif +static int put_old_guest_table(struct vcpu *v) +{ + int rc; + + if ( !v->arch.old_guest_table ) + return 0; + + switch ( rc = put_page_and_type_preemptible(v->arch.old_guest_table, 1) ) + { + case -EINTR: + case -EAGAIN: + return -EAGAIN; + } + + v->arch.old_guest_table = NULL; + + return rc; +} + +int vcpu_destroy_pagetables(struct vcpu *v) +{ + unsigned long mfn = pagetable_get_pfn(v->arch.guest_table); + struct page_info *page; + int rc = put_old_guest_table(v); + + if ( rc ) + return rc; + +#ifdef __x86_64__ + if ( is_pv_32on64_vcpu(v) ) + mfn = l4e_get_pfn(*(l4_pgentry_t *)mfn_to_virt(mfn)); +#endif + + if ( mfn ) + { + page = mfn_to_page(mfn); + if ( paging_mode_refcounts(v->domain) ) + put_page(page); + else + rc = put_page_and_type_preemptible(page, 1); + } + +#ifdef __x86_64__ + if ( is_pv_32on64_vcpu(v) ) + { + if ( !rc ) + l4e_write( + (l4_pgentry_t *)__va(pagetable_get_paddr(v->arch.guest_table)), + l4e_empty()); + } + else +#endif + if ( !rc ) + { + v->arch.guest_table = pagetable_null(); + +#ifdef __x86_64__ + /* Drop ref to guest_table_user (from MMUEXT_NEW_USER_BASEPTR) */ + mfn = pagetable_get_pfn(v->arch.guest_table_user); + if ( mfn ) + { + page = mfn_to_page(mfn); + if ( paging_mode_refcounts(v->domain) ) + put_page(page); + else + rc = put_page_and_type_preemptible(page, 1); + } + if ( !rc ) + v->arch.guest_table_user = pagetable_null(); +#endif + } + + v->arch.cr3 = 0; + + return rc; +} int new_guest_cr3(unsigned long mfn) { struct vcpu *curr = current; struct domain *d = curr->domain; - int okay; + int rc; unsigned long old_base_mfn; #ifdef __x86_64__ if ( is_pv_32on64_domain(d) ) { - okay = paging_mode_refcounts(d) - ? 0 /* Old code was broken, but what should it be? */ - : mod_l4_entry( + rc = paging_mode_refcounts(d) + ? -EINVAL /* Old code was broken, but what should it be? */ + : mod_l4_entry( __va(pagetable_get_paddr(curr->arch.guest_table)), l4e_from_pfn( mfn, (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)), - pagetable_get_pfn(curr->arch.guest_table), 0, 0, curr) == 0; - if ( unlikely(!okay) ) + pagetable_get_pfn(curr->arch.guest_table), 0, 1, curr); + switch ( rc ) { + case 0: + break; + case -EINTR: + case -EAGAIN: + return -EAGAIN; + default: MEM_LOG("Error while installing new compat baseptr %lx", mfn); - return 0; + return rc; } invalidate_shadow_ldt(curr, 0); write_ptbase(curr); - return 1; + return 0; } #endif - okay = paging_mode_refcounts(d) - ? get_page_from_pagenr(mfn, d) - : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 0); - if ( unlikely(!okay) ) + rc = put_old_guest_table(curr); + if ( unlikely(rc) ) + return rc; + + old_base_mfn = pagetable_get_pfn(curr->arch.guest_table); + /* + * This is particularly important when getting restarted after the + * previous attempt got preempted in the put-old-MFN phase. + */ + if ( old_base_mfn == mfn ) { - MEM_LOG("Error while installing new baseptr %lx", mfn); + write_ptbase(curr); return 0; } - invalidate_shadow_ldt(curr, 0); + rc = paging_mode_refcounts(d) + ? (get_page_from_pagenr(mfn, d) ? 0 : -EINVAL) + : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 1); + switch ( rc ) + { + case 0: + break; + case -EINTR: + case -EAGAIN: + return -EAGAIN; + default: + MEM_LOG("Error while installing new baseptr %lx", mfn); + return rc; + } - old_base_mfn = pagetable_get_pfn(curr->arch.guest_table); + invalidate_shadow_ldt(curr, 0); curr->arch.guest_table = pagetable_from_pfn(mfn); update_cr3(curr); @@ -2859,13 +2994,25 @@ int new_guest_cr3(unsigned long mfn) if ( likely(old_base_mfn != 0) ) { + struct page_info *page = mfn_to_page(old_base_mfn); + if ( paging_mode_refcounts(d) ) - put_page(mfn_to_page(old_base_mfn)); + put_page(page); else - put_page_and_type(mfn_to_page(old_base_mfn)); + switch ( rc = put_page_and_type_preemptible(page, 1) ) + { + case -EINTR: + rc = -EAGAIN; + case -EAGAIN: + curr->arch.old_guest_table = page; + break; + default: + BUG_ON(rc); + break; + } } - return 1; + return rc; } static struct domain *get_pg_owner(domid_t domid) @@ -2994,12 +3141,29 @@ long do_mmuext_op( unsigned int foreigndom) { struct mmuext_op op; - int rc = 0, i = 0, okay; unsigned long type; - unsigned int done = 0; + unsigned int i = 0, done = 0; struct vcpu *curr = current; struct domain *d = curr->domain; struct domain *pg_owner; + int okay, rc = put_old_guest_table(curr); + + if ( unlikely(rc) ) + { + if ( likely(rc == -EAGAIN) ) + rc = hypercall_create_continuation( + __HYPERVISOR_mmuext_op, "hihi", uops, count, pdone, + foreigndom); + return rc; + } + + if ( unlikely(count == MMU_UPDATE_PREEMPTED) && + likely(guest_handle_is_null(uops)) ) + { + /* See the curr->arch.old_guest_table related + * hypercall_create_continuation() below. */ + return (int)foreigndom; + } if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) { @@ -3024,7 +3188,7 @@ long do_mmuext_op( for ( i = 0; i < count; i++ ) { - if ( hypercall_preempt_check() ) + if ( curr->arch.old_guest_table || hypercall_preempt_check() ) { rc = -EAGAIN; break; @@ -3088,21 +3252,17 @@ long do_mmuext_op( } if ( (rc = xsm_memory_pin_page(d, pg_owner, page)) != 0 ) - { - put_page_and_type(page); okay = 0; - break; - } - - if ( unlikely(test_and_set_bit(_PGT_pinned, - &page->u.inuse.type_info)) ) + else if ( unlikely(test_and_set_bit(_PGT_pinned, + &page->u.inuse.type_info)) ) { MEM_LOG("Mfn %lx already pinned", page_to_mfn(page)); - put_page_and_type(page); okay = 0; - break; } + if ( unlikely(!okay) ) + goto pin_drop; + /* A page is dirtied when its pin status is set. */ paging_mark_dirty(pg_owner, page_to_mfn(page)); @@ -3116,7 +3276,13 @@ long do_mmuext_op( &page->u.inuse.type_info)); spin_unlock(&pg_owner->page_alloc_lock); if ( drop_ref ) - put_page_and_type(page); + { + pin_drop: + if ( type == PGT_l1_page_table ) + put_page_and_type(page); + else + curr->arch.old_guest_table = page; + } } break; @@ -3144,7 +3310,17 @@ long do_mmuext_op( break; } - put_page_and_type(page); + switch ( rc = put_page_and_type_preemptible(page, 1) ) + { + case -EINTR: + case -EAGAIN: + curr->arch.old_guest_table = page; + rc = 0; + break; + default: + BUG_ON(rc); + break; + } put_page(page); /* A page is dirtied when its pin status is cleared. */ @@ -3154,8 +3330,13 @@ long do_mmuext_op( } case MMUEXT_NEW_BASEPTR: - okay = (!paging_mode_translate(d) - && new_guest_cr3(op.arg1.mfn)); + if ( paging_mode_translate(d) ) + okay = 0; + else + { + rc = new_guest_cr3(op.arg1.mfn); + okay = !rc; + } break; @@ -3169,29 +3350,56 @@ long do_mmuext_op( break; } + old_mfn = pagetable_get_pfn(curr->arch.guest_table_user); + /* + * This is particularly important when getting restarted after the + * previous attempt got preempted in the put-old-MFN phase. + */ + if ( old_mfn == op.arg1.mfn ) + break; + if ( op.arg1.mfn != 0 ) { if ( paging_mode_refcounts(d) ) okay = get_page_from_pagenr(op.arg1.mfn, d); else - okay = !get_page_and_type_from_pagenr( - op.arg1.mfn, PGT_root_page_table, d, 0, 0); + { + rc = get_page_and_type_from_pagenr( + op.arg1.mfn, PGT_root_page_table, d, 0, 1); + okay = !rc; + } if ( unlikely(!okay) ) { - MEM_LOG("Error while installing new mfn %lx", op.arg1.mfn); + if ( rc == -EINTR ) + rc = -EAGAIN; + else if ( rc != -EAGAIN ) + MEM_LOG("Error while installing new mfn %lx", + op.arg1.mfn); break; } } - old_mfn = pagetable_get_pfn(curr->arch.guest_table_user); curr->arch.guest_table_user = pagetable_from_pfn(op.arg1.mfn); if ( old_mfn != 0 ) { + struct page_info *page = mfn_to_page(old_mfn); + if ( paging_mode_refcounts(d) ) - put_page(mfn_to_page(old_mfn)); + put_page(page); else - put_page_and_type(mfn_to_page(old_mfn)); + switch ( rc = put_page_and_type_preemptible(page, 1) ) + { + case -EINTR: + rc = -EAGAIN; + case -EAGAIN: + curr->arch.old_guest_table = page; + okay = 0; + break; + default: + BUG_ON(rc); + break; + } } break; @@ -3433,9 +3641,27 @@ long do_mmuext_op( } if ( rc == -EAGAIN ) + { + ASSERT(i < count); rc = hypercall_create_continuation( __HYPERVISOR_mmuext_op, "hihi", uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); + } + else if ( curr->arch.old_guest_table ) + { + XEN_GUEST_HANDLE(void) null; + + ASSERT(rc || i == count); + set_xen_guest_handle(null, NULL); + /* + * In order to have a way to communicate the final return value to + * our continuation, we pass this in place of "foreigndom", building + * on the fact that this argument isn't needed anymore. + */ + rc = hypercall_create_continuation( + __HYPERVISOR_mmuext_op, "hihi", null, + MMU_UPDATE_PREEMPTED, null, rc); + } put_pg_owner(pg_owner); @@ -3462,11 +3688,28 @@ long do_mmu_update( void *va; unsigned long gpfn, gmfn, mfn; struct page_info *page; - int rc = 0, i = 0; - unsigned int cmd, done = 0, pt_dom; - struct vcpu *v = current; + unsigned int cmd, i = 0, done = 0, pt_dom; + struct vcpu *curr = current, *v = curr; struct domain *d = v->domain, *pt_owner = d, *pg_owner; struct domain_mmap_cache mapcache; + int rc = put_old_guest_table(curr); + + if ( unlikely(rc) ) + { + if ( likely(rc == -EAGAIN) ) + rc = hypercall_create_continuation( + __HYPERVISOR_mmu_update, "hihi", ureqs, count, pdone, + foreigndom); + return rc; + } + + if ( unlikely(count == MMU_UPDATE_PREEMPTED) && + likely(guest_handle_is_null(ureqs)) ) + { + /* See the curr->arch.old_guest_table related + * hypercall_create_continuation() below. */ + return (int)foreigndom; + } if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) { @@ -3515,7 +3758,7 @@ long do_mmu_update( for ( i = 0; i < count; i++ ) { - if ( hypercall_preempt_check() ) + if ( curr->arch.old_guest_table || hypercall_preempt_check() ) { rc = -EAGAIN; break; @@ -3696,9 +3939,27 @@ long do_mmu_update( } if ( rc == -EAGAIN ) + { + ASSERT(i < count); rc = hypercall_create_continuation( __HYPERVISOR_mmu_update, "hihi", ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); + } + else if ( curr->arch.old_guest_table ) + { + XEN_GUEST_HANDLE(void) null; + + ASSERT(rc || i == count); + set_xen_guest_handle(null, NULL); + /* + * In order to have a way to communicate the final return value to + * our continuation, we pass this in place of "foreigndom", building + * on the fact that this argument isn't needed anymore. + */ + rc = hypercall_create_continuation( + __HYPERVISOR_mmu_update, "hihi", null, + MMU_UPDATE_PREEMPTED, null, rc); + } put_pg_owner(pg_owner); diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 692281a..eada470 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -2407,12 +2407,23 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) #endif } page = get_page_from_gfn(v->domain, gfn, NULL, P2M_ALLOC); - rc = page ? new_guest_cr3(page_to_mfn(page)) : 0; if ( page ) + { + rc = new_guest_cr3(page_to_mfn(page)); put_page(page); + } + else + rc = -EINVAL; domain_unlock(v->domain); - if ( rc == 0 ) /* not okay */ + switch ( rc ) + { + case 0: + break; + case -EAGAIN: /* retry after preemption */ + goto skip; + default: /* not okay */ goto fail; + } break; } diff --git a/xen/arch/x86/x86_64/compat/mm.c b/xen/arch/x86/x86_64/compat/mm.c index fb7baca..ef7822b 100644 --- a/xen/arch/x86/x86_64/compat/mm.c +++ b/xen/arch/x86/x86_64/compat/mm.c @@ -268,6 +268,13 @@ int compat_mmuext_op(XEN_GUEST_HANDLE(mmuext_op_compat_t) cmp_uops, int rc = 0; XEN_GUEST_HANDLE(mmuext_op_t) nat_ops; + if ( unlikely(count == MMU_UPDATE_PREEMPTED) && + likely(guest_handle_is_null(cmp_uops)) ) + { + set_xen_guest_handle(nat_ops, NULL); + return do_mmuext_op(nat_ops, count, pdone, foreigndom); + } + preempt_mask = count & MMU_UPDATE_PREEMPTED; count ^= preempt_mask; @@ -365,17 +372,23 @@ int compat_mmuext_op(XEN_GUEST_HANDLE(mmuext_op_compat_t) cmp_uops, : mcs->call.args[1]; unsigned int left = arg1 & ~MMU_UPDATE_PREEMPTED; - BUG_ON(left == arg1); + BUG_ON(left == arg1 && left != i); BUG_ON(left > count); guest_handle_add_offset(nat_ops, i - left); guest_handle_subtract_offset(cmp_uops, left); left = 1; - BUG_ON(!hypercall_xlat_continuation(&left, 0x01, nat_ops, cmp_uops)); - BUG_ON(left != arg1); - if (!test_bit(_MCSF_in_multicall, &mcs->flags)) - regs->_ecx += count - i; + if ( arg1 != MMU_UPDATE_PREEMPTED ) + { + BUG_ON(!hypercall_xlat_continuation(&left, 0x01, nat_ops, + cmp_uops)); + if ( !test_bit(_MCSF_in_multicall, &mcs->flags) ) + regs->_ecx += count - i; + else + mcs->compat_call.args[1] += count - i; + } else - mcs->compat_call.args[1] += count - i; + BUG_ON(hypercall_xlat_continuation(&left, 0)); + BUG_ON(left != arg1); } else BUG_ON(err > 0); diff --git a/xen/common/compat/domain.c b/xen/common/compat/domain.c index 40a0287..9ddaa38 100644 --- a/xen/common/compat/domain.c +++ b/xen/common/compat/domain.c @@ -50,6 +50,10 @@ int compat_vcpu_op(int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg) rc = v->is_initialised ? -EEXIST : arch_set_info_guest(v, cmp_ctxt); domain_unlock(d); + if ( rc == -EAGAIN ) + rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iih", + cmd, vcpuid, arg); + xfree(cmp_ctxt); break; } diff --git a/xen/common/domain.c b/xen/common/domain.c index c09fb73..89ab922 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -779,14 +779,18 @@ void domain_unpause_by_systemcontroller(struct domain *d) domain_unpause(d); } -void vcpu_reset(struct vcpu *v) +int vcpu_reset(struct vcpu *v) { struct domain *d = v->domain; + int rc; vcpu_pause(v); domain_lock(d); - arch_vcpu_reset(v); + set_bit(_VPF_in_reset, &v->pause_flags); + rc = arch_vcpu_reset(v); + if ( rc ) + goto out_unlock; set_bit(_VPF_down, &v->pause_flags); @@ -802,9 +806,13 @@ void vcpu_reset(struct vcpu *v) #endif cpumask_clear(v->cpu_affinity_tmp); clear_bit(_VPF_blocked, &v->pause_flags); + clear_bit(_VPF_in_reset, &v->pause_flags); + out_unlock: domain_unlock(v->domain); vcpu_unpause(v); + + return rc; } @@ -841,6 +849,11 @@ long do_vcpu_op(int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg) domain_unlock(d); free_vcpu_guest_context(ctxt); + + if ( rc == -EAGAIN ) + rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iih", + cmd, vcpuid, arg); + break; case VCPUOP_up: { diff --git a/xen/common/domctl.c b/xen/common/domctl.c index cbc8146..b3bfb38 100644 --- a/xen/common/domctl.c +++ b/xen/common/domctl.c @@ -307,8 +307,10 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) if ( guest_handle_is_null(op->u.vcpucontext.ctxt) ) { - vcpu_reset(v); - ret = 0; + ret = vcpu_reset(v); + if ( ret == -EAGAIN ) + ret = hypercall_create_continuation( + __HYPERVISOR_domctl, "h", u_domctl); goto svc_out; } @@ -337,6 +339,10 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) domain_pause(d); ret = arch_set_info_guest(v, c); domain_unpause(d); + + if ( ret == -EAGAIN ) + ret = hypercall_create_continuation( + __HYPERVISOR_domctl, "h", u_domctl); } svc_out: diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h index aecee68..898f63a 100644 --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -464,6 +464,7 @@ struct arch_vcpu pagetable_t guest_table_user; /* (MFN) x86/64 user-space pagetable */ #endif pagetable_t guest_table; /* (MFN) guest notion of cr3 */ + struct page_info *old_guest_table; /* partially destructed pagetable */ /* guest_table holds a ref to the page, and also a type-count unless * shadow refcounts are in use */ pagetable_t shadow_table[4]; /* (MFN) shadow(s) of guest */ diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h index ba92568..82cdde6 100644 --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -605,6 +605,7 @@ void audit_domains(void); int new_guest_cr3(unsigned long pfn); void make_cr3(struct vcpu *v, unsigned long mfn); void update_cr3(struct vcpu *v); +int vcpu_destroy_pagetables(struct vcpu *); void propagate_page_fault(unsigned long addr, u16 error_code); void *do_page_walk(struct vcpu *v, unsigned long addr); diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h index d4ac50f..504a70f 100644 --- a/xen/include/xen/domain.h +++ b/xen/include/xen/domain.h @@ -13,7 +13,7 @@ typedef union { struct vcpu *alloc_vcpu( struct domain *d, unsigned int vcpu_id, unsigned int cpu_id); struct vcpu *alloc_dom0_vcpu0(void); -void vcpu_reset(struct vcpu *v); +int vcpu_reset(struct vcpu *); struct xen_domctl_getdomaininfo; void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info); @@ -67,7 +67,7 @@ void arch_dump_vcpu_info(struct vcpu *v); void arch_dump_domain_info(struct domain *d); -void arch_vcpu_reset(struct vcpu *v); +int arch_vcpu_reset(struct vcpu *); extern spinlock_t vcpu_alloc_lock; bool_t domctl_lock_acquire(void); diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index b619269..b0715cb 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -644,6 +644,9 @@ static inline struct domain *next_domain_in_cpupool( /* VCPU is blocked due to missing mem_sharing ring. */ #define _VPF_mem_sharing 6 #define VPF_mem_sharing (1UL<<_VPF_mem_sharing) + /* VCPU is being reset. */ +#define _VPF_in_reset 7 +#define VPF_in_reset (1UL<<_VPF_in_reset) static inline int vcpu_runnable(struct vcpu *v) {