x86: fix page refcount handling in page table pin error path In the original patch 7 of the series addressing XSA-45 I mistakenly took the addition of the call to get_page_light() in alloc_page_type() to cover two decrements that would happen: One for the PGT_partial bit that is getting set along with the call, and the other for the page reference the caller hold (and would be dropping on its error path). But of course the additional page reference is tied to the PGT_partial bit, and hence any caller of a function that may leave ->arch.old_guest_table non-NULL for error cleanup purposes has to make sure a respective page reference gets retained. Similar issues were then also spotted elsewhere: In effect all callers of get_page_type_preemptible() need to deal with errors in similar ways. To make sure error handling can work this way without leaking page references, a respective assertion gets added to that function. This is CVE-2013-1432 / XSA-58. Reported-by: Andrew Cooper Signed-off-by: Jan Beulich Tested-by: Andrew Cooper Reviewed-by: Tim Deegan --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -941,6 +941,10 @@ int arch_set_info_guest( if ( v->vcpu_id == 0 ) d->vm_assist = c(vm_assist); + rc = put_old_guest_table(current); + if ( rc ) + return rc; + if ( !compat ) rc = (int)set_gdt(v, c.nat->gdt_frames, c.nat->gdt_ents); #ifdef CONFIG_COMPAT @@ -980,18 +984,24 @@ int arch_set_info_guest( } else { - /* - * Since v->arch.guest_table{,_user} are both NULL, this effectively - * is just a call to put_old_guest_table(). - */ if ( !compat ) - rc = vcpu_destroy_pagetables(v); + rc = put_old_guest_table(v); if ( !rc ) rc = get_page_type_preemptible(cr3_page, !compat ? PGT_root_page_table : PGT_l3_page_table); - if ( rc == -EINTR ) + switch ( rc ) + { + case -EINTR: rc = -EAGAIN; + case -EAGAIN: + case 0: + break; + default: + if ( cr3_page == current->arch.old_guest_table ) + cr3_page = NULL; + break; + } } if ( rc ) /* handled below */; @@ -1018,6 +1028,11 @@ int arch_set_info_guest( pagetable_get_page(v->arch.guest_table); v->arch.guest_table = pagetable_null(); break; + default: + if ( cr3_page == current->arch.old_guest_table ) + cr3_page = NULL; + case 0: + break; } } if ( !rc ) --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -718,7 +718,8 @@ static int get_page_and_type_from_pagenr get_page_type_preemptible(page, type) : (get_page_type(page, type) ? 0 : -EINVAL)); - if ( unlikely(rc) && partial >= 0 ) + if ( unlikely(rc) && partial >= 0 && + (!preemptible || page != current->arch.old_guest_table) ) put_page(page); return rc; @@ -2638,6 +2639,7 @@ int put_page_type_preemptible(struct pag int get_page_type_preemptible(struct page_info *page, unsigned long type) { + ASSERT(!current->arch.old_guest_table); return __get_page_type(page, type, 1); } @@ -2848,7 +2850,7 @@ static void put_superpage(unsigned long #endif -static int put_old_guest_table(struct vcpu *v) +int put_old_guest_table(struct vcpu *v) { int rc; @@ -3253,7 +3255,8 @@ long do_mmuext_op( rc = -EAGAIN; else if ( rc != -EAGAIN ) MEM_LOG("Error while pinning mfn %lx", page_to_mfn(page)); - put_page(page); + if ( page != curr->arch.old_guest_table ) + put_page(page); break; } --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -374,6 +374,7 @@ void put_page_type(struct page_info *pag int get_page_type(struct page_info *page, unsigned long type); int put_page_type_preemptible(struct page_info *page); int get_page_type_preemptible(struct page_info *page, unsigned long type); +int put_old_guest_table(struct vcpu *); int get_page_from_l1e( l1_pgentry_t l1e, struct domain *l1e_owner, struct domain *pg_owner); void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner);