main/xen: security fixes

fixes #7732 CVE-2017-12135 XSA-226 CVE-2017-12137 XSA-227 CVE-2017-12136 XSA-228 CVE-2017-12855 XSA-230
author: Daniel Sabogal <dsabogalcc@gmail.com> 2017-08-22 23:11:53 -0400
committer: Natanael Copa <ncopa@alpinelinux.org> 2017-08-24 22:05:11 +0000
commit: a9878fc57738d1b777eb47e5fdee082f009e2c38 (patch)
tree: 4133d5d8ddf5b5c8d87d44fabcb09318d4dd81d6 /main
parent: 5bae8450f755f729ff4fe76ab3999a538322cad4 (diff)
download: aports-a9878fc57738d1b777eb47e5fdee082f009e2c38.tar.bz2
aports-a9878fc57738d1b777eb47e5fdee082f009e2c38.tar.xz
6 files changed, 719 insertions, 1 deletions
diff --git a/main/xen/APKBUILD b/main/xen/APKBUILD
index 374292e7f6..1274b35f4a 100644
--- a/main/xen/APKBUILD
+++ b/main/xen/APKBUILD
@@ -3,7 +3,7 @@
 # Maintainer: William Pitcock <nenolod@dereferenced.org>
 pkgname=xen
 pkgver=4.9.0
-pkgrel=0
+pkgrel=1
 pkgdesc="Xen hypervisor"
 url="http://www.xen.org/"
 arch="x86_64 armhf"
@@ -73,6 +73,11 @@ options="!strip"
 #     - CVE-2017-10921 XSA-224
 #     - CVE-2017-10922 XSA-224
 #     - CVE-2017-10923 XSA-225
+#   4.9.0-r1:
+#     - CVE-2017-12135 XSA-226
+#     - CVE-2017-12137 XSA-227
+#     - CVE-2017-12136 XSA-228
+#     - CVE-2017-12855 XSA-230
 
 case "$CARCH" in
 x86*)
@@ -117,6 +122,12 @@ source="https://downloads.xenproject.org/release/$pkgname/$pkgver/$pkgname-$pkgv
 	http://xenbits.xen.org/xen-extfiles/zlib-$_ZLIB_VERSION.tar.gz
 	http://xenbits.xen.org/xen-extfiles/ipxe-git-$_IPXE_GIT_TAG.tar.gz
 
+	xsa226-1.patch
+	xsa226-2.patch
+	xsa227.patch
+	xsa228.patch
+	xsa230.patch
+
 	qemu-coroutine-gthread.patch
 	qemu-xen_paths.patch
 
@@ -366,6 +377,11 @@ c2bc9ffc8583aeae71cee9ddcc4418969768d4e3764d47307da54f93981c0109fb07d84b061b3a36
 4928b5b82f57645be9408362706ff2c4d9baa635b21b0d41b1c82930e8c60a759b1ea4fa74d7e6c7cae1b7692d006aa5cb72df0c3b88bf049779aa2b566f9d35  tpm_emulator-0.7.4.tar.gz
 021b958fcd0d346c4ba761bcf0cc40f3522de6186cf5a0a6ea34a70504ce9622b1c2626fce40675bc8282cf5f5ade18473656abc38050f72f5d6480507a2106e  zlib-1.2.3.tar.gz
 82ba65e1c676d32b29c71e6395c9506cab952c8f8b03f692e2b50133be8f0c0146d0f22c223262d81a4df579986fde5abc6507869f4965be4846297ef7b4b890  ipxe-git-827dd1bfee67daa683935ce65316f7e0f057fe1c.tar.gz
+e934ba5be6a526d164cb4c8bb71a679f2fedeaddb82d8f5ebbbbe3cbfaa6dd639c4e94662c6b7a9d066195f2a59e8d14dc3ee55dc94c09b4475d455d881b2741  xsa226-1.patch
+4d1e729c592efefd705233b49484991801606b2122a64ff14abbf994bb3e77ec75c4989d43753ce2043cc4fe13d34fb1cef7ee1adb291ff16625bb3b125e5508  xsa226-2.patch
+7d66494e833d46f8a213af0f2b107a12617d5e8b45c3b07daee229c75bd6aad98284bc0e19f15706d044b58273cc7f0c193ef8553faa22fadeae349689e763c8  xsa227.patch
+d406f14531af707325790909d08ce299ac2f2cb4b87f9a8ddb0fba10bd83bed84cc1633e07632cc2f841c50bc1a9af6240c89539a2e6ba6028cb127e218f86fc  xsa228.patch
+df174a1675f74b73e78bc3cb1c9f16536199dfd1922c0cc545a807e92bc24941a816891838258e118f477109548487251a7eaccb2d1dd9b6994c8c76fc5b058f  xsa230.patch
 c3c46f232f0bd9f767b232af7e8ce910a6166b126bd5427bb8dc325aeb2c634b956de3fc225cab5af72649070c8205cc8e1cab7689fc266c204f525086f1a562  qemu-coroutine-gthread.patch
 1936ab39a1867957fa640eb81c4070214ca4856a2743ba7e49c0cd017917071a9680d015f002c57fa7b9600dbadd29dcea5887f50e6c133305df2669a7a933f3  qemu-xen_paths.patch
 f095ea373f36381491ad36f0662fb4f53665031973721256b23166e596318581da7cbb0146d0beb2446729adfdb321e01468e377793f6563a67d68b8b0f7ffe3  hotplug-vif-vtrill.patch
diff --git a/main/xen/xsa226-1.patch b/main/xen/xsa226-1.patch
new file mode 100644
index 0000000000..7711d3f888
--- /dev/null
+++ b/main/xen/xsa226-1.patch
@@ -0,0 +1,134 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: gnttab: don't use possibly unbounded tail calls
+
+There is no guarantee that the compiler would actually translate them
+to branches instead of calls, so only ones with a known recursion limit
+are okay:
+- __release_grant_for_copy() can call itself only once, as
+  __acquire_grant_for_copy() won't permit use of multi-level transitive
+  grants,
+- __acquire_grant_for_copy() is fine to call itself with the last
+  argument false, as that prevents further recursion,
+- __acquire_grant_for_copy() must not call itself to recover from an
+  observed change to the active entry's pin count
+
+This is part of CVE-2017-12135 / XSA-226.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+
+--- a/xen/common/grant_table.c
++++ b/xen/common/grant_table.c
+@@ -2103,8 +2103,10 @@ __release_grant_for_copy(
+ 
+     if ( td != rd )
+     {
+-        /* Recursive calls, but they're tail calls, so it's
+-           okay. */
++        /*
++         * Recursive calls, but they're bounded (acquire permits only a single
++         * level of transitivity), so it's okay.
++         */
+         if ( released_write )
+             __release_grant_for_copy(td, trans_gref, 0);
+         else if ( released_read )
+@@ -2255,10 +2257,11 @@ __acquire_grant_for_copy(
+                 return rc;
+             }
+ 
+-            /* We dropped the lock, so we have to check that nobody
+-               else tried to pin (or, for that matter, unpin) the
+-               reference in *this* domain.  If they did, just give up
+-               and try again. */
++            /*
++             * We dropped the lock, so we have to check that nobody else tried
++             * to pin (or, for that matter, unpin) the reference in *this*
++             * domain.  If they did, just give up and tell the caller to retry.
++             */
+             if ( act->pin != old_pin )
+             {
+                 __fixup_status_for_copy_pin(act, status);
+@@ -2266,9 +2269,8 @@ __acquire_grant_for_copy(
+                 active_entry_release(act);
+                 grant_read_unlock(rgt);
+                 put_page(*page);
+-                return __acquire_grant_for_copy(rd, gref, ldom, readonly,
+-                                                frame, page, page_off, length,
+-                                                allow_transitive);
++                *page = NULL;
++                return ERESTART;
+             }
+ 
+             /* The actual remote remote grant may or may not be a
+@@ -2574,7 +2576,7 @@ static int gnttab_copy_one(const struct
+     {
+         gnttab_copy_release_buf(src);
+         rc = gnttab_copy_claim_buf(op, &op->source, src, GNTCOPY_source_gref);
+-        if ( rc < 0 )
++        if ( rc )
+             goto out;
+     }
+ 
+@@ -2584,7 +2586,7 @@ static int gnttab_copy_one(const struct
+     {
+         gnttab_copy_release_buf(dest);
+         rc = gnttab_copy_claim_buf(op, &op->dest, dest, GNTCOPY_dest_gref);
+-        if ( rc < 0 )
++        if ( rc )
+             goto out;
+     }
+ 
+@@ -2593,6 +2595,14 @@ static int gnttab_copy_one(const struct
+     return rc;
+ }
+ 
++/*
++ * gnttab_copy(), other than the various other helpers of
++ * do_grant_table_op(), returns (besides possible error indicators)
++ * "count - i" rather than "i" to ensure that even if no progress
++ * was made at all (perhaps due to gnttab_copy_one() returning a
++ * positive value) a non-zero value is being handed back (zero needs
++ * to be avoided, as that means "success, all done").
++ */
+ static long gnttab_copy(
+     XEN_GUEST_HANDLE_PARAM(gnttab_copy_t) uop, unsigned int count)
+ {
+@@ -2606,7 +2616,7 @@ static long gnttab_copy(
+     {
+         if ( i && hypercall_preempt_check() )
+         {
+-            rc = i;
++            rc = count - i;
+             break;
+         }
+ 
+@@ -2616,13 +2626,20 @@ static long gnttab_copy(
+             break;
+         }
+ 
+-        op.status = gnttab_copy_one(&op, &dest, &src);
+-        if ( op.status != GNTST_okay )
++        rc = gnttab_copy_one(&op, &dest, &src);
++        if ( rc > 0 )
++        {
++            rc = count - i;
++            break;
++        }
++        if ( rc != GNTST_okay )
+         {
+             gnttab_copy_release_buf(&src);
+             gnttab_copy_release_buf(&dest);
+         }
+ 
++        op.status = rc;
++        rc = 0;
+         if ( unlikely(__copy_field_to_guest(uop, &op, status)) )
+         {
+             rc = -EFAULT;
+@@ -3160,6 +3177,7 @@ do_grant_table_op(
+         rc = gnttab_copy(copy, count);
+         if ( rc > 0 )
+         {
++            rc = count - rc;
+             guest_handle_add_offset(copy, rc);
+             uop = guest_handle_cast(copy, void);
+         }
diff --git a/main/xen/xsa226-2.patch b/main/xen/xsa226-2.patch
new file mode 100644
index 0000000000..2cf93bdd29
--- /dev/null
+++ b/main/xen/xsa226-2.patch
@@ -0,0 +1,280 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: gnttab: fix transitive grant handling
+
+Processing of transitive grants must not use the fast path, or else
+reference counting breaks due to the skipped recursive call to
+__acquire_grant_for_copy() (its __release_grant_for_copy()
+counterpart occurs independent of original pin count). Furthermore
+after re-acquiring temporarily dropped locks we need to verify no grant
+properties changed if the original pin count was non-zero; checking
+just the pin counts is sufficient only for well-behaved guests. As a
+result, __release_grant_for_copy() needs to mirror that new behavior.
+
+Furthermore a __release_grant_for_copy() invocation was missing on the
+retry path of __acquire_grant_for_copy(), and gnttab_set_version() also
+needs to bail out upon encountering a transitive grant.
+
+This is part of CVE-2017-12135 / XSA-226.
+
+Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+
+--- a/xen/common/grant_table.c
++++ b/xen/common/grant_table.c
+@@ -2050,13 +2050,8 @@ __release_grant_for_copy(
+     unsigned long r_frame;
+     uint16_t *status;
+     grant_ref_t trans_gref;
+-    int released_read;
+-    int released_write;
+     struct domain *td;
+ 
+-    released_read = 0;
+-    released_write = 0;
+-
+     grant_read_lock(rgt);
+ 
+     act = active_entry_acquire(rgt, gref);
+@@ -2086,17 +2081,11 @@ __release_grant_for_copy(
+ 
+         act->pin -= GNTPIN_hstw_inc;
+         if ( !(act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)) )
+-        {
+-            released_write = 1;
+             gnttab_clear_flag(_GTF_writing, status);
+-        }
+     }
+ 
+     if ( !act->pin )
+-    {
+         gnttab_clear_flag(_GTF_reading, status);
+-        released_read = 1;
+-    }
+ 
+     active_entry_release(act);
+     grant_read_unlock(rgt);
+@@ -2104,13 +2093,10 @@ __release_grant_for_copy(
+     if ( td != rd )
+     {
+         /*
+-         * Recursive calls, but they're bounded (acquire permits only a single
++         * Recursive call, but it is bounded (acquire permits only a single
+          * level of transitivity), so it's okay.
+          */
+-        if ( released_write )
+-            __release_grant_for_copy(td, trans_gref, 0);
+-        else if ( released_read )
+-            __release_grant_for_copy(td, trans_gref, 1);
++        __release_grant_for_copy(td, trans_gref, readonly);
+ 
+         rcu_unlock_domain(td);
+     }
+@@ -2184,8 +2170,108 @@ __acquire_grant_for_copy(
+                  act->domid, ldom, act->pin);
+ 
+     old_pin = act->pin;
+-    if ( !act->pin ||
+-         (!readonly && !(act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask))) )
++    if ( sha2 && (shah->flags & GTF_type_mask) == GTF_transitive )
++    {
++        if ( (!old_pin || (!readonly &&
++                           !(old_pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)))) &&
++             (rc = _set_status_v2(ldom, readonly, 0, shah, act,
++                                  status)) != GNTST_okay )
++            goto unlock_out;
++
++        if ( !allow_transitive )
++            PIN_FAIL(unlock_out_clear, GNTST_general_error,
++                     "transitive grant when transitivity not allowed\n");
++
++        trans_domid = sha2->transitive.trans_domid;
++        trans_gref = sha2->transitive.gref;
++        barrier(); /* Stop the compiler from re-loading
++                      trans_domid from shared memory */
++        if ( trans_domid == rd->domain_id )
++            PIN_FAIL(unlock_out_clear, GNTST_general_error,
++                     "transitive grants cannot be self-referential\n");
++
++        /*
++         * We allow the trans_domid == ldom case, which corresponds to a
++         * grant being issued by one domain, sent to another one, and then
++         * transitively granted back to the original domain.  Allowing it
++         * is easy, and means that you don't need to go out of your way to
++         * avoid it in the guest.
++         */
++
++        /* We need to leave the rrd locked during the grant copy. */
++        td = rcu_lock_domain_by_id(trans_domid);
++        if ( td == NULL )
++            PIN_FAIL(unlock_out_clear, GNTST_general_error,
++                     "transitive grant referenced bad domain %d\n",
++                     trans_domid);
++
++        /*
++         * __acquire_grant_for_copy() could take the lock on the
++         * remote table (if rd == td), so we have to drop the lock
++         * here and reacquire.
++         */
++        active_entry_release(act);
++        grant_read_unlock(rgt);
++
++        rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id,
++                                      readonly, &grant_frame, page,
++                                      &trans_page_off, &trans_length, 0);
++
++        grant_read_lock(rgt);
++        act = active_entry_acquire(rgt, gref);
++
++        if ( rc != GNTST_okay )
++        {
++            __fixup_status_for_copy_pin(act, status);
++            rcu_unlock_domain(td);
++            active_entry_release(act);
++            grant_read_unlock(rgt);
++            return rc;
++        }
++
++        /*
++         * We dropped the lock, so we have to check that the grant didn't
++         * change, and that nobody else tried to pin/unpin it. If anything
++         * changed, just give up and tell the caller to retry.
++         */
++        if ( rgt->gt_version != 2 ||
++             act->pin != old_pin ||
++             (old_pin && (act->domid != ldom || act->frame != grant_frame ||
++                          act->start != trans_page_off ||
++                          act->length != trans_length ||
++                          act->trans_domain != td ||
++                          act->trans_gref != trans_gref ||
++                          !act->is_sub_page)) )
++        {
++            __release_grant_for_copy(td, trans_gref, readonly);
++            __fixup_status_for_copy_pin(act, status);
++            rcu_unlock_domain(td);
++            active_entry_release(act);
++            grant_read_unlock(rgt);
++            put_page(*page);
++            *page = NULL;
++            return ERESTART;
++        }
++
++        if ( !old_pin )
++        {
++            act->domid = ldom;
++            act->start = trans_page_off;
++            act->length = trans_length;
++            act->trans_domain = td;
++            act->trans_gref = trans_gref;
++            act->frame = grant_frame;
++            act->gfn = -1ul;
++            /*
++             * The actual remote remote grant may or may not be a sub-page,
++             * but we always treat it as one because that blocks mappings of
++             * transitive grants.
++             */
++            act->is_sub_page = 1;
++        }
++    }
++    else if ( !old_pin ||
++              (!readonly && !(old_pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask))) )
+     {
+         if ( (rc = _set_status(rgt->gt_version, ldom,
+                                readonly, 0, shah, act,
+@@ -2206,79 +2292,6 @@ __acquire_grant_for_copy(
+             trans_page_off = 0;
+             trans_length = PAGE_SIZE;
+         }
+-        else if ( (shah->flags & GTF_type_mask) == GTF_transitive )
+-        {
+-            if ( !allow_transitive )
+-                PIN_FAIL(unlock_out_clear, GNTST_general_error,
+-                         "transitive grant when transitivity not allowed\n");
+-
+-            trans_domid = sha2->transitive.trans_domid;
+-            trans_gref = sha2->transitive.gref;
+-            barrier(); /* Stop the compiler from re-loading
+-                          trans_domid from shared memory */
+-            if ( trans_domid == rd->domain_id )
+-                PIN_FAIL(unlock_out_clear, GNTST_general_error,
+-                         "transitive grants cannot be self-referential\n");
+-
+-            /* We allow the trans_domid == ldom case, which
+-               corresponds to a grant being issued by one domain, sent
+-               to another one, and then transitively granted back to
+-               the original domain.  Allowing it is easy, and means
+-               that you don't need to go out of your way to avoid it
+-               in the guest. */
+-
+-            /* We need to leave the rrd locked during the grant copy */
+-            td = rcu_lock_domain_by_id(trans_domid);
+-            if ( td == NULL )
+-                PIN_FAIL(unlock_out_clear, GNTST_general_error,
+-                         "transitive grant referenced bad domain %d\n",
+-                         trans_domid);
+-
+-            /*
+-             * __acquire_grant_for_copy() could take the lock on the
+-             * remote table (if rd == td), so we have to drop the lock
+-             * here and reacquire
+-             */
+-            active_entry_release(act);
+-            grant_read_unlock(rgt);
+-
+-            rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id,
+-                                          readonly, &grant_frame, page,
+-                                          &trans_page_off, &trans_length, 0);
+-
+-            grant_read_lock(rgt);
+-            act = active_entry_acquire(rgt, gref);
+-
+-            if ( rc != GNTST_okay ) {
+-                __fixup_status_for_copy_pin(act, status);
+-                rcu_unlock_domain(td);
+-                active_entry_release(act);
+-                grant_read_unlock(rgt);
+-                return rc;
+-            }
+-
+-            /*
+-             * We dropped the lock, so we have to check that nobody else tried
+-             * to pin (or, for that matter, unpin) the reference in *this*
+-             * domain.  If they did, just give up and tell the caller to retry.
+-             */
+-            if ( act->pin != old_pin )
+-            {
+-                __fixup_status_for_copy_pin(act, status);
+-                rcu_unlock_domain(td);
+-                active_entry_release(act);
+-                grant_read_unlock(rgt);
+-                put_page(*page);
+-                *page = NULL;
+-                return ERESTART;
+-            }
+-
+-            /* The actual remote remote grant may or may not be a
+-               sub-page, but we always treat it as one because that
+-               blocks mappings of transitive grants. */
+-            is_sub_page = 1;
+-            act->gfn = -1ul;
+-        }
+         else if ( !(sha2->hdr.flags & GTF_sub_page) )
+         {
+             rc = __get_paged_frame(sha2->full_page.frame, &grant_frame, page, readonly, rd);
+@@ -2710,10 +2723,13 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA
+     case 2:
+         for ( i = 0; i < GNTTAB_NR_RESERVED_ENTRIES; i++ )
+         {
+-            if ( ((shared_entry_v2(gt, i).hdr.flags & GTF_type_mask) ==
+-                  GTF_permit_access) &&
+-                 (shared_entry_v2(gt, i).full_page.frame >> 32) )
++            switch ( shared_entry_v2(gt, i).hdr.flags & GTF_type_mask )
+             {
++            case GTF_permit_access:
++                 if ( !(shared_entry_v2(gt, i).full_page.frame >> 32) )
++                     break;
++                 /* fall through */
++            case GTF_transitive:
+                 gdprintk(XENLOG_WARNING,
+                          "tried to change grant table version to 1 with non-representable entries\n");
+                 res = -ERANGE;
diff --git a/main/xen/xsa227.patch b/main/xen/xsa227.patch
new file mode 100644
index 0000000000..86aa41e2d4
--- /dev/null
+++ b/main/xen/xsa227.patch
@@ -0,0 +1,52 @@
+From fa7268b94f8a0a7792ee12d5b8e23a60e52a3a84 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 20 Jun 2017 19:18:54 +0100
+Subject: [PATCH] x86/grant: Disallow misaligned PTEs
+
+Pagetable entries must be aligned to function correctly.  Disallow attempts
+from the guest to have a grant PTE created at a misaligned address, which
+would result in corruption of the L1 table with largely-guest-controlled
+values.
+
+This is XSA-227
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+---
+ xen/arch/x86/mm.c | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 97b3b4b..00f517a 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -3763,6 +3763,9 @@ static int create_grant_pte_mapping(
+     l1_pgentry_t ol1e;
+     struct domain *d = v->domain;
+ 
++    if ( !IS_ALIGNED(pte_addr, sizeof(nl1e)) )
++        return GNTST_general_error;
++
+     adjust_guest_l1e(nl1e, d);
+ 
+     gmfn = pte_addr >> PAGE_SHIFT;
+@@ -3819,6 +3822,16 @@ static int destroy_grant_pte_mapping(
+     struct page_info *page;
+     l1_pgentry_t ol1e;
+ 
++    /*
++     * addr comes from Xen's active_entry tracking so isn't guest controlled,
++     * but it had still better be PTE-aligned.
++     */
++    if ( !IS_ALIGNED(addr, sizeof(ol1e)) )
++    {
++        ASSERT_UNREACHABLE();
++        return GNTST_general_error;
++    }
++
+     gmfn = addr >> PAGE_SHIFT;
+     page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC);
+ 
+-- 
+2.1.4
+
diff --git a/main/xen/xsa228.patch b/main/xen/xsa228.patch
new file mode 100644
index 0000000000..65add3a588
--- /dev/null
+++ b/main/xen/xsa228.patch
@@ -0,0 +1,198 @@
+From 9a52c78eb4ff7836bf7ac9ecd918b289cead1f3f Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Mon, 31 Jul 2017 15:17:56 +0100
+Subject: [PATCH] gnttab: split maptrack lock to make it fulfill its purpose
+ again
+
+The way the lock is currently being used in get_maptrack_handle(), it
+protects only the maptrack limit: The function acts on current's list
+only, so races on list accesses are impossible even without the lock.
+
+Otoh list access races are possible between __get_maptrack_handle() and
+put_maptrack_handle(), due to the invocation of the former for other
+than current from steal_maptrack_handle(). Introduce a per-vCPU lock
+for list accesses to become race free again. This lock will be
+uncontended except when it becomes necessary to take the steal path,
+i.e. in the common case there should be no meaningful performance
+impact.
+
+When in get_maptrack_handle adds a stolen entry to a fresh, empty,
+freelist, we think that there is probably no concurrency.  However,
+this is not a fast path and adding the locking there makes the code
+clearly correct.
+
+Also, while we are here: the stolen maptrack_entry's tail pointer was
+not properly set.  Set it.
+
+This is XSA-228.
+
+Reported-by: Ian Jackson <ian.jackson@eu.citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Signed-off-by: Ian Jackson <Ian.Jackson@eu.citrix.com>
+---
+ docs/misc/grant-tables.txt    |  7 ++++++-
+ xen/common/grant_table.c      | 30 ++++++++++++++++++++++++------
+ xen/include/xen/grant_table.h |  2 +-
+ xen/include/xen/sched.h       |  1 +
+ 4 files changed, 32 insertions(+), 8 deletions(-)
+
+diff --git a/docs/misc/grant-tables.txt b/docs/misc/grant-tables.txt
+index 417ce2d..64da5cf 100644
+--- a/docs/misc/grant-tables.txt
++++ b/docs/misc/grant-tables.txt
+@@ -87,7 +87,8 @@ is complete.
+                                inconsistent grant table state such as current
+                                version, partially initialized active table pages,
+                                etc.
+-  grant_table->maptrack_lock : spinlock used to protect the maptrack free list
++  grant_table->maptrack_lock : spinlock used to protect the maptrack limit
++  v->maptrack_freelist_lock  : spinlock used to protect the maptrack free list
+   active_grant_entry->lock   : spinlock used to serialize modifications to
+                                active entries
+ 
+@@ -102,6 +103,10 @@ is complete.
+  The maptrack free list is protected by its own spinlock. The maptrack
+  lock may be locked while holding the grant table lock.
+ 
++ The maptrack_freelist_lock is an innermost lock.  It may be locked
++ while holding other locks, but no other locks may be acquired within
++ it.
++
+  Active entries are obtained by calling active_entry_acquire(gt, ref).
+  This function returns a pointer to the active entry after locking its
+  spinlock. The caller must hold the grant table read lock before
+diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
+index ae34547..ee33bd8 100644
+--- a/xen/common/grant_table.c
++++ b/xen/common/grant_table.c
+@@ -304,11 +304,16 @@ __get_maptrack_handle(
+ {
+     unsigned int head, next, prev_head;
+ 
++    spin_lock(&v->maptrack_freelist_lock);
++
+     do {
+         /* No maptrack pages allocated for this VCPU yet? */
+         head = read_atomic(&v->maptrack_head);
+         if ( unlikely(head == MAPTRACK_TAIL) )
++        {
++            spin_unlock(&v->maptrack_freelist_lock);
+             return -1;
++        }
+ 
+         /*
+          * Always keep one entry in the free list to make it easier to
+@@ -316,12 +321,17 @@ __get_maptrack_handle(
+          */
+         next = read_atomic(&maptrack_entry(t, head).ref);
+         if ( unlikely(next == MAPTRACK_TAIL) )
++        {
++            spin_unlock(&v->maptrack_freelist_lock);
+             return -1;
++        }
+ 
+         prev_head = head;
+         head = cmpxchg(&v->maptrack_head, prev_head, next);
+     } while ( head != prev_head );
+ 
++    spin_unlock(&v->maptrack_freelist_lock);
++
+     return head;
+ }
+ 
+@@ -380,6 +390,8 @@ put_maptrack_handle(
+     /* 2. Add entry to the tail of the list on the original VCPU. */
+     v = currd->vcpu[maptrack_entry(t, handle).vcpu];
+ 
++    spin_lock(&v->maptrack_freelist_lock);
++
+     cur_tail = read_atomic(&v->maptrack_tail);
+     do {
+         prev_tail = cur_tail;
+@@ -388,6 +400,8 @@ put_maptrack_handle(
+ 
+     /* 3. Update the old tail entry to point to the new entry. */
+     write_atomic(&maptrack_entry(t, prev_tail).ref, handle);
++
++    spin_unlock(&v->maptrack_freelist_lock);
+ }
+ 
+ static inline int
+@@ -411,10 +425,6 @@ get_maptrack_handle(
+      */
+     if ( nr_maptrack_frames(lgt) >= max_maptrack_frames )
+     {
+-        /*
+-         * Can drop the lock since no other VCPU can be adding a new
+-         * frame once they've run out.
+-         */
+         spin_unlock(&lgt->maptrack_lock);
+ 
+         /*
+@@ -426,8 +436,12 @@ get_maptrack_handle(
+             handle = steal_maptrack_handle(lgt, curr);
+             if ( handle == -1 )
+                 return -1;
++            spin_lock(&curr->maptrack_freelist_lock);
++            maptrack_entry(lgt, handle).ref = MAPTRACK_TAIL;
+             curr->maptrack_tail = handle;
+-            write_atomic(&curr->maptrack_head, handle);
++            if ( curr->maptrack_head == MAPTRACK_TAIL )
++                write_atomic(&curr->maptrack_head, handle);
++            spin_unlock(&curr->maptrack_freelist_lock);
+         }
+         return steal_maptrack_handle(lgt, curr);
+     }
+@@ -460,12 +474,15 @@ get_maptrack_handle(
+     smp_wmb();
+     lgt->maptrack_limit += MAPTRACK_PER_PAGE;
+ 
++    spin_unlock(&lgt->maptrack_lock);
++    spin_lock(&curr->maptrack_freelist_lock);
++
+     do {
+         new_mt[i - 1].ref = read_atomic(&curr->maptrack_head);
+         head = cmpxchg(&curr->maptrack_head, new_mt[i - 1].ref, handle + 1);
+     } while ( head != new_mt[i - 1].ref );
+ 
+-    spin_unlock(&lgt->maptrack_lock);
++    spin_unlock(&curr->maptrack_freelist_lock);
+ 
+     return handle;
+ }
+@@ -3475,6 +3492,7 @@ grant_table_destroy(
+ 
+ void grant_table_init_vcpu(struct vcpu *v)
+ {
++    spin_lock_init(&v->maptrack_freelist_lock);
+     v->maptrack_head = MAPTRACK_TAIL;
+     v->maptrack_tail = MAPTRACK_TAIL;
+ }
+diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h
+index 4e77899..100f2b3 100644
+--- a/xen/include/xen/grant_table.h
++++ b/xen/include/xen/grant_table.h
+@@ -78,7 +78,7 @@ struct grant_table {
+     /* Mapping tracking table per vcpu. */
+     struct grant_mapping **maptrack;
+     unsigned int          maptrack_limit;
+-    /* Lock protecting the maptrack page list, head, and limit */
++    /* Lock protecting the maptrack limit */
+     spinlock_t            maptrack_lock;
+     /* The defined versions are 1 and 2.  Set to 0 if we don't know
+        what version to use yet. */
+diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
+index 6673b27..8690f29 100644
+--- a/xen/include/xen/sched.h
++++ b/xen/include/xen/sched.h
+@@ -230,6 +230,7 @@ struct vcpu
+     int              controller_pause_count;
+ 
+     /* Grant table map tracking. */
++    spinlock_t       maptrack_freelist_lock;
+     unsigned int     maptrack_head;
+     unsigned int     maptrack_tail;
+ 
+-- 
+2.1.4
+
diff --git a/main/xen/xsa230.patch b/main/xen/xsa230.patch
new file mode 100644
index 0000000000..c3b50c8aaa
--- /dev/null
+++ b/main/xen/xsa230.patch
@@ -0,0 +1,38 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: gnttab: correct pin status fixup for copy
+
+Regardless of copy operations only setting GNTPIN_hst*, GNTPIN_dev*
+also need to be taken into account when deciding whether to clear
+_GTF_{read,writ}ing. At least for consistency with code elsewhere the
+read part better doesn't use any mask at all.
+
+This is XSA-230.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
+index ae34547..9c9d33c 100644
+--- a/xen/common/grant_table.c
++++ b/xen/common/grant_table.c
+@@ -2107,10 +2107,10 @@ __release_grant_for_copy(
+ static void __fixup_status_for_copy_pin(const struct active_grant_entry *act,
+                                    uint16_t *status)
+ {
+-    if ( !(act->pin & GNTPIN_hstw_mask) )
++    if ( !(act->pin & (GNTPIN_hstw_mask | GNTPIN_devw_mask)) )
+         gnttab_clear_flag(_GTF_writing, status);
+ 
+-    if ( !(act->pin & GNTPIN_hstr_mask) )
++    if ( !act->pin )
+         gnttab_clear_flag(_GTF_reading, status);
+ }
+ 
+@@ -2318,7 +2318,7 @@ __acquire_grant_for_copy(
+  
+  unlock_out_clear:
+     if ( !(readonly) &&
+-         !(act->pin & GNTPIN_hstw_mask) )
++         !(act->pin & (GNTPIN_hstw_mask | GNTPIN_devw_mask)) )
+         gnttab_clear_flag(_GTF_writing, status);
+ 
+     if ( !act->pin )
author	Daniel Sabogal <dsabogalcc@gmail.com>	2017-08-22 23:11:53 -0400
committer	Natanael Copa <ncopa@alpinelinux.org>	2017-08-24 22:05:11 +0000
commit	a9878fc57738d1b777eb47e5fdee082f009e2c38 (patch)
tree	4133d5d8ddf5b5c8d87d44fabcb09318d4dd81d6 /main
parent	5bae8450f755f729ff4fe76ab3999a538322cad4 (diff)
download	aports-a9878fc57738d1b777eb47e5fdee082f009e2c38.tar.bz2 aports-a9878fc57738d1b777eb47e5fdee082f009e2c38.tar.xz