diff options
author | Daniel Sabogal <dsabogalcc@gmail.com> | 2017-11-26 19:05:42 -0500 |
---|---|---|
committer | Natanael Copa <ncopa@alpinelinux.org> | 2017-11-27 11:07:24 +0000 |
commit | ae852e430b290c505942dde52442793ff69171fa (patch) | |
tree | 802598335433847ff48165bc38dbfbff30875a3a /main | |
parent | 58c2060efdb5496c74ff9112062196b371f70d75 (diff) | |
download | aports-ae852e430b290c505942dde52442793ff69171fa.tar.bz2 aports-ae852e430b290c505942dde52442793ff69171fa.tar.xz |
main/xen: upgrade to 4.9.1
Security fixes for all applicable XSAs up to (and including) XSA-245
Upstream rombios makefile now enforces non-PIC/PIE mode
Diffstat (limited to 'main')
29 files changed, 3 insertions, 2756 deletions
diff --git a/main/xen/APKBUILD b/main/xen/APKBUILD index 19d4fbd78e..e128a57be4 100644 --- a/main/xen/APKBUILD +++ b/main/xen/APKBUILD @@ -2,8 +2,8 @@ # Contributor: Roger Pau Monne <roger.pau@entel.upc.edu> # Maintainer: William Pitcock <nenolod@dereferenced.org> pkgname=xen -pkgver=4.9.0 -pkgrel=8 +pkgver=4.9.1 +pkgrel=0 pkgdesc="Xen hypervisor" url="http://www.xen.org/" arch="x86_64 armhf aarch64" @@ -145,39 +145,10 @@ source="https://downloads.xenproject.org/release/$pkgname/$pkgver/$pkgname-$pkgv http://xenbits.xen.org/xen-extfiles/zlib-$_ZLIB_VERSION.tar.gz http://xenbits.xen.org/xen-extfiles/ipxe-git-$_IPXE_GIT_TAG.tar.gz - xsa226-1.patch - xsa226-2.patch - xsa227.patch - xsa228.patch - xsa230.patch - xsa231-4.9.patch - xsa232.patch - xsa233.patch - xsa234-4.9.patch - xsa235-4.9.patch - xsa236-4.9.patch - xsa237-1.patch - xsa237-2.patch - xsa237-3.patch - xsa237-4.patch - xsa237-5.patch - xsa238.patch - xsa239.patch - xsa240-1.patch - xsa240-2.patch - xsa241-4.9.patch - xsa242-4.9.patch - xsa243-1.patch - xsa243-2.patch - xsa244.patch - xsa245-1.patch - xsa245-2.patch - qemu-coroutine-gthread.patch qemu-xen_paths.patch hotplug-vif-vtrill.patch - rombios-no-pie.patch musl-support.patch musl-hvmloader-fix-stdint.patch @@ -422,7 +393,7 @@ EOF } -sha512sums="97f8075c49ef9ec0adbe95106c0cff4f9379578fd568777697565476c3fd948335d72ddcacf8be65fd9db219c0a35dcdc007f355f7e5874dd950fd4c0a0f966f xen-4.9.0.tar.gz +sha512sums="9d22f0aa5dcd01a1c105d17c14bce570cc597e884ddb9b4a46b80a72f647625b76ae5213cede423d0458c14e1906983595a9269bb6e6ff2e9e7e4dea840f4274 xen-4.9.1.tar.gz 2e0b0fd23e6f10742a5517981e5171c6e88b0a93c83da701b296f5c0861d72c19782daab589a7eac3f9032152a0fc7eff7f5362db8fccc4859564a9aa82329cf gmp-4.3.2.tar.bz2 c2bc9ffc8583aeae71cee9ddcc4418969768d4e3764d47307da54f93981c0109fb07d84b061b3a3628bd00ba4d14a54742bc04848110eb3ae8ca25dbfbaabadb grub-0.97.tar.gz 1465b58279af1647f909450e394fe002ca165f0ff4a0254bfa9fe0e64316f50facdde2729d79a4e632565b4500cf4d6c74192ac0dd3bc9fe09129bbd67ba089d lwip-1.3.0.tar.gz @@ -432,37 +403,9 @@ c2bc9ffc8583aeae71cee9ddcc4418969768d4e3764d47307da54f93981c0109fb07d84b061b3a36 4928b5b82f57645be9408362706ff2c4d9baa635b21b0d41b1c82930e8c60a759b1ea4fa74d7e6c7cae1b7692d006aa5cb72df0c3b88bf049779aa2b566f9d35 tpm_emulator-0.7.4.tar.gz 021b958fcd0d346c4ba761bcf0cc40f3522de6186cf5a0a6ea34a70504ce9622b1c2626fce40675bc8282cf5f5ade18473656abc38050f72f5d6480507a2106e zlib-1.2.3.tar.gz 82ba65e1c676d32b29c71e6395c9506cab952c8f8b03f692e2b50133be8f0c0146d0f22c223262d81a4df579986fde5abc6507869f4965be4846297ef7b4b890 ipxe-git-827dd1bfee67daa683935ce65316f7e0f057fe1c.tar.gz -45fed43bbdcf63fc3ded0a2629e27a5d58306a244dba2e005cf8814aa50cde962c41e5e72075a1d678eb9c18af17e1cbf078884214fd29df0ad551977c9880c2 xsa226-1.patch -4d1e729c592efefd705233b49484991801606b2122a64ff14abbf994bb3e77ec75c4989d43753ce2043cc4fe13d34fb1cef7ee1adb291ff16625bb3b125e5508 xsa226-2.patch -7d66494e833d46f8a213af0f2b107a12617d5e8b45c3b07daee229c75bd6aad98284bc0e19f15706d044b58273cc7f0c193ef8553faa22fadeae349689e763c8 xsa227.patch -d406f14531af707325790909d08ce299ac2f2cb4b87f9a8ddb0fba10bd83bed84cc1633e07632cc2f841c50bc1a9af6240c89539a2e6ba6028cb127e218f86fc xsa228.patch -df174a1675f74b73e78bc3cb1c9f16536199dfd1922c0cc545a807e92bc24941a816891838258e118f477109548487251a7eaccb2d1dd9b6994c8c76fc5b058f xsa230.patch -7ef6637112c3d24a3541d40ca79b8d5ed8a152401c8b7bfa4cde3e2e264544ba22d27c199d58f24616af2228f5e4075c2af2744d21a70b8cb4d4b1f1a7feedde xsa231-4.9.patch -fb742225a4f3dbf2a574c4a6e3ef61a5da0c91aaeed77a2247023bdefcd4e0b6c08f1c9ffb42eaac3d38739c401443c3cf7aebb507b1d779c415b6cbffabbc10 xsa232.patch -a322ac6c5ac2f858a59096108032fd42974eaaeeebd8f4966119149665f32bed281e333e743136e79add2e6f3844d88b6a3e4d5a685c2808702fd3a9e6396cd4 xsa233.patch -cafeef137cd82cefc3e974b42b974c6562e822c9b359efb654ac374e663d9fc123be210eec17b278f40eabb77c93d3bf0ff03e445607159ad0712808a609a906 xsa234-4.9.patch -8bab6e59577b51f0c6b8a547c9a37a257bd0460e7219512e899d25f80a74084745d2a4c54e55ad12526663d40f218cb8f833b71350220d36e3750d002ff43d29 xsa235-4.9.patch -a951c3d29a6b05b42021bd49419becff51123a245256659240a3af5701bbf51e7d3c1a79835a7cc9a5fdf7c1c6aa330a35a586cb56d69d847c256642f0fc8e55 xsa236-4.9.patch -a447b4f0a5379da46b5f0eb5b77eab07c3cfe8d303af6e116e03c7d88a9fc9ea154043165631d29248c07516ab8fdfd5de4da1ccf0ab7358d90fb7f9c87bf221 xsa237-1.patch -10f2d84f783fb8bae5a39c463a32f4ac5d4d2614a7eecf109dcccd5418b8ec5e523691e79b3578d9c7b113f368a94d360acb9534808c440852a91c36369f88fd xsa237-2.patch -50607fca2e02eed322927e0288c77e7a6c541794fa2c70c78ada0c2fa762b5ad0f3b5108ecb9f01d8826f89dab492d56c502236c70234e6ba741e94a39356ea3 xsa237-3.patch -c8aab545cd4118e74cbb2010f0a844e88608d44bd3117e765221445a8007a0dfa9ee0f4d5c10c3cd7f307a8b7718d896b928c4e06449dc63f7cfec7a10d356bb xsa237-4.patch -d3dfb9208deb6fc2be949e34fc1be84fbed1269bd99b8b31c176da34cc916c50955c7768b4aac75dbe4377634ae5d8390a03e0ff5ffa2ad00b9fab788449deaa xsa237-5.patch -b154c0925bbceab40e8f3b689e2d1fb321b42c685fdcb6bd29b0411ccd856731480a2fbb8025c633f9edf34cec938e5d8888cc71e8158212c078bb595d07a29d xsa238.patch -8b09cd12c7adfef69a02a2965cda22ef6499fd42c8a84a20a6af231f422a6e8a0e597501c327532e1580c1067ee4bf35579e3cf98dee9302ed34ba87f74bf6d2 xsa239.patch -e209e629757b3471eae415913c34c662882172daad634083ee29823c2cb3f00e98886352085c1afc5d0f622781e65fae7b41ebfcbe6fd5e37c428337f5d2506c xsa240-1.patch -344519cd83ad13245de0e183b7afe564597b30d20756e44f98c0a00df55020d7ef85b92e71701c9791842a48cec93e0fcb9bfba2443313b3aafd8d21ea36abf4 xsa240-2.patch -560d8062b5683b6533a67eebafdd81e6a9d2c9194cc9f9b1404544503238d4f1d98bccb1afac477f6a55ffbc67cf9629a43fd67a745ca9093e2adc0478dd0ddb xsa241-4.9.patch -86aa763949ca36a36120a40eafbdf3a8e8bc04acd32ee6bc1e3ae90b189b86b9b166b81a9e0a4f86a7eb1fcc8723ae8ba6bd0f84fa9732e7e4e1ccea45d0b7c1 xsa242-4.9.patch -9f269e262aa67ff9a304ed6fc64ee9c5c9f6fd606d520fc2614cd173cddc9735ad42f91a97b91f1b9c5368d54d514820937edd0ce302dc3839b426398dc6b700 xsa243-1.patch -8aaf0599259b1ff34171684467089da4a26af8fe67eedf22066955b34b2460c45abdf0f19a5a5e3dd3231b944674c62b9d3112ad7d765afc4bdbcdcfbad226e1 xsa243-2.patch -0fd35e74be6f049f1f376aa8295b14f57b92f5e45e7487e5b485c2b8f6faa2950d0fe7d8a863692b3dab8a3a7ef1d9dd94be2c6b55d01802b0d86c84d2fa9e29 xsa244.patch -b19197934e8685fc2af73f404b5c8cbed66d9241e5ff902d1a77fdc227e001a13b775a53d6e303d5f27419f5590561c84ec69409152d9773a5e6050c16e92f1b xsa245-1.patch -75369673232b2107b59dc0242d6fc224c016b9dcbf3299eab90a1d7c365d617fbc91f7b25075b394fee92782db37ce83c416387fa5ad4c4fcd51d0775a8a754f xsa245-2.patch c3c46f232f0bd9f767b232af7e8ce910a6166b126bd5427bb8dc325aeb2c634b956de3fc225cab5af72649070c8205cc8e1cab7689fc266c204f525086f1a562 qemu-coroutine-gthread.patch 1936ab39a1867957fa640eb81c4070214ca4856a2743ba7e49c0cd017917071a9680d015f002c57fa7b9600dbadd29dcea5887f50e6c133305df2669a7a933f3 qemu-xen_paths.patch f095ea373f36381491ad36f0662fb4f53665031973721256b23166e596318581da7cbb0146d0beb2446729adfdb321e01468e377793f6563a67d68b8b0f7ffe3 hotplug-vif-vtrill.patch -5514d7697c87f7d54d64723d44446b9bd84f6c984e763bd21d4eeaf502bf0c5b765f7b2180f8ca496b3baf97e7efd600b1cc1fdd1284b6ecbffe9846190ca069 rombios-no-pie.patch e635cf27ca022ca5bc829e089b5e9a3ce9e566d4701d06bc38a22e356de45a71bc33e170d6db333d4efe8389144419cc27834a2eee0bcae9118d4ca9aff64306 musl-support.patch 77b08e9655e091b0352e4630d520b54c6ca6d659d1d38fbb4b3bfc9ff3e66db433a2e194ead32bb10ff962c382d800a670e82b7a62835b238e294b22808290ea musl-hvmloader-fix-stdint.patch 8c3b57eab8641bcee3dbdc1937ea7874f77b9722a5a0aa3ddb8dff8cc0ced7e19703ef5d998621b3809bea7c16f3346cfa47610ec9ab014ad0de12651c94e5ff stdint_local.h diff --git a/main/xen/rombios-no-pie.patch b/main/xen/rombios-no-pie.patch deleted file mode 100644 index 6387f4ef47..0000000000 --- a/main/xen/rombios-no-pie.patch +++ /dev/null @@ -1,22 +0,0 @@ ---- ./tools/firmware/rombios/32bit/tcgbios/Makefile.orig -+++ ./tools/firmware/rombios/32bit/tcgbios/Makefile -@@ -3,7 +3,7 @@ - - TARGET = tcgbiosext.o - --CFLAGS += $(CFLAGS_xeninclude) -I.. -I../.. -I../../../../libacpi -+CFLAGS += $(CFLAGS_xeninclude) -I.. -I../.. -I../../../../libacpi -fno-pie - - .PHONY: all - all: $(TARGET) ---- ./tools/firmware/rombios/32bit/Makefile.orig -+++ ./tools/firmware/rombios/32bit/Makefile -@@ -3,7 +3,7 @@ - - TARGET = 32bitbios_flat.h - --CFLAGS += $(CFLAGS_xeninclude) -I.. -I../../../libacpi -+CFLAGS += $(CFLAGS_xeninclude) -I.. -I../../../libacpi -fno-pie - - SUBDIRS = tcgbios - diff --git a/main/xen/xsa226-1.patch b/main/xen/xsa226-1.patch deleted file mode 100644 index d60bbe2db1..0000000000 --- a/main/xen/xsa226-1.patch +++ /dev/null @@ -1,149 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: gnttab: don't use possibly unbounded tail calls - -There is no guarantee that the compiler would actually translate them -to branches instead of calls, so only ones with a known recursion limit -are okay: -- __release_grant_for_copy() can call itself only once, as - __acquire_grant_for_copy() won't permit use of multi-level transitive - grants, -- __acquire_grant_for_copy() is fine to call itself with the last - argument false, as that prevents further recursion, -- __acquire_grant_for_copy() must not call itself to recover from an - observed change to the active entry's pin count - -This is part of CVE-2017-12135 / XSA-226. - -Signed-off-by: Jan Beulich <jbeulich@suse.com> - ---- a/xen/common/compat/grant_table.c -+++ b/xen/common/compat/grant_table.c -@@ -258,9 +258,9 @@ int compat_grant_table_op(unsigned int cmd, - rc = gnttab_copy(guest_handle_cast(nat.uop, gnttab_copy_t), n); - if ( rc > 0 ) - { -- ASSERT(rc < n); -- i -= n - rc; -- n = rc; -+ ASSERT(rc <= n); -+ i -= rc; -+ n -= rc; - } - if ( rc >= 0 ) - { ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -2103,8 +2103,10 @@ __release_grant_for_copy( - - if ( td != rd ) - { -- /* Recursive calls, but they're tail calls, so it's -- okay. */ -+ /* -+ * Recursive calls, but they're bounded (acquire permits only a single -+ * level of transitivity), so it's okay. -+ */ - if ( released_write ) - __release_grant_for_copy(td, trans_gref, 0); - else if ( released_read ) -@@ -2255,10 +2257,11 @@ __acquire_grant_for_copy( - return rc; - } - -- /* We dropped the lock, so we have to check that nobody -- else tried to pin (or, for that matter, unpin) the -- reference in *this* domain. If they did, just give up -- and try again. */ -+ /* -+ * We dropped the lock, so we have to check that nobody else tried -+ * to pin (or, for that matter, unpin) the reference in *this* -+ * domain. If they did, just give up and tell the caller to retry. -+ */ - if ( act->pin != old_pin ) - { - __fixup_status_for_copy_pin(act, status); -@@ -2266,9 +2269,8 @@ __acquire_grant_for_copy( - active_entry_release(act); - grant_read_unlock(rgt); - put_page(*page); -- return __acquire_grant_for_copy(rd, gref, ldom, readonly, -- frame, page, page_off, length, -- allow_transitive); -+ *page = NULL; -+ return ERESTART; - } - - /* The actual remote remote grant may or may not be a -@@ -2574,7 +2576,7 @@ static int gnttab_copy_one(const struct - { - gnttab_copy_release_buf(src); - rc = gnttab_copy_claim_buf(op, &op->source, src, GNTCOPY_source_gref); -- if ( rc < 0 ) -+ if ( rc ) - goto out; - } - -@@ -2584,7 +2586,7 @@ static int gnttab_copy_one(const struct - { - gnttab_copy_release_buf(dest); - rc = gnttab_copy_claim_buf(op, &op->dest, dest, GNTCOPY_dest_gref); -- if ( rc < 0 ) -+ if ( rc ) - goto out; - } - -@@ -2593,6 +2595,14 @@ static int gnttab_copy_one(const struct - return rc; - } - -+/* -+ * gnttab_copy(), other than the various other helpers of -+ * do_grant_table_op(), returns (besides possible error indicators) -+ * "count - i" rather than "i" to ensure that even if no progress -+ * was made at all (perhaps due to gnttab_copy_one() returning a -+ * positive value) a non-zero value is being handed back (zero needs -+ * to be avoided, as that means "success, all done"). -+ */ - static long gnttab_copy( - XEN_GUEST_HANDLE_PARAM(gnttab_copy_t) uop, unsigned int count) - { -@@ -2606,7 +2616,7 @@ static long gnttab_copy( - { - if ( i && hypercall_preempt_check() ) - { -- rc = i; -+ rc = count - i; - break; - } - -@@ -2616,13 +2626,20 @@ static long gnttab_copy( - break; - } - -- op.status = gnttab_copy_one(&op, &dest, &src); -- if ( op.status != GNTST_okay ) -+ rc = gnttab_copy_one(&op, &dest, &src); -+ if ( rc > 0 ) -+ { -+ rc = count - i; -+ break; -+ } -+ if ( rc != GNTST_okay ) - { - gnttab_copy_release_buf(&src); - gnttab_copy_release_buf(&dest); - } - -+ op.status = rc; -+ rc = 0; - if ( unlikely(__copy_field_to_guest(uop, &op, status)) ) - { - rc = -EFAULT; -@@ -3160,6 +3177,7 @@ do_grant_table_op( - rc = gnttab_copy(copy, count); - if ( rc > 0 ) - { -+ rc = count - rc; - guest_handle_add_offset(copy, rc); - uop = guest_handle_cast(copy, void); - } diff --git a/main/xen/xsa226-2.patch b/main/xen/xsa226-2.patch deleted file mode 100644 index 2cf93bdd29..0000000000 --- a/main/xen/xsa226-2.patch +++ /dev/null @@ -1,280 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: gnttab: fix transitive grant handling - -Processing of transitive grants must not use the fast path, or else -reference counting breaks due to the skipped recursive call to -__acquire_grant_for_copy() (its __release_grant_for_copy() -counterpart occurs independent of original pin count). Furthermore -after re-acquiring temporarily dropped locks we need to verify no grant -properties changed if the original pin count was non-zero; checking -just the pin counts is sufficient only for well-behaved guests. As a -result, __release_grant_for_copy() needs to mirror that new behavior. - -Furthermore a __release_grant_for_copy() invocation was missing on the -retry path of __acquire_grant_for_copy(), and gnttab_set_version() also -needs to bail out upon encountering a transitive grant. - -This is part of CVE-2017-12135 / XSA-226. - -Reported-by: Andrew Cooper <andrew.cooper3@citrix.com> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> - ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -2050,13 +2050,8 @@ __release_grant_for_copy( - unsigned long r_frame; - uint16_t *status; - grant_ref_t trans_gref; -- int released_read; -- int released_write; - struct domain *td; - -- released_read = 0; -- released_write = 0; -- - grant_read_lock(rgt); - - act = active_entry_acquire(rgt, gref); -@@ -2086,17 +2081,11 @@ __release_grant_for_copy( - - act->pin -= GNTPIN_hstw_inc; - if ( !(act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)) ) -- { -- released_write = 1; - gnttab_clear_flag(_GTF_writing, status); -- } - } - - if ( !act->pin ) -- { - gnttab_clear_flag(_GTF_reading, status); -- released_read = 1; -- } - - active_entry_release(act); - grant_read_unlock(rgt); -@@ -2104,13 +2093,10 @@ __release_grant_for_copy( - if ( td != rd ) - { - /* -- * Recursive calls, but they're bounded (acquire permits only a single -+ * Recursive call, but it is bounded (acquire permits only a single - * level of transitivity), so it's okay. - */ -- if ( released_write ) -- __release_grant_for_copy(td, trans_gref, 0); -- else if ( released_read ) -- __release_grant_for_copy(td, trans_gref, 1); -+ __release_grant_for_copy(td, trans_gref, readonly); - - rcu_unlock_domain(td); - } -@@ -2184,8 +2170,108 @@ __acquire_grant_for_copy( - act->domid, ldom, act->pin); - - old_pin = act->pin; -- if ( !act->pin || -- (!readonly && !(act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask))) ) -+ if ( sha2 && (shah->flags & GTF_type_mask) == GTF_transitive ) -+ { -+ if ( (!old_pin || (!readonly && -+ !(old_pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)))) && -+ (rc = _set_status_v2(ldom, readonly, 0, shah, act, -+ status)) != GNTST_okay ) -+ goto unlock_out; -+ -+ if ( !allow_transitive ) -+ PIN_FAIL(unlock_out_clear, GNTST_general_error, -+ "transitive grant when transitivity not allowed\n"); -+ -+ trans_domid = sha2->transitive.trans_domid; -+ trans_gref = sha2->transitive.gref; -+ barrier(); /* Stop the compiler from re-loading -+ trans_domid from shared memory */ -+ if ( trans_domid == rd->domain_id ) -+ PIN_FAIL(unlock_out_clear, GNTST_general_error, -+ "transitive grants cannot be self-referential\n"); -+ -+ /* -+ * We allow the trans_domid == ldom case, which corresponds to a -+ * grant being issued by one domain, sent to another one, and then -+ * transitively granted back to the original domain. Allowing it -+ * is easy, and means that you don't need to go out of your way to -+ * avoid it in the guest. -+ */ -+ -+ /* We need to leave the rrd locked during the grant copy. */ -+ td = rcu_lock_domain_by_id(trans_domid); -+ if ( td == NULL ) -+ PIN_FAIL(unlock_out_clear, GNTST_general_error, -+ "transitive grant referenced bad domain %d\n", -+ trans_domid); -+ -+ /* -+ * __acquire_grant_for_copy() could take the lock on the -+ * remote table (if rd == td), so we have to drop the lock -+ * here and reacquire. -+ */ -+ active_entry_release(act); -+ grant_read_unlock(rgt); -+ -+ rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id, -+ readonly, &grant_frame, page, -+ &trans_page_off, &trans_length, 0); -+ -+ grant_read_lock(rgt); -+ act = active_entry_acquire(rgt, gref); -+ -+ if ( rc != GNTST_okay ) -+ { -+ __fixup_status_for_copy_pin(act, status); -+ rcu_unlock_domain(td); -+ active_entry_release(act); -+ grant_read_unlock(rgt); -+ return rc; -+ } -+ -+ /* -+ * We dropped the lock, so we have to check that the grant didn't -+ * change, and that nobody else tried to pin/unpin it. If anything -+ * changed, just give up and tell the caller to retry. -+ */ -+ if ( rgt->gt_version != 2 || -+ act->pin != old_pin || -+ (old_pin && (act->domid != ldom || act->frame != grant_frame || -+ act->start != trans_page_off || -+ act->length != trans_length || -+ act->trans_domain != td || -+ act->trans_gref != trans_gref || -+ !act->is_sub_page)) ) -+ { -+ __release_grant_for_copy(td, trans_gref, readonly); -+ __fixup_status_for_copy_pin(act, status); -+ rcu_unlock_domain(td); -+ active_entry_release(act); -+ grant_read_unlock(rgt); -+ put_page(*page); -+ *page = NULL; -+ return ERESTART; -+ } -+ -+ if ( !old_pin ) -+ { -+ act->domid = ldom; -+ act->start = trans_page_off; -+ act->length = trans_length; -+ act->trans_domain = td; -+ act->trans_gref = trans_gref; -+ act->frame = grant_frame; -+ act->gfn = -1ul; -+ /* -+ * The actual remote remote grant may or may not be a sub-page, -+ * but we always treat it as one because that blocks mappings of -+ * transitive grants. -+ */ -+ act->is_sub_page = 1; -+ } -+ } -+ else if ( !old_pin || -+ (!readonly && !(old_pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask))) ) - { - if ( (rc = _set_status(rgt->gt_version, ldom, - readonly, 0, shah, act, -@@ -2206,79 +2292,6 @@ __acquire_grant_for_copy( - trans_page_off = 0; - trans_length = PAGE_SIZE; - } -- else if ( (shah->flags & GTF_type_mask) == GTF_transitive ) -- { -- if ( !allow_transitive ) -- PIN_FAIL(unlock_out_clear, GNTST_general_error, -- "transitive grant when transitivity not allowed\n"); -- -- trans_domid = sha2->transitive.trans_domid; -- trans_gref = sha2->transitive.gref; -- barrier(); /* Stop the compiler from re-loading -- trans_domid from shared memory */ -- if ( trans_domid == rd->domain_id ) -- PIN_FAIL(unlock_out_clear, GNTST_general_error, -- "transitive grants cannot be self-referential\n"); -- -- /* We allow the trans_domid == ldom case, which -- corresponds to a grant being issued by one domain, sent -- to another one, and then transitively granted back to -- the original domain. Allowing it is easy, and means -- that you don't need to go out of your way to avoid it -- in the guest. */ -- -- /* We need to leave the rrd locked during the grant copy */ -- td = rcu_lock_domain_by_id(trans_domid); -- if ( td == NULL ) -- PIN_FAIL(unlock_out_clear, GNTST_general_error, -- "transitive grant referenced bad domain %d\n", -- trans_domid); -- -- /* -- * __acquire_grant_for_copy() could take the lock on the -- * remote table (if rd == td), so we have to drop the lock -- * here and reacquire -- */ -- active_entry_release(act); -- grant_read_unlock(rgt); -- -- rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id, -- readonly, &grant_frame, page, -- &trans_page_off, &trans_length, 0); -- -- grant_read_lock(rgt); -- act = active_entry_acquire(rgt, gref); -- -- if ( rc != GNTST_okay ) { -- __fixup_status_for_copy_pin(act, status); -- rcu_unlock_domain(td); -- active_entry_release(act); -- grant_read_unlock(rgt); -- return rc; -- } -- -- /* -- * We dropped the lock, so we have to check that nobody else tried -- * to pin (or, for that matter, unpin) the reference in *this* -- * domain. If they did, just give up and tell the caller to retry. -- */ -- if ( act->pin != old_pin ) -- { -- __fixup_status_for_copy_pin(act, status); -- rcu_unlock_domain(td); -- active_entry_release(act); -- grant_read_unlock(rgt); -- put_page(*page); -- *page = NULL; -- return ERESTART; -- } -- -- /* The actual remote remote grant may or may not be a -- sub-page, but we always treat it as one because that -- blocks mappings of transitive grants. */ -- is_sub_page = 1; -- act->gfn = -1ul; -- } - else if ( !(sha2->hdr.flags & GTF_sub_page) ) - { - rc = __get_paged_frame(sha2->full_page.frame, &grant_frame, page, readonly, rd); -@@ -2710,10 +2723,13 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA - case 2: - for ( i = 0; i < GNTTAB_NR_RESERVED_ENTRIES; i++ ) - { -- if ( ((shared_entry_v2(gt, i).hdr.flags & GTF_type_mask) == -- GTF_permit_access) && -- (shared_entry_v2(gt, i).full_page.frame >> 32) ) -+ switch ( shared_entry_v2(gt, i).hdr.flags & GTF_type_mask ) - { -+ case GTF_permit_access: -+ if ( !(shared_entry_v2(gt, i).full_page.frame >> 32) ) -+ break; -+ /* fall through */ -+ case GTF_transitive: - gdprintk(XENLOG_WARNING, - "tried to change grant table version to 1 with non-representable entries\n"); - res = -ERANGE; diff --git a/main/xen/xsa227.patch b/main/xen/xsa227.patch deleted file mode 100644 index 86aa41e2d4..0000000000 --- a/main/xen/xsa227.patch +++ /dev/null @@ -1,52 +0,0 @@ -From fa7268b94f8a0a7792ee12d5b8e23a60e52a3a84 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Tue, 20 Jun 2017 19:18:54 +0100 -Subject: [PATCH] x86/grant: Disallow misaligned PTEs - -Pagetable entries must be aligned to function correctly. Disallow attempts -from the guest to have a grant PTE created at a misaligned address, which -would result in corruption of the L1 table with largely-guest-controlled -values. - -This is XSA-227 - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> ---- - xen/arch/x86/mm.c | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 97b3b4b..00f517a 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -3763,6 +3763,9 @@ static int create_grant_pte_mapping( - l1_pgentry_t ol1e; - struct domain *d = v->domain; - -+ if ( !IS_ALIGNED(pte_addr, sizeof(nl1e)) ) -+ return GNTST_general_error; -+ - adjust_guest_l1e(nl1e, d); - - gmfn = pte_addr >> PAGE_SHIFT; -@@ -3819,6 +3822,16 @@ static int destroy_grant_pte_mapping( - struct page_info *page; - l1_pgentry_t ol1e; - -+ /* -+ * addr comes from Xen's active_entry tracking so isn't guest controlled, -+ * but it had still better be PTE-aligned. -+ */ -+ if ( !IS_ALIGNED(addr, sizeof(ol1e)) ) -+ { -+ ASSERT_UNREACHABLE(); -+ return GNTST_general_error; -+ } -+ - gmfn = addr >> PAGE_SHIFT; - page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC); - --- -2.1.4 - diff --git a/main/xen/xsa228.patch b/main/xen/xsa228.patch deleted file mode 100644 index 65add3a588..0000000000 --- a/main/xen/xsa228.patch +++ /dev/null @@ -1,198 +0,0 @@ -From 9a52c78eb4ff7836bf7ac9ecd918b289cead1f3f Mon Sep 17 00:00:00 2001 -From: Jan Beulich <jbeulich@suse.com> -Date: Mon, 31 Jul 2017 15:17:56 +0100 -Subject: [PATCH] gnttab: split maptrack lock to make it fulfill its purpose - again - -The way the lock is currently being used in get_maptrack_handle(), it -protects only the maptrack limit: The function acts on current's list -only, so races on list accesses are impossible even without the lock. - -Otoh list access races are possible between __get_maptrack_handle() and -put_maptrack_handle(), due to the invocation of the former for other -than current from steal_maptrack_handle(). Introduce a per-vCPU lock -for list accesses to become race free again. This lock will be -uncontended except when it becomes necessary to take the steal path, -i.e. in the common case there should be no meaningful performance -impact. - -When in get_maptrack_handle adds a stolen entry to a fresh, empty, -freelist, we think that there is probably no concurrency. However, -this is not a fast path and adding the locking there makes the code -clearly correct. - -Also, while we are here: the stolen maptrack_entry's tail pointer was -not properly set. Set it. - -This is XSA-228. - -Reported-by: Ian Jackson <ian.jackson@eu.citrix.com> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Signed-off-by: Ian Jackson <Ian.Jackson@eu.citrix.com> ---- - docs/misc/grant-tables.txt | 7 ++++++- - xen/common/grant_table.c | 30 ++++++++++++++++++++++++------ - xen/include/xen/grant_table.h | 2 +- - xen/include/xen/sched.h | 1 + - 4 files changed, 32 insertions(+), 8 deletions(-) - -diff --git a/docs/misc/grant-tables.txt b/docs/misc/grant-tables.txt -index 417ce2d..64da5cf 100644 ---- a/docs/misc/grant-tables.txt -+++ b/docs/misc/grant-tables.txt -@@ -87,7 +87,8 @@ is complete. - inconsistent grant table state such as current - version, partially initialized active table pages, - etc. -- grant_table->maptrack_lock : spinlock used to protect the maptrack free list -+ grant_table->maptrack_lock : spinlock used to protect the maptrack limit -+ v->maptrack_freelist_lock : spinlock used to protect the maptrack free list - active_grant_entry->lock : spinlock used to serialize modifications to - active entries - -@@ -102,6 +103,10 @@ is complete. - The maptrack free list is protected by its own spinlock. The maptrack - lock may be locked while holding the grant table lock. - -+ The maptrack_freelist_lock is an innermost lock. It may be locked -+ while holding other locks, but no other locks may be acquired within -+ it. -+ - Active entries are obtained by calling active_entry_acquire(gt, ref). - This function returns a pointer to the active entry after locking its - spinlock. The caller must hold the grant table read lock before -diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c -index ae34547..ee33bd8 100644 ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -304,11 +304,16 @@ __get_maptrack_handle( - { - unsigned int head, next, prev_head; - -+ spin_lock(&v->maptrack_freelist_lock); -+ - do { - /* No maptrack pages allocated for this VCPU yet? */ - head = read_atomic(&v->maptrack_head); - if ( unlikely(head == MAPTRACK_TAIL) ) -+ { -+ spin_unlock(&v->maptrack_freelist_lock); - return -1; -+ } - - /* - * Always keep one entry in the free list to make it easier to -@@ -316,12 +321,17 @@ __get_maptrack_handle( - */ - next = read_atomic(&maptrack_entry(t, head).ref); - if ( unlikely(next == MAPTRACK_TAIL) ) -+ { -+ spin_unlock(&v->maptrack_freelist_lock); - return -1; -+ } - - prev_head = head; - head = cmpxchg(&v->maptrack_head, prev_head, next); - } while ( head != prev_head ); - -+ spin_unlock(&v->maptrack_freelist_lock); -+ - return head; - } - -@@ -380,6 +390,8 @@ put_maptrack_handle( - /* 2. Add entry to the tail of the list on the original VCPU. */ - v = currd->vcpu[maptrack_entry(t, handle).vcpu]; - -+ spin_lock(&v->maptrack_freelist_lock); -+ - cur_tail = read_atomic(&v->maptrack_tail); - do { - prev_tail = cur_tail; -@@ -388,6 +400,8 @@ put_maptrack_handle( - - /* 3. Update the old tail entry to point to the new entry. */ - write_atomic(&maptrack_entry(t, prev_tail).ref, handle); -+ -+ spin_unlock(&v->maptrack_freelist_lock); - } - - static inline int -@@ -411,10 +425,6 @@ get_maptrack_handle( - */ - if ( nr_maptrack_frames(lgt) >= max_maptrack_frames ) - { -- /* -- * Can drop the lock since no other VCPU can be adding a new -- * frame once they've run out. -- */ - spin_unlock(&lgt->maptrack_lock); - - /* -@@ -426,8 +436,12 @@ get_maptrack_handle( - handle = steal_maptrack_handle(lgt, curr); - if ( handle == -1 ) - return -1; -+ spin_lock(&curr->maptrack_freelist_lock); -+ maptrack_entry(lgt, handle).ref = MAPTRACK_TAIL; - curr->maptrack_tail = handle; -- write_atomic(&curr->maptrack_head, handle); -+ if ( curr->maptrack_head == MAPTRACK_TAIL ) -+ write_atomic(&curr->maptrack_head, handle); -+ spin_unlock(&curr->maptrack_freelist_lock); - } - return steal_maptrack_handle(lgt, curr); - } -@@ -460,12 +474,15 @@ get_maptrack_handle( - smp_wmb(); - lgt->maptrack_limit += MAPTRACK_PER_PAGE; - -+ spin_unlock(&lgt->maptrack_lock); -+ spin_lock(&curr->maptrack_freelist_lock); -+ - do { - new_mt[i - 1].ref = read_atomic(&curr->maptrack_head); - head = cmpxchg(&curr->maptrack_head, new_mt[i - 1].ref, handle + 1); - } while ( head != new_mt[i - 1].ref ); - -- spin_unlock(&lgt->maptrack_lock); -+ spin_unlock(&curr->maptrack_freelist_lock); - - return handle; - } -@@ -3475,6 +3492,7 @@ grant_table_destroy( - - void grant_table_init_vcpu(struct vcpu *v) - { -+ spin_lock_init(&v->maptrack_freelist_lock); - v->maptrack_head = MAPTRACK_TAIL; - v->maptrack_tail = MAPTRACK_TAIL; - } -diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h -index 4e77899..100f2b3 100644 ---- a/xen/include/xen/grant_table.h -+++ b/xen/include/xen/grant_table.h -@@ -78,7 +78,7 @@ struct grant_table { - /* Mapping tracking table per vcpu. */ - struct grant_mapping **maptrack; - unsigned int maptrack_limit; -- /* Lock protecting the maptrack page list, head, and limit */ -+ /* Lock protecting the maptrack limit */ - spinlock_t maptrack_lock; - /* The defined versions are 1 and 2. Set to 0 if we don't know - what version to use yet. */ -diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h -index 6673b27..8690f29 100644 ---- a/xen/include/xen/sched.h -+++ b/xen/include/xen/sched.h -@@ -230,6 +230,7 @@ struct vcpu - int controller_pause_count; - - /* Grant table map tracking. */ -+ spinlock_t maptrack_freelist_lock; - unsigned int maptrack_head; - unsigned int maptrack_tail; - --- -2.1.4 - diff --git a/main/xen/xsa230.patch b/main/xen/xsa230.patch deleted file mode 100644 index c3b50c8aaa..0000000000 --- a/main/xen/xsa230.patch +++ /dev/null @@ -1,38 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: gnttab: correct pin status fixup for copy - -Regardless of copy operations only setting GNTPIN_hst*, GNTPIN_dev* -also need to be taken into account when deciding whether to clear -_GTF_{read,writ}ing. At least for consistency with code elsewhere the -read part better doesn't use any mask at all. - -This is XSA-230. - -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> -diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c -index ae34547..9c9d33c 100644 ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -2107,10 +2107,10 @@ __release_grant_for_copy( - static void __fixup_status_for_copy_pin(const struct active_grant_entry *act, - uint16_t *status) - { -- if ( !(act->pin & GNTPIN_hstw_mask) ) -+ if ( !(act->pin & (GNTPIN_hstw_mask | GNTPIN_devw_mask)) ) - gnttab_clear_flag(_GTF_writing, status); - -- if ( !(act->pin & GNTPIN_hstr_mask) ) -+ if ( !act->pin ) - gnttab_clear_flag(_GTF_reading, status); - } - -@@ -2318,7 +2318,7 @@ __acquire_grant_for_copy( - - unlock_out_clear: - if ( !(readonly) && -- !(act->pin & GNTPIN_hstw_mask) ) -+ !(act->pin & (GNTPIN_hstw_mask | GNTPIN_devw_mask)) ) - gnttab_clear_flag(_GTF_writing, status); - - if ( !act->pin ) diff --git a/main/xen/xsa231-4.9.patch b/main/xen/xsa231-4.9.patch deleted file mode 100644 index 251165e6bd..0000000000 --- a/main/xen/xsa231-4.9.patch +++ /dev/null @@ -1,108 +0,0 @@ -From: George Dunlap <george.dunlap@citrix.com> -Subject: xen/mm: make sure node is less than MAX_NUMNODES - -The output of MEMF_get_node(memflags) can be as large as nodeid_t can -hold (currently 255). This is then used as an index to arrays of size -MAX_NUMNODE, which is 64 on x86 and 1 on ARM, can be passed in by an -untrusted guest (via memory_exchange and increase_reservation) and is -not currently bounds-checked. - -Check the value in page_alloc.c before using it, and also check the -value in the hypercall call sites and return -EINVAL if appropriate. -Don't permit domains other than the hardware or control domain to -allocate node-constrained memory. - -This is XSA-231. - -Reported-by: Matthew Daley <mattd@bugfuzz.com> -Signed-off-by: George Dunlap <george.dunlap@citrix.com> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> - ---- a/xen/common/memory.c -+++ b/xen/common/memory.c -@@ -411,6 +411,31 @@ static void decrease_reservation(struct - a->nr_done = i; - } - -+static bool propagate_node(unsigned int xmf, unsigned int *memflags) -+{ -+ const struct domain *currd = current->domain; -+ -+ BUILD_BUG_ON(XENMEMF_get_node(0) != NUMA_NO_NODE); -+ BUILD_BUG_ON(MEMF_get_node(0) != NUMA_NO_NODE); -+ -+ if ( XENMEMF_get_node(xmf) == NUMA_NO_NODE ) -+ return true; -+ -+ if ( is_hardware_domain(currd) || is_control_domain(currd) ) -+ { -+ if ( XENMEMF_get_node(xmf) >= MAX_NUMNODES ) -+ return false; -+ -+ *memflags |= MEMF_node(XENMEMF_get_node(xmf)); -+ if ( xmf & XENMEMF_exact_node_request ) -+ *memflags |= MEMF_exact_node; -+ } -+ else if ( xmf & XENMEMF_exact_node_request ) -+ return false; -+ -+ return true; -+} -+ - static long memory_exchange(XEN_GUEST_HANDLE_PARAM(xen_memory_exchange_t) arg) - { - struct xen_memory_exchange exch; -@@ -483,6 +508,12 @@ static long memory_exchange(XEN_GUEST_HA - } - } - -+ if ( unlikely(!propagate_node(exch.out.mem_flags, &memflags)) ) -+ { -+ rc = -EINVAL; -+ goto fail_early; -+ } -+ - d = rcu_lock_domain_by_any_id(exch.in.domid); - if ( d == NULL ) - { -@@ -501,7 +532,6 @@ static long memory_exchange(XEN_GUEST_HA - d, - XENMEMF_get_address_bits(exch.out.mem_flags) ? : - (BITS_PER_LONG+PAGE_SHIFT))); -- memflags |= MEMF_node(XENMEMF_get_node(exch.out.mem_flags)); - - for ( i = (exch.nr_exchanged >> in_chunk_order); - i < (exch.in.nr_extents >> in_chunk_order); -@@ -864,12 +894,8 @@ static int construct_memop_from_reservat - } - read_unlock(&d->vnuma_rwlock); - } -- else -- { -- a->memflags |= MEMF_node(XENMEMF_get_node(r->mem_flags)); -- if ( r->mem_flags & XENMEMF_exact_node_request ) -- a->memflags |= MEMF_exact_node; -- } -+ else if ( unlikely(!propagate_node(r->mem_flags, &a->memflags)) ) -+ return -EINVAL; - - return 0; - } ---- a/xen/common/page_alloc.c -+++ b/xen/common/page_alloc.c -@@ -706,9 +706,13 @@ static struct page_info *alloc_heap_page - if ( node >= MAX_NUMNODES ) - node = cpu_to_node(smp_processor_id()); - } -+ else if ( unlikely(node >= MAX_NUMNODES) ) -+ { -+ ASSERT_UNREACHABLE(); -+ return NULL; -+ } - first_node = node; - -- ASSERT(node < MAX_NUMNODES); - ASSERT(zone_lo <= zone_hi); - ASSERT(zone_hi < NR_ZONES); - diff --git a/main/xen/xsa232.patch b/main/xen/xsa232.patch deleted file mode 100644 index 9e5f35c7d6..0000000000 --- a/main/xen/xsa232.patch +++ /dev/null @@ -1,23 +0,0 @@ -From: Andrew Cooper <andrew.cooper3@citrix.com> -Subject: grant_table: fix GNTTABOP_cache_flush handling - -Don't fall over a NULL grant_table pointer when the owner of the domain -is a system domain (DOMID_{XEN,IO} etc). - -This is XSA-232. - -Reported-by: Matthew Daley <mattd@bugfuzz.com> -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> - ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -3053,7 +3053,7 @@ static int cache_flush(gnttab_cache_flus - - page = mfn_to_page(mfn); - owner = page_get_owner_and_reference(page); -- if ( !owner ) -+ if ( !owner || !owner->grant_table ) - { - rcu_unlock_domain(d); - return -EPERM; diff --git a/main/xen/xsa233.patch b/main/xen/xsa233.patch deleted file mode 100644 index 6013c52b41..0000000000 --- a/main/xen/xsa233.patch +++ /dev/null @@ -1,52 +0,0 @@ -From: Juergen Gross <jgross@suse.com> -Subject: tools/xenstore: dont unlink connection object twice - -A connection object of a domain with associated stubdom has two -parents: the domain and the stubdom. When cleaning up the list of -active domains in domain_cleanup() make sure not to unlink the -connection twice from the same domain. This could happen when the -domain and its stubdom are being destroyed at the same time leading -to the domain loop being entered twice. - -Additionally don't use talloc_free() in this case as it will remove -a random parent link, leading eventually to a memory leak. Use -talloc_unlink() instead specifying the context from which the -connection object should be removed. - -This is XSA-233. - -Reported-by: Eric Chanudet <chanudete@ainfosec.com> -Signed-off-by: Juergen Gross <jgross@suse.com> -Reviewed-by: Ian Jackson <ian.jackson@eu.citrix.com> - ---- a/tools/xenstore/xenstored_domain.c -+++ b/tools/xenstore/xenstored_domain.c -@@ -221,10 +221,11 @@ static int destroy_domain(void *_domain) - static void domain_cleanup(void) - { - xc_dominfo_t dominfo; -- struct domain *domain, *tmp; -+ struct domain *domain; - int notify = 0; - -- list_for_each_entry_safe(domain, tmp, &domains, list) { -+ again: -+ list_for_each_entry(domain, &domains, list) { - if (xc_domain_getinfo(*xc_handle, domain->domid, 1, - &dominfo) == 1 && - dominfo.domid == domain->domid) { -@@ -236,8 +237,12 @@ static void domain_cleanup(void) - if (!dominfo.dying) - continue; - } -- talloc_free(domain->conn); -- notify = 0; /* destroy_domain() fires the watch */ -+ if (domain->conn) { -+ talloc_unlink(talloc_autofree_context(), domain->conn); -+ domain->conn = NULL; -+ notify = 0; /* destroy_domain() fires the watch */ -+ goto again; -+ } - } - - if (notify) diff --git a/main/xen/xsa234-4.9.patch b/main/xen/xsa234-4.9.patch deleted file mode 100644 index 8dbf401720..0000000000 --- a/main/xen/xsa234-4.9.patch +++ /dev/null @@ -1,192 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: gnttab: also validate PTE permissions upon destroy/replace - -In order for PTE handling to match up with the reference counting done -by common code, presence and writability of grant mapping PTEs must -also be taken into account; validating just the frame number is not -enough. This is in particular relevant if a guest fiddles with grant -PTEs via non-grant hypercalls. - -Note that the flags being passed to replace_grant_host_mapping() -already happen to be those of the existing mapping, so no new function -parameter is needed. - -This is XSA-234. - -Reported-by: Andrew Cooper <andrew.cooper3@citrix.com> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> - ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -4058,7 +4058,8 @@ static int create_grant_pte_mapping( - } - - static int destroy_grant_pte_mapping( -- uint64_t addr, unsigned long frame, struct domain *d) -+ uint64_t addr, unsigned long frame, unsigned int grant_pte_flags, -+ struct domain *d) - { - int rc = GNTST_okay; - void *va; -@@ -4104,17 +4105,29 @@ static int destroy_grant_pte_mapping( - - ol1e = *(l1_pgentry_t *)va; - -- /* Check that the virtual address supplied is actually mapped to frame. */ -- if ( unlikely(l1e_get_pfn(ol1e) != frame) ) -+ /* -+ * Check that the PTE supplied actually maps frame (with appropriate -+ * permissions). -+ */ -+ if ( unlikely(l1e_get_pfn(ol1e) != frame) || -+ unlikely((l1e_get_flags(ol1e) ^ grant_pte_flags) & -+ (_PAGE_PRESENT | _PAGE_RW)) ) - { - page_unlock(page); -- gdprintk(XENLOG_WARNING, -- "PTE entry %"PRIpte" for address %"PRIx64" doesn't match frame %lx\n", -- l1e_get_intpte(ol1e), addr, frame); -+ gdprintk(XENLOG_ERR, -+ "PTE %"PRIpte" at %"PRIx64" doesn't match grant (%"PRIpte")\n", -+ l1e_get_intpte(ol1e), addr, -+ l1e_get_intpte(l1e_from_pfn(frame, grant_pte_flags))); - rc = GNTST_general_error; - goto failed; - } - -+ if ( unlikely((l1e_get_flags(ol1e) ^ grant_pte_flags) & -+ ~(_PAGE_AVAIL | PAGE_CACHE_ATTRS)) ) -+ gdprintk(XENLOG_WARNING, -+ "PTE flags %x at %"PRIx64" don't match grant (%x)\n", -+ l1e_get_flags(ol1e), addr, grant_pte_flags); -+ - /* Delete pagetable entry. */ - if ( unlikely(!UPDATE_ENTRY - (l1, -@@ -4123,7 +4136,8 @@ static int destroy_grant_pte_mapping( - 0)) ) - { - page_unlock(page); -- gdprintk(XENLOG_WARNING, "Cannot delete PTE entry at %p\n", va); -+ gdprintk(XENLOG_WARNING, "Cannot delete PTE entry at %"PRIx64"\n", -+ addr); - rc = GNTST_general_error; - goto failed; - } -@@ -4191,7 +4205,8 @@ static int create_grant_va_mapping( - } - - static int replace_grant_va_mapping( -- unsigned long addr, unsigned long frame, l1_pgentry_t nl1e, struct vcpu *v) -+ unsigned long addr, unsigned long frame, unsigned int grant_pte_flags, -+ l1_pgentry_t nl1e, struct vcpu *v) - { - l1_pgentry_t *pl1e, ol1e; - unsigned long gl1mfn; -@@ -4227,20 +4242,33 @@ static int replace_grant_va_mapping( - - ol1e = *pl1e; - -- /* Check that the virtual address supplied is actually mapped to frame. */ -- if ( unlikely(l1e_get_pfn(ol1e) != frame) ) -- { -- gdprintk(XENLOG_WARNING, -- "PTE entry %lx for address %lx doesn't match frame %lx\n", -- l1e_get_pfn(ol1e), addr, frame); -+ /* -+ * Check that the virtual address supplied is actually mapped to frame -+ * (with appropriate permissions). -+ */ -+ if ( unlikely(l1e_get_pfn(ol1e) != frame) || -+ unlikely((l1e_get_flags(ol1e) ^ grant_pte_flags) & -+ (_PAGE_PRESENT | _PAGE_RW)) ) -+ { -+ gdprintk(XENLOG_ERR, -+ "PTE %"PRIpte" for %lx doesn't match grant (%"PRIpte")\n", -+ l1e_get_intpte(ol1e), addr, -+ l1e_get_intpte(l1e_from_pfn(frame, grant_pte_flags))); - rc = GNTST_general_error; - goto unlock_and_out; - } - -+ if ( unlikely((l1e_get_flags(ol1e) ^ grant_pte_flags) & -+ ~(_PAGE_AVAIL | PAGE_CACHE_ATTRS)) ) -+ gdprintk(XENLOG_WARNING, -+ "PTE flags %x for %"PRIx64" don't match grant (%x)\n", -+ l1e_get_flags(ol1e), addr, grant_pte_flags); -+ - /* Delete pagetable entry. */ - if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0)) ) - { -- gdprintk(XENLOG_WARNING, "Cannot delete PTE entry at %p\n", pl1e); -+ gdprintk(XENLOG_WARNING, "Cannot delete PTE entry for %"PRIx64"\n", -+ addr); - rc = GNTST_general_error; - goto unlock_and_out; - } -@@ -4254,9 +4282,11 @@ static int replace_grant_va_mapping( - } - - static int destroy_grant_va_mapping( -- unsigned long addr, unsigned long frame, struct vcpu *v) -+ unsigned long addr, unsigned long frame, unsigned int grant_pte_flags, -+ struct vcpu *v) - { -- return replace_grant_va_mapping(addr, frame, l1e_empty(), v); -+ return replace_grant_va_mapping(addr, frame, grant_pte_flags, -+ l1e_empty(), v); - } - - static int create_grant_p2m_mapping(uint64_t addr, unsigned long frame, -@@ -4351,20 +4381,39 @@ int replace_grant_host_mapping( - unsigned long gl1mfn; - struct page_info *l1pg; - int rc; -+ unsigned int grant_pte_flags; - - if ( paging_mode_external(current->domain) ) - return replace_grant_p2m_mapping(addr, frame, new_addr, flags); - -+ grant_pte_flags = -+ _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_GNTTAB | _PAGE_NX; -+ -+ if ( flags & GNTMAP_application_map ) -+ grant_pte_flags |= _PAGE_USER; -+ if ( !(flags & GNTMAP_readonly) ) -+ grant_pte_flags |= _PAGE_RW; -+ /* -+ * On top of the explicit settings done by create_grant_host_mapping() -+ * also open-code relevant parts of adjust_guest_l1e(). Don't mirror -+ * available and cachability flags, though. -+ */ -+ if ( !is_pv_32bit_domain(curr->domain) ) -+ grant_pte_flags |= (grant_pte_flags & _PAGE_USER) -+ ? _PAGE_GLOBAL -+ : _PAGE_GUEST_KERNEL | _PAGE_USER; -+ - if ( flags & GNTMAP_contains_pte ) - { - if ( !new_addr ) -- return destroy_grant_pte_mapping(addr, frame, curr->domain); -+ return destroy_grant_pte_mapping(addr, frame, grant_pte_flags, -+ curr->domain); - - return GNTST_general_error; - } - - if ( !new_addr ) -- return destroy_grant_va_mapping(addr, frame, curr); -+ return destroy_grant_va_mapping(addr, frame, grant_pte_flags, curr); - - pl1e = guest_map_l1e(new_addr, &gl1mfn); - if ( !pl1e ) -@@ -4412,7 +4461,7 @@ int replace_grant_host_mapping( - put_page(l1pg); - guest_unmap_l1e(pl1e); - -- rc = replace_grant_va_mapping(addr, frame, ol1e, curr); -+ rc = replace_grant_va_mapping(addr, frame, grant_pte_flags, ol1e, curr); - if ( rc && !paging_mode_refcounts(curr->domain) ) - put_page_from_l1e(ol1e, curr->domain); - diff --git a/main/xen/xsa235-4.9.patch b/main/xen/xsa235-4.9.patch deleted file mode 100644 index 25dd650755..0000000000 --- a/main/xen/xsa235-4.9.patch +++ /dev/null @@ -1,49 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: arm/mm: release grant lock on xenmem_add_to_physmap_one() error paths - -Commit 55021ff9ab ("xen/arm: add_to_physmap_one: Avoid to map mfn 0 if -an error occurs") introduced error paths not releasing the grant table -lock. Replace them by a suitable check after the lock was dropped. - -This is XSA-235. - -Reported-by: Wei Liu <wei.liu2@citrix.com> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Julien Grall <julien.grall@arm.com> - ---- a/xen/arch/arm/mm.c -+++ b/xen/arch/arm/mm.c -@@ -1164,7 +1164,7 @@ int xenmem_add_to_physmap_one( - if ( idx < nr_status_frames(d->grant_table) ) - mfn = virt_to_mfn(d->grant_table->status[idx]); - else -- return -EINVAL; -+ mfn = mfn_x(INVALID_MFN); - } - else - { -@@ -1175,14 +1175,21 @@ int xenmem_add_to_physmap_one( - if ( idx < nr_grant_frames(d->grant_table) ) - mfn = virt_to_mfn(d->grant_table->shared_raw[idx]); - else -- return -EINVAL; -+ mfn = mfn_x(INVALID_MFN); - } - -- d->arch.grant_table_gfn[idx] = gfn; -+ if ( mfn != mfn_x(INVALID_MFN) ) -+ { -+ d->arch.grant_table_gfn[idx] = gfn; - -- t = p2m_ram_rw; -+ t = p2m_ram_rw; -+ } - - grant_write_unlock(d->grant_table); -+ -+ if ( mfn == mfn_x(INVALID_MFN) ) -+ return -EINVAL; -+ - break; - case XENMAPSPACE_shared_info: - if ( idx != 0 ) diff --git a/main/xen/xsa236-4.9.patch b/main/xen/xsa236-4.9.patch deleted file mode 100644 index 203025dbae..0000000000 --- a/main/xen/xsa236-4.9.patch +++ /dev/null @@ -1,66 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: gnttab: fix pin count / page reference race - -Dropping page references before decrementing pin counts is a bad idea -if assumptions are being made that a non-zero pin count implies a valid -page. Fix the order of operations in gnttab_copy_release_buf(), but at -the same time also remove the assertion that was found to trigger: -map_grant_ref() also has the potential of causing a race here, and -changing the order of operations there would likely be quite a bit more -involved. - -This is XSA-236. - -Reported-by: Pawel Wieczorkiewicz <wipawel@amazon.de> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> - ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -2330,9 +2330,20 @@ __acquire_grant_for_copy( - td = page_get_owner_and_reference(*page); - /* - * act->pin being non-zero should guarantee the page to have a -- * non-zero refcount and hence a valid owner. -+ * non-zero refcount and hence a valid owner (matching the one on -+ * record), with one exception: If the owning domain is dying we -+ * had better not make implications from pin count (map_grant_ref() -+ * updates pin counts before obtaining page references, for -+ * example). - */ -- ASSERT(td); -+ if ( td != rd || rd->is_dying ) -+ { -+ if ( td ) -+ put_page(*page); -+ *page = NULL; -+ rc = GNTST_bad_domain; -+ goto unlock_out_clear; -+ } - } - - act->pin += readonly ? GNTPIN_hstr_inc : GNTPIN_hstw_inc; -@@ -2451,6 +2462,11 @@ static void gnttab_copy_release_buf(stru - unmap_domain_page(buf->virt); - buf->virt = NULL; - } -+ if ( buf->have_grant ) -+ { -+ __release_grant_for_copy(buf->domain, buf->ptr.u.ref, buf->read_only); -+ buf->have_grant = 0; -+ } - if ( buf->have_type ) - { - put_page_type(buf->page); -@@ -2461,11 +2477,6 @@ static void gnttab_copy_release_buf(stru - put_page(buf->page); - buf->page = NULL; - } -- if ( buf->have_grant ) -- { -- __release_grant_for_copy(buf->domain, buf->ptr.u.ref, buf->read_only); -- buf->have_grant = 0; -- } - } - - static int gnttab_copy_claim_buf(const struct gnttab_copy *op, diff --git a/main/xen/xsa237-1.patch b/main/xen/xsa237-1.patch deleted file mode 100644 index 7c9dff9672..0000000000 --- a/main/xen/xsa237-1.patch +++ /dev/null @@ -1,27 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: x86: don't allow MSI pIRQ mapping on unowned device - -MSI setup should be permitted only for existing devices owned by the -respective guest (the operation may still be carried out by the domain -controlling that guest). - -This is part of XSA-237. - -Reported-by: HW42 <hw42@ipsumj.de> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> - ---- a/xen/arch/x86/irq.c -+++ b/xen/arch/x86/irq.c -@@ -1963,7 +1963,10 @@ int map_domain_pirq( - if ( !cpu_has_apic ) - goto done; - -- pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn); -+ pdev = pci_get_pdev_by_domain(d, msi->seg, msi->bus, msi->devfn); -+ if ( !pdev ) -+ goto done; -+ - ret = pci_enable_msi(msi, &msi_desc); - if ( ret ) - { diff --git a/main/xen/xsa237-2.patch b/main/xen/xsa237-2.patch deleted file mode 100644 index 0add704587..0000000000 --- a/main/xen/xsa237-2.patch +++ /dev/null @@ -1,66 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: x86: enforce proper privilege when (un)mapping pIRQ-s - -(Un)mapping of IRQs, just like other RESOURCE__ADD* / RESOURCE__REMOVE* -actions (in FLASK terms) should be XSM_DM_PRIV rather than XSM_TARGET. -This in turn requires bypassing the XSM check in physdev_unmap_pirq() -for the HVM emuirq case just like is being done in physdev_map_pirq(). -The primary goal security wise, however, is to no longer allow HVM -guests, by specifying their own domain ID instead of DOMID_SELF, to -enter code paths intended for PV guest and the control domains of HVM -guests only. - -This is part of XSA-237. - -Reported-by: HW42 <hw42@ipsumj.de> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: George Dunlap <george.dunlap@citrix.com> - ---- a/xen/arch/x86/physdev.c -+++ b/xen/arch/x86/physdev.c -@@ -111,7 +111,7 @@ int physdev_map_pirq(domid_t domid, int - if ( d == NULL ) - return -ESRCH; - -- ret = xsm_map_domain_pirq(XSM_TARGET, d); -+ ret = xsm_map_domain_pirq(XSM_DM_PRIV, d); - if ( ret ) - goto free_domain; - -@@ -256,13 +256,14 @@ int physdev_map_pirq(domid_t domid, int - int physdev_unmap_pirq(domid_t domid, int pirq) - { - struct domain *d; -- int ret; -+ int ret = 0; - - d = rcu_lock_domain_by_any_id(domid); - if ( d == NULL ) - return -ESRCH; - -- ret = xsm_unmap_domain_pirq(XSM_TARGET, d); -+ if ( domid != DOMID_SELF || !is_hvm_domain(d) || !has_pirq(d) ) -+ ret = xsm_unmap_domain_pirq(XSM_DM_PRIV, d); - if ( ret ) - goto free_domain; - ---- a/xen/include/xsm/dummy.h -+++ b/xen/include/xsm/dummy.h -@@ -453,7 +453,7 @@ static XSM_INLINE char *xsm_show_irq_sid - - static XSM_INLINE int xsm_map_domain_pirq(XSM_DEFAULT_ARG struct domain *d) - { -- XSM_ASSERT_ACTION(XSM_TARGET); -+ XSM_ASSERT_ACTION(XSM_DM_PRIV); - return xsm_default_action(action, current->domain, d); - } - -@@ -465,7 +465,7 @@ static XSM_INLINE int xsm_map_domain_irq - - static XSM_INLINE int xsm_unmap_domain_pirq(XSM_DEFAULT_ARG struct domain *d) - { -- XSM_ASSERT_ACTION(XSM_TARGET); -+ XSM_ASSERT_ACTION(XSM_DM_PRIV); - return xsm_default_action(action, current->domain, d); - } - diff --git a/main/xen/xsa237-3.patch b/main/xen/xsa237-3.patch deleted file mode 100644 index 5c69c48265..0000000000 --- a/main/xen/xsa237-3.patch +++ /dev/null @@ -1,55 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: x86/MSI: disallow redundant enabling - -At the moment, Xen attempts to allow redundant enabling of MSI by -having pci_enable_msi() return 0, and point to the existing MSI -descriptor, when the msi already exists. - -Unfortunately, if subsequent errors are encountered, the cleanup -paths assume pci_enable_msi() had done full initialization, and -hence undo everything that was assumed to be done by that -function without also undoing other setup that would normally -occur only after that function was called (in map_domain_pirq() -itself). - -Rather than try to make the redundant enabling case work properly, just -forbid it entirely by having pci_enable_msi() return -EEXIST when MSI -is already set up. - -This is part of XSA-237. - -Reported-by: HW42 <hw42@ipsumj.de> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: George Dunlap <george.dunlap@citrix.com> - ---- a/xen/arch/x86/msi.c -+++ b/xen/arch/x86/msi.c -@@ -1050,11 +1050,10 @@ static int __pci_enable_msi(struct msi_i - old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSI); - if ( old_desc ) - { -- printk(XENLOG_WARNING "irq %d already mapped to MSI on %04x:%02x:%02x.%u\n", -+ printk(XENLOG_ERR "irq %d already mapped to MSI on %04x:%02x:%02x.%u\n", - msi->irq, msi->seg, msi->bus, - PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn)); -- *desc = old_desc; -- return 0; -+ return -EEXIST; - } - - old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSIX); -@@ -1118,11 +1117,10 @@ static int __pci_enable_msix(struct msi_ - old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSIX); - if ( old_desc ) - { -- printk(XENLOG_WARNING "irq %d already mapped to MSI-X on %04x:%02x:%02x.%u\n", -+ printk(XENLOG_ERR "irq %d already mapped to MSI-X on %04x:%02x:%02x.%u\n", - msi->irq, msi->seg, msi->bus, - PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn)); -- *desc = old_desc; -- return 0; -+ return -EEXIST; - } - - old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI); diff --git a/main/xen/xsa237-4.patch b/main/xen/xsa237-4.patch deleted file mode 100644 index a16ec1bba1..0000000000 --- a/main/xen/xsa237-4.patch +++ /dev/null @@ -1,124 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: x86/IRQ: conditionally preserve irq <-> pirq mapping on map error paths - -Mappings that had been set up before should not be torn down when -handling unrelated errors. - -This is part of XSA-237. - -Reported-by: HW42 <hw42@ipsumj.de> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: George Dunlap <george.dunlap@citrix.com> - ---- a/xen/arch/x86/irq.c -+++ b/xen/arch/x86/irq.c -@@ -1251,7 +1251,8 @@ static int prepare_domain_irq_pirq(struc - return -ENOMEM; - } - *pinfo = info; -- return 0; -+ -+ return !!err; - } - - static void set_domain_irq_pirq(struct domain *d, int irq, struct pirq *pirq) -@@ -1294,7 +1295,10 @@ int init_domain_irq_mapping(struct domai - continue; - err = prepare_domain_irq_pirq(d, i, i, &info); - if ( err ) -+ { -+ ASSERT(err < 0); - break; -+ } - set_domain_irq_pirq(d, i, info); - } - -@@ -1902,6 +1906,7 @@ int map_domain_pirq( - struct pirq *info; - struct irq_desc *desc; - unsigned long flags; -+ DECLARE_BITMAP(prepared, MAX_MSI_IRQS) = {}; - - ASSERT(spin_is_locked(&d->event_lock)); - -@@ -1945,8 +1950,10 @@ int map_domain_pirq( - } - - ret = prepare_domain_irq_pirq(d, irq, pirq, &info); -- if ( ret ) -+ if ( ret < 0 ) - goto revoke; -+ if ( !ret ) -+ __set_bit(0, prepared); - - desc = irq_to_desc(irq); - -@@ -2018,8 +2025,10 @@ int map_domain_pirq( - irq = create_irq(NUMA_NO_NODE); - ret = irq >= 0 ? prepare_domain_irq_pirq(d, irq, pirq + nr, &info) - : irq; -- if ( ret ) -+ if ( ret < 0 ) - break; -+ if ( !ret ) -+ __set_bit(nr, prepared); - msi_desc[nr].irq = irq; - - if ( irq_permit_access(d, irq) != 0 ) -@@ -2052,15 +2061,15 @@ int map_domain_pirq( - desc->msi_desc = NULL; - spin_unlock_irqrestore(&desc->lock, flags); - } -- while ( nr-- ) -+ while ( nr ) - { - if ( irq >= 0 && irq_deny_access(d, irq) ) - printk(XENLOG_G_ERR - "dom%d: could not revoke access to IRQ%d (pirq %d)\n", - d->domain_id, irq, pirq); -- if ( info ) -+ if ( info && test_bit(nr, prepared) ) - cleanup_domain_irq_pirq(d, irq, info); -- info = pirq_info(d, pirq + nr); -+ info = pirq_info(d, pirq + --nr); - irq = info->arch.irq; - } - msi_desc->irq = -1; -@@ -2076,12 +2085,14 @@ int map_domain_pirq( - spin_lock_irqsave(&desc->lock, flags); - set_domain_irq_pirq(d, irq, info); - spin_unlock_irqrestore(&desc->lock, flags); -+ ret = 0; - } - - done: - if ( ret ) - { -- cleanup_domain_irq_pirq(d, irq, info); -+ if ( test_bit(0, prepared) ) -+ cleanup_domain_irq_pirq(d, irq, info); - revoke: - if ( irq_deny_access(d, irq) ) - printk(XENLOG_G_ERR ---- a/xen/arch/x86/physdev.c -+++ b/xen/arch/x86/physdev.c -@@ -186,7 +186,7 @@ int physdev_map_pirq(domid_t domid, int - } - else if ( type == MAP_PIRQ_TYPE_MULTI_MSI ) - { -- if ( msi->entry_nr <= 0 || msi->entry_nr > 32 ) -+ if ( msi->entry_nr <= 0 || msi->entry_nr > MAX_MSI_IRQS ) - ret = -EDOM; - else if ( msi->entry_nr != 1 && !iommu_intremap ) - ret = -EOPNOTSUPP; ---- a/xen/include/asm-x86/msi.h -+++ b/xen/include/asm-x86/msi.h -@@ -56,6 +56,8 @@ - /* MAX fixed pages reserved for mapping MSIX tables. */ - #define FIX_MSIX_MAX_PAGES 512 - -+#define MAX_MSI_IRQS 32 /* limited by MSI capability struct properties */ -+ - struct msi_info { - u16 seg; - u8 bus; diff --git a/main/xen/xsa237-5.patch b/main/xen/xsa237-5.patch deleted file mode 100644 index 155ba15d08..0000000000 --- a/main/xen/xsa237-5.patch +++ /dev/null @@ -1,37 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: x86/FLASK: fix unmap-domain-IRQ XSM hook - -The caller and the FLASK implementation of xsm_unmap_domain_irq() -disagreed about what the "data" argument points to in the MSI case: -Change both sides to pass/take a PCI device. - -This is part of XSA-237. - -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> - ---- a/xen/arch/x86/irq.c -+++ b/xen/arch/x86/irq.c -@@ -2143,7 +2143,8 @@ int unmap_domain_pirq(struct domain *d, - nr = msi_desc->msi.nvec; - } - -- ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, msi_desc); -+ ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, -+ msi_desc ? msi_desc->dev : NULL); - if ( ret ) - goto done; - ---- a/xen/xsm/flask/hooks.c -+++ b/xen/xsm/flask/hooks.c -@@ -918,8 +918,8 @@ static int flask_unmap_domain_msi (struc - u32 *sid, struct avc_audit_data *ad) - { - #ifdef CONFIG_HAS_PCI -- struct msi_info *msi = data; -- u32 machine_bdf = (msi->seg << 16) | (msi->bus << 8) | msi->devfn; -+ const struct pci_dev *pdev = data; -+ u32 machine_bdf = (pdev->seg << 16) | (pdev->bus << 8) | pdev->devfn; - - AVC_AUDIT_DATA_INIT(ad, DEV); - ad->device = machine_bdf; diff --git a/main/xen/xsa238.patch b/main/xen/xsa238.patch deleted file mode 100644 index 0d7d48fef8..0000000000 --- a/main/xen/xsa238.patch +++ /dev/null @@ -1,45 +0,0 @@ -From cdc2887076b19b39fab9faec495082586f3113df Mon Sep 17 00:00:00 2001 -From: XenProject Security Team <security@xenproject.org> -Date: Tue, 5 Sep 2017 13:41:37 +0200 -Subject: x86/ioreq server: correctly handle bogus - XEN_DMOP_{,un}map_io_range_to_ioreq_server arguments - -Misbehaving device model can pass incorrect XEN_DMOP_map/ -unmap_io_range_to_ioreq_server arguments, namely end < start when -specifying address range. When this happens we hit ASSERT(s <= e) in -rangeset_contains_range()/rangeset_overlaps_range() with debug builds. -Production builds will not trap right away but may misbehave later -while handling such bogus ranges. - -This is XSA-238. - -Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> ---- - xen/arch/x86/hvm/ioreq.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/xen/arch/x86/hvm/ioreq.c b/xen/arch/x86/hvm/ioreq.c -index b2a8b0e986..8c8bf1f0ec 100644 ---- a/xen/arch/x86/hvm/ioreq.c -+++ b/xen/arch/x86/hvm/ioreq.c -@@ -820,6 +820,9 @@ int hvm_map_io_range_to_ioreq_server(struct domain *d, ioservid_t id, - struct hvm_ioreq_server *s; - int rc; - -+ if ( start > end ) -+ return -EINVAL; -+ - spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock); - - rc = -ENOENT; -@@ -872,6 +875,9 @@ int hvm_unmap_io_range_from_ioreq_server(struct domain *d, ioservid_t id, - struct hvm_ioreq_server *s; - int rc; - -+ if ( start > end ) -+ return -EINVAL; -+ - spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock); - - rc = -ENOENT; diff --git a/main/xen/xsa239.patch b/main/xen/xsa239.patch deleted file mode 100644 index 5daecb5e47..0000000000 --- a/main/xen/xsa239.patch +++ /dev/null @@ -1,46 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: x86/HVM: prefill partially used variable on emulation paths - -Certain handlers ignore the access size (vioapic_write() being the -example this was found with), perhaps leading to subsequent reads -seeing data that wasn't actually written by the guest. For -consistency and extra safety also do this on the read path of -hvm_process_io_intercept(), even if this doesn't directly affect what -guests get to see, as we've supposedly already dealt with read handlers -leaving data completely unitialized. - -This is XSA-239. - -Reported-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> -Signed-off-by: Jan Beulich <jbeulich@suse.com> - ---- a/xen/arch/x86/hvm/emulate.c -+++ b/xen/arch/x86/hvm/emulate.c -@@ -129,7 +129,7 @@ static int hvmemul_do_io( - .count = *reps, - .dir = dir, - .df = df, -- .data = data, -+ .data = data_is_addr ? data : 0, - .data_is_ptr = data_is_addr, /* ioreq_t field name is misleading */ - .state = STATE_IOREQ_READY, - }; ---- a/xen/arch/x86/hvm/intercept.c -+++ b/xen/arch/x86/hvm/intercept.c -@@ -127,6 +127,7 @@ int hvm_process_io_intercept(const struc - addr = (p->type == IOREQ_TYPE_COPY) ? - p->addr + step * i : - p->addr; -+ data = 0; - rc = ops->read(handler, addr, p->size, &data); - if ( rc != X86EMUL_OKAY ) - break; -@@ -161,6 +162,7 @@ int hvm_process_io_intercept(const struc - { - if ( p->data_is_ptr ) - { -+ data = 0; - switch ( hvm_copy_from_guest_phys(&data, p->data + step * i, - p->size) ) - { diff --git a/main/xen/xsa240-1.patch b/main/xen/xsa240-1.patch deleted file mode 100644 index 515ad22b66..0000000000 --- a/main/xen/xsa240-1.patch +++ /dev/null @@ -1,494 +0,0 @@ -From 867988237d3e472fe2c99e81ae733e103422566c Mon Sep 17 00:00:00 2001 -From: Jan Beulich <jbeulich@suse.com> -Date: Thu, 28 Sep 2017 15:17:25 +0100 -Subject: [PATCH 1/2] x86: limit linear page table use to a single level - -That's the only way that they're meant to be used. Without such a -restriction arbitrarily long chains of same-level page tables can be -built, tearing down of which may then cause arbitrarily deep recursion, -causing a stack overflow. To facilitate this restriction, a counter is -being introduced to track both the number of same-level entries in a -page table as well as the number of uses of a page table in another -same-level one (counting into positive and negative direction -respectively, utilizing the fact that both counts can't be non-zero at -the same time). - -Note that the added accounting introduces a restriction on the number -of times a page can be used in other same-level page tables - more than -32k of such uses are no longer possible. - -Note also that some put_page_and_type[_preemptible]() calls are -replaced with open-coded equivalents. This seemed preferrable to -adding "parent_table" to the matrix of functions. - -Note further that cross-domain same-level page table references are no -longer permitted (they probably never should have been). - -This is XSA-240. - -Reported-by: Jann Horn <jannh@google.com> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Signed-off-by: George Dunlap <george.dunlap@citrix.com> ---- - xen/arch/x86/domain.c | 1 + - xen/arch/x86/mm.c | 171 ++++++++++++++++++++++++++++++++++++++----- - xen/include/asm-x86/domain.h | 2 + - xen/include/asm-x86/mm.h | 25 +++++-- - 4 files changed, 175 insertions(+), 24 deletions(-) - -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index d7e699228c..d7ed72c246 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -1226,6 +1226,7 @@ int arch_set_info_guest( - rc = -ERESTART; - /* Fallthrough */ - case -ERESTART: -+ v->arch.old_guest_ptpg = NULL; - v->arch.old_guest_table = - pagetable_get_page(v->arch.guest_table); - v->arch.guest_table = pagetable_null(); -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 86f5eda52d..1e469bd354 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -747,6 +747,61 @@ static void put_data_page( - put_page(page); - } - -+static bool inc_linear_entries(struct page_info *pg) -+{ -+ typeof(pg->linear_pt_count) nc = read_atomic(&pg->linear_pt_count), oc; -+ -+ do { -+ /* -+ * The check below checks for the "linear use" count being non-zero -+ * as well as overflow. Signed integer overflow is undefined behavior -+ * according to the C spec. However, as long as linear_pt_count is -+ * smaller in size than 'int', the arithmetic operation of the -+ * increment below won't overflow; rather the result will be truncated -+ * when stored. Ensure that this is always true. -+ */ -+ BUILD_BUG_ON(sizeof(nc) >= sizeof(int)); -+ oc = nc++; -+ if ( nc <= 0 ) -+ return false; -+ nc = cmpxchg(&pg->linear_pt_count, oc, nc); -+ } while ( oc != nc ); -+ -+ return true; -+} -+ -+static void dec_linear_entries(struct page_info *pg) -+{ -+ typeof(pg->linear_pt_count) oc; -+ -+ oc = arch_fetch_and_add(&pg->linear_pt_count, -1); -+ ASSERT(oc > 0); -+} -+ -+static bool inc_linear_uses(struct page_info *pg) -+{ -+ typeof(pg->linear_pt_count) nc = read_atomic(&pg->linear_pt_count), oc; -+ -+ do { -+ /* See the respective comment in inc_linear_entries(). */ -+ BUILD_BUG_ON(sizeof(nc) >= sizeof(int)); -+ oc = nc--; -+ if ( nc >= 0 ) -+ return false; -+ nc = cmpxchg(&pg->linear_pt_count, oc, nc); -+ } while ( oc != nc ); -+ -+ return true; -+} -+ -+static void dec_linear_uses(struct page_info *pg) -+{ -+ typeof(pg->linear_pt_count) oc; -+ -+ oc = arch_fetch_and_add(&pg->linear_pt_count, 1); -+ ASSERT(oc < 0); -+} -+ - /* - * We allow root tables to map each other (a.k.a. linear page tables). It - * needs some special care with reference counts and access permissions: -@@ -777,15 +832,35 @@ get_##level##_linear_pagetable( \ - \ - if ( (pfn = level##e_get_pfn(pde)) != pde_pfn ) \ - { \ -+ struct page_info *ptpg = mfn_to_page(pde_pfn); \ -+ \ -+ /* Make sure the page table belongs to the correct domain. */ \ -+ if ( unlikely(page_get_owner(ptpg) != d) ) \ -+ return 0; \ -+ \ - /* Make sure the mapped frame belongs to the correct domain. */ \ - if ( unlikely(!get_page_from_pagenr(pfn, d)) ) \ - return 0; \ - \ - /* \ -- * Ensure that the mapped frame is an already-validated page table. \ -+ * Ensure that the mapped frame is an already-validated page table \ -+ * and is not itself having linear entries, as well as that the \ -+ * containing page table is not iself in use as a linear page table \ -+ * elsewhere. \ - * If so, atomically increment the count (checking for overflow). \ - */ \ - page = mfn_to_page(pfn); \ -+ if ( !inc_linear_entries(ptpg) ) \ -+ { \ -+ put_page(page); \ -+ return 0; \ -+ } \ -+ if ( !inc_linear_uses(page) ) \ -+ { \ -+ dec_linear_entries(ptpg); \ -+ put_page(page); \ -+ return 0; \ -+ } \ - y = page->u.inuse.type_info; \ - do { \ - x = y; \ -@@ -793,6 +868,8 @@ get_##level##_linear_pagetable( \ - unlikely((x & (PGT_type_mask|PGT_validated)) != \ - (PGT_##level##_page_table|PGT_validated)) ) \ - { \ -+ dec_linear_uses(page); \ -+ dec_linear_entries(ptpg); \ - put_page(page); \ - return 0; \ - } \ -@@ -1226,6 +1303,9 @@ get_page_from_l4e( - l3e_remove_flags((pl3e), _PAGE_USER|_PAGE_RW|_PAGE_ACCESSED); \ - } while ( 0 ) - -+static int _put_page_type(struct page_info *page, bool preemptible, -+ struct page_info *ptpg); -+ - void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner) - { - unsigned long pfn = l1e_get_pfn(l1e); -@@ -1296,17 +1376,22 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn) - if ( l2e_get_flags(l2e) & _PAGE_PSE ) - put_superpage(l2e_get_pfn(l2e)); - else -- put_page_and_type(l2e_get_page(l2e)); -+ { -+ struct page_info *pg = l2e_get_page(l2e); -+ int rc = _put_page_type(pg, false, mfn_to_page(pfn)); -+ -+ ASSERT(!rc); -+ put_page(pg); -+ } - - return 0; - } - --static int __put_page_type(struct page_info *, int preemptible); -- - static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, - int partial, bool_t defer) - { - struct page_info *pg; -+ int rc; - - if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || (l3e_get_pfn(l3e) == pfn) ) - return 1; -@@ -1329,21 +1414,28 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, - if ( unlikely(partial > 0) ) - { - ASSERT(!defer); -- return __put_page_type(pg, 1); -+ return _put_page_type(pg, true, mfn_to_page(pfn)); - } - - if ( defer ) - { -+ current->arch.old_guest_ptpg = mfn_to_page(pfn); - current->arch.old_guest_table = pg; - return 0; - } - -- return put_page_and_type_preemptible(pg); -+ rc = _put_page_type(pg, true, mfn_to_page(pfn)); -+ if ( likely(!rc) ) -+ put_page(pg); -+ -+ return rc; - } - - static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, - int partial, bool_t defer) - { -+ int rc = 1; -+ - if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) && - (l4e_get_pfn(l4e) != pfn) ) - { -@@ -1352,18 +1444,22 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, - if ( unlikely(partial > 0) ) - { - ASSERT(!defer); -- return __put_page_type(pg, 1); -+ return _put_page_type(pg, true, mfn_to_page(pfn)); - } - - if ( defer ) - { -+ current->arch.old_guest_ptpg = mfn_to_page(pfn); - current->arch.old_guest_table = pg; - return 0; - } - -- return put_page_and_type_preemptible(pg); -+ rc = _put_page_type(pg, true, mfn_to_page(pfn)); -+ if ( likely(!rc) ) -+ put_page(pg); - } -- return 1; -+ -+ return rc; - } - - static int alloc_l1_table(struct page_info *page) -@@ -1561,6 +1657,7 @@ static int alloc_l3_table(struct page_info *page) - { - page->nr_validated_ptes = i; - page->partial_pte = 0; -+ current->arch.old_guest_ptpg = NULL; - current->arch.old_guest_table = page; - } - while ( i-- > 0 ) -@@ -1654,6 +1751,7 @@ static int alloc_l4_table(struct page_info *page) - { - if ( current->arch.old_guest_table ) - page->nr_validated_ptes++; -+ current->arch.old_guest_ptpg = NULL; - current->arch.old_guest_table = page; - } - } -@@ -2403,14 +2501,20 @@ int free_page_type(struct page_info *pag - } - - --static int __put_final_page_type( -- struct page_info *page, unsigned long type, int preemptible) -+static int _put_final_page_type(struct page_info *page, unsigned long type, -+ bool preemptible, struct page_info *ptpg) - { - int rc = free_page_type(page, type, preemptible); - - /* No need for atomic update of type_info here: noone else updates it. */ - if ( rc == 0 ) - { -+ if ( ptpg && PGT_type_equal(type, ptpg->u.inuse.type_info) ) -+ { -+ dec_linear_uses(page); -+ dec_linear_entries(ptpg); -+ } -+ ASSERT(!page->linear_pt_count || page_get_owner(page)->is_dying); - /* - * Record TLB information for flush later. We do not stamp page tables - * when running in shadow mode: -@@ -2446,8 +2550,8 @@ static int __put_final_page_type( - } - - --static int __put_page_type(struct page_info *page, -- int preemptible) -+static int _put_page_type(struct page_info *page, bool preemptible, -+ struct page_info *ptpg) - { - unsigned long nx, x, y = page->u.inuse.type_info; - int rc = 0; -@@ -2474,12 +2578,28 @@ static int __put_page_type(struct page_info *page, - x, nx)) != x) ) - continue; - /* We cleared the 'valid bit' so we do the clean up. */ -- rc = __put_final_page_type(page, x, preemptible); -+ rc = _put_final_page_type(page, x, preemptible, ptpg); -+ ptpg = NULL; - if ( x & PGT_partial ) - put_page(page); - break; - } - -+ if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) ) -+ { -+ /* -+ * page_set_tlbflush_timestamp() accesses the same union -+ * linear_pt_count lives in. Unvalidated page table pages, -+ * however, should occur during domain destruction only -+ * anyway. Updating of linear_pt_count luckily is not -+ * necessary anymore for a dying domain. -+ */ -+ ASSERT(page_get_owner(page)->is_dying); -+ ASSERT(page->linear_pt_count < 0); -+ ASSERT(ptpg->linear_pt_count > 0); -+ ptpg = NULL; -+ } -+ - /* - * Record TLB information for flush later. We do not stamp page - * tables when running in shadow mode: -@@ -2499,6 +2619,13 @@ static int __put_page_type(struct page_info *page, - return -EINTR; - } - -+ if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) ) -+ { -+ ASSERT(!rc); -+ dec_linear_uses(page); -+ dec_linear_entries(ptpg); -+ } -+ - return rc; - } - -@@ -2638,6 +2765,7 @@ static int __get_page_type(struct page_info *page, unsigned long type, - page->nr_validated_ptes = 0; - page->partial_pte = 0; - } -+ page->linear_pt_count = 0; - rc = alloc_page_type(page, type, preemptible); - } - -@@ -2652,7 +2780,7 @@ static int __get_page_type(struct page_info *page, unsigned long type, - - void put_page_type(struct page_info *page) - { -- int rc = __put_page_type(page, 0); -+ int rc = _put_page_type(page, false, NULL); - ASSERT(rc == 0); - (void)rc; - } -@@ -2668,7 +2796,7 @@ int get_page_type(struct page_info *page, unsigned long type) - - int put_page_type_preemptible(struct page_info *page) - { -- return __put_page_type(page, 1); -+ return _put_page_type(page, true, NULL); - } - - int get_page_type_preemptible(struct page_info *page, unsigned long type) -@@ -2878,11 +3006,14 @@ int put_old_guest_table(struct vcpu *v) - if ( !v->arch.old_guest_table ) - return 0; - -- switch ( rc = put_page_and_type_preemptible(v->arch.old_guest_table) ) -+ switch ( rc = _put_page_type(v->arch.old_guest_table, true, -+ v->arch.old_guest_ptpg) ) - { - case -EINTR: - case -ERESTART: - return -ERESTART; -+ case 0: -+ put_page(v->arch.old_guest_table); - } - - v->arch.old_guest_table = NULL; -@@ -3042,6 +3173,7 @@ int new_guest_cr3(unsigned long mfn) - rc = -ERESTART; - /* fallthrough */ - case -ERESTART: -+ curr->arch.old_guest_ptpg = NULL; - curr->arch.old_guest_table = page; - break; - default: -@@ -3310,7 +3442,10 @@ long do_mmuext_op( - if ( type == PGT_l1_page_table ) - put_page_and_type(page); - else -+ { -+ curr->arch.old_guest_ptpg = NULL; - curr->arch.old_guest_table = page; -+ } - } - } - -@@ -3346,6 +3481,7 @@ long do_mmuext_op( - { - case -EINTR: - case -ERESTART: -+ curr->arch.old_guest_ptpg = NULL; - curr->arch.old_guest_table = page; - rc = 0; - break; -@@ -3425,6 +3561,7 @@ long do_mmuext_op( - rc = -ERESTART; - /* fallthrough */ - case -ERESTART: -+ curr->arch.old_guest_ptpg = NULL; - curr->arch.old_guest_table = page; - break; - default: -diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h -index 924caac834..5a512918cc 100644 ---- a/xen/include/asm-x86/domain.h -+++ b/xen/include/asm-x86/domain.h -@@ -527,6 +527,8 @@ struct arch_vcpu - pagetable_t guest_table_user; /* (MFN) x86/64 user-space pagetable */ - pagetable_t guest_table; /* (MFN) guest notion of cr3 */ - struct page_info *old_guest_table; /* partially destructed pagetable */ -+ struct page_info *old_guest_ptpg; /* containing page table of the */ -+ /* former, if any */ - /* guest_table holds a ref to the page, and also a type-count unless - * shadow refcounts are in use */ - pagetable_t shadow_table[4]; /* (MFN) shadow(s) of guest */ -diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h -index 119d7dec6b..445da50d47 100644 ---- a/xen/include/asm-x86/mm.h -+++ b/xen/include/asm-x86/mm.h -@@ -124,11 +124,11 @@ struct page_info - u32 tlbflush_timestamp; - - /* -- * When PGT_partial is true then this field is valid and indicates -- * that PTEs in the range [0, @nr_validated_ptes) have been validated. -- * An extra page reference must be acquired (or not dropped) whenever -- * PGT_partial gets set, and it must be dropped when the flag gets -- * cleared. This is so that a get() leaving a page in partially -+ * When PGT_partial is true then the first two fields are valid and -+ * indicate that PTEs in the range [0, @nr_validated_ptes) have been -+ * validated. An extra page reference must be acquired (or not dropped) -+ * whenever PGT_partial gets set, and it must be dropped when the flag -+ * gets cleared. This is so that a get() leaving a page in partially - * validated state (where the caller would drop the reference acquired - * due to the getting of the type [apparently] failing [-ERESTART]) - * would not accidentally result in a page left with zero general -@@ -152,10 +152,18 @@ struct page_info - * put_page_from_lNe() (due to the apparent failure), and hence it - * must be dropped when the put operation is resumed (and completes), - * but it must not be acquired if picking up the page for validation. -+ * -+ * The 3rd field, @linear_pt_count, indicates -+ * - by a positive value, how many same-level page table entries a page -+ * table has, -+ * - by a negative value, in how many same-level page tables a page is -+ * in use. - */ - struct { -- u16 nr_validated_ptes; -- s8 partial_pte; -+ u16 nr_validated_ptes:PAGETABLE_ORDER + 1; -+ u16 :16 - PAGETABLE_ORDER - 1 - 2; -+ s16 partial_pte:2; -+ s16 linear_pt_count; - }; - - /* -@@ -206,6 +214,9 @@ struct page_info - #define PGT_count_width PG_shift(9) - #define PGT_count_mask ((1UL<<PGT_count_width)-1) - -+/* Are the 'type mask' bits identical? */ -+#define PGT_type_equal(x, y) (!(((x) ^ (y)) & PGT_type_mask)) -+ - /* Cleared when the owning guest 'frees' this page. */ - #define _PGC_allocated PG_shift(1) - #define PGC_allocated PG_mask(1, 1) --- -2.14.1 - diff --git a/main/xen/xsa240-2.patch b/main/xen/xsa240-2.patch deleted file mode 100644 index 5e057c5652..0000000000 --- a/main/xen/xsa240-2.patch +++ /dev/null @@ -1,83 +0,0 @@ -From e614979ce054044d9e19023f1ef10dae6e38baf4 Mon Sep 17 00:00:00 2001 -From: George Dunlap <george.dunlap@citrix.com> -Date: Fri, 22 Sep 2017 11:46:55 +0100 -Subject: [PATCH 2/2] x86/mm: Disable PV linear pagetables by default - -Allowing pagetables to point to other pagetables of the same level -(often called 'linear pagetables') has been included in Xen since its -inception. But it is not used by the most common PV guests (Linux, -NetBSD, minios), and has been the source of a number of subtle -reference-counting bugs. - -Add a command-line option to control whether PV linear pagetables are -allowed (disabled by default). - -Reported-by: Jann Horn <jannh@google.com> -Signed-off-by: George Dunlap <george.dunlap@citrix.com> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> ---- -Changes since v2: -- s/_/-/; in command-line option -- Added __read_mostly ---- - docs/misc/xen-command-line.markdown | 15 +++++++++++++++ - xen/arch/x86/mm.c | 10 ++++++++++ - 2 files changed, 25 insertions(+) - -diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown -index 44d99852aa..45ef873abb 100644 ---- a/docs/misc/xen-command-line.markdown -+++ b/docs/misc/xen-command-line.markdown -@@ -1374,6 +1374,21 @@ The following resources are available: - CDP, one COS will corespond two CBMs other than one with CAT, due to the - sum of CBMs is fixed, that means actual `cos_max` in use will automatically - reduce to half when CDP is enabled. -+ -+### pv-linear-pt -+> `= <boolean>` -+ -+> Default: `false` -+ -+Allow PV guests to have pagetable entries pointing to other pagetables -+of the same level (i.e., allowing L2 PTEs to point to other L2 pages). -+This technique is often called "linear pagetables", and is sometimes -+used to allow operating systems a simple way to consistently map the -+current process's pagetables into its own virtual address space. -+ -+None of the most common PV operating systems (Linux, NetBSD, MiniOS) -+use this technique, but there may be custom operating systems which -+do. - - ### reboot - > `= t[riple] | k[bd] | a[cpi] | p[ci] | P[ower] | e[fi] | n[o] [, [w]arm | [c]old]` -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 1e469bd354..32952a46b9 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -814,6 +814,9 @@ static void dec_linear_uses(struct page_info *pg) - * frame if it is mapped by a different root table. This is sufficient and - * also necessary to allow validation of a root table mapping itself. - */ -+static bool __read_mostly pv_linear_pt_enable = false; -+boolean_param("pv-linear-pt", pv_linear_pt_enable); -+ - #define define_get_linear_pagetable(level) \ - static int \ - get_##level##_linear_pagetable( \ -@@ -823,6 +826,13 @@ get_##level##_linear_pagetable( \ - struct page_info *page; \ - unsigned long pfn; \ - \ -+ if ( !pv_linear_pt_enable ) \ -+ { \ -+ gdprintk(XENLOG_WARNING, \ -+ "Attempt to create linear p.t. (feature disabled)\n"); \ -+ return 0; \ -+ } \ -+ \ - if ( (level##e_get_flags(pde) & _PAGE_RW) ) \ - { \ - gdprintk(XENLOG_WARNING, \ --- -2.14.1 - diff --git a/main/xen/xsa241-4.9.patch b/main/xen/xsa241-4.9.patch deleted file mode 100644 index 514e4c7a4b..0000000000 --- a/main/xen/xsa241-4.9.patch +++ /dev/null @@ -1,120 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: x86: don't store possibly stale TLB flush time stamp - -While the timing window is extremely narrow, it is theoretically -possible for an update to the TLB flush clock and a subsequent flush -IPI to happen between the read and write parts of the update of the -per-page stamp. Exclude this possibility by disabling interrupts -across the update, preventing the IPI to be serviced in the middle. - -This is XSA-241. - -Reported-by: Jann Horn <jannh@google.com> -Suggested-by: George Dunlap <george.dunlap@citrix.com> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: George Dunlap <george.dunlap@citrix.com> - ---- a/xen/arch/arm/smp.c -+++ b/xen/arch/arm/smp.c -@@ -1,3 +1,4 @@ -+#include <xen/mm.h> - #include <asm/system.h> - #include <asm/smp.h> - #include <asm/cpregs.h> ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -2524,7 +2524,7 @@ static int _put_final_page_type(struct p - */ - if ( !(shadow_mode_enabled(page_get_owner(page)) && - (page->count_info & PGC_page_table)) ) -- page->tlbflush_timestamp = tlbflush_current_time(); -+ page_set_tlbflush_timestamp(page); - wmb(); - page->u.inuse.type_info--; - } -@@ -2534,7 +2534,7 @@ static int _put_final_page_type(struct p - (PGT_count_mask|PGT_validated|PGT_partial)) == 1); - if ( !(shadow_mode_enabled(page_get_owner(page)) && - (page->count_info & PGC_page_table)) ) -- page->tlbflush_timestamp = tlbflush_current_time(); -+ page_set_tlbflush_timestamp(page); - wmb(); - page->u.inuse.type_info |= PGT_validated; - } -@@ -2588,7 +2588,7 @@ static int _put_page_type(struct page_in - if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) ) - { - /* -- * page_set_tlbflush_timestamp() accesses the same union -+ * set_tlbflush_timestamp() accesses the same union - * linear_pt_count lives in. Unvalidated page table pages, - * however, should occur during domain destruction only - * anyway. Updating of linear_pt_count luckily is not -@@ -2609,7 +2609,7 @@ static int _put_page_type(struct page_in - */ - if ( !(shadow_mode_enabled(page_get_owner(page)) && - (page->count_info & PGC_page_table)) ) -- page->tlbflush_timestamp = tlbflush_current_time(); -+ page_set_tlbflush_timestamp(page); - } - - if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) ) ---- a/xen/arch/x86/mm/shadow/common.c -+++ b/xen/arch/x86/mm/shadow/common.c -@@ -1464,7 +1464,7 @@ void shadow_free(struct domain *d, mfn_t - * TLBs when we reuse the page. Because the destructors leave the - * contents of the pages in place, we can delay TLB flushes until - * just before the allocator hands the page out again. */ -- sp->tlbflush_timestamp = tlbflush_current_time(); -+ page_set_tlbflush_timestamp(sp); - perfc_decr(shadow_alloc_count); - page_list_add_tail(sp, &d->arch.paging.shadow.freelist); - sp = next; ---- a/xen/common/page_alloc.c -+++ b/xen/common/page_alloc.c -@@ -960,7 +960,7 @@ static void free_heap_pages( - /* If a page has no owner it will need no safety TLB flush. */ - pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL); - if ( pg[i].u.free.need_tlbflush ) -- pg[i].tlbflush_timestamp = tlbflush_current_time(); -+ page_set_tlbflush_timestamp(&pg[i]); - - /* This page is not a guest frame any more. */ - page_set_owner(&pg[i], NULL); /* set_gpfn_from_mfn snoops pg owner */ ---- a/xen/include/asm-arm/flushtlb.h -+++ b/xen/include/asm-arm/flushtlb.h -@@ -12,6 +12,11 @@ static inline void tlbflush_filter(cpuma - - #define tlbflush_current_time() (0) - -+static inline void page_set_tlbflush_timestamp(struct page_info *page) -+{ -+ page->tlbflush_timestamp = tlbflush_current_time(); -+} -+ - #if defined(CONFIG_ARM_32) - # include <asm/arm32/flushtlb.h> - #elif defined(CONFIG_ARM_64) ---- a/xen/include/asm-x86/flushtlb.h -+++ b/xen/include/asm-x86/flushtlb.h -@@ -23,6 +23,20 @@ DECLARE_PER_CPU(u32, tlbflush_time); - - #define tlbflush_current_time() tlbflush_clock - -+static inline void page_set_tlbflush_timestamp(struct page_info *page) -+{ -+ /* -+ * Prevent storing a stale time stamp, which could happen if an update -+ * to tlbflush_clock plus a subsequent flush IPI happen between the -+ * reading of tlbflush_clock and the writing of the struct page_info -+ * field. -+ */ -+ ASSERT(local_irq_is_enabled()); -+ local_irq_disable(); -+ page->tlbflush_timestamp = tlbflush_current_time(); -+ local_irq_enable(); -+} -+ - /* - * @cpu_stamp is the timestamp at last TLB flush for the CPU we are testing. - * @lastuse_stamp is a timestamp taken when the PFN we are testing was last diff --git a/main/xen/xsa242-4.9.patch b/main/xen/xsa242-4.9.patch deleted file mode 100644 index 8adfa61fd7..0000000000 --- a/main/xen/xsa242-4.9.patch +++ /dev/null @@ -1,43 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: x86: don't allow page_unlock() to drop the last type reference - -Only _put_page_type() does the necessary cleanup, and hence not all -domain pages can be released during guest cleanup (leaving around -zombie domains) if we get this wrong. - -This is XSA-242. - -Signed-off-by: Jan Beulich <jbeulich@suse.com> - ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -1923,7 +1923,11 @@ void page_unlock(struct page_info *page) - - do { - x = y; -+ ASSERT((x & PGT_count_mask) && (x & PGT_locked)); -+ - nx = x - (1 | PGT_locked); -+ /* We must not drop the last reference here. */ -+ ASSERT(nx & PGT_count_mask); - } while ( (y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x ); - } - -@@ -2611,6 +2615,17 @@ static int _put_page_type(struct page_in - (page->count_info & PGC_page_table)) ) - page_set_tlbflush_timestamp(page); - } -+ else if ( unlikely((nx & (PGT_locked | PGT_count_mask)) == -+ (PGT_locked | 1)) ) -+ { -+ /* -+ * We must not drop the second to last reference when the page is -+ * locked, as page_unlock() doesn't do any cleanup of the type. -+ */ -+ cpu_relax(); -+ y = page->u.inuse.type_info; -+ continue; -+ } - - if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) ) - break; diff --git a/main/xen/xsa243-1.patch b/main/xen/xsa243-1.patch deleted file mode 100644 index aaff277514..0000000000 --- a/main/xen/xsa243-1.patch +++ /dev/null @@ -1,93 +0,0 @@ -From: Andrew Cooper <andrew.cooper3@citrix.com> -Subject: x86/shadow: Don't create self-linear shadow mappings for 4-level translated guests - -When initially creating a monitor table for 4-level translated guests, don't -install a shadow-linear mapping. This mapping is actually self-linear, and -trips up the writeable heuristic logic into following Xen's mappings, not the -guests' shadows it was expecting to follow. - -A consequence of this is that sh_guess_wrmap() needs to cope with there being -no shadow-linear mapping present, which in practice occurs once each time a -vcpu switches to 4-level paging from a different paging mode. - -An appropriate shadow-linear slot will be inserted into the monitor table -either while constructing lower level monitor tables, or by sh_update_cr3(). - -While fixing this, clarify the safety of the other mappings. Despite -appearing unsafe, it is correct to create a guest-linear mapping for -translated domains; this is self-linear and doesn't point into the translated -domain. Drop a dead clause for translate != external guests. - -This is XSA-243. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Acked-by: Tim Deegan <tim@xen.org> - -diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c -index 8d4f244..a18d286 100644 ---- a/xen/arch/x86/mm/shadow/multi.c -+++ b/xen/arch/x86/mm/shadow/multi.c -@@ -1485,26 +1485,38 @@ void sh_install_xen_entries_in_l4(struct domain *d, mfn_t gl4mfn, mfn_t sl4mfn) - sl4e[shadow_l4_table_offset(RO_MPT_VIRT_START)] = shadow_l4e_empty(); - } - -- /* Shadow linear mapping for 4-level shadows. N.B. for 3-level -- * shadows on 64-bit xen, this linear mapping is later replaced by the -- * monitor pagetable structure, which is built in make_monitor_table -- * and maintained by sh_update_linear_entries. */ -- sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] = -- shadow_l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR_RW); -- -- /* Self linear mapping. */ -- if ( shadow_mode_translate(d) && !shadow_mode_external(d) ) -+ /* -+ * Linear mapping slots: -+ * -+ * Calling this function with gl4mfn == sl4mfn is used to construct a -+ * monitor table for translated domains. In this case, gl4mfn forms the -+ * self-linear mapping (i.e. not pointing into the translated domain), and -+ * the shadow-linear slot is skipped. The shadow-linear slot is either -+ * filled when constructing lower level monitor tables, or via -+ * sh_update_cr3() for 4-level guests. -+ * -+ * Calling this function with gl4mfn != sl4mfn is used for non-translated -+ * guests, where the shadow-linear slot is actually self-linear, and the -+ * guest-linear slot points into the guests view of its pagetables. -+ */ -+ if ( shadow_mode_translate(d) ) - { -- // linear tables may not be used with translated PV guests -- sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] = -+ ASSERT(mfn_eq(gl4mfn, sl4mfn)); -+ -+ sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] = - shadow_l4e_empty(); - } - else - { -- sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] = -- shadow_l4e_from_mfn(gl4mfn, __PAGE_HYPERVISOR_RW); -+ ASSERT(!mfn_eq(gl4mfn, sl4mfn)); -+ -+ sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] = -+ shadow_l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR_RW); - } - -+ sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] = -+ shadow_l4e_from_mfn(gl4mfn, __PAGE_HYPERVISOR_RW); -+ - unmap_domain_page(sl4e); - } - #endif -@@ -4405,6 +4417,11 @@ static int sh_guess_wrmap(struct vcpu *v, unsigned long vaddr, mfn_t gmfn) - - /* Carefully look in the shadow linear map for the l1e we expect */ - #if SHADOW_PAGING_LEVELS >= 4 -+ /* Is a shadow linear map is installed in the first place? */ -+ sl4p = v->arch.paging.shadow.guest_vtable; -+ sl4p += shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START); -+ if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) ) -+ return 0; - sl4p = sh_linear_l4_table(v) + shadow_l4_linear_offset(vaddr); - if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) ) - return 0; diff --git a/main/xen/xsa243-2.patch b/main/xen/xsa243-2.patch deleted file mode 100644 index 1aca5d3dbd..0000000000 --- a/main/xen/xsa243-2.patch +++ /dev/null @@ -1,54 +0,0 @@ -From: Andrew Cooper <andrew.cooper3@citrix.com> -Subject: x86/shadow: correct SH_LINEAR mapping detection in sh_guess_wrmap() - -The fix for XSA-243 / CVE-2017-15592 (c/s bf2b4eadcf379) introduced a change -in behaviour for sh_guest_wrmap(), where it had to cope with no shadow linear -mapping being present. - -As the name suggests, guest_vtable is a mapping of the guests pagetable, not -Xen's pagetable, meaning that it isn't the pagetable we need to check for the -shadow linear slot in. - -The practical upshot is that a shadow HVM vcpu which switches into 4-level -paging mode, with an L4 pagetable that contains a mapping which aliases Xen's -SH_LINEAR_PT_VIRT_START will fool the safety check for whether a SHADOW_LINEAR -mapping is present. As the check passes (when it should have failed), Xen -subsequently falls over the missing mapping with a pagefault such as: - - (XEN) Pagetable walk from ffff8140a0503880: - (XEN) L4[0x102] = 000000046c218063 ffffffffffffffff - (XEN) L3[0x102] = 000000046c218063 ffffffffffffffff - (XEN) L2[0x102] = 000000046c218063 ffffffffffffffff - (XEN) L1[0x103] = 0000000000000000 ffffffffffffffff - -This is part of XSA-243. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Tim Deegan <tim@xen.org> - ---- a/xen/arch/x86/mm/shadow/multi.c -+++ b/xen/arch/x86/mm/shadow/multi.c -@@ -4350,11 +4350,18 @@ static int sh_guess_wrmap(struct vcpu *v - - /* Carefully look in the shadow linear map for the l1e we expect */ - #if SHADOW_PAGING_LEVELS >= 4 -- /* Is a shadow linear map is installed in the first place? */ -- sl4p = v->arch.paging.shadow.guest_vtable; -- sl4p += shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START); -- if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) ) -- return 0; -+ /* -+ * Non-external guests (i.e. PV) have a SHADOW_LINEAR mapping from the -+ * moment their shadows are created. External guests (i.e. HVM) may not, -+ * but always have a regular linear mapping, which we can use to observe -+ * whether a SHADOW_LINEAR mapping is present. -+ */ -+ if ( paging_mode_external(d) ) -+ { -+ sl4p = __linear_l4_table + l4_linear_offset(SH_LINEAR_PT_VIRT_START); -+ if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) ) -+ return 0; -+ } - sl4p = sh_linear_l4_table(v) + shadow_l4_linear_offset(vaddr); - if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) ) - return 0; diff --git a/main/xen/xsa244.patch b/main/xen/xsa244.patch deleted file mode 100644 index c35a80be32..0000000000 --- a/main/xen/xsa244.patch +++ /dev/null @@ -1,59 +0,0 @@ -From: Andrew Cooper <andrew.cooper3@citrix.com> -Subject: [PATCH] x86/cpu: Fix IST handling during PCPU bringup - -Clear IST references in newly allocated IDTs. Nothing good will come of -having them set before the TSS is suitably constructed (although the chances -of the CPU surviving such an IST interrupt/exception is extremely slim). - -Uniformly set the IST references after the TSS is in place. This fixes an -issue on AMD hardware, where onlining a PCPU while PCPU0 is in HVM context -will cause IST_NONE to be copied into the new IDT, making that PCPU vulnerable -to privilege escalation from PV guests until it subsequently schedules an HVM -guest. - -This is XSA-244 - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> ---- - xen/arch/x86/cpu/common.c | 5 +++++ - xen/arch/x86/smpboot.c | 3 +++ - 2 files changed, 8 insertions(+) - -diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c -index 78f5667..6cf3628 100644 ---- a/xen/arch/x86/cpu/common.c -+++ b/xen/arch/x86/cpu/common.c -@@ -640,6 +640,7 @@ void __init early_cpu_init(void) - * - Sets up TSS with stack pointers, including ISTs - * - Inserts TSS selector into regular and compat GDTs - * - Loads GDT, IDT, TR then null LDT -+ * - Sets up IST references in the IDT - */ - void load_system_tables(void) - { -@@ -702,6 +703,10 @@ void load_system_tables(void) - asm volatile ("ltr %w0" : : "rm" (TSS_ENTRY << 3) ); - asm volatile ("lldt %w0" : : "rm" (0) ); - -+ set_ist(&idt_tables[cpu][TRAP_double_fault], IST_DF); -+ set_ist(&idt_tables[cpu][TRAP_nmi], IST_NMI); -+ set_ist(&idt_tables[cpu][TRAP_machine_check], IST_MCE); -+ - /* - * Bottom-of-stack must be 16-byte aligned! - * -diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c -index 3ca716c..1609b62 100644 ---- a/xen/arch/x86/smpboot.c -+++ b/xen/arch/x86/smpboot.c -@@ -724,6 +724,9 @@ static int cpu_smpboot_alloc(unsigned int cpu) - if ( idt_tables[cpu] == NULL ) - goto oom; - memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES * sizeof(idt_entry_t)); -+ set_ist(&idt_tables[cpu][TRAP_double_fault], IST_NONE); -+ set_ist(&idt_tables[cpu][TRAP_nmi], IST_NONE); -+ set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE); - - for ( stub_page = 0, i = cpu & ~(STUBS_PER_PAGE - 1); - i < nr_cpu_ids && i <= (cpu | (STUBS_PER_PAGE - 1)); ++i ) diff --git a/main/xen/xsa245-1.patch b/main/xen/xsa245-1.patch deleted file mode 100644 index 2047686903..0000000000 --- a/main/xen/xsa245-1.patch +++ /dev/null @@ -1,48 +0,0 @@ -From a48d47febc1340f27d6c716545692641a09b414c Mon Sep 17 00:00:00 2001 -From: Julien Grall <julien.grall@arm.com> -Date: Thu, 21 Sep 2017 14:13:08 +0100 -Subject: [PATCH 1/2] xen/page_alloc: Cover memory unreserved after boot in - first_valid_mfn - -On Arm, some regions (e.g Initramfs, Dom0 Kernel...) are marked as -reserved until the hardware domain is built and they are copied into its -memory. Therefore, they will not be added in the boot allocator via -init_boot_pages. - -Instead, init_xenheap_pages will be called once the region are not used -anymore. - -Update first_valid_mfn in both init_heap_pages and init_boot_pages -(already exist) to cover all the cases. - -Signed-off-by: Julien Grall <julien.grall@arm.com> -[Adjust comment, added locking around first_valid_mfn update] -Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> ---- - xen/common/page_alloc.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c -index 0b9f6cc6df..fbe5a8af39 100644 ---- a/xen/common/page_alloc.c -+++ b/xen/common/page_alloc.c -@@ -1700,6 +1700,16 @@ static void init_heap_pages( - { - unsigned long i; - -+ /* -+ * Some pages may not go through the boot allocator (e.g reserved -+ * memory at boot but released just after --- kernel, initramfs, -+ * etc.). -+ * Update first_valid_mfn to ensure those regions are covered. -+ */ -+ spin_lock(&heap_lock); -+ first_valid_mfn = min_t(unsigned long, page_to_mfn(pg), first_valid_mfn); -+ spin_unlock(&heap_lock); -+ - for ( i = 0; i < nr_pages; i++ ) - { - unsigned int nid = phys_to_nid(page_to_maddr(pg+i)); --- -2.11.0 - diff --git a/main/xen/xsa245-2.patch b/main/xen/xsa245-2.patch deleted file mode 100644 index cd4d2709be..0000000000 --- a/main/xen/xsa245-2.patch +++ /dev/null @@ -1,73 +0,0 @@ -From cbfcf039d0e0b6f4c4cb3de612f7bf788a0c47cd Mon Sep 17 00:00:00 2001 -From: Julien Grall <julien.grall@arm.com> -Date: Mon, 18 Sep 2017 14:24:08 +0100 -Subject: [PATCH 2/2] xen/arm: Correctly report the memory region in the dummy - NUMA helpers - -NUMA is currently not supported on Arm. Because common code is -NUMA-aware, dummy helpers are instead provided to expose a single node. - -Those helpers are for instance used to know the region to scrub. - -However the memory region is not reported correctly. Indeed, the -frametable may not be at the beginning of the memory and there might be -multiple memory banks. This will lead to not scrub some part of the -memory. - -The memory information can be found using: - * first_valid_mfn as the start of the memory - * max_page - first_valid_mfn as the spanned pages - -Note that first_valid_mfn is now been exported. The prototype has been -added in asm-arm/numa.h and not in a common header because I would -expect the variable to become static once NUMA is fully supported on -Arm. - -Signed-off-by: Julien Grall <julien.grall@arm.com> ---- - xen/common/page_alloc.c | 6 +++++- - xen/include/asm-arm/numa.h | 10 ++++++++-- - 2 files changed, 13 insertions(+), 3 deletions(-) - -diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c -index fbe5a8af39..472c6fe329 100644 ---- a/xen/common/page_alloc.c -+++ b/xen/common/page_alloc.c -@@ -192,7 +192,11 @@ PAGE_LIST_HEAD(page_broken_list); - * BOOT-TIME ALLOCATOR - */ - --static unsigned long __initdata first_valid_mfn = ~0UL; -+/* -+ * first_valid_mfn is exported because it is use in ARM specific NUMA -+ * helpers. See comment in asm-arm/numa.h. -+ */ -+unsigned long first_valid_mfn = ~0UL; - - static struct bootmem_region { - unsigned long s, e; /* MFNs @s through @e-1 inclusive are free */ -diff --git a/xen/include/asm-arm/numa.h b/xen/include/asm-arm/numa.h -index a2c1a3476d..3e7384da9e 100644 ---- a/xen/include/asm-arm/numa.h -+++ b/xen/include/asm-arm/numa.h -@@ -12,9 +12,15 @@ static inline __attribute__((pure)) nodeid_t phys_to_nid(paddr_t addr) - return 0; - } - -+/* -+ * TODO: make first_valid_mfn static when NUMA is supported on Arm, this -+ * is required because the dummy helpers is using it. -+ */ -+extern unsigned long first_valid_mfn; -+ - /* XXX: implement NUMA support */ --#define node_spanned_pages(nid) (total_pages) --#define node_start_pfn(nid) (pdx_to_pfn(frametable_base_pdx)) -+#define node_spanned_pages(nid) (max_page - first_valid_mfn) -+#define node_start_pfn(nid) (first_valid_mfn) - #define __node_distance(a, b) (20) - - static inline unsigned int arch_get_dma_bitsize(void) --- -2.11.0 - |