diff options
author | Natanael Copa <ncopa@alpinelinux.org> | 2014-08-26 06:22:11 +0000 |
---|---|---|
committer | Natanael Copa <ncopa@alpinelinux.org> | 2014-08-26 06:25:02 +0000 |
commit | 695a72617ae53a60aaefe8567f3e245882e5d6b8 (patch) | |
tree | b9bfed31b74298ec3a51f1e1ab5da4f201aa1f8c | |
parent | 0dabf557b83072a583dfb8c316048783039fe34e (diff) | |
download | aports-695a72617ae53a60aaefe8567f3e245882e5d6b8.tar.bz2 aports-695a72617ae53a60aaefe8567f3e245882e5d6b8.tar.xz |
main/xen: upgrade to 4.2.4 and fix XSA-97 (CVE-2014-5146,CVE-2014-5149)
fixes #3292
-rw-r--r-- | main/xen/APKBUILD | 62 | ||||
-rw-r--r-- | main/xen/xsa45-4.2.patch | 1133 | ||||
-rw-r--r-- | main/xen/xsa48-4.2.patch | 114 | ||||
-rw-r--r-- | main/xen/xsa52-4.2-unstable.patch | 46 | ||||
-rw-r--r-- | main/xen/xsa53-4.2.patch | 57 | ||||
-rw-r--r-- | main/xen/xsa54.patch | 24 | ||||
-rw-r--r-- | main/xen/xsa55.patch | 3431 | ||||
-rw-r--r-- | main/xen/xsa56.patch | 50 | ||||
-rw-r--r-- | main/xen/xsa57.patch | 333 | ||||
-rw-r--r-- | main/xen/xsa58-4.2.patch | 129 | ||||
-rw-r--r-- | main/xen/xsa61-4.2-unstable.patch | 44 | ||||
-rw-r--r-- | main/xen/xsa75-4.2.patch | 53 | ||||
-rw-r--r-- | main/xen/xsa97-hap-4_2-prereq.patch | 466 | ||||
-rw-r--r-- | main/xen/xsa97-hap-4_2.patch | 485 |
14 files changed, 964 insertions, 5463 deletions
diff --git a/main/xen/APKBUILD b/main/xen/APKBUILD index 892efc4528..62962ec0a4 100644 --- a/main/xen/APKBUILD +++ b/main/xen/APKBUILD @@ -2,8 +2,8 @@ # Contributor: Roger Pau Monne <roger.pau@entel.upc.edu> # Maintainer: William Pitcock <nenolod@dereferenced.org> pkgname=xen -pkgver=4.2.2 -pkgrel=8 +pkgver=4.2.4 +pkgrel=0 pkgdesc="Xen hypervisor" url="http://www.xen.org/" arch="x86 x86_64" @@ -23,17 +23,8 @@ source="http://bits.xensource.com/oss-xen/release/$pkgver/$pkgname-$pkgver.tar.g xsa41.patch xsa41b.patch xsa41c.patch - xsa45-4.2.patch - xsa48-4.2.patch - xsa52-4.2-unstable.patch - xsa53-4.2.patch - xsa54.patch - xsa55.patch - xsa56.patch - xsa57.patch - xsa58-4.2.patch - xsa61-4.2-unstable.patch - xsa75-4.2.patch + xsa97-hap-4_2-prereq.patch + xsa97-hap-4_2.patch fix-pod2man-choking.patch @@ -149,7 +140,7 @@ xend() { -exec mv '{}' "$subpkgdir"/"$sitepackages"/xen \; } -md5sums="f7362b19401a47826f2d8fd603a1782a xen-4.2.2.tar.gz +md5sums="b32be39471c93249828b117473adca9d xen-4.2.4.tar.gz 506e7ab6f9482dc95f230978d340bcd9 qemu_uclibc_configure.patch 2dc5ddf47c53ea168729975046c3c1f9 librt.patch 1ccde6b36a6f9542a16d998204dc9a22 qemu-xen_paths.patch @@ -157,17 +148,8 @@ md5sums="f7362b19401a47826f2d8fd603a1782a xen-4.2.2.tar.gz 8ad8942000b8a4be4917599cad9209cf xsa41.patch ed7d0399c6ca6aeee479da5d8f807fe0 xsa41b.patch 2f3dd7bdc59d104370066d6582725575 xsa41c.patch -9265540493f41f7d40c48d0886ec5823 xsa45-4.2.patch -b3e3a57d189a4f86c9766eaf3b5207f4 xsa48-4.2.patch -83a9cdd035bcd18bf035434a1ba08c38 xsa52-4.2-unstable.patch -03a1a4ebc470ee7e638e04db2701a4f7 xsa53-4.2.patch -a8393d1ec6b886ea72ffe624a04ee10a xsa54.patch -42cd104f2a33d67938a63a6372cff573 xsa55.patch -e70b9128ffc2175cea314a533a7d8457 xsa56.patch -7475158130474ee062a4eb878259af61 xsa57.patch -7de2cd11c10d6a554f3c81e0688c38b7 xsa58-4.2.patch -d2b6cd997b025e55d4a9f98c6bd6839b xsa61-4.2-unstable.patch -3a7da1a0f6c39e7e2f422c6455a97ccd xsa75-4.2.patch +4778066a3338ca9a2263048e6a22bb6b xsa97-hap-4_2-prereq.patch +052b4144e2eef36757a28e7011d0ac74 xsa97-hap-4_2.patch c1d1a415415b0192e5dae9032962bf61 fix-pod2man-choking.patch a90c36642f0701a8aaa4ebe4dde430f5 xenstored.initd b017ccdd5e1c27bbf1513e3569d4ff07 xenstored.confd @@ -180,7 +162,7 @@ fa8c72b42e0479d521a353386d8543ef xendomains.initd 9df68ac65dc3f372f5d61183abdc83ff xen-consoles.logrotate 6a2f777c16678d84039acf670d86fff6 xenqemu.confd f9afbf39e2b5a7d9dde60ebbd249ea7d xenqemu.initd" -sha256sums="c9bfe91a5e72f8545acebad9889d64368020359bfe18044c0e683133e55ae005 xen-4.2.2.tar.gz +sha256sums="e23e6292affd7a6d82da99527e8bf3964e57eb7322144c67c2025692b1a21550 xen-4.2.4.tar.gz 4fb92fa1ce67eb3f78a15c6c971415d4d53599904969596acc7a52edc83a5fee qemu_uclibc_configure.patch 12bf32f9937b09283f2df4955b50d6739768f66137a7d991f661f45cf77cb53b librt.patch 9440ca31a6911201f02694e93faafb5ca9b17de18b7f15b53ceac39a03411b4a qemu-xen_paths.patch @@ -188,17 +170,8 @@ a0c225d716d343fe041b63e3940900c5b3573ed3bcfc5b7c2d52ea2861c3fc28 docs-Fix-gener 93452beba88a8da8e89b8bfa743074a358ba1d9052151c608e21c4d62f8c4867 xsa41.patch 896a07f57310c9bea9bc2a305166cf796282c381cb7839be49105b1726a860b5 xsa41b.patch 683dd96a0a8899f794070c8c09643dfeeb39f92da531955cba961b45f6075914 xsa41c.patch -f3c8c75cc6f55409139b1928017d1e432e5e64b6fac2083395f4723353e1c775 xsa45-4.2.patch -dc23077028584e71a08dd0dc9e81552c76744a5ce9d39df5958a95ae9cf3107b xsa48-4.2.patch -5b8582185bf90386729e81db1f7780c69a891b074a87d9a619a90d6f639bea13 xsa52-4.2-unstable.patch -785f7612bd229f7501f4e98e4760f307d90c64305ee14707d262b77f05fa683d xsa53-4.2.patch -5d94946b3c9cba52aae2bffd4b0ebb11d09181650b5322a3c85170674a05f6b7 xsa54.patch -ac3ebaf3ec37e28ba08e23d63626d7aaccf0a3f282dd0af9c24cc4df3fd8fae0 xsa55.patch -a691c5f5332a42c0d38ddb4dc037eb902f01ba31033b64c47d02909a8de0257d xsa56.patch -b6a5106848541972519cc529859d9ff3083c79367276c7031560fa4ce6f9f770 xsa57.patch -194d6610fc38b767d643e5d58a1268f45921fb35e309b47aca6a388b861311c2 xsa58-4.2.patch -5898926de86dd6a27f8e34a2c103e3d0c6267b1d7d947434f294423ed3b0eefd xsa61-4.2-unstable.patch -0b2da4ede6507713c75e313ba468b1fd7110e5696974ab72e2135f41ee393a8b xsa75-4.2.patch +c525a99263eed6f93fad685ae9dad1ae10c8930345ec52659211541640797bb5 xsa97-hap-4_2-prereq.patch +c9e0e9f136db1b976ea371be10430598a7f21b4a33b4849f2081566657ff5da1 xsa97-hap-4_2.patch b4e7d43364a06b2cb04527db3e9567524bc489fef475709fd8493ebf1e62406d fix-pod2man-choking.patch 868c77d689ae54b7041da169bfaa01868503337d4105a071eb771f4ec5a0543d xenstored.initd ea9171e71ab3d33061979bcf3bb737156192aa4b0be4d1234438ced75b6fdef3 xenstored.confd @@ -211,7 +184,7 @@ a50a4485e84bcc098ad021556cd2aa7947c228f0a546ab942e880787ced57be3 xend.initd 0da87a4b9094f934e3de937e8ef8d3afc752e76793aa3d730182d0241e118b19 xen-consoles.logrotate 4cfcddcade5d055422ab4543e8caa6e5c5eee7625c41880a9000b7a87c7c424e xenqemu.confd bf17808a79c57a9efc38b9f14cc87f556b2bb7ecfdec5763d9cf686255a47fce xenqemu.initd" -sha512sums="4943b18016ed8c2b194a3b55e6655b3b734b39ffb8cb7ee0a0580f2f4460a1d0e92e1de8ac23f5186272914fad1650586af51fd7c3644d0310eb16f2e11c5e80 xen-4.2.2.tar.gz +sha512sums="3e5263511e7c40899f580f3384bd987f9c875b8e6816202fd1a5a64fe7e336803d09e58148af074938ef261f0ceeafad121ac541ddd2bf66b76c5aa4ad07c357 xen-4.2.4.tar.gz 81a5555c123daad6a9a1835186a82d604e68d833efe3a6576a88717268e5335f809a6621846645c2e1eb1d33a51951a6306e4c393a76c677959149bc28a886be qemu_uclibc_configure.patch 74e3cfc51e367fc445cb3d8149f0c8830e94719a266daf04d2cd0889864591860c4c8842de2bc78070e4c5be7d14dfbb8b236c511d5faeddc2ad97177c1d3764 librt.patch 425149aea57a6deae9f488cea867f125983998dc6e8c63893fb3b9caf0ea34214251dd98ad74db823f5168631c44c49b988b6fe9c11b76bd493ddf51bc0baaa2 qemu-xen_paths.patch @@ -219,17 +192,8 @@ sha512sums="4943b18016ed8c2b194a3b55e6655b3b734b39ffb8cb7ee0a0580f2f4460a1d0e92e 94672a4d37db4e370370157cac9507ee1a75832f4be779fba148c1faa0b18f26ed57126eee6256ccd5d218463325a730266b53139554f4865adedb7659154c16 xsa41.patch bda9105793f2327e1317991762120d0668af0e964076b18c9fdbfd509984b2e88d85df95702c46b2e00d5350e8113f6aa7b34b19064d19abbeb4d43f0c431d38 xsa41b.patch 36b60478660ff7748328f5ab9adff13286eee1a1bad06e42fdf7e6aafe105103988525725aacd660cf5b2a184a9e2d6b3818655203c1fa07e07dcebdf23f35d9 xsa41c.patch -a57b4c8be76a938d51e51ffb39f0781389ebef320f359b0ae9af4a93af970d37dde50a304d4864a75b7fb32861a4745b9da5fa6acce0f2a688b11b13ab43fb4e xsa45-4.2.patch -31dd8c62d41cc0a01a79d9b24a5b793f5e2058230808d9c5364c6ff3477ab02f3258f1bbd761d97dc1b97ee120b41524b999eaac77f33b606496fc324b5fa2e4 xsa48-4.2.patch -b64a965fab8534958e453c493211ed3a6555aafb90d18f6d56a45b41d3086a0029aee85b6b6eb93b0d861d5fdc0ef10fc32e9b4f83593b37c43922d838085dd8 xsa52-4.2-unstable.patch -9b08924e563e79d2b308c1521da520c0579b334b61ac99a5593eabdb96dbda2da898b542cc47bda6d663c68343216d9d29c04853b6d1b6ecdde964b0cbb3f7ab xsa53-4.2.patch -c9010be637d4f96ef03c880e1ef28228f762c5980108380a105bd190b631a882c8dff81e9421246d88d597e72f69ad1a8c672be6ddd06936acfcacd4575a2650 xsa54.patch -b4f43095163146a29ae258575bb03bd45f5a315d3cca7434a0b88c18eb1b6e1cf17ef13b4ac428a08797271a3dbc756d3f705a990991c8d2fc96f0f272c3665a xsa55.patch -26a1c2cc92ddd4c1ab6712b0e41a0135d0e76a7fe3a14b651fb0235e352e5a24077414371acccb93058b7ce4d882b667386811170ba74570c53165837bcd983d xsa56.patch -5ccc1654d9f0270485495f9fc913e41663ddbda602ffe049e0a9c3247c6246690b7ec4165482f96921c5253a2a5205ca384048339996e611c07ab60a6a75cf6a xsa57.patch -60813c01f6bb909da8748919df4d0ffa923baf4b7b55287e0bec3389fb83020158225182e112941c9e126b4df57e7b8724f2a69d0c1fa9ce3b37c0bdf1a49da4 xsa58-4.2.patch -e733a782740cc51841992cf1954f3ba2f6d5083a1a7810e8445bd1e827b12d9d7a366ca59edc190279cb4e36131fecd8968ce805b7b5d6cc6d0b8280e706eec5 xsa61-4.2-unstable.patch -97ca0e999003e39ab62618e99588c67121f841188d93c8d8cf845d548a52672ec6228f1f2dd9f4ee25de3fdd61bd28012a4714bb01307b6861d792dedca34de0 xsa75-4.2.patch +f90ad305678aeabb95d75a73684a4e18bdd20b6365fe55a001e27414af7d9995945077938df14431283e71c6bd0b871c2a6ab1c206b29366146b5fc647f2ed69 xsa97-hap-4_2-prereq.patch +d6730e2fecc4092a6eb49ffa354bcc790c06cd6892903d7c486a063bb08b005d232d33bf4367831bbd301cabd05c34e47a50373b1d4883895a9a470e1d5dfcc0 xsa97-hap-4_2.patch ffb1113fcec0853b690c177655c7d1136388efdebf0d7f625b80481b98eadd3e9ef461442ced53e11acf0e347800a2b0a41e18b05065b5d04bffdd8a4e127cec fix-pod2man-choking.patch 880584e0866b1efcf3b7a934f07072ec84c13c782e3e7a15848d38ba8af50259d46db037dca1e037b15274989f2c22acd1134954dd60c59f4ee693b417d03e0d xenstored.initd 100cf4112f401f45c1e4e885a5074698c484b40521262f6268fad286498e95f4c51e746f0e94eb43a590bb8e813a397bb53801ccacebec9541020799d8d70514 xenstored.confd diff --git a/main/xen/xsa45-4.2.patch b/main/xen/xsa45-4.2.patch deleted file mode 100644 index dfdfdea64b..0000000000 --- a/main/xen/xsa45-4.2.patch +++ /dev/null @@ -1,1133 +0,0 @@ -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index 26a7f12..b97ac6d 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -73,8 +73,6 @@ void (*dead_idle) (void) __read_mostly = default_dead_idle; - static void paravirt_ctxt_switch_from(struct vcpu *v); - static void paravirt_ctxt_switch_to(struct vcpu *v); - --static void vcpu_destroy_pagetables(struct vcpu *v); -- - static void default_idle(void) - { - local_irq_disable(); -@@ -860,6 +858,9 @@ int arch_set_info_guest( - - if ( !v->is_initialised ) - { -+ if ( !compat && !(flags & VGCF_in_kernel) && !c.nat->ctrlreg[1] ) -+ return -EINVAL; -+ - v->arch.pv_vcpu.ldt_base = c(ldt_base); - v->arch.pv_vcpu.ldt_ents = c(ldt_ents); - } -@@ -957,24 +958,44 @@ int arch_set_info_guest( - if ( rc != 0 ) - return rc; - -+ set_bit(_VPF_in_reset, &v->pause_flags); -+ - if ( !compat ) -- { - cr3_gfn = xen_cr3_to_pfn(c.nat->ctrlreg[3]); -- cr3_page = get_page_from_gfn(d, cr3_gfn, NULL, P2M_ALLOC); -- -- if ( !cr3_page ) -- { -- destroy_gdt(v); -- return -EINVAL; -- } -- if ( !paging_mode_refcounts(d) -- && !get_page_type(cr3_page, PGT_base_page_table) ) -- { -- put_page(cr3_page); -- destroy_gdt(v); -- return -EINVAL; -- } -+#ifdef CONFIG_COMPAT -+ else -+ cr3_gfn = compat_cr3_to_pfn(c.cmp->ctrlreg[3]); -+#endif -+ cr3_page = get_page_from_gfn(d, cr3_gfn, NULL, P2M_ALLOC); - -+ if ( !cr3_page ) -+ rc = -EINVAL; -+ else if ( paging_mode_refcounts(d) ) -+ /* nothing */; -+ else if ( cr3_page == v->arch.old_guest_table ) -+ { -+ v->arch.old_guest_table = NULL; -+ put_page(cr3_page); -+ } -+ else -+ { -+ /* -+ * Since v->arch.guest_table{,_user} are both NULL, this effectively -+ * is just a call to put_old_guest_table(). -+ */ -+ if ( !compat ) -+ rc = vcpu_destroy_pagetables(v); -+ if ( !rc ) -+ rc = get_page_type_preemptible(cr3_page, -+ !compat ? PGT_root_page_table -+ : PGT_l3_page_table); -+ if ( rc == -EINTR ) -+ rc = -EAGAIN; -+ } -+ if ( rc ) -+ /* handled below */; -+ else if ( !compat ) -+ { - v->arch.guest_table = pagetable_from_page(cr3_page); - #ifdef __x86_64__ - if ( c.nat->ctrlreg[1] ) -@@ -982,56 +1003,44 @@ int arch_set_info_guest( - cr3_gfn = xen_cr3_to_pfn(c.nat->ctrlreg[1]); - cr3_page = get_page_from_gfn(d, cr3_gfn, NULL, P2M_ALLOC); - -- if ( !cr3_page || -- (!paging_mode_refcounts(d) -- && !get_page_type(cr3_page, PGT_base_page_table)) ) -+ if ( !cr3_page ) -+ rc = -EINVAL; -+ else if ( !paging_mode_refcounts(d) ) - { -- if (cr3_page) -- put_page(cr3_page); -- cr3_page = pagetable_get_page(v->arch.guest_table); -- v->arch.guest_table = pagetable_null(); -- if ( paging_mode_refcounts(d) ) -- put_page(cr3_page); -- else -- put_page_and_type(cr3_page); -- destroy_gdt(v); -- return -EINVAL; -+ rc = get_page_type_preemptible(cr3_page, PGT_root_page_table); -+ switch ( rc ) -+ { -+ case -EINTR: -+ rc = -EAGAIN; -+ case -EAGAIN: -+ v->arch.old_guest_table = -+ pagetable_get_page(v->arch.guest_table); -+ v->arch.guest_table = pagetable_null(); -+ break; -+ } - } -- -- v->arch.guest_table_user = pagetable_from_page(cr3_page); -- } -- else if ( !(flags & VGCF_in_kernel) ) -- { -- destroy_gdt(v); -- return -EINVAL; -+ if ( !rc ) -+ v->arch.guest_table_user = pagetable_from_page(cr3_page); - } - } - else - { - l4_pgentry_t *l4tab; - -- cr3_gfn = compat_cr3_to_pfn(c.cmp->ctrlreg[3]); -- cr3_page = get_page_from_gfn(d, cr3_gfn, NULL, P2M_ALLOC); -- -- if ( !cr3_page) -- { -- destroy_gdt(v); -- return -EINVAL; -- } -- -- if (!paging_mode_refcounts(d) -- && !get_page_type(cr3_page, PGT_l3_page_table) ) -- { -- put_page(cr3_page); -- destroy_gdt(v); -- return -EINVAL; -- } -- - l4tab = __va(pagetable_get_paddr(v->arch.guest_table)); - *l4tab = l4e_from_pfn(page_to_mfn(cr3_page), - _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED); - #endif - } -+ if ( rc ) -+ { -+ if ( cr3_page ) -+ put_page(cr3_page); -+ destroy_gdt(v); -+ return rc; -+ } -+ -+ clear_bit(_VPF_in_reset, &v->pause_flags); - - if ( v->vcpu_id == 0 ) - update_domain_wallclock_time(d); -@@ -1053,17 +1062,16 @@ int arch_set_info_guest( - #undef c - } - --void arch_vcpu_reset(struct vcpu *v) -+int arch_vcpu_reset(struct vcpu *v) - { - if ( !is_hvm_vcpu(v) ) - { - destroy_gdt(v); -- vcpu_destroy_pagetables(v); -- } -- else -- { -- vcpu_end_shutdown_deferral(v); -+ return vcpu_destroy_pagetables(v); - } -+ -+ vcpu_end_shutdown_deferral(v); -+ return 0; - } - - /* -@@ -2069,63 +2077,6 @@ static int relinquish_memory( - return ret; - } - --static void vcpu_destroy_pagetables(struct vcpu *v) --{ -- struct domain *d = v->domain; -- unsigned long pfn; -- --#ifdef __x86_64__ -- if ( is_pv_32on64_vcpu(v) ) -- { -- pfn = l4e_get_pfn(*(l4_pgentry_t *) -- __va(pagetable_get_paddr(v->arch.guest_table))); -- -- if ( pfn != 0 ) -- { -- if ( paging_mode_refcounts(d) ) -- put_page(mfn_to_page(pfn)); -- else -- put_page_and_type(mfn_to_page(pfn)); -- } -- -- l4e_write( -- (l4_pgentry_t *)__va(pagetable_get_paddr(v->arch.guest_table)), -- l4e_empty()); -- -- v->arch.cr3 = 0; -- return; -- } --#endif -- -- pfn = pagetable_get_pfn(v->arch.guest_table); -- if ( pfn != 0 ) -- { -- if ( paging_mode_refcounts(d) ) -- put_page(mfn_to_page(pfn)); -- else -- put_page_and_type(mfn_to_page(pfn)); -- v->arch.guest_table = pagetable_null(); -- } -- --#ifdef __x86_64__ -- /* Drop ref to guest_table_user (from MMUEXT_NEW_USER_BASEPTR) */ -- pfn = pagetable_get_pfn(v->arch.guest_table_user); -- if ( pfn != 0 ) -- { -- if ( !is_pv_32bit_vcpu(v) ) -- { -- if ( paging_mode_refcounts(d) ) -- put_page(mfn_to_page(pfn)); -- else -- put_page_and_type(mfn_to_page(pfn)); -- } -- v->arch.guest_table_user = pagetable_null(); -- } --#endif -- -- v->arch.cr3 = 0; --} -- - int domain_relinquish_resources(struct domain *d) - { - int ret; -@@ -2143,7 +2094,11 @@ int domain_relinquish_resources(struct domain *d) - - /* Drop the in-use references to page-table bases. */ - for_each_vcpu ( d, v ) -- vcpu_destroy_pagetables(v); -+ { -+ ret = vcpu_destroy_pagetables(v); -+ if ( ret ) -+ return ret; -+ } - - if ( !is_hvm_domain(d) ) - { -diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c -index 3d471a5..efacc98 100644 ---- a/xen/arch/x86/hvm/hvm.c -+++ b/xen/arch/x86/hvm/hvm.c -@@ -3509,8 +3509,11 @@ static void hvm_s3_suspend(struct domain *d) - - for_each_vcpu ( d, v ) - { -+ int rc; -+ - vlapic_reset(vcpu_vlapic(v)); -- vcpu_reset(v); -+ rc = vcpu_reset(v); -+ ASSERT(!rc); - } - - vpic_reset(d); -diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c -index 52d111b..7778342 100644 ---- a/xen/arch/x86/hvm/vlapic.c -+++ b/xen/arch/x86/hvm/vlapic.c -@@ -252,10 +252,13 @@ static void vlapic_init_sipi_action(unsigned long _vcpu) - { - case APIC_DM_INIT: { - bool_t fpu_initialised; -+ int rc; -+ - domain_lock(target->domain); - /* Reset necessary VCPU state. This does not include FPU state. */ - fpu_initialised = target->fpu_initialised; -- vcpu_reset(target); -+ rc = vcpu_reset(target); -+ ASSERT(!rc); - target->fpu_initialised = fpu_initialised; - vlapic_reset(vcpu_vlapic(target)); - domain_unlock(target->domain); -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 8444610..055f307 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -1241,7 +1241,16 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, - #endif - - if ( unlikely(partial > 0) ) -+ { -+ ASSERT(preemptible >= 0); - return __put_page_type(l3e_get_page(l3e), preemptible); -+ } -+ -+ if ( preemptible < 0 ) -+ { -+ current->arch.old_guest_table = l3e_get_page(l3e); -+ return 0; -+ } - - return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible); - } -@@ -1254,7 +1263,17 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, - (l4e_get_pfn(l4e) != pfn) ) - { - if ( unlikely(partial > 0) ) -+ { -+ ASSERT(preemptible >= 0); - return __put_page_type(l4e_get_page(l4e), preemptible); -+ } -+ -+ if ( preemptible < 0 ) -+ { -+ current->arch.old_guest_table = l4e_get_page(l4e); -+ return 0; -+ } -+ - return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible); - } - return 1; -@@ -1549,12 +1568,17 @@ static int alloc_l3_table(struct page_info *page, int preemptible) - if ( rc < 0 && rc != -EAGAIN && rc != -EINTR ) - { - MEM_LOG("Failure in alloc_l3_table: entry %d", i); -+ if ( i ) -+ { -+ page->nr_validated_ptes = i; -+ page->partial_pte = 0; -+ current->arch.old_guest_table = page; -+ } - while ( i-- > 0 ) - { - if ( !is_guest_l3_slot(i) ) - continue; - unadjust_guest_l3e(pl3e[i], d); -- put_page_from_l3e(pl3e[i], pfn, 0, 0); - } - } - -@@ -1584,22 +1608,24 @@ static int alloc_l4_table(struct page_info *page, int preemptible) - page->nr_validated_ptes = i; - page->partial_pte = partial ?: 1; - } -- else if ( rc == -EINTR ) -+ else if ( rc < 0 ) - { -+ if ( rc != -EINTR ) -+ MEM_LOG("Failure in alloc_l4_table: entry %d", i); - if ( i ) - { - page->nr_validated_ptes = i; - page->partial_pte = 0; -- rc = -EAGAIN; -+ if ( rc == -EINTR ) -+ rc = -EAGAIN; -+ else -+ { -+ if ( current->arch.old_guest_table ) -+ page->nr_validated_ptes++; -+ current->arch.old_guest_table = page; -+ } - } - } -- else if ( rc < 0 ) -- { -- MEM_LOG("Failure in alloc_l4_table: entry %d", i); -- while ( i-- > 0 ) -- if ( is_guest_l4_slot(d, i) ) -- put_page_from_l4e(pl4e[i], pfn, 0, 0); -- } - if ( rc < 0 ) - return rc; - -@@ -2047,7 +2073,7 @@ static int mod_l3_entry(l3_pgentry_t *pl3e, - pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e); - } - -- put_page_from_l3e(ol3e, pfn, 0, 0); -+ put_page_from_l3e(ol3e, pfn, 0, -preemptible); - return rc; - } - -@@ -2110,7 +2136,7 @@ static int mod_l4_entry(l4_pgentry_t *pl4e, - return -EFAULT; - } - -- put_page_from_l4e(ol4e, pfn, 0, 0); -+ put_page_from_l4e(ol4e, pfn, 0, -preemptible); - return rc; - } - -@@ -2268,7 +2294,15 @@ static int alloc_page_type(struct page_info *page, unsigned long type, - PRtype_info ": caf=%08lx taf=%" PRtype_info, - page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)), - type, page->count_info, page->u.inuse.type_info); -- page->u.inuse.type_info = 0; -+ if ( page != current->arch.old_guest_table ) -+ page->u.inuse.type_info = 0; -+ else -+ { -+ ASSERT((page->u.inuse.type_info & -+ (PGT_count_mask | PGT_validated)) == 1); -+ get_page_light(page); -+ page->u.inuse.type_info |= PGT_partial; -+ } - } - else - { -@@ -2808,49 +2842,150 @@ static void put_superpage(unsigned long mfn) - - #endif - -+static int put_old_guest_table(struct vcpu *v) -+{ -+ int rc; -+ -+ if ( !v->arch.old_guest_table ) -+ return 0; -+ -+ switch ( rc = put_page_and_type_preemptible(v->arch.old_guest_table, 1) ) -+ { -+ case -EINTR: -+ case -EAGAIN: -+ return -EAGAIN; -+ } -+ -+ v->arch.old_guest_table = NULL; -+ -+ return rc; -+} -+ -+int vcpu_destroy_pagetables(struct vcpu *v) -+{ -+ unsigned long mfn = pagetable_get_pfn(v->arch.guest_table); -+ struct page_info *page; -+ int rc = put_old_guest_table(v); -+ -+ if ( rc ) -+ return rc; -+ -+#ifdef __x86_64__ -+ if ( is_pv_32on64_vcpu(v) ) -+ mfn = l4e_get_pfn(*(l4_pgentry_t *)mfn_to_virt(mfn)); -+#endif -+ -+ if ( mfn ) -+ { -+ page = mfn_to_page(mfn); -+ if ( paging_mode_refcounts(v->domain) ) -+ put_page(page); -+ else -+ rc = put_page_and_type_preemptible(page, 1); -+ } -+ -+#ifdef __x86_64__ -+ if ( is_pv_32on64_vcpu(v) ) -+ { -+ if ( !rc ) -+ l4e_write( -+ (l4_pgentry_t *)__va(pagetable_get_paddr(v->arch.guest_table)), -+ l4e_empty()); -+ } -+ else -+#endif -+ if ( !rc ) -+ { -+ v->arch.guest_table = pagetable_null(); -+ -+#ifdef __x86_64__ -+ /* Drop ref to guest_table_user (from MMUEXT_NEW_USER_BASEPTR) */ -+ mfn = pagetable_get_pfn(v->arch.guest_table_user); -+ if ( mfn ) -+ { -+ page = mfn_to_page(mfn); -+ if ( paging_mode_refcounts(v->domain) ) -+ put_page(page); -+ else -+ rc = put_page_and_type_preemptible(page, 1); -+ } -+ if ( !rc ) -+ v->arch.guest_table_user = pagetable_null(); -+#endif -+ } -+ -+ v->arch.cr3 = 0; -+ -+ return rc; -+} - - int new_guest_cr3(unsigned long mfn) - { - struct vcpu *curr = current; - struct domain *d = curr->domain; -- int okay; -+ int rc; - unsigned long old_base_mfn; - - #ifdef __x86_64__ - if ( is_pv_32on64_domain(d) ) - { -- okay = paging_mode_refcounts(d) -- ? 0 /* Old code was broken, but what should it be? */ -- : mod_l4_entry( -+ rc = paging_mode_refcounts(d) -+ ? -EINVAL /* Old code was broken, but what should it be? */ -+ : mod_l4_entry( - __va(pagetable_get_paddr(curr->arch.guest_table)), - l4e_from_pfn( - mfn, - (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)), -- pagetable_get_pfn(curr->arch.guest_table), 0, 0, curr) == 0; -- if ( unlikely(!okay) ) -+ pagetable_get_pfn(curr->arch.guest_table), 0, 1, curr); -+ switch ( rc ) - { -+ case 0: -+ break; -+ case -EINTR: -+ case -EAGAIN: -+ return -EAGAIN; -+ default: - MEM_LOG("Error while installing new compat baseptr %lx", mfn); -- return 0; -+ return rc; - } - - invalidate_shadow_ldt(curr, 0); - write_ptbase(curr); - -- return 1; -+ return 0; - } - #endif -- okay = paging_mode_refcounts(d) -- ? get_page_from_pagenr(mfn, d) -- : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 0); -- if ( unlikely(!okay) ) -+ rc = put_old_guest_table(curr); -+ if ( unlikely(rc) ) -+ return rc; -+ -+ old_base_mfn = pagetable_get_pfn(curr->arch.guest_table); -+ /* -+ * This is particularly important when getting restarted after the -+ * previous attempt got preempted in the put-old-MFN phase. -+ */ -+ if ( old_base_mfn == mfn ) - { -- MEM_LOG("Error while installing new baseptr %lx", mfn); -+ write_ptbase(curr); - return 0; - } - -- invalidate_shadow_ldt(curr, 0); -+ rc = paging_mode_refcounts(d) -+ ? (get_page_from_pagenr(mfn, d) ? 0 : -EINVAL) -+ : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 1); -+ switch ( rc ) -+ { -+ case 0: -+ break; -+ case -EINTR: -+ case -EAGAIN: -+ return -EAGAIN; -+ default: -+ MEM_LOG("Error while installing new baseptr %lx", mfn); -+ return rc; -+ } - -- old_base_mfn = pagetable_get_pfn(curr->arch.guest_table); -+ invalidate_shadow_ldt(curr, 0); - - curr->arch.guest_table = pagetable_from_pfn(mfn); - update_cr3(curr); -@@ -2859,13 +2994,25 @@ int new_guest_cr3(unsigned long mfn) - - if ( likely(old_base_mfn != 0) ) - { -+ struct page_info *page = mfn_to_page(old_base_mfn); -+ - if ( paging_mode_refcounts(d) ) -- put_page(mfn_to_page(old_base_mfn)); -+ put_page(page); - else -- put_page_and_type(mfn_to_page(old_base_mfn)); -+ switch ( rc = put_page_and_type_preemptible(page, 1) ) -+ { -+ case -EINTR: -+ rc = -EAGAIN; -+ case -EAGAIN: -+ curr->arch.old_guest_table = page; -+ break; -+ default: -+ BUG_ON(rc); -+ break; -+ } - } - -- return 1; -+ return rc; - } - - static struct domain *get_pg_owner(domid_t domid) -@@ -2994,12 +3141,29 @@ long do_mmuext_op( - unsigned int foreigndom) - { - struct mmuext_op op; -- int rc = 0, i = 0, okay; - unsigned long type; -- unsigned int done = 0; -+ unsigned int i = 0, done = 0; - struct vcpu *curr = current; - struct domain *d = curr->domain; - struct domain *pg_owner; -+ int okay, rc = put_old_guest_table(curr); -+ -+ if ( unlikely(rc) ) -+ { -+ if ( likely(rc == -EAGAIN) ) -+ rc = hypercall_create_continuation( -+ __HYPERVISOR_mmuext_op, "hihi", uops, count, pdone, -+ foreigndom); -+ return rc; -+ } -+ -+ if ( unlikely(count == MMU_UPDATE_PREEMPTED) && -+ likely(guest_handle_is_null(uops)) ) -+ { -+ /* See the curr->arch.old_guest_table related -+ * hypercall_create_continuation() below. */ -+ return (int)foreigndom; -+ } - - if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) - { -@@ -3024,7 +3188,7 @@ long do_mmuext_op( - - for ( i = 0; i < count; i++ ) - { -- if ( hypercall_preempt_check() ) -+ if ( curr->arch.old_guest_table || hypercall_preempt_check() ) - { - rc = -EAGAIN; - break; -@@ -3088,21 +3252,17 @@ long do_mmuext_op( - } - - if ( (rc = xsm_memory_pin_page(d, pg_owner, page)) != 0 ) -- { -- put_page_and_type(page); - okay = 0; -- break; -- } -- -- if ( unlikely(test_and_set_bit(_PGT_pinned, -- &page->u.inuse.type_info)) ) -+ else if ( unlikely(test_and_set_bit(_PGT_pinned, -+ &page->u.inuse.type_info)) ) - { - MEM_LOG("Mfn %lx already pinned", page_to_mfn(page)); -- put_page_and_type(page); - okay = 0; -- break; - } - -+ if ( unlikely(!okay) ) -+ goto pin_drop; -+ - /* A page is dirtied when its pin status is set. */ - paging_mark_dirty(pg_owner, page_to_mfn(page)); - -@@ -3116,7 +3276,13 @@ long do_mmuext_op( - &page->u.inuse.type_info)); - spin_unlock(&pg_owner->page_alloc_lock); - if ( drop_ref ) -- put_page_and_type(page); -+ { -+ pin_drop: -+ if ( type == PGT_l1_page_table ) -+ put_page_and_type(page); -+ else -+ curr->arch.old_guest_table = page; -+ } - } - - break; -@@ -3144,7 +3310,17 @@ long do_mmuext_op( - break; - } - -- put_page_and_type(page); -+ switch ( rc = put_page_and_type_preemptible(page, 1) ) -+ { -+ case -EINTR: -+ case -EAGAIN: -+ curr->arch.old_guest_table = page; -+ rc = 0; -+ break; -+ default: -+ BUG_ON(rc); -+ break; -+ } - put_page(page); - - /* A page is dirtied when its pin status is cleared. */ -@@ -3154,8 +3330,13 @@ long do_mmuext_op( - } - - case MMUEXT_NEW_BASEPTR: -- okay = (!paging_mode_translate(d) -- && new_guest_cr3(op.arg1.mfn)); -+ if ( paging_mode_translate(d) ) -+ okay = 0; -+ else -+ { -+ rc = new_guest_cr3(op.arg1.mfn); -+ okay = !rc; -+ } - break; - - -@@ -3169,29 +3350,56 @@ long do_mmuext_op( - break; - } - -+ old_mfn = pagetable_get_pfn(curr->arch.guest_table_user); -+ /* -+ * This is particularly important when getting restarted after the -+ * previous attempt got preempted in the put-old-MFN phase. -+ */ -+ if ( old_mfn == op.arg1.mfn ) -+ break; -+ - if ( op.arg1.mfn != 0 ) - { - if ( paging_mode_refcounts(d) ) - okay = get_page_from_pagenr(op.arg1.mfn, d); - else -- okay = !get_page_and_type_from_pagenr( -- op.arg1.mfn, PGT_root_page_table, d, 0, 0); -+ { -+ rc = get_page_and_type_from_pagenr( -+ op.arg1.mfn, PGT_root_page_table, d, 0, 1); -+ okay = !rc; -+ } - if ( unlikely(!okay) ) - { -- MEM_LOG("Error while installing new mfn %lx", op.arg1.mfn); -+ if ( rc == -EINTR ) -+ rc = -EAGAIN; -+ else if ( rc != -EAGAIN ) -+ MEM_LOG("Error while installing new mfn %lx", -+ op.arg1.mfn); - break; - } - } - -- old_mfn = pagetable_get_pfn(curr->arch.guest_table_user); - curr->arch.guest_table_user = pagetable_from_pfn(op.arg1.mfn); - - if ( old_mfn != 0 ) - { -+ struct page_info *page = mfn_to_page(old_mfn); -+ - if ( paging_mode_refcounts(d) ) -- put_page(mfn_to_page(old_mfn)); -+ put_page(page); - else -- put_page_and_type(mfn_to_page(old_mfn)); -+ switch ( rc = put_page_and_type_preemptible(page, 1) ) -+ { -+ case -EINTR: -+ rc = -EAGAIN; -+ case -EAGAIN: -+ curr->arch.old_guest_table = page; -+ okay = 0; -+ break; -+ default: -+ BUG_ON(rc); -+ break; -+ } - } - - break; -@@ -3433,9 +3641,27 @@ long do_mmuext_op( - } - - if ( rc == -EAGAIN ) -+ { -+ ASSERT(i < count); - rc = hypercall_create_continuation( - __HYPERVISOR_mmuext_op, "hihi", - uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); -+ } -+ else if ( curr->arch.old_guest_table ) -+ { -+ XEN_GUEST_HANDLE(void) null; -+ -+ ASSERT(rc || i == count); -+ set_xen_guest_handle(null, NULL); -+ /* -+ * In order to have a way to communicate the final return value to -+ * our continuation, we pass this in place of "foreigndom", building -+ * on the fact that this argument isn't needed anymore. -+ */ -+ rc = hypercall_create_continuation( -+ __HYPERVISOR_mmuext_op, "hihi", null, -+ MMU_UPDATE_PREEMPTED, null, rc); -+ } - - put_pg_owner(pg_owner); - -@@ -3462,11 +3688,28 @@ long do_mmu_update( - void *va; - unsigned long gpfn, gmfn, mfn; - struct page_info *page; -- int rc = 0, i = 0; -- unsigned int cmd, done = 0, pt_dom; -- struct vcpu *v = current; -+ unsigned int cmd, i = 0, done = 0, pt_dom; -+ struct vcpu *curr = current, *v = curr; - struct domain *d = v->domain, *pt_owner = d, *pg_owner; - struct domain_mmap_cache mapcache; -+ int rc = put_old_guest_table(curr); -+ -+ if ( unlikely(rc) ) -+ { -+ if ( likely(rc == -EAGAIN) ) -+ rc = hypercall_create_continuation( -+ __HYPERVISOR_mmu_update, "hihi", ureqs, count, pdone, -+ foreigndom); -+ return rc; -+ } -+ -+ if ( unlikely(count == MMU_UPDATE_PREEMPTED) && -+ likely(guest_handle_is_null(ureqs)) ) -+ { -+ /* See the curr->arch.old_guest_table related -+ * hypercall_create_continuation() below. */ -+ return (int)foreigndom; -+ } - - if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) - { -@@ -3515,7 +3758,7 @@ long do_mmu_update( - - for ( i = 0; i < count; i++ ) - { -- if ( hypercall_preempt_check() ) -+ if ( curr->arch.old_guest_table || hypercall_preempt_check() ) - { - rc = -EAGAIN; - break; -@@ -3696,9 +3939,27 @@ long do_mmu_update( - } - - if ( rc == -EAGAIN ) -+ { -+ ASSERT(i < count); - rc = hypercall_create_continuation( - __HYPERVISOR_mmu_update, "hihi", - ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); -+ } -+ else if ( curr->arch.old_guest_table ) -+ { -+ XEN_GUEST_HANDLE(void) null; -+ -+ ASSERT(rc || i == count); -+ set_xen_guest_handle(null, NULL); -+ /* -+ * In order to have a way to communicate the final return value to -+ * our continuation, we pass this in place of "foreigndom", building -+ * on the fact that this argument isn't needed anymore. -+ */ -+ rc = hypercall_create_continuation( -+ __HYPERVISOR_mmu_update, "hihi", null, -+ MMU_UPDATE_PREEMPTED, null, rc); -+ } - - put_pg_owner(pg_owner); - -diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c -index 692281a..eada470 100644 ---- a/xen/arch/x86/traps.c -+++ b/xen/arch/x86/traps.c -@@ -2407,12 +2407,23 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) - #endif - } - page = get_page_from_gfn(v->domain, gfn, NULL, P2M_ALLOC); -- rc = page ? new_guest_cr3(page_to_mfn(page)) : 0; - if ( page ) -+ { -+ rc = new_guest_cr3(page_to_mfn(page)); - put_page(page); -+ } -+ else -+ rc = -EINVAL; - domain_unlock(v->domain); -- if ( rc == 0 ) /* not okay */ -+ switch ( rc ) -+ { -+ case 0: -+ break; -+ case -EAGAIN: /* retry after preemption */ -+ goto skip; -+ default: /* not okay */ - goto fail; -+ } - break; - } - -diff --git a/xen/arch/x86/x86_64/compat/mm.c b/xen/arch/x86/x86_64/compat/mm.c -index fb7baca..ef7822b 100644 ---- a/xen/arch/x86/x86_64/compat/mm.c -+++ b/xen/arch/x86/x86_64/compat/mm.c -@@ -268,6 +268,13 @@ int compat_mmuext_op(XEN_GUEST_HANDLE(mmuext_op_compat_t) cmp_uops, - int rc = 0; - XEN_GUEST_HANDLE(mmuext_op_t) nat_ops; - -+ if ( unlikely(count == MMU_UPDATE_PREEMPTED) && -+ likely(guest_handle_is_null(cmp_uops)) ) -+ { -+ set_xen_guest_handle(nat_ops, NULL); -+ return do_mmuext_op(nat_ops, count, pdone, foreigndom); -+ } -+ - preempt_mask = count & MMU_UPDATE_PREEMPTED; - count ^= preempt_mask; - -@@ -365,17 +372,23 @@ int compat_mmuext_op(XEN_GUEST_HANDLE(mmuext_op_compat_t) cmp_uops, - : mcs->call.args[1]; - unsigned int left = arg1 & ~MMU_UPDATE_PREEMPTED; - -- BUG_ON(left == arg1); -+ BUG_ON(left == arg1 && left != i); - BUG_ON(left > count); - guest_handle_add_offset(nat_ops, i - left); - guest_handle_subtract_offset(cmp_uops, left); - left = 1; -- BUG_ON(!hypercall_xlat_continuation(&left, 0x01, nat_ops, cmp_uops)); -- BUG_ON(left != arg1); -- if (!test_bit(_MCSF_in_multicall, &mcs->flags)) -- regs->_ecx += count - i; -+ if ( arg1 != MMU_UPDATE_PREEMPTED ) -+ { -+ BUG_ON(!hypercall_xlat_continuation(&left, 0x01, nat_ops, -+ cmp_uops)); -+ if ( !test_bit(_MCSF_in_multicall, &mcs->flags) ) -+ regs->_ecx += count - i; -+ else -+ mcs->compat_call.args[1] += count - i; -+ } - else -- mcs->compat_call.args[1] += count - i; -+ BUG_ON(hypercall_xlat_continuation(&left, 0)); -+ BUG_ON(left != arg1); - } - else - BUG_ON(err > 0); -diff --git a/xen/common/compat/domain.c b/xen/common/compat/domain.c -index 40a0287..9ddaa38 100644 ---- a/xen/common/compat/domain.c -+++ b/xen/common/compat/domain.c -@@ -50,6 +50,10 @@ int compat_vcpu_op(int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg) - rc = v->is_initialised ? -EEXIST : arch_set_info_guest(v, cmp_ctxt); - domain_unlock(d); - -+ if ( rc == -EAGAIN ) -+ rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iih", -+ cmd, vcpuid, arg); -+ - xfree(cmp_ctxt); - break; - } -diff --git a/xen/common/domain.c b/xen/common/domain.c -index c09fb73..89ab922 100644 ---- a/xen/common/domain.c -+++ b/xen/common/domain.c -@@ -779,14 +779,18 @@ void domain_unpause_by_systemcontroller(struct domain *d) - domain_unpause(d); - } - --void vcpu_reset(struct vcpu *v) -+int vcpu_reset(struct vcpu *v) - { - struct domain *d = v->domain; -+ int rc; - - vcpu_pause(v); - domain_lock(d); - -- arch_vcpu_reset(v); -+ set_bit(_VPF_in_reset, &v->pause_flags); -+ rc = arch_vcpu_reset(v); -+ if ( rc ) -+ goto out_unlock; - - set_bit(_VPF_down, &v->pause_flags); - -@@ -802,9 +806,13 @@ void vcpu_reset(struct vcpu *v) - #endif - cpumask_clear(v->cpu_affinity_tmp); - clear_bit(_VPF_blocked, &v->pause_flags); -+ clear_bit(_VPF_in_reset, &v->pause_flags); - -+ out_unlock: - domain_unlock(v->domain); - vcpu_unpause(v); -+ -+ return rc; - } - - -@@ -841,6 +849,11 @@ long do_vcpu_op(int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg) - domain_unlock(d); - - free_vcpu_guest_context(ctxt); -+ -+ if ( rc == -EAGAIN ) -+ rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iih", -+ cmd, vcpuid, arg); -+ - break; - - case VCPUOP_up: { -diff --git a/xen/common/domctl.c b/xen/common/domctl.c -index cbc8146..b3bfb38 100644 ---- a/xen/common/domctl.c -+++ b/xen/common/domctl.c -@@ -307,8 +307,10 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) - - if ( guest_handle_is_null(op->u.vcpucontext.ctxt) ) - { -- vcpu_reset(v); -- ret = 0; -+ ret = vcpu_reset(v); -+ if ( ret == -EAGAIN ) -+ ret = hypercall_create_continuation( -+ __HYPERVISOR_domctl, "h", u_domctl); - goto svc_out; - } - -@@ -337,6 +339,10 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) - domain_pause(d); - ret = arch_set_info_guest(v, c); - domain_unpause(d); -+ -+ if ( ret == -EAGAIN ) -+ ret = hypercall_create_continuation( -+ __HYPERVISOR_domctl, "h", u_domctl); - } - - svc_out: -diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h -index aecee68..898f63a 100644 ---- a/xen/include/asm-x86/domain.h -+++ b/xen/include/asm-x86/domain.h -@@ -464,6 +464,7 @@ struct arch_vcpu - pagetable_t guest_table_user; /* (MFN) x86/64 user-space pagetable */ - #endif - pagetable_t guest_table; /* (MFN) guest notion of cr3 */ -+ struct page_info *old_guest_table; /* partially destructed pagetable */ - /* guest_table holds a ref to the page, and also a type-count unless - * shadow refcounts are in use */ - pagetable_t shadow_table[4]; /* (MFN) shadow(s) of guest */ -diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h -index ba92568..82cdde6 100644 ---- a/xen/include/asm-x86/mm.h -+++ b/xen/include/asm-x86/mm.h -@@ -605,6 +605,7 @@ void audit_domains(void); - int new_guest_cr3(unsigned long pfn); - void make_cr3(struct vcpu *v, unsigned long mfn); - void update_cr3(struct vcpu *v); -+int vcpu_destroy_pagetables(struct vcpu *); - void propagate_page_fault(unsigned long addr, u16 error_code); - void *do_page_walk(struct vcpu *v, unsigned long addr); - -diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h -index d4ac50f..504a70f 100644 ---- a/xen/include/xen/domain.h -+++ b/xen/include/xen/domain.h -@@ -13,7 +13,7 @@ typedef union { - struct vcpu *alloc_vcpu( - struct domain *d, unsigned int vcpu_id, unsigned int cpu_id); - struct vcpu *alloc_dom0_vcpu0(void); --void vcpu_reset(struct vcpu *v); -+int vcpu_reset(struct vcpu *); - - struct xen_domctl_getdomaininfo; - void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info); -@@ -67,7 +67,7 @@ void arch_dump_vcpu_info(struct vcpu *v); - - void arch_dump_domain_info(struct domain *d); - --void arch_vcpu_reset(struct vcpu *v); -+int arch_vcpu_reset(struct vcpu *); - - extern spinlock_t vcpu_alloc_lock; - bool_t domctl_lock_acquire(void); -diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h -index b619269..b0715cb 100644 ---- a/xen/include/xen/sched.h -+++ b/xen/include/xen/sched.h -@@ -644,6 +644,9 @@ static inline struct domain *next_domain_in_cpupool( - /* VCPU is blocked due to missing mem_sharing ring. */ - #define _VPF_mem_sharing 6 - #define VPF_mem_sharing (1UL<<_VPF_mem_sharing) -+ /* VCPU is being reset. */ -+#define _VPF_in_reset 7 -+#define VPF_in_reset (1UL<<_VPF_in_reset) - - static inline int vcpu_runnable(struct vcpu *v) - { diff --git a/main/xen/xsa48-4.2.patch b/main/xen/xsa48-4.2.patch deleted file mode 100644 index 998dbcb1d5..0000000000 --- a/main/xen/xsa48-4.2.patch +++ /dev/null @@ -1,114 +0,0 @@ -Add -f FMT / --format FMT arg to qemu-nbd - -From: "Daniel P. Berrange" <berrange@redhat.com> - -Currently the qemu-nbd program will auto-detect the format of -any disk it is given. This behaviour is known to be insecure. -For example, if qemu-nbd initially exposes a 'raw' file to an -unprivileged app, and that app runs - - 'qemu-img create -f qcow2 -o backing_file=/etc/shadow /dev/nbd0' - -then the next time the app is started, the qemu-nbd will now -detect it as a 'qcow2' file and expose /etc/shadow to the -unprivileged app. - -The only way to avoid this is to explicitly tell qemu-nbd what -disk format to use on the command line, completely disabling -auto-detection. This patch adds a '-f' / '--format' arg for -this purpose, mirroring what is already available via qemu-img -and qemu commands. - - qemu-nbd --format raw -p 9000 evil.img - -will now always use raw, regardless of what format 'evil.img' -looks like it contains - -Signed-off-by: Daniel P. Berrange <berrange@redhat.com> -[Use errx, not err. - Paolo] -Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> -Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> - -[ This is a security issue, CVE-2013-1922 / XSA-48. ] - -diff --git a/qemu-nbd.c b/qemu-nbd.c -index 291cba2..8fbe2cf 100644 ---- a/tools/qemu-xen/qemu-nbd.c -+++ b/tools/qemu-xen/qemu-nbd.c -@@ -247,6 +247,7 @@ out: - int main(int argc, char **argv) - { - BlockDriverState *bs; -+ BlockDriver *drv; - off_t dev_offset = 0; - off_t offset = 0; - uint32_t nbdflags = 0; -@@ -256,7 +257,7 @@ int main(int argc, char **argv) - struct sockaddr_in addr; - socklen_t addr_len = sizeof(addr); - off_t fd_size; -- const char *sopt = "hVb:o:p:rsnP:c:dvk:e:t"; -+ const char *sopt = "hVb:o:p:rsnP:c:dvk:e:f:t"; - struct option lopt[] = { - { "help", 0, NULL, 'h' }, - { "version", 0, NULL, 'V' }, -@@ -271,6 +272,7 @@ int main(int argc, char **argv) - { "snapshot", 0, NULL, 's' }, - { "nocache", 0, NULL, 'n' }, - { "shared", 1, NULL, 'e' }, -+ { "format", 1, NULL, 'f' }, - { "persistent", 0, NULL, 't' }, - { "verbose", 0, NULL, 'v' }, - { NULL, 0, NULL, 0 } -@@ -292,6 +294,7 @@ int main(int argc, char **argv) - int max_fd; - int persistent = 0; - pthread_t client_thread; -+ const char *fmt = NULL; - - /* The client thread uses SIGTERM to interrupt the server. A signal - * handler ensures that "qemu-nbd -v -c" exits with a nice status code. -@@ -368,6 +371,9 @@ int main(int argc, char **argv) - errx(EXIT_FAILURE, "Shared device number must be greater than 0\n"); - } - break; -+ case 'f': -+ fmt = optarg; -+ break; - case 't': - persistent = 1; - break; -@@ -478,9 +484,19 @@ int main(int argc, char **argv) - bdrv_init(); - atexit(bdrv_close_all); - -+ if (fmt) { -+ drv = bdrv_find_format(fmt); -+ if (!drv) { -+ errx(EXIT_FAILURE, "Unknown file format '%s'", fmt); -+ } -+ } else { -+ drv = NULL; -+ } -+ - bs = bdrv_new("hda"); - srcpath = argv[optind]; -- if ((ret = bdrv_open(bs, srcpath, flags, NULL)) < 0) { -+ ret = bdrv_open(bs, srcpath, flags, drv); -+ if (ret < 0) { - errno = -ret; - err(EXIT_FAILURE, "Failed to bdrv_open '%s'", argv[optind]); - } -diff --git a/qemu-nbd.texi b/qemu-nbd.texi -index 44996cc..f56c68e 100644 ---- a/tools/qemu-xen/qemu-nbd.texi -+++ b/tools/qemu-xen/qemu-nbd.texi -@@ -36,6 +36,8 @@ Export Qemu disk image using NBD protocol. - disconnect the specified device - @item -e, --shared=@var{num} - device can be shared by @var{num} clients (default @samp{1}) -+@item -f, --format=@var{fmt} -+ force block driver for format @var{fmt} instead of auto-detecting - @item -t, --persistent - don't exit on the last connection - @item -v, --verbose diff --git a/main/xen/xsa52-4.2-unstable.patch b/main/xen/xsa52-4.2-unstable.patch deleted file mode 100644 index 14db8a8a7f..0000000000 --- a/main/xen/xsa52-4.2-unstable.patch +++ /dev/null @@ -1,46 +0,0 @@ -x86/xsave: fix information leak on AMD CPUs - -Just like for FXSAVE/FXRSTOR, XSAVE/XRSTOR also don't save/restore the -last instruction and operand pointers as well as the last opcode if -there's no pending unmasked exception (see CVE-2006-1056 and commit -9747:4d667a139318). - -While the FXSR solution sits in the save path, I prefer to have this in -the restore path because there the handling is simpler (namely in the -context of the pending changes to properly save the selector values for -32-bit guest code). - -Also this is using FFREE instead of EMMS, as it doesn't seem unlikely -that in the future we may see CPUs with x87 and SSE/AVX but no MMX -support. The goal here anyway is just to avoid an FPU stack overflow. -I would have preferred to use FFREEP instead of FFREE (freeing two -stack slots at once), but AMD doesn't document that instruction. - -This is CVE-2013-2076 / XSA-52. - -Signed-off-by: Jan Beulich <jbeulich@suse.com> - ---- a/xen/arch/x86/xstate.c -+++ b/xen/arch/x86/xstate.c -@@ -78,6 +78,21 @@ void xrstor(struct vcpu *v, uint64_t mas - - struct xsave_struct *ptr = v->arch.xsave_area; - -+ /* -+ * AMD CPUs don't save/restore FDP/FIP/FOP unless an exception -+ * is pending. Clear the x87 state here by setting it to fixed -+ * values. The hypervisor data segment can be sometimes 0 and -+ * sometimes new user value. Both should be ok. Use the FPU saved -+ * data block as a safe address because it should be in L1. -+ */ -+ if ( (mask & ptr->xsave_hdr.xstate_bv & XSTATE_FP) && -+ !(ptr->fpu_sse.fsw & 0x0080) && -+ boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) -+ asm volatile ( "fnclex\n\t" /* clear exceptions */ -+ "ffree %%st(7)\n\t" /* clear stack tag */ -+ "fildl %0" /* load to clear state */ -+ : : "m" (ptr->fpu_sse) ); -+ - asm volatile ( - ".byte " REX_PREFIX "0x0f,0xae,0x2f" - : diff --git a/main/xen/xsa53-4.2.patch b/main/xen/xsa53-4.2.patch deleted file mode 100644 index eb8e79bed2..0000000000 --- a/main/xen/xsa53-4.2.patch +++ /dev/null @@ -1,57 +0,0 @@ -x86/xsave: recover from faults on XRSTOR - -Just like FXRSTOR, XRSTOR can raise #GP if bad content is being passed -to it in the memory block (i.e. aspects not under the control of the -hypervisor, other than e.g. proper alignment of the block). - -Also correct the comment explaining why FXRSTOR needs exception -recovery code to not wrongly state that this can only be a result of -the control tools passing a bad image. - -This is CVE-2013-2077 / XSA-53. - -Signed-off-by: Jan Beulich <jbeulich@suse.com> - ---- a/xen/arch/x86/i387.c -+++ b/xen/arch/x86/i387.c -@@ -53,7 +53,7 @@ static inline void fpu_fxrstor(struct vc - /* - * FXRSTOR can fault if passed a corrupted data block. We handle this - * possibility, which may occur if the block was passed to us by control -- * tools, by silently clearing the block. -+ * tools or through VCPUOP_initialise, by silently clearing the block. - */ - asm volatile ( - #ifdef __i386__ ---- a/xen/arch/x86/xstate.c -+++ b/xen/arch/x86/xstate.c -@@ -93,10 +93,25 @@ void xrstor(struct vcpu *v, uint64_t mas - "fildl %0" /* load to clear state */ - : : "m" (ptr->fpu_sse) ); - -- asm volatile ( -- ".byte " REX_PREFIX "0x0f,0xae,0x2f" -- : -- : "m" (*ptr), "a" (lmask), "d" (hmask), "D"(ptr) ); -+ /* -+ * XRSTOR can fault if passed a corrupted data block. We handle this -+ * possibility, which may occur if the block was passed to us by control -+ * tools or through VCPUOP_initialise, by silently clearing the block. -+ */ -+ asm volatile ( "1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n" -+ ".section .fixup,\"ax\"\n" -+ "2: mov %5,%%ecx \n" -+ " xor %1,%1 \n" -+ " rep stosb \n" -+ " lea %2,%0 \n" -+ " mov %3,%1 \n" -+ " jmp 1b \n" -+ ".previous \n" -+ _ASM_EXTABLE(1b, 2b) -+ : "+&D" (ptr), "+&a" (lmask) -+ : "m" (*ptr), "g" (lmask), "d" (hmask), -+ "m" (xsave_cntxt_size) -+ : "ecx" ); - } - - bool_t xsave_enabled(const struct vcpu *v) diff --git a/main/xen/xsa54.patch b/main/xen/xsa54.patch deleted file mode 100644 index 83c8993d6a..0000000000 --- a/main/xen/xsa54.patch +++ /dev/null @@ -1,24 +0,0 @@ -x86/xsave: properly check guest input to XSETBV - -Other than the HVM emulation path, the PV case so far failed to check -that YMM state requires SSE state to be enabled, allowing for a #GP to -occur upon passing the inputs to XSETBV inside the hypervisor. - -This is CVE-2013-2078 / XSA-54. - -Signed-off-by: Jan Beulich <jbeulich@suse.com> - ---- a/xen/arch/x86/traps.c -+++ b/xen/arch/x86/traps.c -@@ -2205,6 +2205,11 @@ static int emulate_privileged_op(struct - if ( !(new_xfeature & XSTATE_FP) || (new_xfeature & ~xfeature_mask) ) - goto fail; - -+ /* YMM state takes SSE state as prerequisite. */ -+ if ( (xfeature_mask & new_xfeature & XSTATE_YMM) && -+ !(new_xfeature & XSTATE_SSE) ) -+ goto fail; -+ - v->arch.xcr0 = new_xfeature; - v->arch.xcr0_accum |= new_xfeature; - set_xcr0(new_xfeature); diff --git a/main/xen/xsa55.patch b/main/xen/xsa55.patch deleted file mode 100644 index 35fe7afd06..0000000000 --- a/main/xen/xsa55.patch +++ /dev/null @@ -1,3431 +0,0 @@ -diff --git a/tools/libxc/Makefile b/tools/libxc/Makefile -index 876c148..1a5249c 100644 ---- a/tools/libxc/Makefile -+++ b/tools/libxc/Makefile -@@ -52,8 +52,13 @@ endif - vpath %.c ../../xen/common/libelf - CFLAGS += -I../../xen/common/libelf - --GUEST_SRCS-y += libelf-tools.c libelf-loader.c --GUEST_SRCS-y += libelf-dominfo.c libelf-relocate.c -+ELF_SRCS-y += libelf-tools.c libelf-loader.c -+ELF_SRCS-y += libelf-dominfo.c -+ -+GUEST_SRCS-y += $(ELF_SRCS-y) -+ -+$(patsubst %.c,%.o,$(ELF_SRCS-y)): CFLAGS += -Wno-pointer-sign -+$(patsubst %.c,%.opic,$(ELF_SRCS-y)): CFLAGS += -Wno-pointer-sign - - # new domain builder - GUEST_SRCS-y += xc_dom_core.c xc_dom_boot.c -diff --git a/tools/libxc/ia64/xc_ia64_dom_fwloader.c b/tools/libxc/ia64/xc_ia64_dom_fwloader.c -index cdf3333..dbd3349 100644 ---- a/tools/libxc/ia64/xc_ia64_dom_fwloader.c -+++ b/tools/libxc/ia64/xc_ia64_dom_fwloader.c -@@ -60,6 +60,8 @@ static int xc_dom_load_fw_kernel(struct xc_dom_image *dom) - unsigned long i; - - dest = xc_dom_vaddr_to_ptr(dom, dom->kernel_seg.vstart); -+ if ( dest == NULL ) -+ return -1; - memcpy(dest, dom->kernel_blob, FW_SIZE); - - /* Synchronize cache. */ -diff --git a/tools/libxc/xc_cpuid_x86.c b/tools/libxc/xc_cpuid_x86.c -index 0882ce6..da435ce 100644 ---- a/tools/libxc/xc_cpuid_x86.c -+++ b/tools/libxc/xc_cpuid_x86.c -@@ -589,6 +589,8 @@ static int xc_cpuid_do_domctl( - static char *alloc_str(void) - { - char *s = malloc(33); -+ if ( s == NULL ) -+ return s; - memset(s, 0, 33); - return s; - } -@@ -600,6 +602,8 @@ void xc_cpuid_to_str(const unsigned int *regs, char **strs) - for ( i = 0; i < 4; i++ ) - { - strs[i] = alloc_str(); -+ if ( strs[i] == NULL ) -+ continue; - for ( j = 0; j < 32; j++ ) - strs[i][j] = !!((regs[i] & (1U << (31 - j)))) ? '1' : '0'; - } -@@ -680,7 +684,7 @@ int xc_cpuid_check( - const char **config, - char **config_transformed) - { -- int i, j; -+ int i, j, rc; - unsigned int regs[4]; - - memset(config_transformed, 0, 4 * sizeof(*config_transformed)); -@@ -692,6 +696,11 @@ int xc_cpuid_check( - if ( config[i] == NULL ) - continue; - config_transformed[i] = alloc_str(); -+ if ( config_transformed[i] == NULL ) -+ { -+ rc = -ENOMEM; -+ goto fail_rc; -+ } - for ( j = 0; j < 32; j++ ) - { - unsigned char val = !!((regs[i] & (1U << (31 - j)))); -@@ -708,12 +717,14 @@ int xc_cpuid_check( - return 0; - - fail: -+ rc = -EPERM; -+ fail_rc: - for ( i = 0; i < 4; i++ ) - { - free(config_transformed[i]); - config_transformed[i] = NULL; - } -- return -EPERM; -+ return rc; - } - - /* -@@ -758,6 +769,11 @@ int xc_cpuid_set( - } - - config_transformed[i] = alloc_str(); -+ if ( config_transformed[i] == NULL ) -+ { -+ rc = -ENOMEM; -+ goto fail; -+ } - - for ( j = 0; j < 32; j++ ) - { -diff --git a/tools/libxc/xc_dom.h b/tools/libxc/xc_dom.h -index 6a72aa9..d801f66 100644 ---- a/tools/libxc/xc_dom.h -+++ b/tools/libxc/xc_dom.h -@@ -140,9 +140,10 @@ struct xc_dom_image { - - struct xc_dom_loader { - char *name; -- int (*probe) (struct xc_dom_image * dom); -- int (*parser) (struct xc_dom_image * dom); -- int (*loader) (struct xc_dom_image * dom); -+ /* Sadly the error returns from these functions are not consistent: */ -+ elf_negerrnoval (*probe) (struct xc_dom_image * dom); -+ elf_negerrnoval (*parser) (struct xc_dom_image * dom); -+ elf_errorstatus (*loader) (struct xc_dom_image * dom); - - struct xc_dom_loader *next; - }; -@@ -275,27 +276,50 @@ int xc_dom_alloc_segment(struct xc_dom_image *dom, - - void *xc_dom_pfn_to_ptr(struct xc_dom_image *dom, xen_pfn_t first, - xen_pfn_t count); -+void *xc_dom_pfn_to_ptr_retcount(struct xc_dom_image *dom, xen_pfn_t first, -+ xen_pfn_t count, xen_pfn_t *count_out); - void xc_dom_unmap_one(struct xc_dom_image *dom, xen_pfn_t pfn); - void xc_dom_unmap_all(struct xc_dom_image *dom); - --static inline void *xc_dom_seg_to_ptr(struct xc_dom_image *dom, -- struct xc_dom_seg *seg) -+static inline void *xc_dom_seg_to_ptr_pages(struct xc_dom_image *dom, -+ struct xc_dom_seg *seg, -+ xen_pfn_t *pages_out) - { - xen_vaddr_t segsize = seg->vend - seg->vstart; - unsigned int page_size = XC_DOM_PAGE_SIZE(dom); - xen_pfn_t pages = (segsize + page_size - 1) / page_size; -+ void *retval; -+ -+ retval = xc_dom_pfn_to_ptr(dom, seg->pfn, pages); - -- return xc_dom_pfn_to_ptr(dom, seg->pfn, pages); -+ *pages_out = retval ? pages : 0; -+ return retval; -+} -+ -+static inline void *xc_dom_seg_to_ptr(struct xc_dom_image *dom, -+ struct xc_dom_seg *seg) -+{ -+ xen_pfn_t dummy; -+ -+ return xc_dom_seg_to_ptr_pages(dom, seg, &dummy); - } - - static inline void *xc_dom_vaddr_to_ptr(struct xc_dom_image *dom, -- xen_vaddr_t vaddr) -+ xen_vaddr_t vaddr, -+ size_t *safe_region_out) - { - unsigned int page_size = XC_DOM_PAGE_SIZE(dom); - xen_pfn_t page = (vaddr - dom->parms.virt_base) / page_size; - unsigned int offset = (vaddr - dom->parms.virt_base) % page_size; -- void *ptr = xc_dom_pfn_to_ptr(dom, page, 0); -- return (ptr ? (ptr + offset) : NULL); -+ xen_pfn_t safe_region_count; -+ void *ptr; -+ -+ *safe_region_out = 0; -+ ptr = xc_dom_pfn_to_ptr_retcount(dom, page, 0, &safe_region_count); -+ if ( ptr == NULL ) -+ return ptr; -+ *safe_region_out = (safe_region_count << XC_DOM_PAGE_SHIFT(dom)) - offset; -+ return ptr; - } - - static inline int xc_dom_feature_translated(struct xc_dom_image *dom) -@@ -307,6 +331,8 @@ static inline xen_pfn_t xc_dom_p2m_host(struct xc_dom_image *dom, xen_pfn_t pfn) - { - if (dom->shadow_enabled) - return pfn; -+ if (pfn >= dom->total_pages) -+ return INVALID_MFN; - return dom->p2m_host[pfn]; - } - -@@ -315,6 +341,8 @@ static inline xen_pfn_t xc_dom_p2m_guest(struct xc_dom_image *dom, - { - if (xc_dom_feature_translated(dom)) - return pfn; -+ if (pfn >= dom->total_pages) -+ return INVALID_MFN; - return dom->p2m_host[pfn]; - } - -diff --git a/tools/libxc/xc_dom_binloader.c b/tools/libxc/xc_dom_binloader.c -index 769e97d..553b366 100644 ---- a/tools/libxc/xc_dom_binloader.c -+++ b/tools/libxc/xc_dom_binloader.c -@@ -123,10 +123,13 @@ static struct xen_bin_image_table *find_table(struct xc_dom_image *dom) - uint32_t *probe_ptr; - uint32_t *probe_end; - -+ if ( dom->kernel_size < sizeof(*table) ) -+ return NULL; - probe_ptr = dom->kernel_blob; -- probe_end = dom->kernel_blob + dom->kernel_size - sizeof(*table); -- if ( (void*)probe_end > (dom->kernel_blob + 8192) ) -+ if ( dom->kernel_size > (8192 + sizeof(*table)) ) - probe_end = dom->kernel_blob + 8192; -+ else -+ probe_end = dom->kernel_blob + dom->kernel_size - sizeof(*table); - - for ( table = NULL; probe_ptr < probe_end; probe_ptr++ ) - { -@@ -249,6 +252,7 @@ static int xc_dom_load_bin_kernel(struct xc_dom_image *dom) - char *image = dom->kernel_blob; - char *dest; - size_t image_size = dom->kernel_size; -+ size_t dest_size; - uint32_t start_addr; - uint32_t load_end_addr; - uint32_t bss_end_addr; -@@ -272,7 +276,29 @@ static int xc_dom_load_bin_kernel(struct xc_dom_image *dom) - DOMPRINTF(" text_size: 0x%" PRIx32 "", text_size); - DOMPRINTF(" bss_size: 0x%" PRIx32 "", bss_size); - -- dest = xc_dom_vaddr_to_ptr(dom, dom->kernel_seg.vstart); -+ dest = xc_dom_vaddr_to_ptr(dom, dom->kernel_seg.vstart, &dest_size); -+ if ( dest == NULL ) -+ { -+ DOMPRINTF("%s: xc_dom_vaddr_to_ptr(dom, dom->kernel_seg.vstart)" -+ " => NULL", __FUNCTION__); -+ return -EINVAL; -+ } -+ -+ if ( dest_size < text_size || -+ dest_size - text_size < bss_size ) -+ { -+ DOMPRINTF("%s: mapped region is too small for image", __FUNCTION__); -+ return -EINVAL; -+ } -+ -+ if ( image_size < skip || -+ image_size - skip < text_size ) -+ { -+ DOMPRINTF("%s: image is too small for declared text size", -+ __FUNCTION__); -+ return -EINVAL; -+ } -+ - memcpy(dest, image + skip, text_size); - memset(dest + text_size, 0, bss_size); - -diff --git a/tools/libxc/xc_dom_core.c b/tools/libxc/xc_dom_core.c -index 2a01d7c..e79e38d 100644 ---- a/tools/libxc/xc_dom_core.c -+++ b/tools/libxc/xc_dom_core.c -@@ -120,9 +120,17 @@ void *xc_dom_malloc(struct xc_dom_image *dom, size_t size) - { - struct xc_dom_mem *block; - -+ if ( size > SIZE_MAX - sizeof(*block) ) -+ { -+ DOMPRINTF("%s: unreasonable allocation size", __FUNCTION__); -+ return NULL; -+ } - block = malloc(sizeof(*block) + size); - if ( block == NULL ) -+ { -+ DOMPRINTF("%s: allocation failed", __FUNCTION__); - return NULL; -+ } - memset(block, 0, sizeof(*block) + size); - block->next = dom->memblocks; - dom->memblocks = block; -@@ -138,7 +146,10 @@ void *xc_dom_malloc_page_aligned(struct xc_dom_image *dom, size_t size) - - block = malloc(sizeof(*block)); - if ( block == NULL ) -+ { -+ DOMPRINTF("%s: allocation failed", __FUNCTION__); - return NULL; -+ } - memset(block, 0, sizeof(*block)); - block->mmap_len = size; - block->mmap_ptr = mmap(NULL, block->mmap_len, -@@ -146,6 +157,7 @@ void *xc_dom_malloc_page_aligned(struct xc_dom_image *dom, size_t size) - -1, 0); - if ( block->mmap_ptr == MAP_FAILED ) - { -+ DOMPRINTF("%s: mmap failed", __FUNCTION__); - free(block); - return NULL; - } -@@ -202,6 +214,7 @@ void *xc_dom_malloc_filemap(struct xc_dom_image *dom, - close(fd); - if ( block != NULL ) - free(block); -+ DOMPRINTF("%s: failed (on file `%s')", __FUNCTION__, filename); - return NULL; - } - -@@ -271,6 +284,11 @@ size_t xc_dom_check_gzip(xc_interface *xch, void *blob, size_t ziplen) - unsigned char *gzlen; - size_t unziplen; - -+ if ( ziplen < 6 ) -+ /* Too small. We need (i.e. the subsequent code relies on) -+ * 2 bytes for the magic number plus 4 bytes length. */ -+ return 0; -+ - if ( strncmp(blob, "\037\213", 2) ) - /* not gzipped */ - return 0; -@@ -351,10 +369,19 @@ int xc_dom_try_gunzip(struct xc_dom_image *dom, void **blob, size_t * size) - void *xc_dom_pfn_to_ptr(struct xc_dom_image *dom, xen_pfn_t pfn, - xen_pfn_t count) - { -+ xen_pfn_t count_out_dummy; -+ return xc_dom_pfn_to_ptr_retcount(dom, pfn, count, &count_out_dummy); -+} -+ -+void *xc_dom_pfn_to_ptr_retcount(struct xc_dom_image *dom, xen_pfn_t pfn, -+ xen_pfn_t count, xen_pfn_t *count_out) -+{ - struct xc_dom_phys *phys; - unsigned int page_shift = XC_DOM_PAGE_SHIFT(dom); - char *mode = "unset"; - -+ *count_out = 0; -+ - if ( pfn > dom->total_pages || /* multiple checks to avoid overflows */ - count > dom->total_pages || - pfn > dom->total_pages - count ) -@@ -384,6 +411,7 @@ void *xc_dom_pfn_to_ptr(struct xc_dom_image *dom, xen_pfn_t pfn, - phys->count); - return NULL; - } -+ *count_out = count; - } - else - { -@@ -391,6 +419,9 @@ void *xc_dom_pfn_to_ptr(struct xc_dom_image *dom, xen_pfn_t pfn, - just hand out a pointer to it */ - if ( pfn < phys->first ) - continue; -+ if ( pfn >= phys->first + phys->count ) -+ continue; -+ *count_out = phys->count - (pfn - phys->first); - } - return phys->ptr + ((pfn - phys->first) << page_shift); - } -@@ -478,7 +509,8 @@ int xc_dom_alloc_segment(struct xc_dom_image *dom, - seg->vstart = start; - seg->pfn = (seg->vstart - dom->parms.virt_base) / page_size; - -- if ( pages > dom->total_pages || /* double test avoids overflow probs */ -+ if ( pages > dom->total_pages || /* multiple test avoids overflow probs */ -+ seg->pfn > dom->total_pages || - pages > dom->total_pages - seg->pfn) - { - xc_dom_panic(dom->xch, XC_OUT_OF_MEMORY, -@@ -855,6 +887,12 @@ int xc_dom_build_image(struct xc_dom_image *dom) - ramdisklen) != 0 ) - goto err; - ramdiskmap = xc_dom_seg_to_ptr(dom, &dom->ramdisk_seg); -+ if ( ramdiskmap == NULL ) -+ { -+ DOMPRINTF("%s: xc_dom_seg_to_ptr(dom, &dom->ramdisk_seg) => NULL", -+ __FUNCTION__); -+ goto err; -+ } - if ( unziplen ) - { - if ( xc_dom_do_gunzip(dom->xch, -diff --git a/tools/libxc/xc_dom_elfloader.c b/tools/libxc/xc_dom_elfloader.c -index 2e69559..be58276 100644 ---- a/tools/libxc/xc_dom_elfloader.c -+++ b/tools/libxc/xc_dom_elfloader.c -@@ -28,13 +28,14 @@ - - #include "xg_private.h" - #include "xc_dom.h" -+#include "xc_bitops.h" - - #define XEN_VER "xen-3.0" - - /* ------------------------------------------------------------------------ */ - - static void log_callback(struct elf_binary *elf, void *caller_data, -- int iserr, const char *fmt, va_list al) { -+ bool iserr, const char *fmt, va_list al) { - xc_interface *xch = caller_data; - - xc_reportv(xch, -@@ -46,7 +47,7 @@ static void log_callback(struct elf_binary *elf, void *caller_data, - - void xc_elf_set_logfile(xc_interface *xch, struct elf_binary *elf, - int verbose) { -- elf_set_log(elf, log_callback, xch, verbose); -+ elf_set_log(elf, log_callback, xch, verbose /* convert to bool */); - } - - /* ------------------------------------------------------------------------ */ -@@ -84,7 +85,7 @@ static char *xc_dom_guest_type(struct xc_dom_image *dom, - /* ------------------------------------------------------------------------ */ - /* parse elf binary */ - --static int check_elf_kernel(struct xc_dom_image *dom, int verbose) -+static elf_negerrnoval check_elf_kernel(struct xc_dom_image *dom, bool verbose) - { - if ( dom->kernel_blob == NULL ) - { -@@ -95,7 +96,7 @@ static int check_elf_kernel(struct xc_dom_image *dom, int verbose) - return -EINVAL; - } - -- if ( !elf_is_elfbinary(dom->kernel_blob) ) -+ if ( !elf_is_elfbinary(dom->kernel_blob, dom->kernel_size) ) - { - if ( verbose ) - xc_dom_panic(dom->xch, -@@ -106,20 +107,21 @@ static int check_elf_kernel(struct xc_dom_image *dom, int verbose) - return 0; - } - --static int xc_dom_probe_elf_kernel(struct xc_dom_image *dom) -+static elf_negerrnoval xc_dom_probe_elf_kernel(struct xc_dom_image *dom) - { - return check_elf_kernel(dom, 0); - } - --static int xc_dom_load_elf_symtab(struct xc_dom_image *dom, -- struct elf_binary *elf, int load) -+static elf_errorstatus xc_dom_load_elf_symtab(struct xc_dom_image *dom, -+ struct elf_binary *elf, bool load) - { - struct elf_binary syms; -- const elf_shdr *shdr, *shdr2; -+ ELF_HANDLE_DECL(elf_shdr) shdr; ELF_HANDLE_DECL(elf_shdr) shdr2; - xen_vaddr_t symtab, maxaddr; -- char *hdr; -+ elf_ptrval hdr; - size_t size; -- int h, count, type, i, tables = 0; -+ unsigned h, count, type, i, tables = 0; -+ unsigned long *strtab_referenced = NULL; - - if ( elf_swap(elf) ) - { -@@ -130,31 +132,48 @@ static int xc_dom_load_elf_symtab(struct xc_dom_image *dom, - - if ( load ) - { -+ char *hdr_ptr; -+ size_t allow_size; -+ - if ( !dom->bsd_symtab_start ) - return 0; - size = dom->kernel_seg.vend - dom->bsd_symtab_start; -- hdr = xc_dom_vaddr_to_ptr(dom, dom->bsd_symtab_start); -- *(int *)hdr = size - sizeof(int); -+ hdr_ptr = xc_dom_vaddr_to_ptr(dom, dom->bsd_symtab_start, &allow_size); -+ if ( hdr_ptr == NULL ) -+ { -+ DOMPRINTF("%s/load: xc_dom_vaddr_to_ptr(dom,dom->bsd_symtab_start" -+ " => NULL", __FUNCTION__); -+ return -1; -+ } -+ elf->caller_xdest_base = hdr_ptr; -+ elf->caller_xdest_size = allow_size; -+ hdr = ELF_REALPTR2PTRVAL(hdr_ptr); -+ elf_store_val(elf, unsigned, hdr, size - sizeof(unsigned)); - } - else - { -- size = sizeof(int) + elf_size(elf, elf->ehdr) + -+ char *hdr_ptr; -+ -+ size = sizeof(unsigned) + elf_size(elf, elf->ehdr) + - elf_shdr_count(elf) * elf_size(elf, shdr); -- hdr = xc_dom_malloc(dom, size); -- if ( hdr == NULL ) -+ hdr_ptr = xc_dom_malloc(dom, size); -+ if ( hdr_ptr == NULL ) - return 0; -- dom->bsd_symtab_start = elf_round_up(&syms, dom->kernel_seg.vend); -+ elf->caller_xdest_base = hdr_ptr; -+ elf->caller_xdest_size = size; -+ hdr = ELF_REALPTR2PTRVAL(hdr_ptr); -+ dom->bsd_symtab_start = elf_round_up(elf, dom->kernel_seg.vend); - } - -- memcpy(hdr + sizeof(int), -- elf->image, -+ elf_memcpy_safe(elf, hdr + sizeof(unsigned), -+ ELF_IMAGE_BASE(elf), - elf_size(elf, elf->ehdr)); -- memcpy(hdr + sizeof(int) + elf_size(elf, elf->ehdr), -- elf->image + elf_uval(elf, elf->ehdr, e_shoff), -+ elf_memcpy_safe(elf, hdr + sizeof(unsigned) + elf_size(elf, elf->ehdr), -+ ELF_IMAGE_BASE(elf) + elf_uval(elf, elf->ehdr, e_shoff), - elf_shdr_count(elf) * elf_size(elf, shdr)); - if ( elf_64bit(elf) ) - { -- Elf64_Ehdr *ehdr = (Elf64_Ehdr *)(hdr + sizeof(int)); -+ Elf64_Ehdr *ehdr = (Elf64_Ehdr *)(hdr + sizeof(unsigned)); - ehdr->e_phoff = 0; - ehdr->e_phentsize = 0; - ehdr->e_phnum = 0; -@@ -163,19 +182,42 @@ static int xc_dom_load_elf_symtab(struct xc_dom_image *dom, - } - else - { -- Elf32_Ehdr *ehdr = (Elf32_Ehdr *)(hdr + sizeof(int)); -+ Elf32_Ehdr *ehdr = (Elf32_Ehdr *)(hdr + sizeof(unsigned)); - ehdr->e_phoff = 0; - ehdr->e_phentsize = 0; - ehdr->e_phnum = 0; - ehdr->e_shoff = elf_size(elf, elf->ehdr); - ehdr->e_shstrndx = SHN_UNDEF; - } -- if ( elf_init(&syms, hdr + sizeof(int), size - sizeof(int)) ) -+ if ( elf->caller_xdest_size < sizeof(unsigned) ) -+ { -+ DOMPRINTF("%s/%s: header size %"PRIx64" too small", -+ __FUNCTION__, load ? "load" : "parse", -+ (uint64_t)elf->caller_xdest_size); -+ return -1; -+ } -+ if ( elf_init(&syms, elf->caller_xdest_base + sizeof(unsigned), -+ elf->caller_xdest_size - sizeof(unsigned)) ) - return -1; - -+ /* -+ * The caller_xdest_{base,size} and dest_{base,size} need to -+ * remain valid so long as each struct elf_image does. The -+ * principle we adopt is that these values are set when the -+ * memory is allocated or mapped, and cleared when (and if) -+ * they are unmapped. -+ * -+ * Mappings of the guest are normally undone by xc_dom_unmap_all -+ * (directly or via xc_dom_release). We do not explicitly clear -+ * these because in fact that happens only at the end of -+ * xc_dom_boot_image, at which time all of these ELF loading -+ * functions have returned. No relevant struct elf_binary* -+ * escapes this file. -+ */ -+ - xc_elf_set_logfile(dom->xch, &syms, 1); - -- symtab = dom->bsd_symtab_start + sizeof(int); -+ symtab = dom->bsd_symtab_start + sizeof(unsigned); - maxaddr = elf_round_up(&syms, symtab + elf_size(&syms, syms.ehdr) + - elf_shdr_count(&syms) * elf_size(&syms, shdr)); - -@@ -186,27 +228,40 @@ static int xc_dom_load_elf_symtab(struct xc_dom_image *dom, - symtab, maxaddr); - - count = elf_shdr_count(&syms); -+ /* elf_shdr_count guarantees that count is reasonable */ -+ -+ strtab_referenced = xc_dom_malloc(dom, bitmap_size(count)); -+ if ( strtab_referenced == NULL ) -+ return -1; -+ bitmap_clear(strtab_referenced, count); -+ /* Note the symtabs @h linked to by any strtab @i. */ -+ for ( i = 0; i < count; i++ ) -+ { -+ shdr2 = elf_shdr_by_index(&syms, i); -+ if ( elf_uval(&syms, shdr2, sh_type) == SHT_SYMTAB ) -+ { -+ h = elf_uval(&syms, shdr2, sh_link); -+ if (h < count) -+ set_bit(h, strtab_referenced); -+ } -+ } -+ - for ( h = 0; h < count; h++ ) - { - shdr = elf_shdr_by_index(&syms, h); -+ if ( !elf_access_ok(elf, ELF_HANDLE_PTRVAL(shdr), 1) ) -+ /* input has an insane section header count field */ -+ break; - type = elf_uval(&syms, shdr, sh_type); - if ( type == SHT_STRTAB ) - { -- /* Look for a strtab @i linked to symtab @h. */ -- for ( i = 0; i < count; i++ ) -- { -- shdr2 = elf_shdr_by_index(&syms, i); -- if ( (elf_uval(&syms, shdr2, sh_type) == SHT_SYMTAB) && -- (elf_uval(&syms, shdr2, sh_link) == h) ) -- break; -- } - /* Skip symtab @h if we found no corresponding strtab @i. */ -- if ( i == count ) -+ if ( !test_bit(h, strtab_referenced) ) - { - if ( elf_64bit(&syms) ) -- *(Elf64_Off*)(&shdr->e64.sh_offset) = 0; -+ elf_store_field(elf, shdr, e64.sh_offset, 0); - else -- *(Elf32_Off*)(&shdr->e32.sh_offset) = 0; -+ elf_store_field(elf, shdr, e32.sh_offset, 0); - continue; - } - } -@@ -215,13 +270,13 @@ static int xc_dom_load_elf_symtab(struct xc_dom_image *dom, - { - /* Mangled to be based on ELF header location. */ - if ( elf_64bit(&syms) ) -- *(Elf64_Off*)(&shdr->e64.sh_offset) = maxaddr - symtab; -+ elf_store_field(elf, shdr, e64.sh_offset, maxaddr - symtab); - else -- *(Elf32_Off*)(&shdr->e32.sh_offset) = maxaddr - symtab; -+ elf_store_field(elf, shdr, e32.sh_offset, maxaddr - symtab); - size = elf_uval(&syms, shdr, sh_size); - maxaddr = elf_round_up(&syms, maxaddr + size); - tables++; -- DOMPRINTF("%s: h=%d %s, size=0x%zx, maxaddr=0x%" PRIx64 "", -+ DOMPRINTF("%s: h=%u %s, size=0x%zx, maxaddr=0x%" PRIx64 "", - __FUNCTION__, h, - type == SHT_SYMTAB ? "symtab" : "strtab", - size, maxaddr); -@@ -229,7 +284,7 @@ static int xc_dom_load_elf_symtab(struct xc_dom_image *dom, - if ( load ) - { - shdr2 = elf_shdr_by_index(elf, h); -- memcpy((void*)elf_section_start(&syms, shdr), -+ elf_memcpy_safe(elf, elf_section_start(&syms, shdr), - elf_section_start(elf, shdr2), - size); - } -@@ -237,11 +292,18 @@ static int xc_dom_load_elf_symtab(struct xc_dom_image *dom, - - /* Name is NULL. */ - if ( elf_64bit(&syms) ) -- *(Elf64_Word*)(&shdr->e64.sh_name) = 0; -+ elf_store_field(elf, shdr, e64.sh_name, 0); - else -- *(Elf32_Word*)(&shdr->e32.sh_name) = 0; -+ elf_store_field(elf, shdr, e32.sh_name, 0); - } - -+ if ( elf_check_broken(&syms) ) -+ DOMPRINTF("%s: symbols ELF broken: %s", __FUNCTION__, -+ elf_check_broken(&syms)); -+ if ( elf_check_broken(elf) ) -+ DOMPRINTF("%s: ELF broken: %s", __FUNCTION__, -+ elf_check_broken(elf)); -+ - if ( tables == 0 ) - { - DOMPRINTF("%s: no symbol table present", __FUNCTION__); -@@ -253,16 +315,22 @@ static int xc_dom_load_elf_symtab(struct xc_dom_image *dom, - return 0; - } - --static int xc_dom_parse_elf_kernel(struct xc_dom_image *dom) -+static elf_errorstatus xc_dom_parse_elf_kernel(struct xc_dom_image *dom) -+ /* -+ * This function sometimes returns -1 for error and sometimes -+ * an errno value. ?!?! -+ */ - { - struct elf_binary *elf; -- int rc; -+ elf_errorstatus rc; - - rc = check_elf_kernel(dom, 1); - if ( rc != 0 ) - return rc; - - elf = xc_dom_malloc(dom, sizeof(*elf)); -+ if ( elf == NULL ) -+ return -1; - dom->private_loader = elf; - rc = elf_init(elf, dom->kernel_blob, dom->kernel_size); - xc_elf_set_logfile(dom->xch, elf, 1); -@@ -274,23 +342,27 @@ static int xc_dom_parse_elf_kernel(struct xc_dom_image *dom) - } - - /* Find the section-header strings table. */ -- if ( elf->sec_strtab == NULL ) -+ if ( ELF_PTRVAL_INVALID(elf->sec_strtab) ) - { - xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: ELF image" - " has no shstrtab", __FUNCTION__); -- return -EINVAL; -+ rc = -EINVAL; -+ goto out; - } - - /* parse binary and get xen meta info */ - elf_parse_binary(elf); - if ( (rc = elf_xen_parse(elf, &dom->parms)) != 0 ) -- return rc; -+ { -+ goto out; -+ } - - if ( elf_xen_feature_get(XENFEAT_dom0, dom->parms.f_required) ) - { - xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: Kernel does not" - " support unprivileged (DomU) operation", __FUNCTION__); -- return -EINVAL; -+ rc = -EINVAL; -+ goto out; - } - - /* find kernel segment */ -@@ -304,15 +376,30 @@ static int xc_dom_parse_elf_kernel(struct xc_dom_image *dom) - DOMPRINTF("%s: %s: 0x%" PRIx64 " -> 0x%" PRIx64 "", - __FUNCTION__, dom->guest_type, - dom->kernel_seg.vstart, dom->kernel_seg.vend); -- return 0; -+ rc = 0; -+out: -+ if ( elf_check_broken(elf) ) -+ DOMPRINTF("%s: ELF broken: %s", __FUNCTION__, -+ elf_check_broken(elf)); -+ -+ return rc; - } - --static int xc_dom_load_elf_kernel(struct xc_dom_image *dom) -+static elf_errorstatus xc_dom_load_elf_kernel(struct xc_dom_image *dom) - { - struct elf_binary *elf = dom->private_loader; -- int rc; -+ elf_errorstatus rc; -+ xen_pfn_t pages; -+ -+ elf->dest_base = xc_dom_seg_to_ptr_pages(dom, &dom->kernel_seg, &pages); -+ if ( elf->dest_base == NULL ) -+ { -+ DOMPRINTF("%s: xc_dom_vaddr_to_ptr(dom,dom->kernel_seg)" -+ " => NULL", __FUNCTION__); -+ return -1; -+ } -+ elf->dest_size = pages * XC_DOM_PAGE_SIZE(dom); - -- elf->dest = xc_dom_seg_to_ptr(dom, &dom->kernel_seg); - rc = elf_load_binary(elf); - if ( rc < 0 ) - { -diff --git a/tools/libxc/xc_dom_ia64.c b/tools/libxc/xc_dom_ia64.c -index dcd1523..076821c 100644 ---- a/tools/libxc/xc_dom_ia64.c -+++ b/tools/libxc/xc_dom_ia64.c -@@ -60,6 +60,12 @@ int start_info_ia64(struct xc_dom_image *dom) - - DOMPRINTF_CALLED(dom->xch); - -+ if ( start_info == NULL ) -+ { -+ DOMPRINTF("%s: xc_dom_pfn_to_ptr failed on start_info", __FUNCTION__); -+ return -1; /* our caller throws away our return value :-/ */ -+ } -+ - memset(start_info, 0, sizeof(*start_info)); - sprintf(start_info->magic, dom->guest_type); - start_info->flags = dom->flags; -@@ -182,6 +188,12 @@ int arch_setup_meminit(struct xc_dom_image *dom) - - /* setup initial p2m */ - dom->p2m_host = xc_dom_malloc(dom, sizeof(xen_pfn_t) * nbr); -+ if ( dom->p2m_host == NULL ) -+ { -+ DOMPRINTF("%s: xc_dom_malloc failed for p2m_host", -+ __FUNCTION__); -+ return -1; -+ } - for ( pfn = 0; pfn < nbr; pfn++ ) - dom->p2m_host[pfn] = start + pfn; - -diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c -index 0cf1687..448d9a1 100644 ---- a/tools/libxc/xc_dom_x86.c -+++ b/tools/libxc/xc_dom_x86.c -@@ -144,6 +144,9 @@ static int setup_pgtables_x86_32(struct xc_dom_image *dom) - xen_vaddr_t addr; - xen_pfn_t pgpfn; - -+ if ( l2tab == NULL ) -+ goto pfn_error; -+ - for ( addr = dom->parms.virt_base; addr < dom->virt_pgtab_end; - addr += PAGE_SIZE_X86 ) - { -@@ -151,6 +154,8 @@ static int setup_pgtables_x86_32(struct xc_dom_image *dom) - { - /* get L1 tab, make L2 entry */ - l1tab = xc_dom_pfn_to_ptr(dom, l1pfn, 1); -+ if ( l1tab == NULL ) -+ goto pfn_error; - l2off = l2_table_offset_i386(addr); - l2tab[l2off] = - pfn_to_paddr(xc_dom_p2m_guest(dom, l1pfn)) | L2_PROT; -@@ -169,6 +174,11 @@ static int setup_pgtables_x86_32(struct xc_dom_image *dom) - l1tab = NULL; - } - return 0; -+ -+pfn_error: -+ xc_dom_panic(dom->xch, XC_INTERNAL_ERROR, -+ "%s: xc_dom_pfn_to_ptr failed", __FUNCTION__); -+ return -EINVAL; - } - - /* -@@ -219,6 +229,12 @@ static xen_pfn_t move_l3_below_4G(struct xc_dom_image *dom, - goto out; - - l3tab = xc_dom_pfn_to_ptr(dom, l3pfn, 1); -+ if ( l3tab == NULL ) -+ { -+ DOMPRINTF("%s: xc_dom_pfn_to_ptr(dom, l3pfn, 1) => NULL", -+ __FUNCTION__); -+ return l3mfn; /* our one call site will call xc_dom_panic and fail */ -+ } - memset(l3tab, 0, XC_DOM_PAGE_SIZE(dom)); - - DOMPRINTF("%s: successfully relocated L3 below 4G. " -@@ -262,6 +278,8 @@ static int setup_pgtables_x86_32_pae(struct xc_dom_image *dom) - } - - l3tab = xc_dom_pfn_to_ptr(dom, l3pfn, 1); -+ if ( l3tab == NULL ) -+ goto pfn_error; - - for ( addr = dom->parms.virt_base; addr < dom->virt_pgtab_end; - addr += PAGE_SIZE_X86 ) -@@ -270,6 +288,8 @@ static int setup_pgtables_x86_32_pae(struct xc_dom_image *dom) - { - /* get L2 tab, make L3 entry */ - l2tab = xc_dom_pfn_to_ptr(dom, l2pfn, 1); -+ if ( l2tab == NULL ) -+ goto pfn_error; - l3off = l3_table_offset_pae(addr); - l3tab[l3off] = - pfn_to_paddr(xc_dom_p2m_guest(dom, l2pfn)) | L3_PROT; -@@ -280,6 +300,8 @@ static int setup_pgtables_x86_32_pae(struct xc_dom_image *dom) - { - /* get L1 tab, make L2 entry */ - l1tab = xc_dom_pfn_to_ptr(dom, l1pfn, 1); -+ if ( l1tab == NULL ) -+ goto pfn_error; - l2off = l2_table_offset_pae(addr); - l2tab[l2off] = - pfn_to_paddr(xc_dom_p2m_guest(dom, l1pfn)) | L2_PROT; -@@ -306,6 +328,11 @@ static int setup_pgtables_x86_32_pae(struct xc_dom_image *dom) - l3tab[3] = pfn_to_paddr(xc_dom_p2m_guest(dom, l2pfn)) | L3_PROT; - } - return 0; -+ -+pfn_error: -+ xc_dom_panic(dom->xch, XC_INTERNAL_ERROR, -+ "%s: xc_dom_pfn_to_ptr failed", __FUNCTION__); -+ return -EINVAL; - } - - #undef L1_PROT -@@ -344,6 +371,9 @@ static int setup_pgtables_x86_64(struct xc_dom_image *dom) - uint64_t addr; - xen_pfn_t pgpfn; - -+ if ( l4tab == NULL ) -+ goto pfn_error; -+ - for ( addr = dom->parms.virt_base; addr < dom->virt_pgtab_end; - addr += PAGE_SIZE_X86 ) - { -@@ -351,6 +381,8 @@ static int setup_pgtables_x86_64(struct xc_dom_image *dom) - { - /* get L3 tab, make L4 entry */ - l3tab = xc_dom_pfn_to_ptr(dom, l3pfn, 1); -+ if ( l3tab == NULL ) -+ goto pfn_error; - l4off = l4_table_offset_x86_64(addr); - l4tab[l4off] = - pfn_to_paddr(xc_dom_p2m_guest(dom, l3pfn)) | L4_PROT; -@@ -361,6 +393,8 @@ static int setup_pgtables_x86_64(struct xc_dom_image *dom) - { - /* get L2 tab, make L3 entry */ - l2tab = xc_dom_pfn_to_ptr(dom, l2pfn, 1); -+ if ( l2tab == NULL ) -+ goto pfn_error; - l3off = l3_table_offset_x86_64(addr); - l3tab[l3off] = - pfn_to_paddr(xc_dom_p2m_guest(dom, l2pfn)) | L3_PROT; -@@ -373,6 +407,8 @@ static int setup_pgtables_x86_64(struct xc_dom_image *dom) - { - /* get L1 tab, make L2 entry */ - l1tab = xc_dom_pfn_to_ptr(dom, l1pfn, 1); -+ if ( l1tab == NULL ) -+ goto pfn_error; - l2off = l2_table_offset_x86_64(addr); - l2tab[l2off] = - pfn_to_paddr(xc_dom_p2m_guest(dom, l1pfn)) | L2_PROT; -@@ -393,6 +429,11 @@ static int setup_pgtables_x86_64(struct xc_dom_image *dom) - l1tab = NULL; - } - return 0; -+ -+pfn_error: -+ xc_dom_panic(dom->xch, XC_INTERNAL_ERROR, -+ "%s: xc_dom_pfn_to_ptr failed", __FUNCTION__); -+ return -EINVAL; - } - - #undef L1_PROT -@@ -410,6 +451,8 @@ static int alloc_magic_pages(struct xc_dom_image *dom) - if ( xc_dom_alloc_segment(dom, &dom->p2m_seg, "phys2mach", 0, p2m_size) ) - return -1; - dom->p2m_guest = xc_dom_seg_to_ptr(dom, &dom->p2m_seg); -+ if ( dom->p2m_guest == NULL ) -+ return -1; - - /* allocate special pages */ - dom->start_info_pfn = xc_dom_alloc_page(dom, "start info"); -@@ -434,6 +477,12 @@ static int start_info_x86_32(struct xc_dom_image *dom) - - DOMPRINTF_CALLED(dom->xch); - -+ if ( start_info == NULL ) -+ { -+ DOMPRINTF("%s: xc_dom_pfn_to_ptr failed on start_info", __FUNCTION__); -+ return -1; /* our caller throws away our return value :-/ */ -+ } -+ - memset(start_info, 0, sizeof(*start_info)); - strncpy(start_info->magic, dom->guest_type, sizeof(start_info->magic)); - start_info->magic[sizeof(start_info->magic) - 1] = '\0'; -@@ -474,6 +523,12 @@ static int start_info_x86_64(struct xc_dom_image *dom) - - DOMPRINTF_CALLED(dom->xch); - -+ if ( start_info == NULL ) -+ { -+ DOMPRINTF("%s: xc_dom_pfn_to_ptr failed on start_info", __FUNCTION__); -+ return -1; /* our caller throws away our return value :-/ */ -+ } -+ - memset(start_info, 0, sizeof(*start_info)); - strncpy(start_info->magic, dom->guest_type, sizeof(start_info->magic)); - start_info->magic[sizeof(start_info->magic) - 1] = '\0'; -@@ -725,6 +780,9 @@ int arch_setup_meminit(struct xc_dom_image *dom) - } - - dom->p2m_host = xc_dom_malloc(dom, sizeof(xen_pfn_t) * dom->total_pages); -+ if ( dom->p2m_host == NULL ) -+ return -EINVAL; -+ - if ( dom->superpages ) - { - int count = dom->total_pages >> SUPERPAGE_PFN_SHIFT; -diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c -index b4c0b10..f9ed6b2 100644 ---- a/tools/libxc/xc_domain_restore.c -+++ b/tools/libxc/xc_domain_restore.c -@@ -1180,6 +1180,11 @@ static int apply_batch(xc_interface *xch, uint32_t dom, struct restore_ctx *ctx, - - /* Map relevant mfns */ - pfn_err = calloc(j, sizeof(*pfn_err)); -+ if ( pfn_err == NULL ) -+ { -+ PERROR("allocation for pfn_err failed"); -+ return -1; -+ } - region_base = xc_map_foreign_bulk( - xch, dom, PROT_WRITE, region_mfn, pfn_err, j); - -@@ -1556,6 +1561,12 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, - mfn = ctx->p2m[pfn]; - buf = xc_map_foreign_range(xch, dom, PAGE_SIZE, - PROT_READ | PROT_WRITE, mfn); -+ if ( buf == NULL ) -+ { -+ ERROR("xc_map_foreign_range for generation id" -+ " buffer failed"); -+ goto out; -+ } - - generationid = *(unsigned long long *)(buf + offset); - *(unsigned long long *)(buf + offset) = generationid + 1; -@@ -1713,6 +1724,11 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, - l3tab = (uint64_t *) - xc_map_foreign_range(xch, dom, PAGE_SIZE, - PROT_READ, ctx->p2m[i]); -+ if ( l3tab == NULL ) -+ { -+ PERROR("xc_map_foreign_range failed (for l3tab)"); -+ goto out; -+ } - - for ( j = 0; j < 4; j++ ) - l3ptes[j] = l3tab[j]; -@@ -1739,6 +1755,11 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, - l3tab = (uint64_t *) - xc_map_foreign_range(xch, dom, PAGE_SIZE, - PROT_READ | PROT_WRITE, ctx->p2m[i]); -+ if ( l3tab == NULL ) -+ { -+ PERROR("xc_map_foreign_range failed (for l3tab, 2nd)"); -+ goto out; -+ } - - for ( j = 0; j < 4; j++ ) - l3tab[j] = l3ptes[j]; -@@ -1909,6 +1930,12 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, - SET_FIELD(ctxt, user_regs.edx, mfn); - start_info = xc_map_foreign_range( - xch, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn); -+ if ( start_info == NULL ) -+ { -+ PERROR("xc_map_foreign_range failed (for start_info)"); -+ goto out; -+ } -+ - SET_FIELD(start_info, nr_pages, dinfo->p2m_size); - SET_FIELD(start_info, shared_info, shared_info_frame<<PAGE_SHIFT); - SET_FIELD(start_info, flags, 0); -@@ -2056,6 +2083,11 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, - /* Restore contents of shared-info page. No checking needed. */ - new_shared_info = xc_map_foreign_range( - xch, dom, PAGE_SIZE, PROT_WRITE, shared_info_frame); -+ if ( new_shared_info == NULL ) -+ { -+ PERROR("xc_map_foreign_range failed (for new_shared_info)"); -+ goto out; -+ } - - /* restore saved vcpu_info and arch specific info */ - MEMCPY_FIELD(new_shared_info, old_shared_info, vcpu_info); -diff --git a/tools/libxc/xc_hvm_build_x86.c b/tools/libxc/xc_hvm_build_x86.c -index cf5d7fb..8165287 100644 ---- a/tools/libxc/xc_hvm_build_x86.c -+++ b/tools/libxc/xc_hvm_build_x86.c -@@ -104,21 +104,23 @@ static int loadelfimage( - for ( i = 0; i < pages; i++ ) - entries[i].mfn = parray[(elf->pstart >> PAGE_SHIFT) + i]; - -- elf->dest = xc_map_foreign_ranges( -+ elf->dest_base = xc_map_foreign_ranges( - xch, dom, pages << PAGE_SHIFT, PROT_READ | PROT_WRITE, 1 << PAGE_SHIFT, - entries, pages); -- if ( elf->dest == NULL ) -+ if ( elf->dest_base == NULL ) - goto err; -+ elf->dest_size = pages * PAGE_SIZE; - -- elf->dest += elf->pstart & (PAGE_SIZE - 1); -+ ELF_ADVANCE_DEST(elf, elf->pstart & (PAGE_SIZE - 1)); - - /* Load the initial elf image. */ - rc = elf_load_binary(elf); - if ( rc < 0 ) - PERROR("Failed to load elf binary\n"); - -- munmap(elf->dest, pages << PAGE_SHIFT); -- elf->dest = NULL; -+ munmap(elf->dest_base, pages << PAGE_SHIFT); -+ elf->dest_base = NULL; -+ elf->dest_size = 0; - - err: - free(entries); -@@ -401,11 +403,16 @@ static int setup_guest(xc_interface *xch, - munmap(page0, PAGE_SIZE); - } - -+ if ( elf_check_broken(&elf) ) -+ ERROR("HVM ELF broken: %s", elf_check_broken(&elf)); -+ - free(page_array); - return 0; - - error_out: - free(page_array); -+ if ( elf_check_broken(&elf) ) -+ ERROR("HVM ELF broken, failing: %s", elf_check_broken(&elf)); - return -1; - } - -diff --git a/tools/libxc/xc_linux_osdep.c b/tools/libxc/xc_linux_osdep.c -index 787e742..98e041c 100644 ---- a/tools/libxc/xc_linux_osdep.c -+++ b/tools/libxc/xc_linux_osdep.c -@@ -378,6 +378,8 @@ static void *linux_privcmd_map_foreign_range(xc_interface *xch, xc_osdep_handle - - num = (size + XC_PAGE_SIZE - 1) >> XC_PAGE_SHIFT; - arr = calloc(num, sizeof(xen_pfn_t)); -+ if ( arr == NULL ) -+ return NULL; - - for ( i = 0; i < num; i++ ) - arr[i] = mfn + i; -@@ -402,6 +404,8 @@ static void *linux_privcmd_map_foreign_ranges(xc_interface *xch, xc_osdep_handle - num_per_entry = chunksize >> XC_PAGE_SHIFT; - num = num_per_entry * nentries; - arr = calloc(num, sizeof(xen_pfn_t)); -+ if ( arr == NULL ) -+ return NULL; - - for ( i = 0; i < nentries; i++ ) - for ( j = 0; j < num_per_entry; j++ ) -diff --git a/tools/libxc/xc_offline_page.c b/tools/libxc/xc_offline_page.c -index 089a361..36b9812 100644 ---- a/tools/libxc/xc_offline_page.c -+++ b/tools/libxc/xc_offline_page.c -@@ -714,6 +714,11 @@ int xc_exchange_page(xc_interface *xch, int domid, xen_pfn_t mfn) - - new_p = xc_map_foreign_range(xch, domid, PAGE_SIZE, - PROT_READ|PROT_WRITE, new_mfn); -+ if ( new_p == NULL ) -+ { -+ ERROR("failed to map new_p for copy, guest may be broken?"); -+ goto failed; -+ } - memcpy(new_p, backup, PAGE_SIZE); - munmap(new_p, PAGE_SIZE); - mops.arg1.mfn = new_mfn; -diff --git a/tools/libxc/xc_private.c b/tools/libxc/xc_private.c -index 3e03a91..848ceed 100644 ---- a/tools/libxc/xc_private.c -+++ b/tools/libxc/xc_private.c -@@ -771,6 +771,8 @@ const char *xc_strerror(xc_interface *xch, int errcode) - errbuf = pthread_getspecific(errbuf_pkey); - if (errbuf == NULL) { - errbuf = malloc(XS_BUFSIZE); -+ if ( errbuf == NULL ) -+ return "(failed to allocate errbuf)"; - pthread_setspecific(errbuf_pkey, errbuf); - } - -diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h -index b7741ca..8952048 100644 ---- a/tools/libxc/xenctrl.h -+++ b/tools/libxc/xenctrl.h -@@ -1778,7 +1778,7 @@ int xc_cpuid_set(xc_interface *xch, - int xc_cpuid_apply_policy(xc_interface *xch, - domid_t domid); - void xc_cpuid_to_str(const unsigned int *regs, -- char **strs); -+ char **strs); /* some strs[] may be NULL if ENOMEM */ - int xc_mca_op(xc_interface *xch, struct xen_mc *mc); - #endif - -diff --git a/tools/xcutils/readnotes.c b/tools/xcutils/readnotes.c -index c926186..5fa445e 100644 ---- a/tools/xcutils/readnotes.c -+++ b/tools/xcutils/readnotes.c -@@ -61,51 +61,56 @@ struct setup_header { - } __attribute__((packed)); - - static void print_string_note(const char *prefix, struct elf_binary *elf, -- const elf_note *note) -+ ELF_HANDLE_DECL(elf_note) note) - { -- printf("%s: %s\n", prefix, (char*)elf_note_desc(elf, note)); -+ printf("%s: %s\n", prefix, elf_strfmt(elf, elf_note_desc(elf, note))); - } - - static void print_numeric_note(const char *prefix, struct elf_binary *elf, -- const elf_note *note) -+ ELF_HANDLE_DECL(elf_note) note) - { - uint64_t value = elf_note_numeric(elf, note); -- int descsz = elf_uval(elf, note, descsz); -+ unsigned descsz = elf_uval(elf, note, descsz); - - printf("%s: %#*" PRIx64 " (%d bytes)\n", - prefix, 2+2*descsz, value, descsz); - } - - static void print_l1_mfn_valid_note(const char *prefix, struct elf_binary *elf, -- const elf_note *note) -+ ELF_HANDLE_DECL(elf_note) note) - { -- int descsz = elf_uval(elf, note, descsz); -- const uint32_t *desc32 = elf_note_desc(elf, note); -- const uint64_t *desc64 = elf_note_desc(elf, note); -+ unsigned descsz = elf_uval(elf, note, descsz); -+ elf_ptrval desc = elf_note_desc(elf, note); - - /* XXX should be able to cope with a list of values. */ - switch ( descsz / 2 ) - { - case 8: - printf("%s: mask=%#"PRIx64" value=%#"PRIx64"\n", prefix, -- desc64[0], desc64[1]); -+ elf_access_unsigned(elf, desc, 0, 8), -+ elf_access_unsigned(elf, desc, 8, 8)); - break; - case 4: - printf("%s: mask=%#"PRIx32" value=%#"PRIx32"\n", prefix, -- desc32[0],desc32[1]); -+ (uint32_t)elf_access_unsigned(elf, desc, 0, 4), -+ (uint32_t)elf_access_unsigned(elf, desc, 4, 4)); - break; - } - - } - --static int print_notes(struct elf_binary *elf, const elf_note *start, const elf_note *end) -+static unsigned print_notes(struct elf_binary *elf, ELF_HANDLE_DECL(elf_note) start, ELF_HANDLE_DECL(elf_note) end) - { -- const elf_note *note; -- int notes_found = 0; -+ ELF_HANDLE_DECL(elf_note) note; -+ unsigned notes_found = 0; -+ const char *this_note_name; - -- for ( note = start; note < end; note = elf_note_next(elf, note) ) -+ for ( note = start; ELF_HANDLE_PTRVAL(note) < ELF_HANDLE_PTRVAL(end); note = elf_note_next(elf, note) ) - { -- if (0 != strcmp(elf_note_name(elf, note), "Xen")) -+ this_note_name = elf_note_name(elf, note); -+ if (NULL == this_note_name) -+ continue; -+ if (0 != strcmp(this_note_name, "Xen")) - continue; - - notes_found++; -@@ -156,7 +161,7 @@ static int print_notes(struct elf_binary *elf, const elf_note *start, const elf_ - break; - default: - printf("unknown note type %#x\n", -- (int)elf_uval(elf, note, type)); -+ (unsigned)elf_uval(elf, note, type)); - break; - } - } -@@ -166,12 +171,13 @@ static int print_notes(struct elf_binary *elf, const elf_note *start, const elf_ - int main(int argc, char **argv) - { - const char *f; -- int fd,h,size,usize,count; -+ int fd; -+ unsigned h,size,usize,count; - void *image,*tmp; - struct stat st; - struct elf_binary elf; -- const elf_shdr *shdr; -- int notes_found = 0; -+ ELF_HANDLE_DECL(elf_shdr) shdr; -+ unsigned notes_found = 0; - - struct setup_header *hdr; - uint64_t payload_offset, payload_length; -@@ -257,7 +263,7 @@ int main(int argc, char **argv) - count = elf_phdr_count(&elf); - for ( h=0; h < count; h++) - { -- const elf_phdr *phdr; -+ ELF_HANDLE_DECL(elf_phdr) phdr; - phdr = elf_phdr_by_index(&elf, h); - if (elf_uval(&elf, phdr, p_type) != PT_NOTE) - continue; -@@ -269,8 +275,8 @@ int main(int argc, char **argv) - continue; - - notes_found = print_notes(&elf, -- elf_segment_start(&elf, phdr), -- elf_segment_end(&elf, phdr)); -+ ELF_MAKE_HANDLE(elf_note, elf_segment_start(&elf, phdr)), -+ ELF_MAKE_HANDLE(elf_note, elf_segment_end(&elf, phdr))); - } - - if ( notes_found == 0 ) -@@ -278,13 +284,13 @@ int main(int argc, char **argv) - count = elf_shdr_count(&elf); - for ( h=0; h < count; h++) - { -- const elf_shdr *shdr; -+ ELF_HANDLE_DECL(elf_shdr) shdr; - shdr = elf_shdr_by_index(&elf, h); - if (elf_uval(&elf, shdr, sh_type) != SHT_NOTE) - continue; - notes_found = print_notes(&elf, -- elf_section_start(&elf, shdr), -- elf_section_end(&elf, shdr)); -+ ELF_MAKE_HANDLE(elf_note, elf_section_start(&elf, shdr)), -+ ELF_MAKE_HANDLE(elf_note, elf_section_end(&elf, shdr))); - if ( notes_found ) - fprintf(stderr, "using notes from SHT_NOTE section\n"); - -@@ -292,8 +298,12 @@ int main(int argc, char **argv) - } - - shdr = elf_shdr_by_name(&elf, "__xen_guest"); -- if (shdr) -- printf("__xen_guest: %s\n", (char*)elf_section_start(&elf, shdr)); -+ if (ELF_HANDLE_VALID(shdr)) -+ printf("__xen_guest: %s\n", -+ elf_strfmt(&elf, elf_section_start(&elf, shdr))); -+ -+ if (elf_check_broken(&elf)) -+ printf("warning: broken ELF: %s\n", elf_check_broken(&elf)); - - return 0; - } -diff --git a/xen/arch/arm/kernel.c b/xen/arch/arm/kernel.c -index 2d56130..dec0519 100644 ---- a/xen/arch/arm/kernel.c -+++ b/xen/arch/arm/kernel.c -@@ -146,6 +146,8 @@ static int kernel_try_elf_prepare(struct kernel_info *info) - { - int rc; - -+ memset(&info->elf.elf, 0, sizeof(info->elf.elf)); -+ - info->kernel_order = get_order_from_bytes(KERNEL_FLASH_SIZE); - info->kernel_img = alloc_xenheap_pages(info->kernel_order, 0); - if ( info->kernel_img == NULL ) -@@ -160,7 +162,7 @@ static int kernel_try_elf_prepare(struct kernel_info *info) - #endif - elf_parse_binary(&info->elf.elf); - if ( (rc = elf_xen_parse(&info->elf.elf, &info->elf.parms)) != 0 ) -- return rc; -+ goto err; - - /* - * TODO: can the ELF header be used to find the physical address -@@ -169,7 +171,18 @@ static int kernel_try_elf_prepare(struct kernel_info *info) - info->entry = info->elf.parms.virt_entry; - info->load = kernel_elf_load; - -+ if ( elf_check_broken(&info->elf.elf) ) -+ printk("Xen: warning: ELF kernel broken: %s\n", -+ elf_check_broken(&info->elf.elf)); -+ - return 0; -+ -+err: -+ if ( elf_check_broken(&info->elf.elf) ) -+ printk("Xen: ELF kernel broken: %s\n", -+ elf_check_broken(&info->elf.elf)); -+ -+ return rc; - } - - int kernel_prepare(struct kernel_info *info) -diff --git a/xen/arch/x86/bzimage.c b/xen/arch/x86/bzimage.c -index 5adc223..3600dca 100644 ---- a/xen/arch/x86/bzimage.c -+++ b/xen/arch/x86/bzimage.c -@@ -220,7 +220,7 @@ unsigned long __init bzimage_headroom(char *image_start, - image_length = hdr->payload_length; - } - -- if ( elf_is_elfbinary(image_start) ) -+ if ( elf_is_elfbinary(image_start, image_length) ) - return 0; - - orig_image_len = image_length; -@@ -251,7 +251,7 @@ int __init bzimage_parse(char *image_base, char **image_start, unsigned long *im - *image_len = hdr->payload_length; - } - -- if ( elf_is_elfbinary(*image_start) ) -+ if ( elf_is_elfbinary(*image_start, *image_len) ) - return 0; - - BUG_ON(!(image_base < *image_start)); -diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c -index 469d363..0dbec96 100644 ---- a/xen/arch/x86/domain_build.c -+++ b/xen/arch/x86/domain_build.c -@@ -374,7 +374,7 @@ int __init construct_dom0( - #endif - elf_parse_binary(&elf); - if ( (rc = elf_xen_parse(&elf, &parms)) != 0 ) -- return rc; -+ goto out; - - /* compatibility check */ - compatible = 0; -@@ -413,14 +413,16 @@ int __init construct_dom0( - if ( !compatible ) - { - printk("Mismatch between Xen and DOM0 kernel\n"); -- return -EINVAL; -+ rc = -EINVAL; -+ goto out; - } - - if ( parms.elf_notes[XEN_ELFNOTE_SUPPORTED_FEATURES].type != XEN_ENT_NONE && - !test_bit(XENFEAT_dom0, parms.f_supported) ) - { - printk("Kernel does not support Dom0 operation\n"); -- return -EINVAL; -+ rc = -EINVAL; -+ goto out; - } - - #if defined(__x86_64__) -@@ -734,7 +736,8 @@ int __init construct_dom0( - (v_end > HYPERVISOR_COMPAT_VIRT_START(d)) ) - { - printk("DOM0 image overlaps with Xen private area.\n"); -- return -EINVAL; -+ rc = -EINVAL; -+ goto out; - } - - if ( is_pv_32on64_domain(d) ) -@@ -908,12 +911,13 @@ int __init construct_dom0( - write_ptbase(v); - - /* Copy the OS image and free temporary buffer. */ -- elf.dest = (void*)vkern_start; -+ elf.dest_base = (void*)vkern_start; -+ elf.dest_size = vkern_end - vkern_start; - rc = elf_load_binary(&elf); - if ( rc < 0 ) - { - printk("Failed to load the kernel binary\n"); -- return rc; -+ goto out; - } - bootstrap_map(NULL); - -@@ -924,7 +928,8 @@ int __init construct_dom0( - { - write_ptbase(current); - printk("Invalid HYPERCALL_PAGE field in ELF notes.\n"); -- return -1; -+ rc = -1; -+ goto out; - } - hypercall_page_initialise( - d, (void *)(unsigned long)parms.virt_hypercall); -@@ -1271,9 +1276,19 @@ int __init construct_dom0( - - BUG_ON(rc != 0); - -- iommu_dom0_init(dom0); -+ if ( elf_check_broken(&elf) ) -+ printk(" Xen warning: dom0 kernel broken ELF: %s\n", -+ elf_check_broken(&elf)); - -+ iommu_dom0_init(dom0); - return 0; -+ -+out: -+ if ( elf_check_broken(&elf) ) -+ printk(" Xen dom0 kernel broken ELF: %s\n", -+ elf_check_broken(&elf)); -+ -+ return rc; - } - - /* -diff --git a/xen/common/libelf/Makefile b/xen/common/libelf/Makefile -index 18dc8e2..5bf8f76 100644 ---- a/xen/common/libelf/Makefile -+++ b/xen/common/libelf/Makefile -@@ -2,6 +2,8 @@ obj-bin-y := libelf.o - - SECTIONS := text data $(SPECIAL_DATA_SECTIONS) - -+CFLAGS += -Wno-pointer-sign -+ - libelf.o: libelf-temp.o Makefile - $(OBJCOPY) $(foreach s,$(SECTIONS),--rename-section .$(s)=.init.$(s)) $< $@ - -diff --git a/xen/common/libelf/libelf-dominfo.c b/xen/common/libelf/libelf-dominfo.c -index 523837f..412ea70 100644 ---- a/xen/common/libelf/libelf-dominfo.c -+++ b/xen/common/libelf/libelf-dominfo.c -@@ -29,22 +29,22 @@ static const char *const elf_xen_feature_names[] = { - [XENFEAT_pae_pgdir_above_4gb] = "pae_pgdir_above_4gb", - [XENFEAT_dom0] = "dom0" - }; --static const int elf_xen_features = -+static const unsigned elf_xen_features = - sizeof(elf_xen_feature_names) / sizeof(elf_xen_feature_names[0]); - --int elf_xen_parse_features(const char *features, -+elf_errorstatus elf_xen_parse_features(const char *features, - uint32_t *supported, - uint32_t *required) - { -- char feature[64]; -- int pos, len, i; -+ unsigned char feature[64]; -+ unsigned pos, len, i; - - if ( features == NULL ) - return 0; - - for ( pos = 0; features[pos] != '\0'; pos += len ) - { -- memset(feature, 0, sizeof(feature)); -+ elf_memset_unchecked(feature, 0, sizeof(feature)); - for ( len = 0;; len++ ) - { - if ( len >= sizeof(feature)-1 ) -@@ -94,14 +94,14 @@ int elf_xen_parse_features(const char *features, - /* ------------------------------------------------------------------------ */ - /* xen elf notes */ - --int elf_xen_parse_note(struct elf_binary *elf, -+elf_errorstatus elf_xen_parse_note(struct elf_binary *elf, - struct elf_dom_parms *parms, -- const elf_note *note) -+ ELF_HANDLE_DECL(elf_note) note) - { - /* *INDENT-OFF* */ - static const struct { - char *name; -- int str; -+ bool str; - } note_desc[] = { - [XEN_ELFNOTE_ENTRY] = { "ENTRY", 0}, - [XEN_ELFNOTE_HYPERCALL_PAGE] = { "HYPERCALL_PAGE", 0}, -@@ -125,7 +125,7 @@ int elf_xen_parse_note(struct elf_binary *elf, - const char *str = NULL; - uint64_t val = 0; - unsigned int i; -- int type = elf_uval(elf, note, type); -+ unsigned type = elf_uval(elf, note, type); - - if ( (type >= sizeof(note_desc) / sizeof(note_desc[0])) || - (note_desc[type].name == NULL) ) -@@ -137,7 +137,10 @@ int elf_xen_parse_note(struct elf_binary *elf, - - if ( note_desc[type].str ) - { -- str = elf_note_desc(elf, note); -+ str = elf_strval(elf, elf_note_desc(elf, note)); -+ if (str == NULL) -+ /* elf_strval will mark elf broken if it fails so no need to log */ -+ return 0; - elf_msg(elf, "%s: %s = \"%s\"\n", __FUNCTION__, - note_desc[type].name, str); - parms->elf_notes[type].type = XEN_ENT_STR; -@@ -213,23 +216,37 @@ int elf_xen_parse_note(struct elf_binary *elf, - return 0; - } - --static int elf_xen_parse_notes(struct elf_binary *elf, -+#define ELF_NOTE_INVALID (~0U) -+ -+static unsigned elf_xen_parse_notes(struct elf_binary *elf, - struct elf_dom_parms *parms, -- const void *start, const void *end) -+ elf_ptrval start, -+ elf_ptrval end, -+ unsigned *total_note_count) - { -- int xen_elfnotes = 0; -- const elf_note *note; -+ unsigned xen_elfnotes = 0; -+ ELF_HANDLE_DECL(elf_note) note; -+ const char *note_name; - - parms->elf_note_start = start; - parms->elf_note_end = end; -- for ( note = parms->elf_note_start; -- (void *)note < parms->elf_note_end; -+ for ( note = ELF_MAKE_HANDLE(elf_note, parms->elf_note_start); -+ ELF_HANDLE_PTRVAL(note) < parms->elf_note_end; - note = elf_note_next(elf, note) ) - { -- if ( strcmp(elf_note_name(elf, note), "Xen") ) -+ if ( *total_note_count >= ELF_MAX_TOTAL_NOTE_COUNT ) -+ { -+ elf_mark_broken(elf, "too many ELF notes"); -+ break; -+ } -+ (*total_note_count)++; -+ note_name = elf_note_name(elf, note); -+ if ( note_name == NULL ) -+ continue; -+ if ( strcmp(note_name, "Xen") ) - continue; - if ( elf_xen_parse_note(elf, parms, note) ) -- return -1; -+ return ELF_NOTE_INVALID; - xen_elfnotes++; - } - return xen_elfnotes; -@@ -238,48 +255,49 @@ static int elf_xen_parse_notes(struct elf_binary *elf, - /* ------------------------------------------------------------------------ */ - /* __xen_guest section */ - --int elf_xen_parse_guest_info(struct elf_binary *elf, -+elf_errorstatus elf_xen_parse_guest_info(struct elf_binary *elf, - struct elf_dom_parms *parms) - { -- const char *h; -- char name[32], value[128]; -- int len; -+ elf_ptrval h; -+ unsigned char name[32], value[128]; -+ unsigned len; - - h = parms->guest_info; -- while ( *h ) -+#define STAR(h) (elf_access_unsigned(elf, (h), 0, 1)) -+ while ( STAR(h) ) - { -- memset(name, 0, sizeof(name)); -- memset(value, 0, sizeof(value)); -+ elf_memset_unchecked(name, 0, sizeof(name)); -+ elf_memset_unchecked(value, 0, sizeof(value)); - for ( len = 0;; len++, h++ ) - { - if ( len >= sizeof(name)-1 ) - break; -- if ( *h == '\0' ) -+ if ( STAR(h) == '\0' ) - break; -- if ( *h == ',' ) -+ if ( STAR(h) == ',' ) - { - h++; - break; - } -- if ( *h == '=' ) -+ if ( STAR(h) == '=' ) - { - h++; - for ( len = 0;; len++, h++ ) - { - if ( len >= sizeof(value)-1 ) - break; -- if ( *h == '\0' ) -+ if ( STAR(h) == '\0' ) - break; -- if ( *h == ',' ) -+ if ( STAR(h) == ',' ) - { - h++; - break; - } -- value[len] = *h; -+ value[len] = STAR(h); - } - break; - } -- name[len] = *h; -+ name[len] = STAR(h); - } - elf_msg(elf, "%s: %s=\"%s\"\n", __FUNCTION__, name, value); - -@@ -325,12 +343,13 @@ int elf_xen_parse_guest_info(struct elf_binary *elf, - /* ------------------------------------------------------------------------ */ - /* sanity checks */ - --static int elf_xen_note_check(struct elf_binary *elf, -+static elf_errorstatus elf_xen_note_check(struct elf_binary *elf, - struct elf_dom_parms *parms) - { -- if ( (parms->elf_note_start == NULL) && (parms->guest_info == NULL) ) -+ if ( (ELF_PTRVAL_INVALID(parms->elf_note_start)) && -+ (ELF_PTRVAL_INVALID(parms->guest_info)) ) - { -- int machine = elf_uval(elf, elf->ehdr, e_machine); -+ unsigned machine = elf_uval(elf, elf->ehdr, e_machine); - if ( (machine == EM_386) || (machine == EM_X86_64) ) - { - elf_err(elf, "%s: ERROR: Not a Xen-ELF image: " -@@ -368,7 +387,7 @@ static int elf_xen_note_check(struct elf_binary *elf, - return 0; - } - --static int elf_xen_addr_calc_check(struct elf_binary *elf, -+static elf_errorstatus elf_xen_addr_calc_check(struct elf_binary *elf, - struct elf_dom_parms *parms) - { - if ( (parms->elf_paddr_offset != UNSET_ADDR) && -@@ -454,15 +473,16 @@ static int elf_xen_addr_calc_check(struct elf_binary *elf, - /* ------------------------------------------------------------------------ */ - /* glue it all together ... */ - --int elf_xen_parse(struct elf_binary *elf, -+elf_errorstatus elf_xen_parse(struct elf_binary *elf, - struct elf_dom_parms *parms) - { -- const elf_shdr *shdr; -- const elf_phdr *phdr; -- int xen_elfnotes = 0; -- int i, count, rc; -+ ELF_HANDLE_DECL(elf_shdr) shdr; -+ ELF_HANDLE_DECL(elf_phdr) phdr; -+ unsigned xen_elfnotes = 0; -+ unsigned i, count, more_notes; -+ unsigned total_note_count = 0; - -- memset(parms, 0, sizeof(*parms)); -+ elf_memset_unchecked(parms, 0, sizeof(*parms)); - parms->virt_base = UNSET_ADDR; - parms->virt_entry = UNSET_ADDR; - parms->virt_hypercall = UNSET_ADDR; -@@ -475,6 +495,9 @@ int elf_xen_parse(struct elf_binary *elf, - for ( i = 0; i < count; i++ ) - { - phdr = elf_phdr_by_index(elf, i); -+ if ( !elf_access_ok(elf, ELF_HANDLE_PTRVAL(phdr), 1) ) -+ /* input has an insane program header count field */ -+ break; - if ( elf_uval(elf, phdr, p_type) != PT_NOTE ) - continue; - -@@ -485,13 +508,14 @@ int elf_xen_parse(struct elf_binary *elf, - if (elf_uval(elf, phdr, p_offset) == 0) - continue; - -- rc = elf_xen_parse_notes(elf, parms, -+ more_notes = elf_xen_parse_notes(elf, parms, - elf_segment_start(elf, phdr), -- elf_segment_end(elf, phdr)); -- if ( rc == -1 ) -+ elf_segment_end(elf, phdr), -+ &total_note_count); -+ if ( more_notes == ELF_NOTE_INVALID ) - return -1; - -- xen_elfnotes += rc; -+ xen_elfnotes += more_notes; - } - - /* -@@ -504,21 +528,25 @@ int elf_xen_parse(struct elf_binary *elf, - for ( i = 0; i < count; i++ ) - { - shdr = elf_shdr_by_index(elf, i); -+ if ( !elf_access_ok(elf, ELF_HANDLE_PTRVAL(shdr), 1) ) -+ /* input has an insane section header count field */ -+ break; - - if ( elf_uval(elf, shdr, sh_type) != SHT_NOTE ) - continue; - -- rc = elf_xen_parse_notes(elf, parms, -+ more_notes = elf_xen_parse_notes(elf, parms, - elf_section_start(elf, shdr), -- elf_section_end(elf, shdr)); -+ elf_section_end(elf, shdr), -+ &total_note_count); - -- if ( rc == -1 ) -+ if ( more_notes == ELF_NOTE_INVALID ) - return -1; - -- if ( xen_elfnotes == 0 && rc > 0 ) -+ if ( xen_elfnotes == 0 && more_notes > 0 ) - elf_msg(elf, "%s: using notes from SHT_NOTE section\n", __FUNCTION__); - -- xen_elfnotes += rc; -+ xen_elfnotes += more_notes; - } - - } -@@ -528,20 +556,15 @@ int elf_xen_parse(struct elf_binary *elf, - */ - if ( xen_elfnotes == 0 ) - { -- count = elf_shdr_count(elf); -- for ( i = 0; i < count; i++ ) -+ shdr = elf_shdr_by_name(elf, "__xen_guest"); -+ if ( ELF_HANDLE_VALID(shdr) ) - { -- shdr = elf_shdr_by_name(elf, "__xen_guest"); -- if ( shdr ) -- { -- parms->guest_info = elf_section_start(elf, shdr); -- parms->elf_note_start = NULL; -- parms->elf_note_end = NULL; -- elf_msg(elf, "%s: __xen_guest: \"%s\"\n", __FUNCTION__, -- parms->guest_info); -- elf_xen_parse_guest_info(elf, parms); -- break; -- } -+ parms->guest_info = elf_section_start(elf, shdr); -+ parms->elf_note_start = ELF_INVALID_PTRVAL; -+ parms->elf_note_end = ELF_INVALID_PTRVAL; -+ elf_msg(elf, "%s: __xen_guest: \"%s\"\n", __FUNCTION__, -+ elf_strfmt(elf, parms->guest_info)); -+ elf_xen_parse_guest_info(elf, parms); - } - } - -diff --git a/xen/common/libelf/libelf-loader.c b/xen/common/libelf/libelf-loader.c -index ab58b8b..e2e75af 100644 ---- a/xen/common/libelf/libelf-loader.c -+++ b/xen/common/libelf/libelf-loader.c -@@ -16,27 +16,33 @@ - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -+#ifdef __XEN__ -+#include <asm/guest_access.h> -+#endif -+ - #include "libelf-private.h" - - /* ------------------------------------------------------------------------ */ - --int elf_init(struct elf_binary *elf, const char *image, size_t size) -+elf_errorstatus elf_init(struct elf_binary *elf, const char *image_input, size_t size) - { -- const elf_shdr *shdr; -+ ELF_HANDLE_DECL(elf_shdr) shdr; - uint64_t i, count, section, offset; - -- if ( !elf_is_elfbinary(image) ) -+ if ( !elf_is_elfbinary(image_input, size) ) - { - elf_err(elf, "%s: not an ELF binary\n", __FUNCTION__); - return -1; - } - -- memset(elf, 0, sizeof(*elf)); -- elf->image = image; -+ elf_memset_unchecked(elf, 0, sizeof(*elf)); -+ elf->image_base = image_input; - elf->size = size; -- elf->ehdr = (elf_ehdr *)image; -- elf->class = elf->ehdr->e32.e_ident[EI_CLASS]; -- elf->data = elf->ehdr->e32.e_ident[EI_DATA]; -+ elf->ehdr = ELF_MAKE_HANDLE(elf_ehdr, (elf_ptrval)image_input); -+ elf->class = elf_uval_3264(elf, elf->ehdr, e32.e_ident[EI_CLASS]); -+ elf->data = elf_uval_3264(elf, elf->ehdr, e32.e_ident[EI_DATA]); -+ elf->caller_xdest_base = NULL; -+ elf->caller_xdest_size = 0; - - /* Sanity check phdr. */ - offset = elf_uval(elf, elf->ehdr, e_phoff) + -@@ -61,7 +67,7 @@ int elf_init(struct elf_binary *elf, const char *image, size_t size) - /* Find section string table. */ - section = elf_uval(elf, elf->ehdr, e_shstrndx); - shdr = elf_shdr_by_index(elf, section); -- if ( shdr != NULL ) -+ if ( ELF_HANDLE_VALID(shdr) ) - elf->sec_strtab = elf_section_start(elf, shdr); - - /* Find symbol table and symbol string table. */ -@@ -69,13 +75,16 @@ int elf_init(struct elf_binary *elf, const char *image, size_t size) - for ( i = 0; i < count; i++ ) - { - shdr = elf_shdr_by_index(elf, i); -+ if ( !elf_access_ok(elf, ELF_HANDLE_PTRVAL(shdr), 1) ) -+ /* input has an insane section header count field */ -+ break; - if ( elf_uval(elf, shdr, sh_type) != SHT_SYMTAB ) - continue; - elf->sym_tab = shdr; - shdr = elf_shdr_by_index(elf, elf_uval(elf, shdr, sh_link)); -- if ( shdr == NULL ) -+ if ( !ELF_HANDLE_VALID(shdr) ) - { -- elf->sym_tab = NULL; -+ elf->sym_tab = ELF_INVALID_HANDLE(elf_shdr); - continue; - } - elf->sym_strtab = elf_section_start(elf, shdr); -@@ -86,7 +95,7 @@ int elf_init(struct elf_binary *elf, const char *image, size_t size) - } - - #ifndef __XEN__ --void elf_call_log_callback(struct elf_binary *elf, int iserr, -+void elf_call_log_callback(struct elf_binary *elf, bool iserr, - const char *fmt,...) { - va_list al; - -@@ -101,36 +110,39 @@ void elf_call_log_callback(struct elf_binary *elf, int iserr, - } - - void elf_set_log(struct elf_binary *elf, elf_log_callback *log_callback, -- void *log_caller_data, int verbose) -+ void *log_caller_data, bool verbose) - { - elf->log_callback = log_callback; - elf->log_caller_data = log_caller_data; - elf->verbose = verbose; - } - --static int elf_load_image(void *dst, const void *src, uint64_t filesz, uint64_t memsz) -+static elf_errorstatus elf_load_image(struct elf_binary *elf, -+ elf_ptrval dst, elf_ptrval src, -+ uint64_t filesz, uint64_t memsz) - { -- memcpy(dst, src, filesz); -- memset(dst + filesz, 0, memsz - filesz); -+ elf_memcpy_safe(elf, dst, src, filesz); -+ elf_memset_safe(elf, dst + filesz, 0, memsz - filesz); - return 0; - } - #else --#include <asm/guest_access.h> - - void elf_set_verbose(struct elf_binary *elf) - { - elf->verbose = 1; - } - --static int elf_load_image(void *dst, const void *src, uint64_t filesz, uint64_t memsz) -+static elf_errorstatus elf_load_image(struct elf_binary *elf, elf_ptrval dst, elf_ptrval src, uint64_t filesz, uint64_t memsz) - { -- int rc; -+ elf_errorstatus rc; - if ( filesz > ULONG_MAX || memsz > ULONG_MAX ) - return -1; -- rc = raw_copy_to_guest(dst, src, filesz); -+ /* We trust the dom0 kernel image completely, so we don't care -+ * about overruns etc. here. */ -+ rc = raw_copy_to_guest(ELF_UNSAFE_PTR(dst), ELF_UNSAFE_PTR(src), filesz); - if ( rc != 0 ) - return -1; -- rc = raw_clear_guest(dst + filesz, memsz - filesz); -+ rc = raw_clear_guest(ELF_UNSAFE_PTR(dst + filesz), memsz - filesz); - if ( rc != 0 ) - return -1; - return 0; -@@ -141,10 +153,10 @@ static int elf_load_image(void *dst, const void *src, uint64_t filesz, uint64_t - void elf_parse_bsdsyms(struct elf_binary *elf, uint64_t pstart) - { - uint64_t sz; -- const elf_shdr *shdr; -- int i, type; -+ ELF_HANDLE_DECL(elf_shdr) shdr; -+ unsigned i, type; - -- if ( !elf->sym_tab ) -+ if ( !ELF_HANDLE_VALID(elf->sym_tab) ) - return; - - pstart = elf_round_up(elf, pstart); -@@ -161,7 +173,10 @@ void elf_parse_bsdsyms(struct elf_binary *elf, uint64_t pstart) - for ( i = 0; i < elf_shdr_count(elf); i++ ) - { - shdr = elf_shdr_by_index(elf, i); -- type = elf_uval(elf, (elf_shdr *)shdr, sh_type); -+ if ( !elf_access_ok(elf, ELF_HANDLE_PTRVAL(shdr), 1) ) -+ /* input has an insane section header count field */ -+ break; -+ type = elf_uval(elf, shdr, sh_type); - if ( (type == SHT_STRTAB) || (type == SHT_SYMTAB) ) - sz = elf_round_up(elf, sz + elf_uval(elf, shdr, sh_size)); - } -@@ -172,11 +187,13 @@ void elf_parse_bsdsyms(struct elf_binary *elf, uint64_t pstart) - - static void elf_load_bsdsyms(struct elf_binary *elf) - { -- elf_ehdr *sym_ehdr; -+ ELF_HANDLE_DECL(elf_ehdr) sym_ehdr; - unsigned long sz; -- char *maxva, *symbase, *symtab_addr; -- elf_shdr *shdr; -- int i, type; -+ elf_ptrval maxva; -+ elf_ptrval symbase; -+ elf_ptrval symtab_addr; -+ ELF_HANDLE_DECL(elf_shdr) shdr; -+ unsigned i, type; - - if ( !elf->bsd_symtab_pstart ) - return; -@@ -184,18 +201,18 @@ static void elf_load_bsdsyms(struct elf_binary *elf) - #define elf_hdr_elm(_elf, _hdr, _elm, _val) \ - do { \ - if ( elf_64bit(_elf) ) \ -- (_hdr)->e64._elm = _val; \ -+ elf_store_field(_elf, _hdr, e64._elm, _val); \ - else \ -- (_hdr)->e32._elm = _val; \ -+ elf_store_field(_elf, _hdr, e32._elm, _val); \ - } while ( 0 ) - - symbase = elf_get_ptr(elf, elf->bsd_symtab_pstart); - symtab_addr = maxva = symbase + sizeof(uint32_t); - - /* Set up Elf header. */ -- sym_ehdr = (elf_ehdr *)symtab_addr; -+ sym_ehdr = ELF_MAKE_HANDLE(elf_ehdr, symtab_addr); - sz = elf_uval(elf, elf->ehdr, e_ehsize); -- memcpy(sym_ehdr, elf->ehdr, sz); -+ elf_memcpy_safe(elf, ELF_HANDLE_PTRVAL(sym_ehdr), ELF_HANDLE_PTRVAL(elf->ehdr), sz); - maxva += sz; /* no round up */ - - elf_hdr_elm(elf, sym_ehdr, e_phoff, 0); -@@ -204,37 +221,50 @@ do { \ - elf_hdr_elm(elf, sym_ehdr, e_phnum, 0); - - /* Copy Elf section headers. */ -- shdr = (elf_shdr *)maxva; -+ shdr = ELF_MAKE_HANDLE(elf_shdr, maxva); - sz = elf_shdr_count(elf) * elf_uval(elf, elf->ehdr, e_shentsize); -- memcpy(shdr, elf->image + elf_uval(elf, elf->ehdr, e_shoff), sz); -- maxva = (char *)(long)elf_round_up(elf, (long)maxva + sz); -+ elf_memcpy_safe(elf, ELF_HANDLE_PTRVAL(shdr), -+ ELF_IMAGE_BASE(elf) + elf_uval(elf, elf->ehdr, e_shoff), -+ sz); -+ maxva = elf_round_up(elf, (unsigned long)maxva + sz); - - for ( i = 0; i < elf_shdr_count(elf); i++ ) - { -+ elf_ptrval old_shdr_p; -+ elf_ptrval new_shdr_p; -+ - type = elf_uval(elf, shdr, sh_type); - if ( (type == SHT_STRTAB) || (type == SHT_SYMTAB) ) - { -- elf_msg(elf, "%s: shdr %i at 0x%p -> 0x%p\n", __func__, i, -+ elf_msg(elf, "%s: shdr %i at 0x%"ELF_PRPTRVAL" -> 0x%"ELF_PRPTRVAL"\n", __func__, i, - elf_section_start(elf, shdr), maxva); - sz = elf_uval(elf, shdr, sh_size); -- memcpy(maxva, elf_section_start(elf, shdr), sz); -+ elf_memcpy_safe(elf, maxva, elf_section_start(elf, shdr), sz); - /* Mangled to be based on ELF header location. */ - elf_hdr_elm(elf, shdr, sh_offset, maxva - symtab_addr); -- maxva = (char *)(long)elf_round_up(elf, (long)maxva + sz); -+ maxva = elf_round_up(elf, (unsigned long)maxva + sz); - } -- shdr = (elf_shdr *)((long)shdr + -- (long)elf_uval(elf, elf->ehdr, e_shentsize)); -+ old_shdr_p = ELF_HANDLE_PTRVAL(shdr); -+ new_shdr_p = old_shdr_p + elf_uval(elf, elf->ehdr, e_shentsize); -+ if ( new_shdr_p <= old_shdr_p ) /* wrapped or stuck */ -+ { -+ elf_mark_broken(elf, "bad section header length"); -+ break; -+ } -+ if ( !elf_access_ok(elf, new_shdr_p, 1) ) /* outside image */ -+ break; -+ shdr = ELF_MAKE_HANDLE(elf_shdr, new_shdr_p); - } - - /* Write down the actual sym size. */ -- *(uint32_t *)symbase = maxva - symtab_addr; -+ elf_store_val(elf, uint32_t, symbase, maxva - symtab_addr); - - #undef elf_ehdr_elm - } - - void elf_parse_binary(struct elf_binary *elf) - { -- const elf_phdr *phdr; -+ ELF_HANDLE_DECL(elf_phdr) phdr; - uint64_t low = -1; - uint64_t high = 0; - uint64_t i, count, paddr, memsz; -@@ -243,6 +273,9 @@ void elf_parse_binary(struct elf_binary *elf) - for ( i = 0; i < count; i++ ) - { - phdr = elf_phdr_by_index(elf, i); -+ if ( !elf_access_ok(elf, ELF_HANDLE_PTRVAL(phdr), 1) ) -+ /* input has an insane program header count field */ -+ break; - if ( !elf_phdr_is_loadable(elf, phdr) ) - continue; - paddr = elf_uval(elf, phdr, p_paddr); -@@ -260,16 +293,25 @@ void elf_parse_binary(struct elf_binary *elf) - __FUNCTION__, elf->pstart, elf->pend); - } - --int elf_load_binary(struct elf_binary *elf) -+elf_errorstatus elf_load_binary(struct elf_binary *elf) - { -- const elf_phdr *phdr; -+ ELF_HANDLE_DECL(elf_phdr) phdr; - uint64_t i, count, paddr, offset, filesz, memsz; -- char *dest; -+ elf_ptrval dest; -+ /* -+ * Let bizarre ELFs write the output image up to twice; this -+ * calculation is just to ensure our copying loop is no worse than -+ * O(domain_size). -+ */ -+ uint64_t remain_allow_copy = (uint64_t)elf->dest_size * 2; - - count = elf_uval(elf, elf->ehdr, e_phnum); - for ( i = 0; i < count; i++ ) - { - phdr = elf_phdr_by_index(elf, i); -+ if ( !elf_access_ok(elf, ELF_HANDLE_PTRVAL(phdr), 1) ) -+ /* input has an insane program header count field */ -+ break; - if ( !elf_phdr_is_loadable(elf, phdr) ) - continue; - paddr = elf_uval(elf, phdr, p_paddr); -@@ -277,9 +319,23 @@ int elf_load_binary(struct elf_binary *elf) - filesz = elf_uval(elf, phdr, p_filesz); - memsz = elf_uval(elf, phdr, p_memsz); - dest = elf_get_ptr(elf, paddr); -- elf_msg(elf, "%s: phdr %" PRIu64 " at 0x%p -> 0x%p\n", -- __func__, i, dest, dest + filesz); -- if ( elf_load_image(dest, elf->image + offset, filesz, memsz) != 0 ) -+ -+ /* -+ * We need to check that the input image doesn't have us copy -+ * the whole image zillions of times, as that could lead to -+ * O(n^2) time behaviour and possible DoS by a malicous ELF. -+ */ -+ if ( remain_allow_copy < memsz ) -+ { -+ elf_mark_broken(elf, "program segments total to more" -+ " than the input image size"); -+ break; -+ } -+ remain_allow_copy -= memsz; -+ -+ elf_msg(elf, "%s: phdr %" PRIu64 " at 0x%"ELF_PRPTRVAL" -> 0x%"ELF_PRPTRVAL"\n", -+ __func__, i, dest, (elf_ptrval)(dest + filesz)); -+ if ( elf_load_image(elf, dest, ELF_IMAGE_BASE(elf) + offset, filesz, memsz) != 0 ) - return -1; - } - -@@ -287,18 +343,18 @@ int elf_load_binary(struct elf_binary *elf) - return 0; - } - --void *elf_get_ptr(struct elf_binary *elf, unsigned long addr) -+elf_ptrval elf_get_ptr(struct elf_binary *elf, unsigned long addr) - { -- return elf->dest + addr - elf->pstart; -+ return ELF_REALPTR2PTRVAL(elf->dest_base) + addr - elf->pstart; - } - - uint64_t elf_lookup_addr(struct elf_binary * elf, const char *symbol) - { -- const elf_sym *sym; -+ ELF_HANDLE_DECL(elf_sym) sym; - uint64_t value; - - sym = elf_sym_by_name(elf, symbol); -- if ( sym == NULL ) -+ if ( !ELF_HANDLE_VALID(sym) ) - { - elf_err(elf, "%s: not found: %s\n", __FUNCTION__, symbol); - return -1; -diff --git a/xen/common/libelf/libelf-private.h b/xen/common/libelf/libelf-private.h -index 3ef753c..277be04 100644 ---- a/xen/common/libelf/libelf-private.h -+++ b/xen/common/libelf/libelf-private.h -@@ -77,7 +77,7 @@ - #define elf_err(elf, fmt, args ... ) \ - elf_call_log_callback(elf, 1, fmt , ## args ); - --void elf_call_log_callback(struct elf_binary*, int iserr, const char *fmt,...); -+void elf_call_log_callback(struct elf_binary*, bool iserr, const char *fmt,...); - - #define safe_strcpy(d,s) \ - do { strncpy((d),(s),sizeof((d))-1); \ -@@ -86,6 +86,19 @@ do { strncpy((d),(s),sizeof((d))-1); \ - - #endif - -+#undef memcpy -+#undef memset -+#undef memmove -+#undef strcpy -+ -+#define memcpy MISTAKE_unspecified_memcpy -+#define memset MISTAKE_unspecified_memset -+#define memmove MISTAKE_unspecified_memmove -+#define strcpy MISTAKE_unspecified_strcpy -+ /* This prevents libelf from using these undecorated versions -+ * of memcpy, memset, memmove and strcpy. Every call site -+ * must either use elf_mem*_unchecked, or elf_mem*_safe. */ -+ - #endif /* __LIBELF_PRIVATE_H_ */ - - /* -diff --git a/xen/common/libelf/libelf-relocate.c b/xen/common/libelf/libelf-relocate.c -deleted file mode 100644 -index 7ef4b01..0000000 ---- a/xen/common/libelf/libelf-relocate.c -+++ /dev/null -@@ -1,372 +0,0 @@ --/* -- * ELF relocation code (not used by xen kernel right now). -- * -- * This library is free software; you can redistribute it and/or -- * modify it under the terms of the GNU Lesser General Public -- * License as published by the Free Software Foundation; -- * version 2.1 of the License. -- * -- * This library is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- * Lesser General Public License for more details. -- * -- * You should have received a copy of the GNU Lesser General Public -- * License along with this library; if not, write to the Free Software -- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -- */ -- --#include "libelf-private.h" -- --/* ------------------------------------------------------------------------ */ -- --static const char *rel_names_i386[] = { -- "R_386_NONE", -- "R_386_32", -- "R_386_PC32", -- "R_386_GOT32", -- "R_386_PLT32", -- "R_386_COPY", -- "R_386_GLOB_DAT", -- "R_386_JMP_SLOT", -- "R_386_RELATIVE", -- "R_386_GOTOFF", -- "R_386_GOTPC", -- "R_386_32PLT", -- "R_386_TLS_TPOFF", -- "R_386_TLS_IE", -- "R_386_TLS_GOTIE", -- "R_386_TLS_LE", -- "R_386_TLS_GD", -- "R_386_TLS_LDM", -- "R_386_16", -- "R_386_PC16", -- "R_386_8", -- "R_386_PC8", -- "R_386_TLS_GD_32", -- "R_386_TLS_GD_PUSH", -- "R_386_TLS_GD_CALL", -- "R_386_TLS_GD_POP", -- "R_386_TLS_LDM_32", -- "R_386_TLS_LDM_PUSH", -- "R_386_TLS_LDM_CALL", -- "R_386_TLS_LDM_POP", -- "R_386_TLS_LDO_32", -- "R_386_TLS_IE_32", -- "R_386_TLS_LE_32", -- "R_386_TLS_DTPMOD32", -- "R_386_TLS_DTPOFF32", -- "R_386_TLS_TPOFF32", --}; -- --static int elf_reloc_i386(struct elf_binary *elf, int type, -- uint64_t addr, uint64_t value) --{ -- void *ptr = elf_get_ptr(elf, addr); -- uint32_t *u32; -- -- switch ( type ) -- { -- case 1 /* R_386_32 */ : -- u32 = ptr; -- *u32 += elf->reloc_offset; -- break; -- case 2 /* R_386_PC32 */ : -- /* nothing */ -- break; -- default: -- return -1; -- } -- return 0; --} -- --/* ------------------------------------------------------------------------ */ -- --static const char *rel_names_x86_64[] = { -- "R_X86_64_NONE", -- "R_X86_64_64", -- "R_X86_64_PC32", -- "R_X86_64_GOT32", -- "R_X86_64_PLT32", -- "R_X86_64_COPY", -- "R_X86_64_GLOB_DAT", -- "R_X86_64_JUMP_SLOT", -- "R_X86_64_RELATIVE", -- "R_X86_64_GOTPCREL", -- "R_X86_64_32", -- "R_X86_64_32S", -- "R_X86_64_16", -- "R_X86_64_PC16", -- "R_X86_64_8", -- "R_X86_64_PC8", -- "R_X86_64_DTPMOD64", -- "R_X86_64_DTPOFF64", -- "R_X86_64_TPOFF64", -- "R_X86_64_TLSGD", -- "R_X86_64_TLSLD", -- "R_X86_64_DTPOFF32", -- "R_X86_64_GOTTPOFF", -- "R_X86_64_TPOFF32", --}; -- --static int elf_reloc_x86_64(struct elf_binary *elf, int type, -- uint64_t addr, uint64_t value) --{ -- void *ptr = elf_get_ptr(elf, addr); -- uint64_t *u64; -- uint32_t *u32; -- int32_t *s32; -- -- switch ( type ) -- { -- case 1 /* R_X86_64_64 */ : -- u64 = ptr; -- value += elf->reloc_offset; -- *u64 = value; -- break; -- case 2 /* R_X86_64_PC32 */ : -- u32 = ptr; -- *u32 = value - addr; -- if ( *u32 != (uint32_t)(value - addr) ) -- { -- elf_err(elf, "R_X86_64_PC32 overflow: 0x%" PRIx32 -- " != 0x%" PRIx32 "\n", -- *u32, (uint32_t) (value - addr)); -- return -1; -- } -- break; -- case 10 /* R_X86_64_32 */ : -- u32 = ptr; -- value += elf->reloc_offset; -- *u32 = value; -- if ( *u32 != value ) -- { -- elf_err(elf, "R_X86_64_32 overflow: 0x%" PRIx32 -- " != 0x%" PRIx64 "\n", -- *u32, value); -- return -1; -- } -- break; -- case 11 /* R_X86_64_32S */ : -- s32 = ptr; -- value += elf->reloc_offset; -- *s32 = value; -- if ( *s32 != (int64_t) value ) -- { -- elf_err(elf, "R_X86_64_32S overflow: 0x%" PRIx32 -- " != 0x%" PRIx64 "\n", -- *s32, (int64_t) value); -- return -1; -- } -- break; -- default: -- return -1; -- } -- return 0; --} -- --/* ------------------------------------------------------------------------ */ -- --static struct relocs { -- const char **names; -- int count; -- int (*func) (struct elf_binary * elf, int type, uint64_t addr, -- uint64_t value); --} relocs[] = --/* *INDENT-OFF* */ --{ -- [EM_386] = { -- .names = rel_names_i386, -- .count = sizeof(rel_names_i386) / sizeof(rel_names_i386[0]), -- .func = elf_reloc_i386, -- }, -- [EM_X86_64] = { -- .names = rel_names_x86_64, -- .count = sizeof(rel_names_x86_64) / sizeof(rel_names_x86_64[0]), -- .func = elf_reloc_x86_64, -- } --}; --/* *INDENT-ON* */ -- --/* ------------------------------------------------------------------------ */ -- --static const char *rela_name(int machine, int type) --{ -- if ( machine > sizeof(relocs) / sizeof(relocs[0]) ) -- return "unknown mach"; -- if ( !relocs[machine].names ) -- return "unknown mach"; -- if ( type > relocs[machine].count ) -- return "unknown rela"; -- return relocs[machine].names[type]; --} -- --static int elf_reloc_section(struct elf_binary *elf, -- const elf_shdr * rels, -- const elf_shdr * sect, const elf_shdr * syms) --{ -- const void *ptr, *end; -- const elf_shdr *shdr; -- const elf_rela *rela; -- const elf_rel *rel; -- const elf_sym *sym; -- uint64_t s_type; -- uint64_t r_offset; -- uint64_t r_info; -- uint64_t r_addend; -- int r_type, r_sym; -- size_t rsize; -- uint64_t shndx, sbase, addr, value; -- const char *sname; -- int machine; -- -- machine = elf_uval(elf, elf->ehdr, e_machine); -- if ( (machine >= (sizeof(relocs) / sizeof(relocs[0]))) || -- (relocs[machine].func == NULL) ) -- { -- elf_err(elf, "%s: can't handle machine %d\n", -- __FUNCTION__, machine); -- return -1; -- } -- if ( elf_swap(elf) ) -- { -- elf_err(elf, "%s: non-native byte order, relocation not supported\n", -- __FUNCTION__); -- return -1; -- } -- -- s_type = elf_uval(elf, rels, sh_type); -- rsize = (SHT_REL == s_type) ? elf_size(elf, rel) : elf_size(elf, rela); -- ptr = elf_section_start(elf, rels); -- end = elf_section_end(elf, rels); -- -- for ( ; ptr < end; ptr += rsize ) -- { -- switch ( s_type ) -- { -- case SHT_REL: -- rel = ptr; -- r_offset = elf_uval(elf, rel, r_offset); -- r_info = elf_uval(elf, rel, r_info); -- r_addend = 0; -- break; -- case SHT_RELA: -- rela = ptr; -- r_offset = elf_uval(elf, rela, r_offset); -- r_info = elf_uval(elf, rela, r_info); -- r_addend = elf_uval(elf, rela, r_addend); -- break; -- default: -- /* can't happen */ -- return -1; -- } -- if ( elf_64bit(elf) ) -- { -- r_type = ELF64_R_TYPE(r_info); -- r_sym = ELF64_R_SYM(r_info); -- } -- else -- { -- r_type = ELF32_R_TYPE(r_info); -- r_sym = ELF32_R_SYM(r_info); -- } -- -- sym = elf_sym_by_index(elf, r_sym); -- shndx = elf_uval(elf, sym, st_shndx); -- switch ( shndx ) -- { -- case SHN_UNDEF: -- sname = "*UNDEF*"; -- sbase = 0; -- break; -- case SHN_COMMON: -- elf_err(elf, "%s: invalid section: %" PRId64 "\n", -- __FUNCTION__, shndx); -- return -1; -- case SHN_ABS: -- sname = "*ABS*"; -- sbase = 0; -- break; -- default: -- shdr = elf_shdr_by_index(elf, shndx); -- if ( shdr == NULL ) -- { -- elf_err(elf, "%s: invalid section: %" PRId64 "\n", -- __FUNCTION__, shndx); -- return -1; -- } -- sname = elf_section_name(elf, shdr); -- sbase = elf_uval(elf, shdr, sh_addr); -- } -- -- addr = r_offset; -- value = elf_uval(elf, sym, st_value); -- value += r_addend; -- -- if ( elf->log_callback && (elf->verbose > 1) ) -- { -- uint64_t st_name = elf_uval(elf, sym, st_name); -- const char *name = st_name ? elf->sym_strtab + st_name : "*NONE*"; -- -- elf_msg(elf, -- "%s: type %s [%d], off 0x%" PRIx64 ", add 0x%" PRIx64 "," -- " sym %s [0x%" PRIx64 "], sec %s [0x%" PRIx64 "]" -- " -> addr 0x%" PRIx64 " value 0x%" PRIx64 "\n", -- __FUNCTION__, rela_name(machine, r_type), r_type, r_offset, -- r_addend, name, elf_uval(elf, sym, st_value), sname, sbase, -- addr, value); -- } -- -- if ( relocs[machine].func(elf, r_type, addr, value) == -1 ) -- { -- elf_err(elf, "%s: unknown/unsupported reloc type %s [%d]\n", -- __FUNCTION__, rela_name(machine, r_type), r_type); -- return -1; -- } -- } -- return 0; --} -- --int elf_reloc(struct elf_binary *elf) --{ -- const elf_shdr *rels, *sect, *syms; -- uint64_t i, count, type; -- -- count = elf_shdr_count(elf); -- for ( i = 0; i < count; i++ ) -- { -- rels = elf_shdr_by_index(elf, i); -- type = elf_uval(elf, rels, sh_type); -- if ( (type != SHT_REL) && (type != SHT_RELA) ) -- continue; -- -- sect = elf_shdr_by_index(elf, elf_uval(elf, rels, sh_info)); -- syms = elf_shdr_by_index(elf, elf_uval(elf, rels, sh_link)); -- if ( NULL == sect || NULL == syms ) -- continue; -- -- if ( !(elf_uval(elf, sect, sh_flags) & SHF_ALLOC) ) -- { -- elf_msg(elf, "%s: relocations for %s, skipping\n", -- __FUNCTION__, elf_section_name(elf, sect)); -- continue; -- } -- -- elf_msg(elf, "%s: relocations for %s @ 0x%" PRIx64 "\n", -- __FUNCTION__, elf_section_name(elf, sect), -- elf_uval(elf, sect, sh_addr)); -- if ( elf_reloc_section(elf, rels, sect, syms) != 0 ) -- return -1; -- } -- return 0; --} -- --/* -- * Local variables: -- * mode: C -- * c-set-style: "BSD" -- * c-basic-offset: 4 -- * tab-width: 4 -- * indent-tabs-mode: nil -- * End: -- */ -diff --git a/xen/common/libelf/libelf-tools.c b/xen/common/libelf/libelf-tools.c -index cb97908..e202249 100644 ---- a/xen/common/libelf/libelf-tools.c -+++ b/xen/common/libelf/libelf-tools.c -@@ -20,201 +20,292 @@ - - /* ------------------------------------------------------------------------ */ - --uint64_t elf_access_unsigned(struct elf_binary * elf, const void *ptr, -- uint64_t offset, size_t size) -+void elf_mark_broken(struct elf_binary *elf, const char *msg) - { -- int need_swap = elf_swap(elf); -+ if ( elf->broken == NULL ) -+ elf->broken = msg; -+} -+ -+const char *elf_check_broken(const struct elf_binary *elf) -+{ -+ return elf->broken; -+} -+ -+static bool elf_ptrval_in_range(elf_ptrval ptrval, uint64_t size, -+ const void *region, uint64_t regionsize) -+ /* -+ * Returns true if the putative memory area [ptrval,ptrval+size> -+ * is completely inside the region [region,region+regionsize>. -+ * -+ * ptrval and size are the untrusted inputs to be checked. -+ * region and regionsize are trusted and must be correct and valid, -+ * although it is OK for region to perhaps be maliciously NULL -+ * (but not some other malicious value). -+ */ -+{ -+ elf_ptrval regionp = (elf_ptrval)region; -+ -+ if ( (region == NULL) || -+ (ptrval < regionp) || /* start is before region */ -+ (ptrval > regionp + regionsize) || /* start is after region */ -+ (size > regionsize - (ptrval - regionp)) ) /* too big */ -+ return 0; -+ return 1; -+} -+ -+bool elf_access_ok(struct elf_binary * elf, -+ uint64_t ptrval, size_t size) -+{ -+ if ( elf_ptrval_in_range(ptrval, size, elf->image_base, elf->size) ) -+ return 1; -+ if ( elf_ptrval_in_range(ptrval, size, elf->dest_base, elf->dest_size) ) -+ return 1; -+ if ( elf_ptrval_in_range(ptrval, size, -+ elf->caller_xdest_base, elf->caller_xdest_size) ) -+ return 1; -+ elf_mark_broken(elf, "out of range access"); -+ return 0; -+} -+ -+void elf_memcpy_safe(struct elf_binary *elf, elf_ptrval dst, -+ elf_ptrval src, size_t size) -+{ -+ if ( elf_access_ok(elf, dst, size) && -+ elf_access_ok(elf, src, size) ) -+ { -+ /* use memmove because these checks do not prove that the -+ * regions don't overlap and overlapping regions grant -+ * permission for compiler malice */ -+ elf_memmove_unchecked(ELF_UNSAFE_PTR(dst), ELF_UNSAFE_PTR(src), size); -+ } -+} -+ -+void elf_memset_safe(struct elf_binary *elf, elf_ptrval dst, int c, size_t size) -+{ -+ if ( elf_access_ok(elf, dst, size) ) -+ { -+ elf_memset_unchecked(ELF_UNSAFE_PTR(dst), c, size); -+ } -+} -+ -+uint64_t elf_access_unsigned(struct elf_binary * elf, elf_ptrval base, -+ uint64_t moreoffset, size_t size) -+{ -+ elf_ptrval ptrval = base + moreoffset; -+ bool need_swap = elf_swap(elf); - const uint8_t *u8; - const uint16_t *u16; - const uint32_t *u32; - const uint64_t *u64; - -+ if ( !elf_access_ok(elf, ptrval, size) ) -+ return 0; -+ - switch ( size ) - { - case 1: -- u8 = ptr + offset; -+ u8 = (const void*)ptrval; - return *u8; - case 2: -- u16 = ptr + offset; -+ u16 = (const void*)ptrval; - return need_swap ? bswap_16(*u16) : *u16; - case 4: -- u32 = ptr + offset; -+ u32 = (const void*)ptrval; - return need_swap ? bswap_32(*u32) : *u32; - case 8: -- u64 = ptr + offset; -+ u64 = (const void*)ptrval; - return need_swap ? bswap_64(*u64) : *u64; - default: - return 0; - } - } - --int64_t elf_access_signed(struct elf_binary *elf, const void *ptr, -- uint64_t offset, size_t size) --{ -- int need_swap = elf_swap(elf); -- const int8_t *s8; -- const int16_t *s16; -- const int32_t *s32; -- const int64_t *s64; -- -- switch ( size ) -- { -- case 1: -- s8 = ptr + offset; -- return *s8; -- case 2: -- s16 = ptr + offset; -- return need_swap ? bswap_16(*s16) : *s16; -- case 4: -- s32 = ptr + offset; -- return need_swap ? bswap_32(*s32) : *s32; -- case 8: -- s64 = ptr + offset; -- return need_swap ? bswap_64(*s64) : *s64; -- default: -- return 0; -- } --} -- - uint64_t elf_round_up(struct elf_binary *elf, uint64_t addr) - { -- int elf_round = (elf_64bit(elf) ? 8 : 4) - 1; -+ uint64_t elf_round = (elf_64bit(elf) ? 8 : 4) - 1; - - return (addr + elf_round) & ~elf_round; - } - - /* ------------------------------------------------------------------------ */ - --int elf_shdr_count(struct elf_binary *elf) -+unsigned elf_shdr_count(struct elf_binary *elf) - { -- return elf_uval(elf, elf->ehdr, e_shnum); -+ unsigned count = elf_uval(elf, elf->ehdr, e_shnum); -+ uint64_t max = elf->size / sizeof(Elf32_Shdr); -+ if (max > ~(unsigned)0) -+ max = ~(unsigned)0; /* Xen doesn't have limits.h :-/ */ -+ if (count > max) -+ { -+ elf_mark_broken(elf, "far too many section headers"); -+ count = max; -+ } -+ return count; - } - --int elf_phdr_count(struct elf_binary *elf) -+unsigned elf_phdr_count(struct elf_binary *elf) - { - return elf_uval(elf, elf->ehdr, e_phnum); - } - --const elf_shdr *elf_shdr_by_name(struct elf_binary *elf, const char *name) -+ELF_HANDLE_DECL(elf_shdr) elf_shdr_by_name(struct elf_binary *elf, const char *name) - { - uint64_t count = elf_shdr_count(elf); -- const elf_shdr *shdr; -+ ELF_HANDLE_DECL(elf_shdr) shdr; - const char *sname; -- int i; -+ unsigned i; - - for ( i = 0; i < count; i++ ) - { - shdr = elf_shdr_by_index(elf, i); -+ if ( !elf_access_ok(elf, ELF_HANDLE_PTRVAL(shdr), 1) ) -+ /* input has an insane section header count field */ -+ break; - sname = elf_section_name(elf, shdr); - if ( sname && !strcmp(sname, name) ) - return shdr; - } -- return NULL; -+ return ELF_INVALID_HANDLE(elf_shdr); - } - --const elf_shdr *elf_shdr_by_index(struct elf_binary *elf, int index) -+ELF_HANDLE_DECL(elf_shdr) elf_shdr_by_index(struct elf_binary *elf, unsigned index) - { - uint64_t count = elf_shdr_count(elf); -- const void *ptr; -+ elf_ptrval ptr; - - if ( index >= count ) -- return NULL; -+ return ELF_INVALID_HANDLE(elf_shdr); - -- ptr = (elf->image -+ ptr = (ELF_IMAGE_BASE(elf) - + elf_uval(elf, elf->ehdr, e_shoff) - + elf_uval(elf, elf->ehdr, e_shentsize) * index); -- return ptr; -+ return ELF_MAKE_HANDLE(elf_shdr, ptr); - } - --const elf_phdr *elf_phdr_by_index(struct elf_binary *elf, int index) -+ELF_HANDLE_DECL(elf_phdr) elf_phdr_by_index(struct elf_binary *elf, unsigned index) - { - uint64_t count = elf_uval(elf, elf->ehdr, e_phnum); -- const void *ptr; -+ elf_ptrval ptr; - - if ( index >= count ) -- return NULL; -+ return ELF_INVALID_HANDLE(elf_phdr); - -- ptr = (elf->image -+ ptr = (ELF_IMAGE_BASE(elf) - + elf_uval(elf, elf->ehdr, e_phoff) - + elf_uval(elf, elf->ehdr, e_phentsize) * index); -- return ptr; -+ return ELF_MAKE_HANDLE(elf_phdr, ptr); - } - --const char *elf_section_name(struct elf_binary *elf, const elf_shdr * shdr) -+ -+const char *elf_section_name(struct elf_binary *elf, -+ ELF_HANDLE_DECL(elf_shdr) shdr) - { -- if ( elf->sec_strtab == NULL ) -+ if ( ELF_PTRVAL_INVALID(elf->sec_strtab) ) - return "unknown"; -- return elf->sec_strtab + elf_uval(elf, shdr, sh_name); -+ -+ return elf_strval(elf, elf->sec_strtab + elf_uval(elf, shdr, sh_name)); - } - --const void *elf_section_start(struct elf_binary *elf, const elf_shdr * shdr) -+const char *elf_strval(struct elf_binary *elf, elf_ptrval start) - { -- return elf->image + elf_uval(elf, shdr, sh_offset); -+ uint64_t length; -+ -+ for ( length = 0; ; length++ ) { -+ if ( !elf_access_ok(elf, start + length, 1) ) -+ return NULL; -+ if ( !elf_access_unsigned(elf, start, length, 1) ) -+ /* ok */ -+ return ELF_UNSAFE_PTR(start); -+ if ( length >= ELF_MAX_STRING_LENGTH ) -+ { -+ elf_mark_broken(elf, "excessively long string"); -+ return NULL; -+ } -+ } - } - --const void *elf_section_end(struct elf_binary *elf, const elf_shdr * shdr) -+const char *elf_strfmt(struct elf_binary *elf, elf_ptrval start) - { -- return elf->image -+ const char *str = elf_strval(elf, start); -+ -+ if ( str == NULL ) -+ return "(invalid)"; -+ return str; -+} -+ -+elf_ptrval elf_section_start(struct elf_binary *elf, ELF_HANDLE_DECL(elf_shdr) shdr) -+{ -+ return ELF_IMAGE_BASE(elf) + elf_uval(elf, shdr, sh_offset); -+} -+ -+elf_ptrval elf_section_end(struct elf_binary *elf, ELF_HANDLE_DECL(elf_shdr) shdr) -+{ -+ return ELF_IMAGE_BASE(elf) - + elf_uval(elf, shdr, sh_offset) + elf_uval(elf, shdr, sh_size); - } - --const void *elf_segment_start(struct elf_binary *elf, const elf_phdr * phdr) -+elf_ptrval elf_segment_start(struct elf_binary *elf, ELF_HANDLE_DECL(elf_phdr) phdr) - { -- return elf->image + elf_uval(elf, phdr, p_offset); -+ return ELF_IMAGE_BASE(elf) -+ + elf_uval(elf, phdr, p_offset); - } - --const void *elf_segment_end(struct elf_binary *elf, const elf_phdr * phdr) -+elf_ptrval elf_segment_end(struct elf_binary *elf, ELF_HANDLE_DECL(elf_phdr) phdr) - { -- return elf->image -+ return ELF_IMAGE_BASE(elf) - + elf_uval(elf, phdr, p_offset) + elf_uval(elf, phdr, p_filesz); - } - --const elf_sym *elf_sym_by_name(struct elf_binary *elf, const char *symbol) -+ELF_HANDLE_DECL(elf_sym) elf_sym_by_name(struct elf_binary *elf, const char *symbol) - { -- const void *ptr = elf_section_start(elf, elf->sym_tab); -- const void *end = elf_section_end(elf, elf->sym_tab); -- const elf_sym *sym; -+ elf_ptrval ptr = elf_section_start(elf, elf->sym_tab); -+ elf_ptrval end = elf_section_end(elf, elf->sym_tab); -+ ELF_HANDLE_DECL(elf_sym) sym; - uint64_t info, name; -+ const char *sym_name; - - for ( ; ptr < end; ptr += elf_size(elf, sym) ) - { -- sym = ptr; -+ sym = ELF_MAKE_HANDLE(elf_sym, ptr); - info = elf_uval(elf, sym, st_info); - name = elf_uval(elf, sym, st_name); - if ( ELF32_ST_BIND(info) != STB_GLOBAL ) - continue; -- if ( strcmp(elf->sym_strtab + name, symbol) ) -+ sym_name = elf_strval(elf, elf->sym_strtab + name); -+ if ( sym_name == NULL ) /* out of range, oops */ -+ return ELF_INVALID_HANDLE(elf_sym); -+ if ( strcmp(sym_name, symbol) ) - continue; - return sym; - } -- return NULL; -+ return ELF_INVALID_HANDLE(elf_sym); - } - --const elf_sym *elf_sym_by_index(struct elf_binary *elf, int index) -+ELF_HANDLE_DECL(elf_sym) elf_sym_by_index(struct elf_binary *elf, unsigned index) - { -- const void *ptr = elf_section_start(elf, elf->sym_tab); -- const elf_sym *sym; -+ elf_ptrval ptr = elf_section_start(elf, elf->sym_tab); -+ ELF_HANDLE_DECL(elf_sym) sym; - -- sym = ptr + index * elf_size(elf, sym); -+ sym = ELF_MAKE_HANDLE(elf_sym, ptr + index * elf_size(elf, sym)); - return sym; - } - --const char *elf_note_name(struct elf_binary *elf, const elf_note * note) -+const char *elf_note_name(struct elf_binary *elf, ELF_HANDLE_DECL(elf_note) note) - { -- return (void *)note + elf_size(elf, note); -+ return elf_strval(elf, ELF_HANDLE_PTRVAL(note) + elf_size(elf, note)); - } - --const void *elf_note_desc(struct elf_binary *elf, const elf_note * note) -+elf_ptrval elf_note_desc(struct elf_binary *elf, ELF_HANDLE_DECL(elf_note) note) - { -- int namesz = (elf_uval(elf, note, namesz) + 3) & ~3; -+ unsigned namesz = (elf_uval(elf, note, namesz) + 3) & ~3; - -- return (void *)note + elf_size(elf, note) + namesz; -+ return ELF_HANDLE_PTRVAL(note) + elf_size(elf, note) + namesz; - } - --uint64_t elf_note_numeric(struct elf_binary *elf, const elf_note * note) -+uint64_t elf_note_numeric(struct elf_binary *elf, ELF_HANDLE_DECL(elf_note) note) - { -- const void *desc = elf_note_desc(elf, note); -- int descsz = elf_uval(elf, note, descsz); -+ elf_ptrval desc = elf_note_desc(elf, note); -+ unsigned descsz = elf_uval(elf, note, descsz); - - switch (descsz) - { -@@ -228,11 +319,11 @@ uint64_t elf_note_numeric(struct elf_binary *elf, const elf_note * note) - } - } - --uint64_t elf_note_numeric_array(struct elf_binary *elf, const elf_note *note, -+uint64_t elf_note_numeric_array(struct elf_binary *elf, ELF_HANDLE_DECL(elf_note) note, - unsigned int unitsz, unsigned int idx) - { -- const void *desc = elf_note_desc(elf, note); -- int descsz = elf_uval(elf, note, descsz); -+ elf_ptrval desc = elf_note_desc(elf, note); -+ unsigned descsz = elf_uval(elf, note, descsz); - - if ( descsz % unitsz || idx >= descsz / unitsz ) - return 0; -@@ -248,24 +339,34 @@ uint64_t elf_note_numeric_array(struct elf_binary *elf, const elf_note *note, - } - } - --const elf_note *elf_note_next(struct elf_binary *elf, const elf_note * note) -+ELF_HANDLE_DECL(elf_note) elf_note_next(struct elf_binary *elf, ELF_HANDLE_DECL(elf_note) note) - { -- int namesz = (elf_uval(elf, note, namesz) + 3) & ~3; -- int descsz = (elf_uval(elf, note, descsz) + 3) & ~3; -+ unsigned namesz = (elf_uval(elf, note, namesz) + 3) & ~3; -+ unsigned descsz = (elf_uval(elf, note, descsz) + 3) & ~3; -+ -+ elf_ptrval ptrval = ELF_HANDLE_PTRVAL(note) -+ + elf_size(elf, note) + namesz + descsz; - -- return (void *)note + elf_size(elf, note) + namesz + descsz; -+ if ( ( ptrval <= ELF_HANDLE_PTRVAL(note) || /* wrapped or stuck */ -+ !elf_access_ok(elf, ELF_HANDLE_PTRVAL(note), 1) ) ) -+ ptrval = ELF_MAX_PTRVAL; /* terminate caller's loop */ -+ -+ return ELF_MAKE_HANDLE(elf_note, ptrval); - } - - /* ------------------------------------------------------------------------ */ - --int elf_is_elfbinary(const void *image) -+bool elf_is_elfbinary(const void *image_start, size_t image_size) - { -- const Elf32_Ehdr *ehdr = image; -+ const Elf32_Ehdr *ehdr = image_start; -+ -+ if ( image_size < sizeof(*ehdr) ) -+ return 0; - - return IS_ELF(*ehdr); - } - --int elf_phdr_is_loadable(struct elf_binary *elf, const elf_phdr * phdr) -+bool elf_phdr_is_loadable(struct elf_binary *elf, ELF_HANDLE_DECL(elf_phdr) phdr) - { - uint64_t p_type = elf_uval(elf, phdr, p_type); - uint64_t p_flags = elf_uval(elf, phdr, p_flags); -diff --git a/xen/include/xen/libelf.h b/xen/include/xen/libelf.h -index e8f6508..174f8da 100644 ---- a/xen/include/xen/libelf.h -+++ b/xen/include/xen/libelf.h -@@ -29,6 +29,11 @@ - #error define architectural endianness - #endif - -+#include <stdbool.h> -+ -+typedef int elf_errorstatus; /* 0: ok; -ve (normally -1): error */ -+typedef int elf_negerrnoval; /* 0: ok; -EFOO: error */ -+ - #undef ELFSIZE - #include "elfstructs.h" - #ifdef __XEN__ -@@ -42,12 +47,98 @@ - - struct elf_binary; - typedef void elf_log_callback(struct elf_binary*, void *caller_data, -- int iserr, const char *fmt, va_list al); -+ bool iserr, const char *fmt, va_list al); -+ -+#endif -+ -+#define ELF_MAX_STRING_LENGTH 4096 -+#define ELF_MAX_TOTAL_NOTE_COUNT 65536 -+ -+/* ------------------------------------------------------------------------ */ -+ -+/* Macros for accessing the input image and output area. */ -+ -+/* -+ * We abstract away the pointerness of these pointers, replacing -+ * various void*, char* and struct* with the following: -+ * elf_ptrval A pointer to a byte; one can do pointer arithmetic -+ * on this. -+ * HANDLE A pointer to a struct. There is one of these types -+ * for each pointer type - that is, for each "structname". -+ * In the arguments to the various HANDLE macros, structname -+ * must be a single identifier which is a typedef. -+ * It is not permitted to do arithmetic on these -+ * pointers. In the current code attempts to do so will -+ * compile, but in the next patch this will become a -+ * compile error. -+ */ -+ -+typedef uintptr_t elf_ptrval; -+ -+#define ELF_REALPTR2PTRVAL(realpointer) ((elf_ptrval)(realpointer)) -+ /* Converts an actual C pointer into a PTRVAL */ -+ -+#define ELF_HANDLE_DECL(structname) structname##_handle -+ /* Provides a type declaration for a HANDLE. */ - -+#ifdef __XEN__ -+# define ELF_PRPTRVAL "lu" -+ /* -+ * PRIuPTR is misdefined in xen/include/xen/inttypes.h, on 32-bit, -+ * to "u", when in fact uintptr_t is an unsigned long. -+ */ -+#else -+# define ELF_PRPTRVAL PRIuPTR - #endif -+ /* printf format a la PRId... for a PTRVAL */ -+ -+#define ELF_DEFINE_HANDLE(structname) \ -+ typedef union { \ -+ elf_ptrval ptrval; \ -+ const structname *typeonly; /* for sizeof, offsetof, &c only */ \ -+ } structname##_handle; -+ /* -+ * This must be invoked for each HANDLE type to define -+ * the actual C type used for that kind of HANDLE. -+ */ -+ -+#define ELF_MAKE_HANDLE(structname, ptrval) ((structname##_handle){ ptrval }) -+ /* Converts a PTRVAL to a HANDLE */ -+ -+#define ELF_IMAGE_BASE(elf) ((elf_ptrval)(elf)->image_base) -+ /* Returns the base of the image as a PTRVAL. */ -+ -+#define ELF_HANDLE_PTRVAL(handleval) ((handleval).ptrval) -+ /* Converts a HANDLE to a PTRVAL. */ -+ -+#define ELF_UNSAFE_PTR(ptrval) ((void*)(elf_ptrval)(ptrval)) -+ /* -+ * Turns a PTRVAL into an actual C pointer. Before this is done -+ * the caller must have ensured that the PTRVAL does in fact point -+ * to a permissible location. -+ */ -+ -+/* PTRVALs can be INVALID (ie, NULL). */ -+#define ELF_INVALID_PTRVAL ((elf_ptrval)0) /* returns NULL PTRVAL */ -+#define ELF_INVALID_HANDLE(structname) /* returns NULL handle */ \ -+ ELF_MAKE_HANDLE(structname, ELF_INVALID_PTRVAL) -+#define ELF_PTRVAL_VALID(ptrval) (!!(ptrval)) /* } */ -+#define ELF_HANDLE_VALID(handleval) (!!(handleval).ptrval) /* } predicates */ -+#define ELF_PTRVAL_INVALID(ptrval) (!ELF_PTRVAL_VALID((ptrval))) /* } */ -+ -+#define ELF_MAX_PTRVAL (~(elf_ptrval)0) -+ /* PTRVAL value guaranteed to compare > to any valid PTRVAL */ -+ -+/* For internal use by other macros here */ -+#define ELF__HANDLE_FIELD_TYPE(handleval, elm) \ -+ typeof((handleval).typeonly->elm) -+#define ELF__HANDLE_FIELD_OFFSET(handleval, elm) \ -+ offsetof(typeof(*(handleval).typeonly),elm) -+ - - /* ------------------------------------------------------------------------ */ - -+ - typedef union { - Elf32_Ehdr e32; - Elf64_Ehdr e64; -@@ -83,20 +174,32 @@ typedef union { - Elf64_Note e64; - } elf_note; - -+ELF_DEFINE_HANDLE(elf_ehdr) -+ELF_DEFINE_HANDLE(elf_shdr) -+ELF_DEFINE_HANDLE(elf_phdr) -+ELF_DEFINE_HANDLE(elf_sym) -+ELF_DEFINE_HANDLE(elf_note) -+ - struct elf_binary { - /* elf binary */ -- const char *image; -+ const void *image_base; - size_t size; - char class; - char data; - -- const elf_ehdr *ehdr; -- const char *sec_strtab; -- const elf_shdr *sym_tab; -- const char *sym_strtab; -+ ELF_HANDLE_DECL(elf_ehdr) ehdr; -+ elf_ptrval sec_strtab; -+ ELF_HANDLE_DECL(elf_shdr) sym_tab; -+ uint64_t sym_strtab; - - /* loaded to */ -- char *dest; -+ /* -+ * dest_base and dest_size are trusted and must be correct; -+ * whenever dest_size is not 0, both of these must be valid -+ * so long as the struct elf_binary is in use. -+ */ -+ char *dest_base; -+ size_t dest_size; - uint64_t pstart; - uint64_t pend; - uint64_t reloc_offset; -@@ -104,12 +207,22 @@ struct elf_binary { - uint64_t bsd_symtab_pstart; - uint64_t bsd_symtab_pend; - -+ /* -+ * caller's other acceptable destination -+ * -+ * Again, these are trusted and must be valid (or 0) so long -+ * as the struct elf_binary is in use. -+ */ -+ void *caller_xdest_base; -+ uint64_t caller_xdest_size; -+ - #ifndef __XEN__ - /* misc */ - elf_log_callback *log_callback; - void *log_caller_data; - #endif -- int verbose; -+ bool verbose; -+ const char *broken; - }; - - /* ------------------------------------------------------------------------ */ -@@ -127,88 +240,145 @@ struct elf_binary { - #define elf_lsb(elf) (ELFDATA2LSB == (elf)->data) - #define elf_swap(elf) (NATIVE_ELFDATA != (elf)->data) - --#define elf_uval(elf, str, elem) \ -- ((ELFCLASS64 == (elf)->class) \ -- ? elf_access_unsigned((elf), (str), \ -- offsetof(typeof(*(str)),e64.elem), \ -- sizeof((str)->e64.elem)) \ -- : elf_access_unsigned((elf), (str), \ -- offsetof(typeof(*(str)),e32.elem), \ -- sizeof((str)->e32.elem))) -- --#define elf_sval(elf, str, elem) \ -- ((ELFCLASS64 == (elf)->class) \ -- ? elf_access_signed((elf), (str), \ -- offsetof(typeof(*(str)),e64.elem), \ -- sizeof((str)->e64.elem)) \ -- : elf_access_signed((elf), (str), \ -- offsetof(typeof(*(str)),e32.elem), \ -- sizeof((str)->e32.elem))) -- --#define elf_size(elf, str) \ -- ((ELFCLASS64 == (elf)->class) \ -- ? sizeof((str)->e64) : sizeof((str)->e32)) -+#define elf_uval_3264(elf, handle, elem) \ -+ elf_access_unsigned((elf), (handle).ptrval, \ -+ offsetof(typeof(*(handle).typeonly),elem), \ -+ sizeof((handle).typeonly->elem)) -+ -+#define elf_uval(elf, handle, elem) \ -+ ((ELFCLASS64 == (elf)->class) \ -+ ? elf_uval_3264(elf, handle, e64.elem) \ -+ : elf_uval_3264(elf, handle, e32.elem)) -+ /* -+ * Reads an unsigned field in a header structure in the ELF. -+ * str is a HANDLE, and elem is the field name in it. -+ */ - --uint64_t elf_access_unsigned(struct elf_binary *elf, const void *ptr, -+ -+#define elf_size(elf, handle_or_handletype) ({ \ -+ typeof(handle_or_handletype) elf_size__dummy; \ -+ ((ELFCLASS64 == (elf)->class) \ -+ ? sizeof(elf_size__dummy.typeonly->e64) \ -+ : sizeof(elf_size__dummy.typeonly->e32)); \ -+}) -+ /* -+ * Returns the size of the substructure for the appropriate 32/64-bitness. -+ * str should be a HANDLE. -+ */ -+ -+uint64_t elf_access_unsigned(struct elf_binary *elf, elf_ptrval ptr, - uint64_t offset, size_t size); --int64_t elf_access_signed(struct elf_binary *elf, const void *ptr, -- uint64_t offset, size_t size); -+ /* Reads a field at arbitrary offset and alignemnt */ - - uint64_t elf_round_up(struct elf_binary *elf, uint64_t addr); - -+const char *elf_strval(struct elf_binary *elf, elf_ptrval start); -+ /* may return NULL if the string is out of range etc. */ -+ -+const char *elf_strfmt(struct elf_binary *elf, elf_ptrval start); -+ /* like elf_strval but returns "(invalid)" instead of NULL */ -+ -+void elf_memcpy_safe(struct elf_binary*, elf_ptrval dst, elf_ptrval src, size_t); -+void elf_memset_safe(struct elf_binary*, elf_ptrval dst, int c, size_t); -+ /* -+ * Versions of memcpy and memset which arrange never to write -+ * outside permitted areas. -+ */ -+ -+bool elf_access_ok(struct elf_binary * elf, -+ uint64_t ptrval, size_t size); -+ -+#define elf_store_val(elf, type, ptr, val) \ -+ ({ \ -+ typeof(type) elf_store__val = (val); \ -+ elf_ptrval elf_store__targ = ptr; \ -+ if (elf_access_ok((elf), elf_store__targ, \ -+ sizeof(elf_store__val))) { \ -+ elf_memcpy_unchecked((void*)elf_store__targ, &elf_store__val, \ -+ sizeof(elf_store__val)); \ -+ } \ -+ }) \ -+ /* Stores a value at a particular PTRVAL. */ -+ -+#define elf_store_field(elf, hdr, elm, val) \ -+ (elf_store_val((elf), ELF__HANDLE_FIELD_TYPE(hdr, elm), \ -+ ELF_HANDLE_PTRVAL(hdr) + ELF__HANDLE_FIELD_OFFSET(hdr, elm), \ -+ (val))) -+ /* Stores a 32/64-bit field. hdr is a HANDLE and elm is the field name. */ -+ -+ - /* ------------------------------------------------------------------------ */ - /* xc_libelf_tools.c */ - --int elf_shdr_count(struct elf_binary *elf); --int elf_phdr_count(struct elf_binary *elf); -+unsigned elf_shdr_count(struct elf_binary *elf); -+unsigned elf_phdr_count(struct elf_binary *elf); - --const elf_shdr *elf_shdr_by_name(struct elf_binary *elf, const char *name); --const elf_shdr *elf_shdr_by_index(struct elf_binary *elf, int index); --const elf_phdr *elf_phdr_by_index(struct elf_binary *elf, int index); -+ELF_HANDLE_DECL(elf_shdr) elf_shdr_by_name(struct elf_binary *elf, const char *name); -+ELF_HANDLE_DECL(elf_shdr) elf_shdr_by_index(struct elf_binary *elf, unsigned index); -+ELF_HANDLE_DECL(elf_phdr) elf_phdr_by_index(struct elf_binary *elf, unsigned index); - --const char *elf_section_name(struct elf_binary *elf, const elf_shdr * shdr); --const void *elf_section_start(struct elf_binary *elf, const elf_shdr * shdr); --const void *elf_section_end(struct elf_binary *elf, const elf_shdr * shdr); -+const char *elf_section_name(struct elf_binary *elf, ELF_HANDLE_DECL(elf_shdr) shdr); /* might return NULL if inputs are invalid */ -+elf_ptrval elf_section_start(struct elf_binary *elf, ELF_HANDLE_DECL(elf_shdr) shdr); -+elf_ptrval elf_section_end(struct elf_binary *elf, ELF_HANDLE_DECL(elf_shdr) shdr); - --const void *elf_segment_start(struct elf_binary *elf, const elf_phdr * phdr); --const void *elf_segment_end(struct elf_binary *elf, const elf_phdr * phdr); -+elf_ptrval elf_segment_start(struct elf_binary *elf, ELF_HANDLE_DECL(elf_phdr) phdr); -+elf_ptrval elf_segment_end(struct elf_binary *elf, ELF_HANDLE_DECL(elf_phdr) phdr); - --const elf_sym *elf_sym_by_name(struct elf_binary *elf, const char *symbol); --const elf_sym *elf_sym_by_index(struct elf_binary *elf, int index); -+ELF_HANDLE_DECL(elf_sym) elf_sym_by_name(struct elf_binary *elf, const char *symbol); -+ELF_HANDLE_DECL(elf_sym) elf_sym_by_index(struct elf_binary *elf, unsigned index); - --const char *elf_note_name(struct elf_binary *elf, const elf_note * note); --const void *elf_note_desc(struct elf_binary *elf, const elf_note * note); --uint64_t elf_note_numeric(struct elf_binary *elf, const elf_note * note); --uint64_t elf_note_numeric_array(struct elf_binary *, const elf_note *, -+const char *elf_note_name(struct elf_binary *elf, ELF_HANDLE_DECL(elf_note) note); /* may return NULL */ -+elf_ptrval elf_note_desc(struct elf_binary *elf, ELF_HANDLE_DECL(elf_note) note); -+uint64_t elf_note_numeric(struct elf_binary *elf, ELF_HANDLE_DECL(elf_note) note); -+uint64_t elf_note_numeric_array(struct elf_binary *, ELF_HANDLE_DECL(elf_note), - unsigned int unitsz, unsigned int idx); --const elf_note *elf_note_next(struct elf_binary *elf, const elf_note * note); - --int elf_is_elfbinary(const void *image); --int elf_phdr_is_loadable(struct elf_binary *elf, const elf_phdr * phdr); -+/* -+ * If you use elf_note_next in a loop, you must put a nontrivial upper -+ * bound on the returned value as part of your loop condition. In -+ * some cases elf_note_next will substitute ELF_PTRVAL_MAX as return -+ * value to indicate that the iteration isn't going well (for example, -+ * the putative "next" value would be earlier in memory). In this -+ * case the caller's loop must terminate. Checking against the -+ * end of the notes segment with a strict inequality is sufficient. -+ */ -+ELF_HANDLE_DECL(elf_note) elf_note_next(struct elf_binary *elf, ELF_HANDLE_DECL(elf_note) note); -+ -+/* (Only) checks that the image has the right magic number. */ -+bool elf_is_elfbinary(const void *image_start, size_t image_size); -+ -+bool elf_phdr_is_loadable(struct elf_binary *elf, ELF_HANDLE_DECL(elf_phdr) phdr); - - /* ------------------------------------------------------------------------ */ - /* xc_libelf_loader.c */ - --int elf_init(struct elf_binary *elf, const char *image, size_t size); -+elf_errorstatus elf_init(struct elf_binary *elf, const char *image, size_t size); -+ /* -+ * image and size must be correct. They will be recorded in -+ * *elf, and must remain valid while the elf is in use. -+ */ - #ifdef __XEN__ - void elf_set_verbose(struct elf_binary *elf); - #else - void elf_set_log(struct elf_binary *elf, elf_log_callback*, -- void *log_caller_pointer, int verbose); -+ void *log_caller_pointer, bool verbose); - #endif - - void elf_parse_binary(struct elf_binary *elf); --int elf_load_binary(struct elf_binary *elf); -+elf_errorstatus elf_load_binary(struct elf_binary *elf); - --void *elf_get_ptr(struct elf_binary *elf, unsigned long addr); -+elf_ptrval elf_get_ptr(struct elf_binary *elf, unsigned long addr); - uint64_t elf_lookup_addr(struct elf_binary *elf, const char *symbol); - - void elf_parse_bsdsyms(struct elf_binary *elf, uint64_t pstart); /* private */ - -+void elf_mark_broken(struct elf_binary *elf, const char *msg); -+const char *elf_check_broken(const struct elf_binary *elf); /* NULL means OK */ -+ - /* ------------------------------------------------------------------------ */ - /* xc_libelf_relocate.c */ - --int elf_reloc(struct elf_binary *elf); -+elf_errorstatus elf_reloc(struct elf_binary *elf); - - /* ------------------------------------------------------------------------ */ - /* xc_libelf_dominfo.c */ -@@ -232,9 +402,9 @@ struct xen_elfnote { - - struct elf_dom_parms { - /* raw */ -- const char *guest_info; -- const void *elf_note_start; -- const void *elf_note_end; -+ elf_ptrval guest_info; -+ elf_ptrval elf_note_start; -+ elf_ptrval elf_note_end; - struct xen_elfnote elf_notes[XEN_ELFNOTE_MAX + 1]; - - /* parsed */ -@@ -242,8 +412,8 @@ struct elf_dom_parms { - char guest_ver[16]; - char xen_ver[16]; - char loader[16]; -- int pae; -- int bsd_symtab; -+ int pae; /* some kind of enum apparently */ -+ bool bsd_symtab; - uint64_t virt_base; - uint64_t virt_entry; - uint64_t virt_hypercall; -@@ -273,10 +443,44 @@ int elf_xen_parse_features(const char *features, - uint32_t *required); - int elf_xen_parse_note(struct elf_binary *elf, - struct elf_dom_parms *parms, -- const elf_note *note); -+ ELF_HANDLE_DECL(elf_note) note); - int elf_xen_parse_guest_info(struct elf_binary *elf, - struct elf_dom_parms *parms); - int elf_xen_parse(struct elf_binary *elf, - struct elf_dom_parms *parms); - -+static inline void *elf_memcpy_unchecked(void *dest, const void *src, size_t n) -+ { return memcpy(dest, src, n); } -+static inline void *elf_memmove_unchecked(void *dest, const void *src, size_t n) -+ { return memmove(dest, src, n); } -+static inline void *elf_memset_unchecked(void *s, int c, size_t n) -+ { return memset(s, c, n); } -+ /* -+ * Unsafe versions of memcpy, memmove memset which take actual C -+ * pointers. These are just like the real functions. -+ * We provide these so that in libelf-private.h we can #define -+ * memcpy, memset and memmove to undefined MISTAKE things. -+ */ -+ -+ -+/* Advances past amount bytes of the current destination area. */ -+static inline void ELF_ADVANCE_DEST(struct elf_binary *elf, uint64_t amount) -+{ -+ if ( elf->dest_base == NULL ) -+ { -+ elf_mark_broken(elf, "advancing in null image"); -+ } -+ else if ( elf->dest_size >= amount ) -+ { -+ elf->dest_base += amount; -+ elf->dest_size -= amount; -+ } -+ else -+ { -+ elf->dest_size = 0; -+ elf_mark_broken(elf, "advancing past end (image very short?)"); -+ } -+} -+ -+ - #endif /* __XEN_LIBELF_H__ */ diff --git a/main/xen/xsa56.patch b/main/xen/xsa56.patch deleted file mode 100644 index 1368ac3514..0000000000 --- a/main/xen/xsa56.patch +++ /dev/null @@ -1,50 +0,0 @@ -libxc: limit cpu values when setting vcpu affinity - -When support for pinning more than 64 cpus was added, check for cpu -out-of-range values was removed. This can lead to subsequent -out-of-bounds cpumap array accesses in case the cpu number is higher -than the actual count. - -This patch returns the check. - -This is CVE-2013-2072 / XSA-56 - -Signed-off-by: Petr Matousek <pmatouse@redhat.com> - -diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c -index e220f68..e611b24 100644 ---- a/tools/python/xen/lowlevel/xc/xc.c -+++ b/tools/python/xen/lowlevel/xc/xc.c -@@ -228,6 +228,7 @@ static PyObject *pyxc_vcpu_setaffinity(XcObject *self, - int vcpu = 0, i; - xc_cpumap_t cpumap; - PyObject *cpulist = NULL; -+ int nr_cpus; - - static char *kwd_list[] = { "domid", "vcpu", "cpumap", NULL }; - -@@ -235,6 +236,10 @@ static PyObject *pyxc_vcpu_setaffinity(XcObject *self, - &dom, &vcpu, &cpulist) ) - return NULL; - -+ nr_cpus = xc_get_max_cpus(self->xc_handle); -+ if ( nr_cpus == 0 ) -+ return pyxc_error_to_exception(self->xc_handle); -+ - cpumap = xc_cpumap_alloc(self->xc_handle); - if(cpumap == NULL) - return pyxc_error_to_exception(self->xc_handle); -@@ -244,6 +249,13 @@ static PyObject *pyxc_vcpu_setaffinity(XcObject *self, - for ( i = 0; i < PyList_Size(cpulist); i++ ) - { - long cpu = PyInt_AsLong(PyList_GetItem(cpulist, i)); -+ if ( cpu < 0 || cpu >= nr_cpus ) -+ { -+ free(cpumap); -+ errno = EINVAL; -+ PyErr_SetFromErrno(xc_error_obj); -+ return NULL; -+ } - cpumap[cpu / 8] |= 1 << (cpu % 8); - } - } diff --git a/main/xen/xsa57.patch b/main/xen/xsa57.patch deleted file mode 100644 index 178b818890..0000000000 --- a/main/xen/xsa57.patch +++ /dev/null @@ -1,333 +0,0 @@ -libxl: Restrict permissions on PV console device xenstore nodes - -Matthew Daley has observed that the PV console protocol places sensitive host -state into a guest writeable xenstore locations, this includes: - - - The pty used to communicate between the console backend daemon and its - client, allowing the guest administrator to read and write arbitrary host - files. - - The output file, allowing the guest administrator to write arbitrary host - files or to target arbitrary qemu chardevs which include sockets, udp, ptr, - pipes etc (see -chardev in qemu(1) for a more complete list). - - The maximum buffer size, allowing the guest administrator to consume more - resources than the host administrator has configured. - - The backend to use (qemu vs xenconsoled), potentially allowing the guest - administrator to confuse host software. - -So we arrange to make the sensitive keys in the xenstore frontend directory -read only for the guest. This is safe since the xenstore permissions model, -unlike POSIX directory permissions, does not allow the guest to remove and -recreate a node if it has write access to the containing directory. - -There are a few associated wrinkles: - - - The primary PV console is "special". It's xenstore node is not under the - usual /devices/ subtree and it does not use the customary xenstore state - machine protocol. Unfortunately its directory is used for other things, - including the vnc-port node, which we do not want the guest to be able to - write to. Rather than trying to track down all the possible secondary uses - of this directory just make it r/o to the guest. All newly created - subdirectories inherit these permissions and so are now safe by default. - - - The other serial consoles do use the customary xenstore state machine and - therefore need write access to at least the "protocol" and "state" nodes, - however they may also want to use arbitrary "feature-foo" nodes (although - I'm not aware of any) and therefore we cannot simply lock down the entire - frontend directory. Instead we add support to libxl__device_generic_add for - frontend keys which are explicitly read only and use that to lock down the - sensitive keys. - - - Minios' console frontend wants to write the "type" node, which it has no - business doing since this is a host/toolstack level decision. This fails - now that the node has become read only to the PV guest. Since the toolstack - already writes this node just remove the attempt to set it. - -This is CVE-XXXX-XXX / XSA-57 - -Signed-off-by: Ian Campbell <ian.campbell@citrix.com> - -Conflicts: - tools/libxl/libxl.c (no vtpm, free front_ro on error in - libxl__device_console_add) - -diff --git a/extras/mini-os/console/xenbus.c b/extras/mini-os/console/xenbus.c -index 77de82a..e65baf7 100644 ---- a/extras/mini-os/console/xenbus.c -+++ b/extras/mini-os/console/xenbus.c -@@ -122,12 +122,6 @@ again: - goto abort_transaction; - } - -- err = xenbus_printf(xbt, nodename, "type", "%s", "ioemu"); -- if (err) { -- message = "writing type"; -- goto abort_transaction; -- } -- - snprintf(path, sizeof(path), "%s/state", nodename); - err = xenbus_switch_state(xbt, path, XenbusStateConnected); - if (err) { -diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c -index a6e9601..32d788a 100644 ---- a/tools/libxl/libxl.c -+++ b/tools/libxl/libxl.c -@@ -1920,8 +1920,9 @@ static void device_disk_add(libxl__egc *egc, uint32_t domid, - flexarray_append(front, disk->is_cdrom ? "cdrom" : "disk"); - - libxl__device_generic_add(gc, t, device, -- libxl__xs_kvs_of_flexarray(gc, back, back->count), -- libxl__xs_kvs_of_flexarray(gc, front, front->count)); -+ libxl__xs_kvs_of_flexarray(gc, back, back->count), -+ libxl__xs_kvs_of_flexarray(gc, front, front->count), -+ NULL); - - rc = libxl__xs_transaction_commit(gc, &t); - if (!rc) break; -@@ -2633,8 +2634,9 @@ void libxl__device_nic_add(libxl__egc *egc, uint32_t domid, - flexarray_append(front, libxl__sprintf(gc, - LIBXL_MAC_FMT, LIBXL_MAC_BYTES(nic->mac))); - libxl__device_generic_add(gc, XBT_NULL, device, -- libxl__xs_kvs_of_flexarray(gc, back, back->count), -- libxl__xs_kvs_of_flexarray(gc, front, front->count)); -+ libxl__xs_kvs_of_flexarray(gc, back, back->count), -+ libxl__xs_kvs_of_flexarray(gc, front, front->count), -+ NULL); - - aodev->dev = device; - aodev->action = DEVICE_CONNECT; -@@ -2830,7 +2832,7 @@ int libxl__device_console_add(libxl__gc *gc, uint32_t domid, - libxl__device_console *console, - libxl__domain_build_state *state) - { -- flexarray_t *front; -+ flexarray_t *front, *ro_front; - flexarray_t *back; - libxl__device device; - int rc; -@@ -2845,6 +2847,11 @@ int libxl__device_console_add(libxl__gc *gc, uint32_t domid, - rc = ERROR_NOMEM; - goto out; - } -+ ro_front = flexarray_make(16, 1); -+ if (!ro_front) { -+ rc = ERROR_NOMEM; -+ goto out; -+ } - back = flexarray_make(16, 1); - if (!back) { - rc = ERROR_NOMEM; -@@ -2871,21 +2878,24 @@ int libxl__device_console_add(libxl__gc *gc, uint32_t domid, - - flexarray_append(front, "backend-id"); - flexarray_append(front, libxl__sprintf(gc, "%d", console->backend_domid)); -- flexarray_append(front, "limit"); -- flexarray_append(front, libxl__sprintf(gc, "%d", LIBXL_XENCONSOLE_LIMIT)); -- flexarray_append(front, "type"); -+ -+ flexarray_append(ro_front, "limit"); -+ flexarray_append(ro_front, libxl__sprintf(gc, "%d", LIBXL_XENCONSOLE_LIMIT)); -+ flexarray_append(ro_front, "type"); - if (console->consback == LIBXL__CONSOLE_BACKEND_XENCONSOLED) -- flexarray_append(front, "xenconsoled"); -+ flexarray_append(ro_front, "xenconsoled"); - else -- flexarray_append(front, "ioemu"); -- flexarray_append(front, "output"); -- flexarray_append(front, console->output); -+ flexarray_append(ro_front, "ioemu"); -+ flexarray_append(ro_front, "output"); -+ flexarray_append(ro_front, console->output); -+ flexarray_append(ro_front, "tty"); -+ flexarray_append(ro_front, ""); - - if (state) { -- flexarray_append(front, "port"); -- flexarray_append(front, libxl__sprintf(gc, "%"PRIu32, state->console_port)); -- flexarray_append(front, "ring-ref"); -- flexarray_append(front, libxl__sprintf(gc, "%lu", state->console_mfn)); -+ flexarray_append(ro_front, "port"); -+ flexarray_append(ro_front, libxl__sprintf(gc, "%"PRIu32, state->console_port)); -+ flexarray_append(ro_front, "ring-ref"); -+ flexarray_append(ro_front, libxl__sprintf(gc, "%lu", state->console_mfn)); - } else { - flexarray_append(front, "state"); - flexarray_append(front, libxl__sprintf(gc, "%d", 1)); -@@ -2894,11 +2904,13 @@ int libxl__device_console_add(libxl__gc *gc, uint32_t domid, - } - - libxl__device_generic_add(gc, XBT_NULL, &device, -- libxl__xs_kvs_of_flexarray(gc, back, back->count), -- libxl__xs_kvs_of_flexarray(gc, front, front->count)); -+ libxl__xs_kvs_of_flexarray(gc, back, back->count), -+ libxl__xs_kvs_of_flexarray(gc, front, front->count), -+ libxl__xs_kvs_of_flexarray(gc, ro_front, ro_front->count)); - rc = 0; - out_free: - flexarray_free(back); -+ flexarray_free(ro_front); - flexarray_free(front); - out: - return rc; -@@ -2982,8 +2994,9 @@ int libxl__device_vkb_add(libxl__gc *gc, uint32_t domid, - flexarray_append(front, libxl__sprintf(gc, "%d", 1)); - - libxl__device_generic_add(gc, XBT_NULL, &device, -- libxl__xs_kvs_of_flexarray(gc, back, back->count), -- libxl__xs_kvs_of_flexarray(gc, front, front->count)); -+ libxl__xs_kvs_of_flexarray(gc, back, back->count), -+ libxl__xs_kvs_of_flexarray(gc, front, front->count), -+ NULL); - rc = 0; - out_free: - flexarray_free(back); -@@ -3096,8 +3109,9 @@ int libxl__device_vfb_add(libxl__gc *gc, uint32_t domid, libxl_device_vfb *vfb) - flexarray_append_pair(front, "state", libxl__sprintf(gc, "%d", 1)); - - libxl__device_generic_add(gc, XBT_NULL, &device, -- libxl__xs_kvs_of_flexarray(gc, back, back->count), -- libxl__xs_kvs_of_flexarray(gc, front, front->count)); -+ libxl__xs_kvs_of_flexarray(gc, back, back->count), -+ libxl__xs_kvs_of_flexarray(gc, front, front->count), -+ NULL); - rc = 0; - out_free: - flexarray_free(front); -diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c -index c3283f1..1c04a21 100644 ---- a/tools/libxl/libxl_device.c -+++ b/tools/libxl/libxl_device.c -@@ -84,11 +84,12 @@ out: - } - - int libxl__device_generic_add(libxl__gc *gc, xs_transaction_t t, -- libxl__device *device, char **bents, char **fents) -+ libxl__device *device, char **bents, char **fents, char **ro_fents) - { - libxl_ctx *ctx = libxl__gc_owner(gc); - char *frontend_path, *backend_path; - struct xs_permissions frontend_perms[2]; -+ struct xs_permissions ro_frontend_perms[2]; - struct xs_permissions backend_perms[2]; - int create_transaction = t == XBT_NULL; - -@@ -100,22 +101,37 @@ int libxl__device_generic_add(libxl__gc *gc, xs_transaction_t t, - frontend_perms[1].id = device->backend_domid; - frontend_perms[1].perms = XS_PERM_READ; - -- backend_perms[0].id = device->backend_domid; -- backend_perms[0].perms = XS_PERM_NONE; -- backend_perms[1].id = device->domid; -- backend_perms[1].perms = XS_PERM_READ; -+ ro_frontend_perms[0].id = backend_perms[0].id = device->backend_domid; -+ ro_frontend_perms[0].perms = backend_perms[0].perms = XS_PERM_NONE; -+ ro_frontend_perms[1].id = backend_perms[1].id = device->domid; -+ ro_frontend_perms[1].perms = backend_perms[1].perms = XS_PERM_READ; - - retry_transaction: - if (create_transaction) - t = xs_transaction_start(ctx->xsh); - /* FIXME: read frontend_path and check state before removing stuff */ - -- if (fents) { -+ if (fents || ro_fents) { - xs_rm(ctx->xsh, t, frontend_path); - xs_mkdir(ctx->xsh, t, frontend_path); -- xs_set_permissions(ctx->xsh, t, frontend_path, frontend_perms, ARRAY_SIZE(frontend_perms)); -+ /* Console 0 is a special case. It doesn't use the regular PV -+ * state machine but also the frontend directory has -+ * historically contained other information, such as the -+ * vnc-port, which we don't want the guest fiddling with. -+ */ -+ if (device->kind == LIBXL__DEVICE_KIND_CONSOLE && device->devid == 0) -+ xs_set_permissions(ctx->xsh, t, frontend_path, -+ ro_frontend_perms, ARRAY_SIZE(ro_frontend_perms)); -+ else -+ xs_set_permissions(ctx->xsh, t, frontend_path, -+ frontend_perms, ARRAY_SIZE(frontend_perms)); - xs_write(ctx->xsh, t, libxl__sprintf(gc, "%s/backend", frontend_path), backend_path, strlen(backend_path)); -- libxl__xs_writev(gc, t, frontend_path, fents); -+ if (fents) -+ libxl__xs_writev_perms(gc, t, frontend_path, fents, -+ frontend_perms, ARRAY_SIZE(frontend_perms)); -+ if (ro_fents) -+ libxl__xs_writev_perms(gc, t, frontend_path, ro_fents, -+ ro_frontend_perms, ARRAY_SIZE(ro_frontend_perms)); - } - - if (bents) { -diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h -index 13fa509..ae96a74 100644 ---- a/tools/libxl/libxl_internal.h -+++ b/tools/libxl/libxl_internal.h -@@ -516,6 +516,11 @@ _hidden char **libxl__xs_kvs_of_flexarray(libxl__gc *gc, flexarray_t *array, int - /* treats kvs as pairs of keys and values and writes each to dir. */ - _hidden int libxl__xs_writev(libxl__gc *gc, xs_transaction_t t, - const char *dir, char **kvs); -+/* as writev but also sets the permissions on each path */ -+_hidden int libxl__xs_writev_perms(libxl__gc *gc, xs_transaction_t t, -+ const char *dir, char *kvs[], -+ struct xs_permissions *perms, -+ unsigned int num_perms); - /* _atonce creates a transaction and writes all keys at once */ - _hidden int libxl__xs_writev_atonce(libxl__gc *gc, - const char *dir, char **kvs); -@@ -930,7 +935,7 @@ _hidden int libxl__device_console_add(libxl__gc *gc, uint32_t domid, - libxl__domain_build_state *state); - - _hidden int libxl__device_generic_add(libxl__gc *gc, xs_transaction_t t, -- libxl__device *device, char **bents, char **fents); -+ libxl__device *device, char **bents, char **fents, char **ro_fents); - _hidden char *libxl__device_backend_path(libxl__gc *gc, libxl__device *device); - _hidden char *libxl__device_frontend_path(libxl__gc *gc, libxl__device *device); - _hidden int libxl__parse_backend_path(libxl__gc *gc, const char *path, -diff --git a/tools/libxl/libxl_pci.c b/tools/libxl/libxl_pci.c -index 48986f3..d373b4d 100644 ---- a/tools/libxl/libxl_pci.c -+++ b/tools/libxl/libxl_pci.c -@@ -106,7 +106,8 @@ int libxl__create_pci_backend(libxl__gc *gc, uint32_t domid, - - libxl__device_generic_add(gc, XBT_NULL, &device, - libxl__xs_kvs_of_flexarray(gc, back, back->count), -- libxl__xs_kvs_of_flexarray(gc, front, front->count)); -+ libxl__xs_kvs_of_flexarray(gc, front, front->count), -+ NULL); - - out: - if (back) -diff --git a/tools/libxl/libxl_xshelp.c b/tools/libxl/libxl_xshelp.c -index 52af484..d7eaa66 100644 ---- a/tools/libxl/libxl_xshelp.c -+++ b/tools/libxl/libxl_xshelp.c -@@ -41,8 +41,10 @@ char **libxl__xs_kvs_of_flexarray(libxl__gc *gc, flexarray_t *array, int length) - return kvs; - } - --int libxl__xs_writev(libxl__gc *gc, xs_transaction_t t, -- const char *dir, char *kvs[]) -+int libxl__xs_writev_perms(libxl__gc *gc, xs_transaction_t t, -+ const char *dir, char *kvs[], -+ struct xs_permissions *perms, -+ unsigned int num_perms) - { - libxl_ctx *ctx = libxl__gc_owner(gc); - char *path; -@@ -56,11 +58,19 @@ int libxl__xs_writev(libxl__gc *gc, xs_transaction_t t, - if (path && kvs[i + 1]) { - int length = strlen(kvs[i + 1]); - xs_write(ctx->xsh, t, path, kvs[i + 1], length); -+ if (perms) -+ xs_set_permissions(ctx->xsh, t, path, perms, num_perms); - } - } - return 0; - } - -+int libxl__xs_writev(libxl__gc *gc, xs_transaction_t t, -+ const char *dir, char *kvs[]) -+{ -+ return libxl__xs_writev_perms(gc, t, dir, kvs, NULL, 0); -+} -+ - int libxl__xs_writev_atonce(libxl__gc *gc, - const char *dir, char *kvs[]) - { diff --git a/main/xen/xsa58-4.2.patch b/main/xen/xsa58-4.2.patch deleted file mode 100644 index 1ea3aaa97d..0000000000 --- a/main/xen/xsa58-4.2.patch +++ /dev/null @@ -1,129 +0,0 @@ -x86: fix page refcount handling in page table pin error path - -In the original patch 7 of the series addressing XSA-45 I mistakenly -took the addition of the call to get_page_light() in alloc_page_type() -to cover two decrements that would happen: One for the PGT_partial bit -that is getting set along with the call, and the other for the page -reference the caller hold (and would be dropping on its error path). -But of course the additional page reference is tied to the PGT_partial -bit, and hence any caller of a function that may leave -->arch.old_guest_table non-NULL for error cleanup purposes has to make -sure a respective page reference gets retained. - -Similar issues were then also spotted elsewhere: In effect all callers -of get_page_type_preemptible() need to deal with errors in similar -ways. To make sure error handling can work this way without leaking -page references, a respective assertion gets added to that function. - -This is CVE-2013-1432 / XSA-58. - -Reported-by: Andrew Cooper <andrew.cooper3@citrix.com> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Tested-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Tim Deegan <tim@xen.org> - ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -941,6 +941,10 @@ int arch_set_info_guest( - if ( v->vcpu_id == 0 ) - d->vm_assist = c(vm_assist); - -+ rc = put_old_guest_table(current); -+ if ( rc ) -+ return rc; -+ - if ( !compat ) - rc = (int)set_gdt(v, c.nat->gdt_frames, c.nat->gdt_ents); - #ifdef CONFIG_COMPAT -@@ -980,18 +984,24 @@ int arch_set_info_guest( - } - else - { -- /* -- * Since v->arch.guest_table{,_user} are both NULL, this effectively -- * is just a call to put_old_guest_table(). -- */ - if ( !compat ) -- rc = vcpu_destroy_pagetables(v); -+ rc = put_old_guest_table(v); - if ( !rc ) - rc = get_page_type_preemptible(cr3_page, - !compat ? PGT_root_page_table - : PGT_l3_page_table); -- if ( rc == -EINTR ) -+ switch ( rc ) -+ { -+ case -EINTR: - rc = -EAGAIN; -+ case -EAGAIN: -+ case 0: -+ break; -+ default: -+ if ( cr3_page == current->arch.old_guest_table ) -+ cr3_page = NULL; -+ break; -+ } - } - if ( rc ) - /* handled below */; -@@ -1018,6 +1028,11 @@ int arch_set_info_guest( - pagetable_get_page(v->arch.guest_table); - v->arch.guest_table = pagetable_null(); - break; -+ default: -+ if ( cr3_page == current->arch.old_guest_table ) -+ cr3_page = NULL; -+ case 0: -+ break; - } - } - if ( !rc ) ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -718,7 +718,8 @@ static int get_page_and_type_from_pagenr - get_page_type_preemptible(page, type) : - (get_page_type(page, type) ? 0 : -EINVAL)); - -- if ( unlikely(rc) && partial >= 0 ) -+ if ( unlikely(rc) && partial >= 0 && -+ (!preemptible || page != current->arch.old_guest_table) ) - put_page(page); - - return rc; -@@ -2638,6 +2639,7 @@ int put_page_type_preemptible(struct pag - - int get_page_type_preemptible(struct page_info *page, unsigned long type) - { -+ ASSERT(!current->arch.old_guest_table); - return __get_page_type(page, type, 1); - } - -@@ -2848,7 +2850,7 @@ static void put_superpage(unsigned long - - #endif - --static int put_old_guest_table(struct vcpu *v) -+int put_old_guest_table(struct vcpu *v) - { - int rc; - -@@ -3253,7 +3255,8 @@ long do_mmuext_op( - rc = -EAGAIN; - else if ( rc != -EAGAIN ) - MEM_LOG("Error while pinning mfn %lx", page_to_mfn(page)); -- put_page(page); -+ if ( page != curr->arch.old_guest_table ) -+ put_page(page); - break; - } - ---- a/xen/include/asm-x86/mm.h -+++ b/xen/include/asm-x86/mm.h -@@ -374,6 +374,7 @@ void put_page_type(struct page_info *pag - int get_page_type(struct page_info *page, unsigned long type); - int put_page_type_preemptible(struct page_info *page); - int get_page_type_preemptible(struct page_info *page, unsigned long type); -+int put_old_guest_table(struct vcpu *); - int get_page_from_l1e( - l1_pgentry_t l1e, struct domain *l1e_owner, struct domain *pg_owner); - void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner); diff --git a/main/xen/xsa61-4.2-unstable.patch b/main/xen/xsa61-4.2-unstable.patch deleted file mode 100644 index 87fbf3239f..0000000000 --- a/main/xen/xsa61-4.2-unstable.patch +++ /dev/null @@ -1,44 +0,0 @@ -libxl: suppress device assignment to HVM guest when there is no IOMMU - -This in effect copies similar logic from xend: While there's no way to -check whether a device is assigned to a particular guest, -XEN_DOMCTL_test_assign_device at least allows checking whether an -IOMMU is there and whether a device has been assign to _some_ -guest. - -For the time being, this should be enough to cover for the missing -error checking/recovery in other parts of libxl's device assignment -paths. - -There remains a (functionality-, but not security-related) race in -that the iommu should be set up earlier, but this is too risky a -change for this stage of the 4.3 release. - -This is a security issue, XSA-61. - -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Tested-by: George Dunlap <george.dunlap@eu.citrix.com> -Acked-by: George Dunlap <george.dunlap@eu.citrix.com> -Acked-by: Ian Jackson <ian.jackson@eu.citrix.com> - ---- a/tools/libxl/libxl_pci.c -+++ b/tools/libxl/libxl_pci.c -@@ -1036,6 +1036,18 @@ int libxl__device_pci_add(libxl__gc *gc, uint32_t domid, libxl_device_pci *pcide - int num_assigned, i, rc; - int stubdomid = 0; - -+ if (libxl__domain_type(gc, domid) == LIBXL_DOMAIN_TYPE_HVM) { -+ rc = xc_test_assign_device(ctx->xch, domid, pcidev_encode_bdf(pcidev)); -+ if (rc) { -+ LIBXL__LOG(ctx, LIBXL__LOG_ERROR, -+ "PCI device %04x:%02x:%02x.%u %s?", -+ pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func, -+ errno == ENOSYS ? "cannot be assigned - no IOMMU" -+ : "already assigned to a different guest"); -+ goto out; -+ } -+ } -+ - rc = libxl__device_pci_setdefault(gc, pcidev); - if (rc) goto out; - diff --git a/main/xen/xsa75-4.2.patch b/main/xen/xsa75-4.2.patch deleted file mode 100644 index c171562e4d..0000000000 --- a/main/xen/xsa75-4.2.patch +++ /dev/null @@ -1,53 +0,0 @@ -nested VMX: VMLANUCH/VMRESUME emulation must check permission first thing - -Otherwise uninitialized data may be used, leading to crashes. - -This is XSA-75. - -Reported-and-tested-by: Jeff Zimmerman <Jeff_Zimmerman@McAfee.com> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-and-tested-by: Andrew Cooper <andrew.cooper3@citrix.com> - ---- a/xen/arch/x86/hvm/vmx/vvmx.c -+++ b/xen/arch/x86/hvm/vmx/vvmx.c -@@ -1075,15 +1075,10 @@ int nvmx_handle_vmxoff(struct cpu_user_r - return X86EMUL_OKAY; - } - --int nvmx_vmresume(struct vcpu *v, struct cpu_user_regs *regs) -+static int nvmx_vmresume(struct vcpu *v, struct cpu_user_regs *regs) - { - struct nestedvmx *nvmx = &vcpu_2_nvmx(v); - struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); -- int rc; -- -- rc = vmx_inst_check_privilege(regs, 0); -- if ( rc != X86EMUL_OKAY ) -- return rc; - - /* check VMCS is valid and IO BITMAP is set */ - if ( (nvcpu->nv_vvmcxaddr != VMCX_EADDR) && -@@ -1100,6 +1095,10 @@ int nvmx_handle_vmresume(struct cpu_user - { - int launched; - struct vcpu *v = current; -+ int rc = vmx_inst_check_privilege(regs, 0); -+ -+ if ( rc != X86EMUL_OKAY ) -+ return rc; - - if ( vcpu_nestedhvm(v).nv_vvmcxaddr == VMCX_EADDR ) - { -@@ -1119,8 +1118,11 @@ int nvmx_handle_vmresume(struct cpu_user - int nvmx_handle_vmlaunch(struct cpu_user_regs *regs) - { - int launched; -- int rc; - struct vcpu *v = current; -+ int rc = vmx_inst_check_privilege(regs, 0); -+ -+ if ( rc != X86EMUL_OKAY ) -+ return rc; - - if ( vcpu_nestedhvm(v).nv_vvmcxaddr == VMCX_EADDR ) - { diff --git a/main/xen/xsa97-hap-4_2-prereq.patch b/main/xen/xsa97-hap-4_2-prereq.patch new file mode 100644 index 0000000000..ce2240aec8 --- /dev/null +++ b/main/xen/xsa97-hap-4_2-prereq.patch @@ -0,0 +1,466 @@ +x86/mm/hap: Adjust vram tracking to play nicely with log-dirty. + +The previous code assumed the guest would be in one of three mutually exclusive +modes for bookkeeping dirty pages: (1) shadow, (2) hap utilizing the log dirty +bitmap to support functionality such as live migrate, (3) hap utilizing the +log dirty bitmap to track dirty vram pages. +Races arose when a guest attempted to track dirty vram while performing live +migrate. (The dispatch table managed by paging_log_dirty_init() might change +in the middle of a log dirty or a vram tracking function.) + +This change allows hap log dirty and hap vram tracking to be concurrent. +Vram tracking no longer uses the log dirty bitmap. Instead it detects +dirty vram pages by examining their p2m type. The log dirty bitmap is only +used by the log dirty code. Because the two operations use different +mechanisms, they are no longer mutually exclusive. + +Signed-Off-By: Robert Phillips <robert.phillips@citrix.com> +Acked-by: Tim Deegan <tim@xen.org> + +Minor whitespace changes to conform with coding style +Signed-off-by: Tim Deegan <tim@xen.org> + +Committed-by: Tim Deegan <tim@xen.org> +master commit: fd91a2a662bc59677e0f217423a7a155d5465886 +master date: 2012-12-13 12:10:14 +0000 + +--- a/xen/arch/x86/mm/hap/hap.c ++++ b/xen/arch/x86/mm/hap/hap.c +@@ -56,132 +56,110 @@ + /* HAP VRAM TRACKING SUPPORT */ + /************************************************/ + +-static int hap_enable_vram_tracking(struct domain *d) +-{ +- struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram; +- +- if ( !dirty_vram ) +- return -EINVAL; +- +- /* turn on PG_log_dirty bit in paging mode */ +- paging_lock(d); +- d->arch.paging.mode |= PG_log_dirty; +- paging_unlock(d); +- +- /* set l1e entries of P2M table to be read-only. */ +- p2m_change_type_range(d, dirty_vram->begin_pfn, dirty_vram->end_pfn, +- p2m_ram_rw, p2m_ram_logdirty); +- +- flush_tlb_mask(d->domain_dirty_cpumask); +- return 0; +-} +- +-static int hap_disable_vram_tracking(struct domain *d) +-{ +- struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram; +- +- if ( !dirty_vram ) +- return -EINVAL; +- +- paging_lock(d); +- d->arch.paging.mode &= ~PG_log_dirty; +- paging_unlock(d); +- +- /* set l1e entries of P2M table with normal mode */ +- p2m_change_type_range(d, dirty_vram->begin_pfn, dirty_vram->end_pfn, +- p2m_ram_logdirty, p2m_ram_rw); +- +- flush_tlb_mask(d->domain_dirty_cpumask); +- return 0; +-} +- +-static void hap_clean_vram_tracking(struct domain *d) +-{ +- struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram; +- +- if ( !dirty_vram ) +- return; +- +- /* set l1e entries of P2M table to be read-only. */ +- p2m_change_type_range(d, dirty_vram->begin_pfn, dirty_vram->end_pfn, +- p2m_ram_rw, p2m_ram_logdirty); +- +- flush_tlb_mask(d->domain_dirty_cpumask); +-} +- +-static void hap_vram_tracking_init(struct domain *d) +-{ +- paging_log_dirty_init(d, hap_enable_vram_tracking, +- hap_disable_vram_tracking, +- hap_clean_vram_tracking); +-} ++/* ++ * hap_track_dirty_vram() ++ * Create the domain's dv_dirty_vram struct on demand. ++ * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is ++ * first encountered. ++ * Collect the guest_dirty bitmask, a bit mask of the dirty vram pages, by ++ * calling paging_log_dirty_range(), which interrogates each vram ++ * page's p2m type looking for pages that have been made writable. ++ */ + + int hap_track_dirty_vram(struct domain *d, + unsigned long begin_pfn, + unsigned long nr, +- XEN_GUEST_HANDLE_64(uint8) dirty_bitmap) ++ XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap) + { + long rc = 0; +- struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram; ++ struct sh_dirty_vram *dirty_vram; ++ uint8_t *dirty_bitmap = NULL; + + if ( nr ) + { +- if ( paging_mode_log_dirty(d) && dirty_vram ) ++ int size = (nr + BITS_PER_BYTE - 1) / BITS_PER_BYTE; ++ ++ if ( !paging_mode_log_dirty(d) ) + { +- if ( begin_pfn != dirty_vram->begin_pfn || +- begin_pfn + nr != dirty_vram->end_pfn ) +- { +- paging_log_dirty_disable(d); +- dirty_vram->begin_pfn = begin_pfn; +- dirty_vram->end_pfn = begin_pfn + nr; +- rc = paging_log_dirty_enable(d); +- if (rc != 0) +- goto param_fail; +- } ++ hap_logdirty_init(d); ++ rc = paging_log_dirty_enable(d); ++ if ( rc ) ++ goto out; + } +- else if ( !paging_mode_log_dirty(d) && !dirty_vram ) ++ ++ rc = -ENOMEM; ++ dirty_bitmap = xzalloc_bytes(size); ++ if ( !dirty_bitmap ) ++ goto out; ++ ++ paging_lock(d); ++ ++ dirty_vram = d->arch.hvm_domain.dirty_vram; ++ if ( !dirty_vram ) + { + rc = -ENOMEM; +- if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL ) +- goto param_fail; ++ if ( (dirty_vram = xzalloc(struct sh_dirty_vram)) == NULL ) ++ { ++ paging_unlock(d); ++ goto out; ++ } + ++ d->arch.hvm_domain.dirty_vram = dirty_vram; ++ } ++ ++ if ( begin_pfn != dirty_vram->begin_pfn || ++ begin_pfn + nr != dirty_vram->end_pfn ) ++ { + dirty_vram->begin_pfn = begin_pfn; + dirty_vram->end_pfn = begin_pfn + nr; +- d->arch.hvm_domain.dirty_vram = dirty_vram; +- hap_vram_tracking_init(d); +- rc = paging_log_dirty_enable(d); +- if (rc != 0) +- goto param_fail; ++ ++ paging_unlock(d); ++ ++ /* set l1e entries of range within P2M table to be read-only. */ ++ p2m_change_type_range(d, begin_pfn, begin_pfn + nr, ++ p2m_ram_rw, p2m_ram_logdirty); ++ ++ flush_tlb_mask(d->domain_dirty_cpumask); ++ ++ memset(dirty_bitmap, 0xff, size); /* consider all pages dirty */ + } + else + { +- if ( !paging_mode_log_dirty(d) && dirty_vram ) +- rc = -EINVAL; +- else +- rc = -ENODATA; +- goto param_fail; ++ paging_unlock(d); ++ ++ domain_pause(d); ++ ++ /* get the bitmap */ ++ paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap); ++ ++ domain_unpause(d); + } +- /* get the bitmap */ +- rc = paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap); ++ ++ rc = -EFAULT; ++ if ( copy_to_guest(guest_dirty_bitmap, dirty_bitmap, size) == 0 ) ++ rc = 0; + } + else + { +- if ( paging_mode_log_dirty(d) && dirty_vram ) { +- rc = paging_log_dirty_disable(d); +- xfree(dirty_vram); +- dirty_vram = d->arch.hvm_domain.dirty_vram = NULL; +- } else +- rc = 0; +- } ++ paging_lock(d); + +- return rc; ++ dirty_vram = d->arch.hvm_domain.dirty_vram; ++ if ( dirty_vram ) ++ { ++ /* ++ * If zero pages specified while tracking dirty vram ++ * then stop tracking ++ */ ++ xfree(dirty_vram); ++ d->arch.hvm_domain.dirty_vram = NULL; ++ } + +-param_fail: +- if ( dirty_vram ) +- { +- xfree(dirty_vram); +- dirty_vram = d->arch.hvm_domain.dirty_vram = NULL; ++ paging_unlock(d); + } ++out: ++ if ( dirty_bitmap ) ++ xfree(dirty_bitmap); ++ + return rc; + } + +@@ -223,13 +201,6 @@ static void hap_clean_dirty_bitmap(struc + + void hap_logdirty_init(struct domain *d) + { +- struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram; +- if ( paging_mode_log_dirty(d) && dirty_vram ) +- { +- paging_log_dirty_disable(d); +- xfree(dirty_vram); +- dirty_vram = d->arch.hvm_domain.dirty_vram = NULL; +- } + + /* Reinitialize logdirty mechanism */ + paging_log_dirty_init(d, hap_enable_log_dirty, +--- a/xen/arch/x86/mm/paging.c ++++ b/xen/arch/x86/mm/paging.c +@@ -447,157 +447,38 @@ int paging_log_dirty_op(struct domain *d + return rv; + } + +-int paging_log_dirty_range(struct domain *d, +- unsigned long begin_pfn, +- unsigned long nr, +- XEN_GUEST_HANDLE_64(uint8) dirty_bitmap) +-{ +- int rv = 0; +- unsigned long pages = 0; +- mfn_t *l4, *l3, *l2; +- unsigned long *l1; +- int b1, b2, b3, b4; +- int i2, i3, i4; +- +- d->arch.paging.log_dirty.clean_dirty_bitmap(d); +- paging_lock(d); +- +- PAGING_DEBUG(LOGDIRTY, "log-dirty-range: dom %u faults=%u dirty=%u\n", +- d->domain_id, +- d->arch.paging.log_dirty.fault_count, +- d->arch.paging.log_dirty.dirty_count); +- +- if ( unlikely(d->arch.paging.log_dirty.failed_allocs) ) { +- printk("%s: %d failed page allocs while logging dirty pages\n", +- __FUNCTION__, d->arch.paging.log_dirty.failed_allocs); +- rv = -ENOMEM; +- goto out; +- } ++void paging_log_dirty_range(struct domain *d, ++ unsigned long begin_pfn, ++ unsigned long nr, ++ uint8_t *dirty_bitmap) ++{ ++ struct p2m_domain *p2m = p2m_get_hostp2m(d); ++ int i; ++ unsigned long pfn; ++ ++ /* ++ * Set l1e entries of P2M table to be read-only. ++ * ++ * On first write, it page faults, its entry is changed to read-write, ++ * and on retry the write succeeds. ++ * ++ * We populate dirty_bitmap by looking for entries that have been ++ * switched to read-write. ++ */ + +- if ( !d->arch.paging.log_dirty.fault_count && +- !d->arch.paging.log_dirty.dirty_count ) { +- unsigned int size = BITS_TO_LONGS(nr); +- +- if ( clear_guest(dirty_bitmap, size * BYTES_PER_LONG) != 0 ) +- rv = -EFAULT; +- goto out; +- } +- d->arch.paging.log_dirty.fault_count = 0; +- d->arch.paging.log_dirty.dirty_count = 0; ++ p2m_lock(p2m); + +- b1 = L1_LOGDIRTY_IDX(begin_pfn); +- b2 = L2_LOGDIRTY_IDX(begin_pfn); +- b3 = L3_LOGDIRTY_IDX(begin_pfn); +- b4 = L4_LOGDIRTY_IDX(begin_pfn); +- l4 = paging_map_log_dirty_bitmap(d); +- +- for ( i4 = b4; +- (pages < nr) && (i4 < LOGDIRTY_NODE_ENTRIES); +- i4++ ) ++ for ( i = 0, pfn = begin_pfn; pfn < begin_pfn + nr; i++, pfn++ ) + { +- l3 = (l4 && mfn_valid(l4[i4])) ? map_domain_page(mfn_x(l4[i4])) : NULL; +- for ( i3 = b3; +- (pages < nr) && (i3 < LOGDIRTY_NODE_ENTRIES); +- i3++ ) +- { +- l2 = ((l3 && mfn_valid(l3[i3])) ? +- map_domain_page(mfn_x(l3[i3])) : NULL); +- for ( i2 = b2; +- (pages < nr) && (i2 < LOGDIRTY_NODE_ENTRIES); +- i2++ ) +- { +- unsigned int bytes = PAGE_SIZE; +- uint8_t *s; +- l1 = ((l2 && mfn_valid(l2[i2])) ? +- map_domain_page(mfn_x(l2[i2])) : NULL); +- +- s = ((uint8_t*)l1) + (b1 >> 3); +- bytes -= b1 >> 3; +- +- if ( likely(((nr - pages + 7) >> 3) < bytes) ) +- bytes = (unsigned int)((nr - pages + 7) >> 3); +- +- if ( !l1 ) +- { +- if ( clear_guest_offset(dirty_bitmap, pages >> 3, +- bytes) != 0 ) +- { +- rv = -EFAULT; +- goto out; +- } +- } +- /* begin_pfn is not 32K aligned, hence we have to bit +- * shift the bitmap */ +- else if ( b1 & 0x7 ) +- { +- int i, j; +- uint32_t *l = (uint32_t*) s; +- int bits = b1 & 0x7; +- int bitmask = (1 << bits) - 1; +- int size = (bytes + BYTES_PER_LONG - 1) / BYTES_PER_LONG; +- unsigned long bitmap[size]; +- static unsigned long printed = 0; +- +- if ( printed != begin_pfn ) +- { +- dprintk(XENLOG_DEBUG, "%s: begin_pfn %lx is not 32K aligned!\n", +- __FUNCTION__, begin_pfn); +- printed = begin_pfn; +- } +- +- for ( i = 0; i < size - 1; i++, l++ ) { +- bitmap[i] = ((*l) >> bits) | +- (((*((uint8_t*)(l + 1))) & bitmask) << (sizeof(*l) * 8 - bits)); +- } +- s = (uint8_t*) l; +- size = BYTES_PER_LONG - ((b1 >> 3) & 0x3); +- bitmap[i] = 0; +- for ( j = 0; j < size; j++, s++ ) +- bitmap[i] |= (*s) << (j * 8); +- bitmap[i] = (bitmap[i] >> bits) | (bitmask << (size * 8 - bits)); +- if ( copy_to_guest_offset(dirty_bitmap, (pages >> 3), +- (uint8_t*) bitmap, bytes) != 0 ) +- { +- rv = -EFAULT; +- goto out; +- } +- } +- else +- { +- if ( copy_to_guest_offset(dirty_bitmap, pages >> 3, +- s, bytes) != 0 ) +- { +- rv = -EFAULT; +- goto out; +- } +- } +- +- pages += bytes << 3; +- if ( l1 ) +- { +- clear_page(l1); +- unmap_domain_page(l1); +- } +- b1 = b1 & 0x7; +- } +- b2 = 0; +- if ( l2 ) +- unmap_domain_page(l2); +- } +- b3 = 0; +- if ( l3 ) +- unmap_domain_page(l3); ++ p2m_type_t pt; ++ pt = p2m_change_type(d, pfn, p2m_ram_rw, p2m_ram_logdirty); ++ if ( pt == p2m_ram_rw ) ++ dirty_bitmap[i >> 3] |= (1 << (i & 7)); + } +- if ( l4 ) +- unmap_domain_page(l4); +- +- paging_unlock(d); + +- return rv; ++ p2m_unlock(p2m); + +- out: +- paging_unlock(d); +- return rv; ++ flush_tlb_mask(d->domain_dirty_cpumask); + } + + /* Note that this function takes three function pointers. Callers must supply +--- a/xen/include/asm-x86/config.h ++++ b/xen/include/asm-x86/config.h +@@ -17,6 +17,7 @@ + + #define BYTES_PER_LONG (1 << LONG_BYTEORDER) + #define BITS_PER_LONG (BYTES_PER_LONG << 3) ++#define BITS_PER_BYTE 8 + + #define CONFIG_X86 1 + #define CONFIG_X86_HT 1 +--- a/xen/include/asm-x86/paging.h ++++ b/xen/include/asm-x86/paging.h +@@ -145,10 +145,10 @@ struct paging_mode { + void paging_free_log_dirty_bitmap(struct domain *d); + + /* get the dirty bitmap for a specific range of pfns */ +-int paging_log_dirty_range(struct domain *d, +- unsigned long begin_pfn, +- unsigned long nr, +- XEN_GUEST_HANDLE_64(uint8) dirty_bitmap); ++void paging_log_dirty_range(struct domain *d, ++ unsigned long begin_pfn, ++ unsigned long nr, ++ uint8_t *dirty_bitmap); + + /* enable log dirty */ + int paging_log_dirty_enable(struct domain *d); diff --git a/main/xen/xsa97-hap-4_2.patch b/main/xen/xsa97-hap-4_2.patch new file mode 100644 index 0000000000..5f89b58b7b --- /dev/null +++ b/main/xen/xsa97-hap-4_2.patch @@ -0,0 +1,485 @@ +x86/paging: make log-dirty operations preemptible + +Both the freeing and the inspection of the bitmap get done in (nested) +loops which - besides having a rather high iteration count in general, +albeit that would be covered by XSA-77 - have the number of non-trivial +iterations they need to perform (indirectly) controllable by both the +guest they are for and any domain controlling the guest (including the +one running qemu for it). + +This is XSA-97. + +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: Tim Deegan <tim@xen.org> + +--- a/xen/arch/x86/domain.c ++++ b/xen/arch/x86/domain.c +@@ -2136,7 +2136,9 @@ int domain_relinquish_resources(struct d + pci_release_devices(d); + + /* Tear down paging-assistance stuff. */ +- paging_teardown(d); ++ ret = paging_teardown(d); ++ if ( ret ) ++ return ret; + + /* Drop the in-use references to page-table bases. */ + for_each_vcpu ( d, v ) +--- a/xen/arch/x86/domctl.c ++++ b/xen/arch/x86/domctl.c +@@ -66,6 +66,9 @@ long arch_do_domctl( + &domctl->u.shadow_op, + guest_handle_cast(u_domctl, void)); + rcu_unlock_domain(d); ++ if ( ret == -EAGAIN ) ++ return hypercall_create_continuation(__HYPERVISOR_domctl, ++ "h", u_domctl); + copy_to_guest(u_domctl, domctl, 1); + } + } +--- a/xen/arch/x86/mm/hap/hap.c ++++ b/xen/arch/x86/mm/hap/hap.c +@@ -678,8 +678,7 @@ int hap_domctl(struct domain *d, xen_dom + paging_unlock(d); + if ( preempted ) + /* Not finished. Set up to re-run the call. */ +- rc = hypercall_create_continuation(__HYPERVISOR_domctl, "h", +- u_domctl); ++ rc = -EAGAIN; + else + /* Finished. Return the new allocation */ + sc->mb = hap_get_allocation(d); +--- a/xen/arch/x86/mm/paging.c ++++ b/xen/arch/x86/mm/paging.c +@@ -26,6 +26,7 @@ + #include <asm/shadow.h> + #include <asm/p2m.h> + #include <asm/hap.h> ++#include <asm/event.h> + #include <asm/hvm/nestedhvm.h> + #include <xen/numa.h> + #include <xsm/xsm.h> +@@ -116,26 +117,46 @@ static void paging_free_log_dirty_page(s + d->arch.paging.free_page(d, mfn_to_page(mfn)); + } + +-void paging_free_log_dirty_bitmap(struct domain *d) ++static int paging_free_log_dirty_bitmap(struct domain *d, int rc) + { + mfn_t *l4, *l3, *l2; + int i4, i3, i2; + ++ paging_lock(d); ++ + if ( !mfn_valid(d->arch.paging.log_dirty.top) ) +- return; ++ { ++ paging_unlock(d); ++ return 0; ++ } + +- paging_lock(d); ++ if ( !d->arch.paging.preempt.vcpu ) ++ { ++ memset(&d->arch.paging.preempt.log_dirty, 0, ++ sizeof(d->arch.paging.preempt.log_dirty)); ++ ASSERT(rc <= 0); ++ d->arch.paging.preempt.log_dirty.done = -rc; ++ } ++ else if ( d->arch.paging.preempt.vcpu != current || ++ d->arch.paging.preempt.op != XEN_DOMCTL_SHADOW_OP_OFF ) ++ { ++ paging_unlock(d); ++ return -EBUSY; ++ } + + l4 = map_domain_page(mfn_x(d->arch.paging.log_dirty.top)); ++ i4 = d->arch.paging.preempt.log_dirty.i4; ++ i3 = d->arch.paging.preempt.log_dirty.i3; ++ rc = 0; + +- for ( i4 = 0; i4 < LOGDIRTY_NODE_ENTRIES; i4++ ) ++ for ( ; i4 < LOGDIRTY_NODE_ENTRIES; i4++, i3 = 0 ) + { + if ( !mfn_valid(l4[i4]) ) + continue; + + l3 = map_domain_page(mfn_x(l4[i4])); + +- for ( i3 = 0; i3 < LOGDIRTY_NODE_ENTRIES; i3++ ) ++ for ( ; i3 < LOGDIRTY_NODE_ENTRIES; i3++ ) + { + if ( !mfn_valid(l3[i3]) ) + continue; +@@ -148,20 +169,54 @@ void paging_free_log_dirty_bitmap(struct + + unmap_domain_page(l2); + paging_free_log_dirty_page(d, l3[i3]); ++ l3[i3] = _mfn(INVALID_MFN); ++ ++ if ( i3 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() ) ++ { ++ d->arch.paging.preempt.log_dirty.i3 = i3 + 1; ++ d->arch.paging.preempt.log_dirty.i4 = i4; ++ rc = -EAGAIN; ++ break; ++ } + } + + unmap_domain_page(l3); ++ if ( rc ) ++ break; + paging_free_log_dirty_page(d, l4[i4]); ++ l4[i4] = _mfn(INVALID_MFN); ++ ++ if ( i4 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() ) ++ { ++ d->arch.paging.preempt.log_dirty.i3 = 0; ++ d->arch.paging.preempt.log_dirty.i4 = i4 + 1; ++ rc = -EAGAIN; ++ break; ++ } + } + + unmap_domain_page(l4); +- paging_free_log_dirty_page(d, d->arch.paging.log_dirty.top); +- d->arch.paging.log_dirty.top = _mfn(INVALID_MFN); + +- ASSERT(d->arch.paging.log_dirty.allocs == 0); +- d->arch.paging.log_dirty.failed_allocs = 0; ++ if ( !rc ) ++ { ++ paging_free_log_dirty_page(d, d->arch.paging.log_dirty.top); ++ d->arch.paging.log_dirty.top = _mfn(INVALID_MFN); ++ ++ ASSERT(d->arch.paging.log_dirty.allocs == 0); ++ d->arch.paging.log_dirty.failed_allocs = 0; ++ ++ rc = -d->arch.paging.preempt.log_dirty.done; ++ d->arch.paging.preempt.vcpu = NULL; ++ } ++ else ++ { ++ d->arch.paging.preempt.vcpu = current; ++ d->arch.paging.preempt.op = XEN_DOMCTL_SHADOW_OP_OFF; ++ } + + paging_unlock(d); ++ ++ return rc; + } + + int paging_log_dirty_enable(struct domain *d) +@@ -178,15 +233,25 @@ int paging_log_dirty_enable(struct domai + return ret; + } + +-int paging_log_dirty_disable(struct domain *d) ++static int paging_log_dirty_disable(struct domain *d, bool_t resuming) + { +- int ret; ++ int ret = 1; ++ ++ if ( !resuming ) ++ { ++ domain_pause(d); ++ /* Safe because the domain is paused. */ ++ ret = d->arch.paging.log_dirty.disable_log_dirty(d); ++ ASSERT(ret <= 0); ++ } + +- domain_pause(d); +- /* Safe because the domain is paused. */ +- ret = d->arch.paging.log_dirty.disable_log_dirty(d); + if ( !paging_mode_log_dirty(d) ) +- paging_free_log_dirty_bitmap(d); ++ { ++ ret = paging_free_log_dirty_bitmap(d, ret); ++ if ( ret == -EAGAIN ) ++ return ret; ++ } ++ + domain_unpause(d); + + return ret; +@@ -326,7 +391,9 @@ int paging_mfn_is_dirty(struct domain *d + + /* Read a domain's log-dirty bitmap and stats. If the operation is a CLEAN, + * clear the bitmap and stats as well. */ +-int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc) ++static int paging_log_dirty_op(struct domain *d, ++ struct xen_domctl_shadow_op *sc, ++ bool_t resuming) + { + int rv = 0, clean = 0, peek = 1; + unsigned long pages = 0; +@@ -334,9 +401,22 @@ int paging_log_dirty_op(struct domain *d + unsigned long *l1 = NULL; + int i4, i3, i2; + +- domain_pause(d); ++ if ( !resuming ) ++ domain_pause(d); + paging_lock(d); + ++ if ( !d->arch.paging.preempt.vcpu ) ++ memset(&d->arch.paging.preempt.log_dirty, 0, ++ sizeof(d->arch.paging.preempt.log_dirty)); ++ else if ( d->arch.paging.preempt.vcpu != current || ++ d->arch.paging.preempt.op != sc->op ) ++ { ++ paging_unlock(d); ++ ASSERT(!resuming); ++ domain_unpause(d); ++ return -EBUSY; ++ } ++ + clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN); + + PAGING_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n", +@@ -365,17 +445,15 @@ int paging_log_dirty_op(struct domain *d + goto out; + } + +- pages = 0; + l4 = paging_map_log_dirty_bitmap(d); ++ i4 = d->arch.paging.preempt.log_dirty.i4; ++ i3 = d->arch.paging.preempt.log_dirty.i3; ++ pages = d->arch.paging.preempt.log_dirty.done; + +- for ( i4 = 0; +- (pages < sc->pages) && (i4 < LOGDIRTY_NODE_ENTRIES); +- i4++ ) ++ for ( ; (pages < sc->pages) && (i4 < LOGDIRTY_NODE_ENTRIES); i4++, i3 = 0 ) + { + l3 = (l4 && mfn_valid(l4[i4])) ? map_domain_page(mfn_x(l4[i4])) : NULL; +- for ( i3 = 0; +- (pages < sc->pages) && (i3 < LOGDIRTY_NODE_ENTRIES); +- i3++ ) ++ for ( ; (pages < sc->pages) && (i3 < LOGDIRTY_NODE_ENTRIES); i3++ ) + { + l2 = ((l3 && mfn_valid(l3[i3])) ? + map_domain_page(mfn_x(l3[i3])) : NULL); +@@ -410,18 +488,51 @@ int paging_log_dirty_op(struct domain *d + } + if ( l2 ) + unmap_domain_page(l2); ++ ++ if ( i3 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() ) ++ { ++ d->arch.paging.preempt.log_dirty.i4 = i4; ++ d->arch.paging.preempt.log_dirty.i3 = i3 + 1; ++ rv = -EAGAIN; ++ break; ++ } + } + if ( l3 ) + unmap_domain_page(l3); ++ ++ if ( !rv && i4 < LOGDIRTY_NODE_ENTRIES - 1 && ++ hypercall_preempt_check() ) ++ { ++ d->arch.paging.preempt.log_dirty.i4 = i4 + 1; ++ d->arch.paging.preempt.log_dirty.i3 = 0; ++ rv = -EAGAIN; ++ } ++ if ( rv ) ++ break; + } + if ( l4 ) + unmap_domain_page(l4); + +- if ( pages < sc->pages ) +- sc->pages = pages; ++ if ( !rv ) ++ d->arch.paging.preempt.vcpu = NULL; ++ else ++ { ++ d->arch.paging.preempt.vcpu = current; ++ d->arch.paging.preempt.op = sc->op; ++ d->arch.paging.preempt.log_dirty.done = pages; ++ } + + paging_unlock(d); + ++ if ( rv ) ++ { ++ /* Never leave the domain paused for other errors. */ ++ ASSERT(rv == -EAGAIN); ++ return rv; ++ } ++ ++ if ( pages < sc->pages ) ++ sc->pages = pages; + if ( clean ) + { + /* We need to further call clean_dirty_bitmap() functions of specific +@@ -432,6 +543,7 @@ int paging_log_dirty_op(struct domain *d + return rv; + + out: ++ d->arch.paging.preempt.vcpu = NULL; + paging_unlock(d); + domain_unpause(d); + +@@ -498,12 +610,6 @@ void paging_log_dirty_init(struct domain + d->arch.paging.log_dirty.clean_dirty_bitmap = clean_dirty_bitmap; + } + +-/* This function fress log dirty bitmap resources. */ +-static void paging_log_dirty_teardown(struct domain*d) +-{ +- paging_free_log_dirty_bitmap(d); +-} +- + /************************************************/ + /* CODE FOR PAGING SUPPORT */ + /************************************************/ +@@ -547,6 +653,7 @@ void paging_vcpu_init(struct vcpu *v) + int paging_domctl(struct domain *d, xen_domctl_shadow_op_t *sc, + XEN_GUEST_HANDLE(void) u_domctl) + { ++ bool_t resuming = 0; + int rc; + + if ( unlikely(d == current->domain) ) +@@ -569,6 +676,20 @@ int paging_domctl(struct domain *d, xen_ + return -EINVAL; + } + ++ if ( d->arch.paging.preempt.vcpu ) ++ { ++ if ( d->arch.paging.preempt.vcpu != current || ++ d->arch.paging.preempt.op != sc->op ) ++ { ++ printk(XENLOG_G_DEBUG ++ "d%d:v%d: Paging op %#x on Dom%u with unfinished prior op %#x\n", ++ current->domain->domain_id, current->vcpu_id, ++ sc->op, d->domain_id, d->arch.paging.preempt.op); ++ return -EBUSY; ++ } ++ resuming = 1; ++ } ++ + rc = xsm_shadow_control(d, sc->op); + if ( rc ) + return rc; +@@ -594,13 +714,13 @@ int paging_domctl(struct domain *d, xen_ + + case XEN_DOMCTL_SHADOW_OP_OFF: + if ( paging_mode_log_dirty(d) ) +- if ( (rc = paging_log_dirty_disable(d)) != 0 ) ++ if ( (rc = paging_log_dirty_disable(d, resuming)) != 0 ) + return rc; + break; + + case XEN_DOMCTL_SHADOW_OP_CLEAN: + case XEN_DOMCTL_SHADOW_OP_PEEK: +- return paging_log_dirty_op(d, sc); ++ return paging_log_dirty_op(d, sc, resuming); + } + + /* Here, dispatch domctl to the appropriate paging code */ +@@ -611,18 +731,24 @@ int paging_domctl(struct domain *d, xen_ + } + + /* Call when destroying a domain */ +-void paging_teardown(struct domain *d) ++int paging_teardown(struct domain *d) + { ++ int rc; ++ + if ( hap_enabled(d) ) + hap_teardown(d); + else + shadow_teardown(d); + + /* clean up log dirty resources. */ +- paging_log_dirty_teardown(d); ++ rc = paging_free_log_dirty_bitmap(d, 0); ++ if ( rc == -EAGAIN ) ++ return rc; + + /* Move populate-on-demand cache back to domain_list for destruction */ + p2m_pod_empty_cache(d); ++ ++ return rc; + } + + /* Call once all of the references to the domain have gone away */ +--- a/xen/arch/x86/mm/shadow/common.c ++++ b/xen/arch/x86/mm/shadow/common.c +@@ -3829,8 +3829,7 @@ int shadow_domctl(struct domain *d, + paging_unlock(d); + if ( preempted ) + /* Not finished. Set up to re-run the call. */ +- rc = hypercall_create_continuation( +- __HYPERVISOR_domctl, "h", u_domctl); ++ rc = -EAGAIN; + else + /* Finished. Return the new allocation */ + sc->mb = shadow_get_allocation(d); +--- a/xen/common/domain.c ++++ b/xen/common/domain.c +@@ -479,7 +479,6 @@ int domain_kill(struct domain *d) + rc = domain_relinquish_resources(d); + if ( rc != 0 ) + { +- BUG_ON(rc != -EAGAIN); + break; + } + if ( sched_move_domain(d, cpupool0) ) +--- a/xen/include/asm-x86/domain.h ++++ b/xen/include/asm-x86/domain.h +@@ -193,6 +193,20 @@ struct paging_domain { + struct hap_domain hap; + /* log dirty support */ + struct log_dirty_domain log_dirty; ++ ++ /* preemption handling */ ++ struct { ++ struct vcpu *vcpu; ++ unsigned int op; ++ union { ++ struct { ++ unsigned long done:PADDR_BITS - PAGE_SHIFT; ++ unsigned long i4:PAGETABLE_ORDER; ++ unsigned long i3:PAGETABLE_ORDER; ++ } log_dirty; ++ }; ++ } preempt; ++ + /* alloc/free pages from the pool for paging-assistance structures + * (used by p2m and log-dirty code for their tries) */ + struct page_info * (*alloc_page)(struct domain *d); +--- a/xen/include/asm-x86/paging.h ++++ b/xen/include/asm-x86/paging.h +@@ -141,9 +141,6 @@ struct paging_mode { + /***************************************************************************** + * Log dirty code */ + +-/* free log dirty bitmap resource */ +-void paging_free_log_dirty_bitmap(struct domain *d); +- + /* get the dirty bitmap for a specific range of pfns */ + void paging_log_dirty_range(struct domain *d, + unsigned long begin_pfn, +@@ -153,9 +150,6 @@ void paging_log_dirty_range(struct domai + /* enable log dirty */ + int paging_log_dirty_enable(struct domain *d); + +-/* disable log dirty */ +-int paging_log_dirty_disable(struct domain *d); +- + /* log dirty initialization */ + void paging_log_dirty_init(struct domain *d, + int (*enable_log_dirty)(struct domain *d), +@@ -218,7 +212,7 @@ int paging_domctl(struct domain *d, xen_ + XEN_GUEST_HANDLE(void) u_domctl); + + /* Call when destroying a domain */ +-void paging_teardown(struct domain *d); ++int paging_teardown(struct domain *d); + + /* Call once all of the references to the domain have gone away */ + void paging_final_teardown(struct domain *d); |