--- a/src/arch/runtime/context_switching.cc +++ b/src/arch/runtime/context_switching.cc @@ -98,9 +98,11 @@ the stack by swapcontext; they're callee-saved, so whatever happens to be in them will be ignored. */ sp -= 6; -#elif defined(__arm__) || defined(__aarch64__) +#elif defined(__arm__) /* We must preserve r4, r5, r6, r7, r8, r9, r10, and r11. Because we have to store the LR (r14) in swapcontext as well, we also store r12 in swapcontext to keep the stack double-word-aligned. However, we already accounted for both of those by decrementing sp twice above (once for r14 and once for r12, say). */ sp -= 8; +#elif defined(__aarch64__) + sp -=94; #else #error "Unsupported architecture." #endif @@ -303,16 +305,38 @@ "push {r14}\n" "push {r4-r11}\n" #elif defined(__aarch64__) - "str x12, [sp, #-8]!\n" - "str x14, [sp, #-8]!\n" - "str x4, [sp, #-8]!\n" - "str x5, [sp, #-8]!\n" - "str x6, [sp, #-8]!\n" - "str x7, [sp, #-8]!\n" - "str x8, [sp, #-8]!\n" - "str x9, [sp, #-8]!\n" - "str x10, [sp, #-8]!\n" - "str x11, [sp, #-8]!\n" + "STP X30, X16, [SP, #-0x10]!\n" + "STP X29, X17, [SP, #-0x10]!\n" + "STP X0, X1, [SP, #-0x10]!\n" + "STP X2, X3, [SP, #-0x10]!\n" + "STP X4, X5, [SP, #-0x10]!\n" + "STP X6, X7, [SP, #-0x10]!\n" + "STP X9, X10, [SP, #-0x10]!\n" + "STP X11, X12, [SP, #-0x10]!\n" + "STP X13, X14, [SP, #-0x10]!\n" + "STP X15, X19, [SP, #-0x10]!\n" + "STP X20, X21, [SP, #-0x10]!\n" + "STP X22, X23, [SP, #-0x10]!\n" + "STP X24, X25, [SP, #-0x10]!\n" + "STP X26, X27, [SP, #-0x10]!\n" + "STP X28, X29, [SP, #-0x10]!\n" + "STP XZR, X18, [SP, #-0x10]!\n" + "STP Q0, Q1, [SP,#-0x20]!\n" + "STP Q2, Q3, [SP,#-0x20]!\n" + "STP Q4, Q5, [SP,#-0x20]!\n" + "STP Q6, Q7, [SP,#-0x20]!\n" + "STP Q8, Q9, [SP,#-0x20]!\n" + "STP Q10, Q11, [SP,#-0x20]!\n" + "STP Q12, Q13, [SP,#-0x20]!\n" + "STP Q14, Q15, [SP,#-0x20]!\n" + "STP Q16, Q17, [SP,#-0x20]!\n" + "STP Q18, Q19, [SP,#-0x20]!\n" + "STP Q20, Q21, [SP,#-0x20]!\n" + "STP Q22, Q23, [SP,#-0x20]!\n" + "STP Q24, Q25, [SP,#-0x20]!\n" + "STP Q26, Q27, [SP,#-0x20]!\n" + "STP Q28, Q29, [SP,#-0x20]!\n" + "STP Q30, Q31, [SP,#-0x20]!\n" #endif /* Save old stack pointer. */ @@ -329,7 +353,8 @@ "str r13, [r0]\n" #elif defined(__aarch64__) /* On aarch64, the first argument is in `x0`. `sp` is the stack pointer. */ - "str x0, [sp, #-8]\n" + "mov X2, SP\n" + "str X2, [x0]\n" #endif /* Load the new stack pointer and the preserved registers. */ @@ -346,7 +371,7 @@ "mov r13, r1\n" #elif defined(__aarch64__) /* On aarch64, the second argument is in `x1` */ - "mov x13, x1\n" + "mov SP, x1\n" #endif #if defined(__i386__) @@ -366,16 +391,38 @@ "pop {r14}\n" "pop {r12}\n" #elif defined(__aarch64__) - "ldr x4, [sp], #8\n" - "ldr x5, [sp], #8\n" - "ldr x6, [sp], #8\n" - "ldr x7, [sp], #8\n" - "ldr x8, [sp], #8\n" - "ldr x9, [sp], #8\n" - "ldr x10, [sp], #8\n" - "ldr x11, [sp], #8\n" - "ldr x14, [sp], #8\n" - "ldr x12, [sp], #8\n" + "LDP Q30, Q31, [SP], #0x20\n" + "LDP Q28, Q29, [SP], #0x20\n" + "LDP Q26, Q27, [SP], #0x20\n" + "LDP Q24, Q25, [SP], #0x20\n" + "LDP Q22, Q23, [SP], #0x20\n" + "LDP Q20, Q21, [SP], #0x20\n" + "LDP Q18, Q19, [SP], #0x20\n" + "LDP Q16, Q17, [SP], #0x20\n" + "LDP Q14, Q15, [SP], #0x20\n" + "LDP Q12, Q13, [SP], #0x20\n" + "LDP Q10, Q11, [SP], #0x20\n" + "LDP Q8, Q9, [SP], #0x20\n" + "LDP Q6, Q7, [SP], #0x20\n" + "LDP Q4, Q5, [SP], #0x20\n" + "LDP Q2, Q3, [SP], #0x20\n" + "LDP Q0, Q1, [SP], #0x20\n" + "LDP XZR, X18, [SP], #0x10\n" + "LDP X28, X29, [SP], #0x10\n" + "LDP X26, X27, [SP], #0x10\n" + "LDP X24, X25, [SP], #0x10\n" + "LDP X22, X23, [SP], #0x10\n" + "LDP X20, X21, [SP], #0x10\n" + "LDP X15, X19, [SP], #0x10\n" + "LDP X13, X14, [SP], #0x10\n" + "LDP X11, X12, [SP], #0x10\n" + "LDP X9, X10, [SP], #0x10\n" + "LDP X6, X7, [SP], #0x10\n" + "LDP X4, X5, [SP], #0x10\n" + "LDP X2, X3, [SP], #0x10\n" + "LDP X0, X1, [SP], #0x10\n" + "LDP X29, X17, [SP], #0x10\n" + "LDP X30, X16, [SP], #0x10\n" #endif #if defined(__i386__) || defined(__x86_64__)