diff options
author | Mike Sullivan <mksully22@gmail.com> | 2019-11-20 00:25:43 +0000 |
---|---|---|
committer | Natanael Copa <ncopa@alpinelinux.org> | 2019-11-21 11:31:01 +0000 |
commit | 8e73e28545b2466e7dae9bf4574224398e140592 (patch) | |
tree | eddea446c57a5e8768e7d928a3b2d55cd976639f /main/luajit | |
parent | c63718215c0617266f0e70f24fe6a12aeea09af2 (diff) | |
download | aports-8e73e28545b2466e7dae9bf4574224398e140592.tar.bz2 aports-8e73e28545b2466e7dae9bf4574224398e140592.tar.xz |
main/luajit: update ppc64le support patch to latest
This should fix lua-penlight testsuite on ppc64le
Diffstat (limited to 'main/luajit')
-rw-r--r-- | main/luajit/0004-Add-ppc64-support-based-on-koriakin-GitHub-patchset.patch | 3522 | ||||
-rw-r--r-- | main/luajit/APKBUILD | 6 | ||||
-rw-r--r-- | main/luajit/enable-support-for-ppc64le.patch | 4822 |
3 files changed, 3525 insertions, 4825 deletions
diff --git a/main/luajit/0004-Add-ppc64-support-based-on-koriakin-GitHub-patchset.patch b/main/luajit/0004-Add-ppc64-support-based-on-koriakin-GitHub-patchset.patch new file mode 100644 index 0000000000..a879f3fc9d --- /dev/null +++ b/main/luajit/0004-Add-ppc64-support-based-on-koriakin-GitHub-patchset.patch @@ -0,0 +1,3522 @@ +From: "Rodrigo R. Galvao" <rosattig@br.ibm.com> +Date: Wed, 11 Oct 2017 08:41:47 +0000 +Subject: New patch proposal for PPC64 support + + Create a patch for PPC64 support based on +https://github.com/LuaJIT/LuaJIT/pull/140. + It replaces the old patch since this new one is more likely to be merged +with luajit upstream. + + +Author: Rodrigo R. Galvao <rosattig@br.ibm.com> +--- + dynasm/dasm_ppc.lua | 5 + + src/Makefile | 11 +- + src/host/buildvm_asm.c | 16 +- + src/lj_arch.h | 18 +- + src/lj_ccall.c | 166 ++++++- + src/lj_ccall.h | 13 + + src/lj_ccallback.c | 68 ++- + src/lj_ctype.h | 2 +- + src/lj_def.h | 4 + + src/lj_frame.h | 9 + + src/lj_target_ppc.h | 14 + + src/vm_ppc.dasc | 1290 ++++++++++++++++++++++++++++++++---------------- + 12 files changed, 1162 insertions(+), 454 deletions(-) + +diff --git a/dynasm/dasm_ppc.lua b/dynasm/dasm_ppc.lua +index f73974d..a4ad70b 100644 +--- a/dynasm/dasm_ppc.lua ++++ b/dynasm/dasm_ppc.lua +@@ -257,9 +257,11 @@ map_op = { + addic_3 = "30000000RRI", + ["addic._3"] = "34000000RRI", + addi_3 = "38000000RR0I", ++ addil_3 = "38000000RR0J", + li_2 = "38000000RI", + la_2 = "38000000RD", + addis_3 = "3c000000RR0I", ++ addisl_3 = "3c000000RR0J", + lis_2 = "3c000000RI", + lus_2 = "3c000000RU", + bc_3 = "40000000AAK", +@@ -842,6 +844,9 @@ map_op = { + srdi_3 = op_alias("rldicl_4", function(p) + p[4] = p[3]; p[3] = "64-("..p[3]..")" + end), ++ ["srdi._3"] = op_alias("rldicl._4", function(p) ++ p[4] = p[3]; p[3] = "64-("..p[3]..")" ++ end), + clrldi_3 = op_alias("rldicl_4", function(p) + p[4] = p[3]; p[3] = "0" + end), +diff --git a/src/Makefile b/src/Makefile +index 6b73a89..cc50bae 100644 +--- a/src/Makefile ++++ b/src/Makefile +@@ -453,7 +453,16 @@ ifeq (ppc,$(TARGET_LJARCH)) + DASM_AFLAGS+= -D GPR64 + endif + ifeq (PS3,$(TARGET_SYS)) +- DASM_AFLAGS+= -D PPE -D TOC ++ DASM_AFLAGS+= -D PPE ++ endif ++ ifneq (,$(findstring LJ_ARCH_PPC_OPD 1,$(TARGET_TESTARCH))) ++ DASM_AFLAGS+= -D OPD ++ endif ++ ifneq (,$(findstring LJ_ARCH_PPC_OPDENV 1,$(TARGET_TESTARCH))) ++ DASM_AFLAGS+= -D OPDENV ++ endif ++ ifneq (,$(findstring LJ_ARCH_PPC_ELFV2 1,$(TARGET_TESTARCH))) ++ DASM_AFLAGS+= -D ELFV2 + endif + ifneq (,$(findstring LJ_ARCH_PPC64 ,$(TARGET_TESTARCH))) + DASM_ARCH= ppc64 +diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c +index ffd1490..6bb995e 100644 +--- a/src/host/buildvm_asm.c ++++ b/src/host/buildvm_asm.c +@@ -140,18 +140,14 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n, + #else + #define TOCPREFIX "" + #endif +- if ((ins >> 26) == 16) { ++ if ((ins >> 26) == 14) { ++ fprintf(ctx->fp, "\taddi %d,%d,%s\n", (ins >> 21) & 31, (ins >> 16) & 31, sym); ++ } else if ((ins >> 26) == 15) { ++ fprintf(ctx->fp, "\taddis %d,%d,%s\n", (ins >> 21) & 31, (ins >> 16) & 31, sym); ++ } else if ((ins >> 26) == 16) { + fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n", + (ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins >> 16) & 31, sym); + } else if ((ins >> 26) == 18) { +-#if LJ_ARCH_PPC64 +- const char *suffix = strchr(sym, '@'); +- if (suffix && suffix[1] == 'h') { +- fprintf(ctx->fp, "\taddis 11, 2, %s\n", sym); +- } else if (suffix && suffix[1] == 'l') { +- fprintf(ctx->fp, "\tld 12, %s\n", sym); +- } else +-#endif + fprintf(ctx->fp, "\t%s " TOCPREFIX "%s\n", (ins & 1) ? "bl" : "b", sym); + } else { + fprintf(stderr, +@@ -250,7 +246,7 @@ void emit_asm(BuildCtx *ctx) + int i, rel; + + fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch); +-#if LJ_ARCH_PPC64 ++#if LJ_ARCH_PPC_ELFV2 + fprintf(ctx->fp, "\t.abiversion 2\n"); + #endif + fprintf(ctx->fp, "\t.text\n"); +diff --git a/src/lj_arch.h b/src/lj_arch.h +index d609b37..53bc651 100644 +--- a/src/lj_arch.h ++++ b/src/lj_arch.h +@@ -269,10 +269,18 @@ + #if LJ_TARGET_CONSOLE + #define LJ_ARCH_PPC32ON64 1 + #define LJ_ARCH_NOFFI 1 ++#if LJ_TARGET_PS3 ++#define LJ_ARCH_PPC_OPD 1 ++#endif + #elif LJ_ARCH_BITS == 64 +-#define LJ_ARCH_PPC64 1 +-#define LJ_TARGET_GC64 1 ++#define LJ_ARCH_PPC32ON64 1 + #define LJ_ARCH_NOJIT 1 /* NYI */ ++#if _CALL_ELF == 2 ++#define LJ_ARCH_PPC_ELFV2 1 ++#else ++#define LJ_ARCH_PPC_OPD 1 ++#define LJ_ARCH_PPC_OPDENV 1 ++#endif + #endif + + #if _ARCH_PWR7 +@@ -423,12 +431,6 @@ + #if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) + #error "No support for PowerPC CPUs without double-precision FPU" + #endif +-#if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE +-#error "No support for little-endian PPC32" +-#endif +-#if LJ_ARCH_PPC64 +-#error "No support for PowerPC 64 bit mode (yet)" +-#endif + #ifdef __NO_FPRS__ + #error "No support for PPC/e500 anymore (use LuaJIT 2.0)" + #endif +diff --git a/src/lj_ccall.c b/src/lj_ccall.c +index 5c252e5..b891591 100644 +--- a/src/lj_ccall.c ++++ b/src/lj_ccall.c +@@ -369,21 +369,97 @@ + #elif LJ_TARGET_PPC + /* -- PPC calling conventions --------------------------------------------- */ + ++#if LJ_ARCH_BITS == 64 ++ ++#if LJ_ARCH_PPC_ELFV2 ++ ++#define CCALL_HANDLE_STRUCTRET \ ++ if (sz > 16 && ccall_classify_fp(cts, ctr) <= 0) { \ ++ cc->retref = 1; /* Return by reference. */ \ ++ cc->gpr[ngpr++] = (GPRArg)dp; \ ++ } ++ ++#define CCALL_HANDLE_STRUCTRET2 \ ++ int isfp = ccall_classify_fp(cts, ctr); \ ++ int i; \ ++ if (isfp == FTYPE_FLOAT) { \ ++ for (i = 0; i < ctr->size / 4; i++) \ ++ ((float *)dp)[i] = cc->fpr[i]; \ ++ } else if (isfp == FTYPE_DOUBLE) { \ ++ for (i = 0; i < ctr->size / 8; i++) \ ++ ((double *)dp)[i] = cc->fpr[i]; \ ++ } else { \ ++ if (ctr->size < 8 && LJ_BE) { \ ++ sp += 8 - ctr->size; \ ++ } \ ++ memcpy(dp, sp, ctr->size); \ ++ } ++ ++#else ++ + #define CCALL_HANDLE_STRUCTRET \ + cc->retref = 1; /* Return all structs by reference. */ \ + cc->gpr[ngpr++] = (GPRArg)dp; + ++#endif ++ + #define CCALL_HANDLE_COMPLEXRET \ + /* Complex values are returned in 2 or 4 GPRs. */ \ + cc->retref = 0; + ++#define CCALL_HANDLE_STRUCTARG ++ + #define CCALL_HANDLE_COMPLEXRET2 \ +- memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */ ++ if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ ++ ((float *)dp)[0] = cc->fpr[0]; \ ++ ((float *)dp)[1] = cc->fpr[1]; \ ++ } else { /* Copy complex double from FPRs. */ \ ++ ((double *)dp)[0] = cc->fpr[0]; \ ++ ((double *)dp)[1] = cc->fpr[1]; \ ++ } ++ ++#define CCALL_HANDLE_COMPLEXARG \ ++ isfp = 1; \ ++ if (d->size == sizeof(float) * 2) { \ ++ d = ctype_get(cts, CTID_COMPLEX_DOUBLE); \ ++ isf32 = 1; \ ++ } ++ ++#define CCALL_HANDLE_REGARG \ ++ if (isfp && d->size == sizeof(float)) { \ ++ d = ctype_get(cts, CTID_DOUBLE); \ ++ isf32 = 1; \ ++ } \ ++ if (ngpr < maxgpr) { \ ++ dp = &cc->gpr[ngpr]; \ ++ ngpr += n; \ ++ if (ngpr > maxgpr) { \ ++ nsp += ngpr - 8; \ ++ ngpr = 8; \ ++ if (nsp > CCALL_MAXSTACK) { \ ++ goto err_nyi; \ ++ } \ ++ } \ ++ goto done; \ ++ } ++ ++#else ++ ++#define CCALL_HANDLE_STRUCTRET \ ++ cc->retref = 1; /* Return all structs by reference. */ \ ++ cc->gpr[ngpr++] = (GPRArg)dp; ++ ++#define CCALL_HANDLE_COMPLEXRET \ ++ /* Complex values are returned in 2 or 4 GPRs. */ \ ++ cc->retref = 0; + + #define CCALL_HANDLE_STRUCTARG \ + rp = cdataptr(lj_cdata_new(cts, did, sz)); \ + sz = CTSIZE_PTR; /* Pass all structs by reference. */ + ++#define CCALL_HANDLE_COMPLEXRET2 \ ++ memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */ ++ + #define CCALL_HANDLE_COMPLEXARG \ + /* Pass complex by value in 2 or 4 GPRs. */ + +@@ -410,6 +486,8 @@ + } \ + } + ++#endif ++ + #define CCALL_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ +@@ -801,6 +879,50 @@ noth: /* Not a homogeneous float/double aggregate. */ + + #endif + ++/* -- PowerPC64 ELFv2 ABI struct classification ------------------- */ ++ ++#if LJ_ARCH_PPC_ELFV2 ++ ++#define FTYPE_FLOAT 1 ++#define FTYPE_DOUBLE 2 ++ ++static unsigned int ccall_classify_fp(CTState *cts, CType *ct) { ++ if (ctype_isfp(ct->info)) { ++ if (ct->size == sizeof(float)) ++ return FTYPE_FLOAT; ++ else ++ return FTYPE_DOUBLE; ++ } else if (ctype_iscomplex(ct->info)) { ++ if (ct->size == sizeof(float) * 2) ++ return FTYPE_FLOAT; ++ else ++ return FTYPE_DOUBLE; ++ } else if (ctype_isstruct(ct->info)) { ++ int res = -1; ++ int sz = ct->size; ++ while (ct->sib) { ++ ct = ctype_get(cts, ct->sib); ++ if (ctype_isfield(ct->info)) { ++ int sub = ccall_classify_fp(cts, ctype_rawchild(cts, ct)); ++ if (res == -1) ++ res = sub; ++ if (sub != -1 && sub != res) ++ return 0; ++ } else if (ctype_isbitfield(ct->info) || ++ ctype_isxattrib(ct->info, CTA_SUBTYPE)) { ++ return 0; ++ } ++ } ++ if (res > 0 && sz > res * 4 * 8) ++ return 0; ++ return res; ++ } else { ++ return 0; ++ } ++} ++ ++#endif ++ + /* -- MIPS64 ABI struct classification ---------------------------- */ + + #if LJ_TARGET_MIPS64 +@@ -974,6 +1096,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, + CTSize sz; + MSize n, isfp = 0, isva = 0; + void *dp, *rp = NULL; ++#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 ++ int isf32 = 0; ++#endif + + if (fid) { /* Get argument type from field. */ + CType *ctf = ctype_get(cts, fid); +@@ -1030,7 +1155,37 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, + *(void **)dp = rp; + dp = rp; + } ++#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 && LJ_BE ++ if (ctype_isstruct(d->info) && sz < CTSIZE_PTR) { ++ dp = (char *)dp + (CTSIZE_PTR - sz); ++ } ++#endif + lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg)); ++#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 ++ if (isfp) { ++ int i; ++ for (i = 0; i < d->size / 8 && nfpr < CCALL_NARG_FPR; i++) ++ cc->fpr[nfpr++] = ((double *)dp)[i]; ++ } ++ if (isf32) { ++ int i; ++ for (i = 0; i < d->size / 8; i++) ++ ((float *)dp)[i*2] = ((double *)dp)[i]; ++ } ++#endif ++#if LJ_ARCH_PPC_ELFV2 ++ if (ctype_isstruct(d->info)) { ++ isfp = ccall_classify_fp(cts, d); ++ int i; ++ if (isfp == FTYPE_FLOAT) { ++ for (i = 0; i < d->size / 4 && nfpr < CCALL_NARG_FPR; i++) ++ cc->fpr[nfpr++] = ((float *)dp)[i]; ++ } else if (isfp == FTYPE_DOUBLE) { ++ for (i = 0; i < d->size / 8 && nfpr < CCALL_NARG_FPR; i++) ++ cc->fpr[nfpr++] = ((double *)dp)[i]; ++ } ++ } ++#endif + /* Extend passed integers to 32 bits at least. */ + if (ctype_isinteger_or_bool(d->info) && d->size < 4) { + if (d->info & CTF_UNSIGNED) +@@ -1044,6 +1199,15 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, + if (isfp && d->size == sizeof(float)) + ((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */ + #endif ++#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 ++ if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)) ++ && d->size <= 4) { ++ if (d->info & CTF_UNSIGNED) ++ *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp; ++ else ++ *(int64_t *)dp = (int64_t)*(int32_t *)dp; ++ } ++#endif + #if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) + if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) + #if LJ_TARGET_MIPS64 +diff --git a/src/lj_ccall.h b/src/lj_ccall.h +index 59f6648..bbf309f 100644 +--- a/src/lj_ccall.h ++++ b/src/lj_ccall.h +@@ -86,10 +86,23 @@ typedef union FPRArg { + #elif LJ_TARGET_PPC + + #define CCALL_NARG_GPR 8 ++#if LJ_ARCH_BITS == 64 ++#define CCALL_NARG_FPR 13 ++#if LJ_ARCH_PPC_ELFV2 ++#define CCALL_NRET_GPR 2 ++#define CCALL_NRET_FPR 8 ++#define CCALL_SPS_EXTRA 14 ++#else ++#define CCALL_NRET_GPR 1 ++#define CCALL_NRET_FPR 2 ++#define CCALL_SPS_EXTRA 16 ++#endif ++#else + #define CCALL_NARG_FPR 8 + #define CCALL_NRET_GPR 4 /* For complex double. */ + #define CCALL_NRET_FPR 1 + #define CCALL_SPS_EXTRA 4 ++#endif + #define CCALL_SPS_FREE 0 + + typedef intptr_t GPRArg; +diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c +index 846827b..eb7f445 100644 +--- a/src/lj_ccallback.c ++++ b/src/lj_ccallback.c +@@ -61,8 +61,24 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) + + #elif LJ_TARGET_PPC + ++#if LJ_ARCH_PPC_OPD ++ ++#define CALLBACK_SLOT2OFS(slot) (24*(slot)) ++#define CALLBACK_OFS2SLOT(ofs) ((ofs)/24) ++#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) ++ ++#elif LJ_ARCH_PPC_ELFV2 ++ ++#define CALLBACK_SLOT2OFS(slot) (4*(slot)) ++#define CALLBACK_OFS2SLOT(ofs) ((ofs)/4) ++#define CALLBACK_MAX_SLOT (CALLBACK_MCODE_SIZE/4 - 10) ++ ++#else ++ + #define CALLBACK_MCODE_HEAD 24 + ++#endif ++ + #elif LJ_TARGET_MIPS32 + + #define CALLBACK_MCODE_HEAD 20 +@@ -188,24 +204,59 @@ static void callback_mcode_init(global_State *g, uint32_t *page) + lua_assert(p - page <= CALLBACK_MCODE_SIZE); + } + #elif LJ_TARGET_PPC ++#if LJ_ARCH_PPC_OPD ++register void *vm_toc __asm__("r2"); ++static void callback_mcode_init(global_State *g, uint64_t *page) ++{ ++ uint64_t *p = page; ++ void *target = (void *)lj_vm_ffi_callback; ++ MSize slot; ++ for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { ++ *p++ = (uint64_t)target; ++ *p++ = (uint64_t)vm_toc; ++ *p++ = (uint64_t)g | ((uint64_t)slot << 47); ++ } ++ lua_assert(p - page <= CALLBACK_MCODE_SIZE / 8); ++} ++#else + static void callback_mcode_init(global_State *g, uint32_t *page) + { + uint32_t *p = page; + void *target = (void *)lj_vm_ffi_callback; + MSize slot; ++#if LJ_ARCH_PPC_ELFV2 ++ // Needs to be in sync with lj_vm_ffi_callback. ++ lua_assert(CALLBACK_MCODE_SIZE == 4096); ++ for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { ++ *p = PPCI_B | (((page+CALLBACK_MAX_SLOT-p) & 0x00ffffffu) << 2); ++ p++; ++ } ++ *p++ = PPCI_LI | PPCF_T(RID_SYS1) | ((((intptr_t)target) >> 32) & 0xffff); ++ *p++ = PPCI_LI | PPCF_T(RID_R11) | ((((intptr_t)g) >> 32) & 0xffff); ++ *p++ = PPCI_RLDICR | PPCF_T(RID_SYS1) | PPCF_A(RID_SYS1) | PPCF_SH(32) | PPCF_M6(63-32); /* sldi */ ++ *p++ = PPCI_RLDICR | PPCF_T(RID_R11) | PPCF_A(RID_R11) | PPCF_SH(32) | PPCF_M6(63-32); /* sldi */ ++ *p++ = PPCI_ORIS | PPCF_A(RID_SYS1) | PPCF_T(RID_SYS1) | ((((intptr_t)target) >> 16) & 0xffff); ++ *p++ = PPCI_ORIS | PPCF_A(RID_R11) | PPCF_T(RID_R11) | ((((intptr_t)g) >> 16) & 0xffff); ++ *p++ = PPCI_ORI | PPCF_A(RID_SYS1) | PPCF_T(RID_SYS1) | (((intptr_t)target) & 0xffff); ++ *p++ = PPCI_ORI | PPCF_A(RID_R11) | PPCF_T(RID_R11) | (((intptr_t)g) & 0xffff); ++ *p++ = PPCI_MTCTR | PPCF_T(RID_SYS1); ++ *p++ = PPCI_BCTR; ++#else + *p++ = PPCI_LIS | PPCF_T(RID_TMP) | (u32ptr(target) >> 16); +- *p++ = PPCI_LIS | PPCF_T(RID_R12) | (u32ptr(g) >> 16); ++ *p++ = PPCI_LIS | PPCF_T(RID_R11) | (u32ptr(g) >> 16); + *p++ = PPCI_ORI | PPCF_A(RID_TMP)|PPCF_T(RID_TMP) | (u32ptr(target) & 0xffff); +- *p++ = PPCI_ORI | PPCF_A(RID_R12)|PPCF_T(RID_R12) | (u32ptr(g) & 0xffff); ++ *p++ = PPCI_ORI | PPCF_A(RID_R11)|PPCF_T(RID_R11) | (u32ptr(g) & 0xffff); + *p++ = PPCI_MTCTR | PPCF_T(RID_TMP); + *p++ = PPCI_BCTR; + for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { +- *p++ = PPCI_LI | PPCF_T(RID_R11) | slot; ++ *p++ = PPCI_LI | PPCF_T(RID_R12) | slot; + *p = PPCI_B | (((page-p) & 0x00ffffffu) << 2); + p++; + } +- lua_assert(p - page <= CALLBACK_MCODE_SIZE); ++#endif ++ lua_assert(p - page <= CALLBACK_MCODE_SIZE / 4); + } ++#endif + #elif LJ_TARGET_MIPS + static void callback_mcode_init(global_State *g, uint32_t *page) + { +@@ -641,6 +692,15 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) + *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : + (int32_t)*(int16_t *)dp; + } ++#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 ++ if (ctr->size <= 4 && ++ (ctype_isinteger_or_bool(ctr->info) || ctype_isenum(ctr->info))) { ++ if (ctr->info & CTF_UNSIGNED) ++ *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp; ++ else ++ *(int64_t *)dp = (int64_t)*(int32_t *)dp; ++ } ++#endif + #if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) + /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ + if (ctr->size <= 4 && +diff --git a/src/lj_ctype.h b/src/lj_ctype.h +index 0c220a8..105865b 100644 +--- a/src/lj_ctype.h ++++ b/src/lj_ctype.h +@@ -153,7 +153,7 @@ typedef struct CType { + + /* Simplify target-specific configuration. Checked in lj_ccall.h. */ + #define CCALL_MAX_GPR 8 +-#define CCALL_MAX_FPR 8 ++#define CCALL_MAX_FPR 14 + + typedef LJ_ALIGN(8) union FPRCBArg { double d; float f[2]; } FPRCBArg; + +diff --git a/src/lj_def.h b/src/lj_def.h +index 2d8fff6..381d6f5 100644 +--- a/src/lj_def.h ++++ b/src/lj_def.h +@@ -71,7 +71,11 @@ typedef unsigned int uintptr_t; + #define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */ + #define LJ_STACK_EXTRA (5+2*LJ_FR2) /* Extra stack space (metamethods). */ + ++#if defined(__powerpc64__) && _CALL_ELF != 2 ++#define LJ_NUM_CBPAGE 4 /* Number of FFI callback pages. */ ++#else + #define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */ ++#endif + + /* Minimum table/buffer sizes. */ + #define LJ_MIN_GLOBAL 6 /* Min. global table size (hbits). */ +diff --git a/src/lj_frame.h b/src/lj_frame.h +index 19c49a4..c666418 100644 +--- a/src/lj_frame.h ++++ b/src/lj_frame.h +@@ -210,6 +210,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ + #define CFRAME_OFS_MULTRES 408 + #define CFRAME_SIZE 384 + #define CFRAME_SHIFT_MULTRES 3 ++#elif LJ_ARCH_PPC_ELFV2 ++#define CFRAME_OFS_ERRF 360 ++#define CFRAME_OFS_NRES 356 ++#define CFRAME_OFS_PREV 336 ++#define CFRAME_OFS_L 352 ++#define CFRAME_OFS_PC 348 ++#define CFRAME_OFS_MULTRES 344 ++#define CFRAME_SIZE 368 ++#define CFRAME_SHIFT_MULTRES 3 + #elif LJ_ARCH_PPC32ON64 + #define CFRAME_OFS_ERRF 472 + #define CFRAME_OFS_NRES 468 +diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h +index c5c991a..f0c8c94 100644 +--- a/src/lj_target_ppc.h ++++ b/src/lj_target_ppc.h +@@ -30,8 +30,13 @@ enum { + + /* Calling conventions. */ + RID_RET = RID_R3, ++#if LJ_LE ++ RID_RETHI = RID_R4, ++ RID_RETLO = RID_R3, ++#else + RID_RETHI = RID_R3, + RID_RETLO = RID_R4, ++#endif + RID_FPRET = RID_F1, + + /* These definitions must match with the *.dasc file(s): */ +@@ -131,6 +136,8 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) + #define PPCF_C(r) ((r) << 6) + #define PPCF_MB(n) ((n) << 6) + #define PPCF_ME(n) ((n) << 1) ++#define PPCF_SH(n) ((((n) & 31) << (11+1)) | (((n) & 32) >> (5-1))) ++#define PPCF_M6(n) ((((n) & 31) << (5+1)) | (((n) & 32) << (11-5))) + #define PPCF_Y 0x00200000 + #define PPCF_DOT 0x00000001 + +@@ -200,6 +207,13 @@ typedef enum PPCIns { + PPCI_RLWINM = 0x54000000, + PPCI_RLWIMI = 0x50000000, + ++ PPCI_RLDICL = 0x78000000, ++ PPCI_RLDICR = 0x78000004, ++ PPCI_RLDIC = 0x78000008, ++ PPCI_RLDIMI = 0x7800000c, ++ PPCI_RLDCL = 0x78000010, ++ PPCI_RLDCR = 0x78000012, ++ + PPCI_B = 0x48000000, + PPCI_BL = 0x48000001, + PPCI_BC = 0x40800000, +diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc +index b4260eb..abb381e 100644 +--- a/src/vm_ppc.dasc ++++ b/src/vm_ppc.dasc +@@ -22,35 +22,40 @@ + |// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3). + |// Affects reg saves, stack layout, carry/overflow/dot flags etc. + |// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360). +-|// TOC Need table of contents (64 bit or 32 bit variant, e.g. PS3). ++|// OPD Need function descriptors (64 bit or 32 bit variant, e.g. PS3). + |// Function pointers are really a struct: code, TOC, env (optional). +-|// TOCENV Function pointers have an environment pointer, too (not on PS3). ++|// OPDENV Function pointers have an environment pointer, too (not on PS3). ++|// ELFV2 The 64-bit ELF V2 ABI is in use. + |// PPE Power Processor Element of Cell (PS3) or Xenon (Xbox 360). + |// Must avoid (slow) micro-coded instructions. + | + |.if P64 +-|.define TOC, 1 +-|.define TOCENV, 1 + |.macro lpx, a, b, c; ldx a, b, c; .endmacro + |.macro lp, a, b; ld a, b; .endmacro + |.macro stp, a, b; std a, b; .endmacro ++|.macro stpx, a, b, c; stdx a, b, c; .endmacro + |.define decode_OPP, decode_OP8 +-|.if FFI +-|// Missing: Calling conventions, 64 bit regs, TOC. +-|.error lib_ffi not yet implemented for PPC64 +-|.endif ++|.define PSIZE, 8 + |.else + |.macro lpx, a, b, c; lwzx a, b, c; .endmacro + |.macro lp, a, b; lwz a, b; .endmacro + |.macro stp, a, b; stw a, b; .endmacro ++|.macro stpx, a, b, c; stwx a, b, c; .endmacro + |.define decode_OPP, decode_OP4 ++|.define PSIZE, 4 + |.endif + | + |// Convenience macros for TOC handling. +-|.if TOC ++|.if OPD or ELFV2 + |// Linker needs a TOC patch area for every external call relocation. +-|.macro blex, target; bl extern target@plt; nop; .endmacro ++|.macro blex, target; bl extern target; nop; .endmacro + |.macro .toc, a, b; a, b; .endmacro ++|.else ++|.macro blex, target; bl extern target@plt; .endmacro ++|.macro .toc, a, b; .endmacro ++|.endif ++|.if OPD ++|.macro .opd, a, b; a, b; .endmacro + |.if P64 + |.define TOC_OFS, 8 + |.define ENV_OFS, 16 +@@ -58,13 +63,13 @@ + |.define TOC_OFS, 4 + |.define ENV_OFS, 8 + |.endif +-|.else // No TOC. +-|.macro blex, target; bl extern target@plt; .endmacro +-|.macro .toc, a, b; .endmacro ++|.else // No OPD. ++|.macro .opd, a, b; .endmacro + |.endif +-|.macro .tocenv, a, b; .if TOCENV; a, b; .endif; .endmacro ++|.macro .opdenv, a, b; .if OPDENV; a, b; .endif; .endmacro + | + |.macro .gpr64, a, b; .if GPR64; a, b; .endif; .endmacro ++|.macro .elfv2, a, b; .if ELFV2; a, b; .endif; .endmacro + | + |.macro andix., y, a, i + |.if PPE +@@ -75,29 +80,6 @@ + |.endif + |.endmacro + | +-|.macro clrso, reg +-|.if PPE +-| li reg, 0 +-| mtxer reg +-|.else +-| mcrxr cr0 +-|.endif +-|.endmacro +-| +-|.macro checkov, reg, noov +-|.if PPE +-| mfxer reg +-| add reg, reg, reg +-| cmpwi reg, 0 +-| li reg, 0 +-| mtxer reg +-| bgey noov +-|.else +-| mcrxr cr0 +-| bley noov +-|.endif +-|.endmacro +-| + |//----------------------------------------------------------------------- + | + |// Fixed register assignments for the interpreter. +@@ -111,6 +93,7 @@ + |.define LREG, r18 // Register holding lua_State (also in SAVE_L). + |.define MULTRES, r19 // Size of multi-result: (nresults+1)*8. + |.define JGL, r31 // On-trace: global_State + 32768. ++|.define BASEP4, r25 // Equal to BASE + 4 + | + |// Constants for type-comparisons, stores and conversions. C callee-save. + |.define TISNUM, r22 +@@ -143,12 +126,19 @@ + | + |.define FARG1, f1 + |.define FARG2, f2 ++|.define FARG3, f3 ++|.define FARG4, f4 ++|.define FARG5, f5 ++|.define FARG6, f6 ++|.define FARG7, f7 ++|.define FARG8, f8 + | + |.define CRET1, r3 + |.define CRET2, r4 + | + |.define TOCREG, r2 // TOC register (only used by C code). + |.define ENVREG, r11 // Environment pointer (nested C functions). ++|.define FUNCREG, r12 // ELFv2 function pointer (overlaps RD) + | + |// Stack layout while in interpreter. Must match with lj_frame.h. + |.if GPR64 +@@ -182,6 +172,49 @@ + |.define TMPD, TMPD_HI + |.define TONUM_D, TONUM_HI + | ++|.elif ELFV2 ++| ++|// 392(sp) // \ 32 bit C frame info. ++|.define SAVE_LR, 384(sp) ++|.define SAVE_CR, 376(sp) // 64 bit CR save. ++|.define CFRAME_SPACE, 368 // Delta for sp. ++|// Back chain for sp: 368(sp) <-- sp entering interpreter ++|.define SAVE_ERRF, 360(sp) // | ++|.define SAVE_NRES, 356(sp) // | ++|.define SAVE_L, 352(sp) // > Parameter save area. ++|.define SAVE_PC, 348(sp) // | ++|.define SAVE_MULTRES, 344(sp) // | ++|.define SAVE_CFRAME, 336(sp) // / 64 bit C frame chain. ++|.define SAVE_FPR_, 192 // .. 192+18*8: 64 bit FPR saves. ++|.define SAVE_GPR_, 48 // .. 48+18*8: 64 bit GPR saves. ++|.if ENDIAN_LE ++|.define TMPD_HI, 44(sp) ++|.define TMPD_LO, 40(sp) ++|.define TONUM_HI, 36(sp) ++|.define TONUM_LO, 32(sp) ++|.else ++|.define TMPD_LO, 44(sp) ++|.define TMPD_HI, 40(sp) ++|.define TONUM_LO, 36(sp) ++|.define TONUM_HI, 32(sp) ++|.endif ++|.define SAVE_TOC, 24(sp) // TOC save area. ++|// Next frame lr: 16(sp) ++|// Next frame cr: 8(sp) ++|// Back chain for sp: 0(sp) <-- sp while in interpreter ++| ++|.if ENDIAN_LE ++|.define TMPD_BLO, 32(sp) ++|.define TMPD, TMPD_LO ++|.define TONUM_D, TONUM_LO ++|.else ++|.define TMPD_BLO, 39(sp) ++|.define TMPD, TMPD_HI ++|.define TONUM_D, TONUM_HI ++|.endif ++| ++|.define EXIT_OFFSET, 32 ++| + |.else + | + |// 508(sp) // \ 32 bit C frame info. +@@ -192,23 +225,39 @@ + |.define SAVE_MULTRES, 456(sp) // | + |.define SAVE_CFRAME, 448(sp) // / 64 bit C frame chain. + |.define SAVE_LR, 416(sp) ++|.define SAVE_CR, 408(sp) // 64 bit CR save. + |.define CFRAME_SPACE, 400 // Delta for sp. + |// Back chain for sp: 400(sp) <-- sp entering interpreter + |.define SAVE_FPR_, 256 // .. 256+18*8: 64 bit FPR saves. + |.define SAVE_GPR_, 112 // .. 112+18*8: 64 bit GPR saves. + |// 48(sp) // Callee parameter save area (ABI mandated). + |.define SAVE_TOC, 40(sp) // TOC save area. ++|.if ENDIAN_LE ++|.define TMPD_HI, 36(sp) // \ Link editor temp (ABI mandated). ++|.define TMPD_LO, 32(sp) // / ++|.define TONUM_HI, 28(sp) // \ Compiler temp (ABI mandated). ++|.define TONUM_LO, 24(sp) // / ++|.else + |.define TMPD_LO, 36(sp) // \ Link editor temp (ABI mandated). + |.define TMPD_HI, 32(sp) // / + |.define TONUM_LO, 28(sp) // \ Compiler temp (ABI mandated). + |.define TONUM_HI, 24(sp) // / ++|.endif + |// Next frame lr: 16(sp) +-|.define SAVE_CR, 8(sp) // 64 bit CR save. ++|// Next frame cr: 8(sp) + |// Back chain for sp: 0(sp) <-- sp while in interpreter + | ++|.if ENDIAN_LE ++|.define TMPD_BLO, 32(sp) ++|.define TMPD, TMPD_LO ++|.define TONUM_D, TONUM_LO ++|.else + |.define TMPD_BLO, 39(sp) + |.define TMPD, TMPD_HI + |.define TONUM_D, TONUM_HI ++|.endif ++| ++|.define EXIT_OFFSET, 112 + | + |.endif + |.else +@@ -226,16 +275,31 @@ + |.define SAVE_PC, 32(sp) + |.define SAVE_MULTRES, 28(sp) + |.define UNUSED1, 24(sp) ++|.if ENDIAN_LE ++|.define TMPD_HI, 20(sp) ++|.define TMPD_LO, 16(sp) ++|.define TONUM_HI, 12(sp) ++|.define TONUM_LO, 8(sp) ++|.else + |.define TMPD_LO, 20(sp) + |.define TMPD_HI, 16(sp) + |.define TONUM_LO, 12(sp) + |.define TONUM_HI, 8(sp) ++|.endif + |// Next frame lr: 4(sp) + |// Back chain for sp: 0(sp) <-- sp while in interpreter + | ++|.if ENDIAN_LE ++|.define TMPD_BLO, 16(sp) ++|.define TMPD, TMPD_LO ++|.define TONUM_D, TONUM_LO ++|.else + |.define TMPD_BLO, 23(sp) + |.define TMPD, TMPD_HI + |.define TONUM_D, TONUM_HI ++|.endif ++| ++|.define EXIT_OFFSET, 16 + | + |.endif + | +@@ -350,8 +414,35 @@ + |//----------------------------------------------------------------------- + | + |// Access to frame relative to BASE. ++|.if ENDIAN_LE ++|.define FRAME_PC, -4 ++|.define FRAME_FUNC, -8 ++|.define FRAME_CONTPC, -12 ++|.define FRAME_CONTRET, -16 ++|.define WORD_LO, 0 ++|.define WORD_HI, 4 ++|.define WORD_BLO, 0 ++|.define BASE_LO, BASE ++|.define BASE_HI, BASEP4 ++|.macro lwzux2, hi, lo, base, idx ++| lwzux lo, base, idx ++| lwz hi, 4(base) ++|.endmacro ++|.else + |.define FRAME_PC, -8 + |.define FRAME_FUNC, -4 ++|.define FRAME_CONTPC, -16 ++|.define FRAME_CONTRET, -12 ++|.define WORD_LO, 4 ++|.define WORD_HI, 0 ++|.define WORD_BLO, 7 ++|.define BASE_LO, BASEP4 ++|.define BASE_HI, BASE ++|.macro lwzux2, hi, lo, base, idx ++| lwzux hi, base, idx ++| lwz lo, 4(base) ++|.endmacro ++|.endif + | + |// Instruction decode. + |.macro decode_OP4, dst, ins; rlwinm dst, ins, 2, 22, 29; .endmacro +@@ -412,6 +503,7 @@ + |// Call decode and dispatch. + |.macro ins_callt + | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC ++| addi BASEP4, BASE, 4 + | lwz PC, LFUNC:RB->pc + | lwz INS, 0(PC) + | addi PC, PC, 4 +@@ -504,7 +596,12 @@ static void build_subroutines(BuildCtx *ctx) + | lwz PC, FRAME_PC(TMP2) // Fetch PC of previous frame. + | mr BASE, TMP2 // Restore caller base. + | // Prepending may overwrite the pcall frame, so do it at the end. +- | stwu TMP1, FRAME_PC(RA) // Prepend true to results. ++ | .if ENDIAN_LE ++ | addi RA, RA, -8 ++ | stw TMP1, WORD_HI(RA) // Prepend true to results. ++ | .else ++ | stwu TMP1, -8(RA) // Prepend true to results. ++ | .endif + | + |->vm_returnc: + | addi RD, RD, 8 // RD = (nresults+1)*8. +@@ -560,7 +657,7 @@ static void build_subroutines(BuildCtx *ctx) + | lwz TMP1, L->maxstack + | cmplw BASE, TMP1 + | bge >8 +- | stw TISNIL, 0(BASE) ++ | stw TISNIL, WORD_HI(BASE) + | addi RD, RD, 8 + | addi BASE, BASE, 8 + | b <2 +@@ -611,7 +708,12 @@ static void build_subroutines(BuildCtx *ctx) + |->vm_unwind_ff_eh: // Landing pad for external unwinder. + | lwz L, SAVE_L + | .toc ld TOCREG, SAVE_TOC ++ |.if P64 ++ | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants. ++ | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff ++ |.else + | li TISNUM, LJ_TISNUM // Setup type comparison constants. ++ |.endif + | lp BASE, L->base + | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | lwz DISPATCH, L->glref // Setup pointer to dispatch table. +@@ -626,7 +728,7 @@ static void build_subroutines(BuildCtx *ctx) + | la RA, -8(BASE) // Results start at BASE-8. + | stw TMP3, TMPD + | addi DISPATCH, DISPATCH, GG_G2DISP +- | stw TMP1, 0(RA) // Prepend false to error message. ++ | stw TMP1, WORD_HI(RA) // Prepend false to error message. + | li RD, 16 // 2 results: false + error message. + | st_vmstate + | lfs TONUM, TMPD +@@ -687,7 +789,12 @@ static void build_subroutines(BuildCtx *ctx) + | stw L, DISPATCH_GL(cur_L)(DISPATCH) + | mr RA, BASE + | lp BASE, L->base ++ |.if P64 ++ | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants. ++ | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff ++ |.else + | li TISNUM, LJ_TISNUM // Setup type comparison constants. ++ |.endif + | lp TMP1, L->top + | lwz PC, FRAME_PC(BASE) + | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). +@@ -737,7 +844,12 @@ static void build_subroutines(BuildCtx *ctx) + |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). + | stw L, DISPATCH_GL(cur_L)(DISPATCH) + | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). ++ |.if P64 ++ | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants. ++ | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff ++ |.else + | li TISNUM, LJ_TISNUM // Setup type comparison constants. ++ |.endif + | lp TMP1, L->top + | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | add PC, PC, BASE +@@ -757,8 +869,8 @@ static void build_subroutines(BuildCtx *ctx) + | + |->vm_call_dispatch: + | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC +- | lwz TMP0, FRAME_PC(BASE) +- | lwz LFUNC:RB, FRAME_FUNC(BASE) ++ | lwz TMP0, WORD_HI-8(BASE) ++ | lwz LFUNC:RB, WORD_LO-8(BASE) + | checkfunc TMP0; bne ->vmeta_call + | + |->vm_call_dispatch_f: +@@ -777,7 +889,9 @@ static void build_subroutines(BuildCtx *ctx) + | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). + | lp TMP1, L->cframe + | addi DISPATCH, DISPATCH, GG_G2DISP +- | .toc lp CARG4, 0(CARG4) ++ | .opd lp TOCREG, TOC_OFS(CARG4) ++ | .opdenv lp ENVREG, ENV_OFS(CARG4) ++ | .opd lp CARG4, 0(CARG4) + | li TMP2, 0 + | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. + | stw TMP2, SAVE_ERRF // No error function. +@@ -785,7 +899,9 @@ static void build_subroutines(BuildCtx *ctx) + | stp sp, L->cframe // Add our C frame to cframe chain. + | stw L, DISPATCH_GL(cur_L)(DISPATCH) + | mtctr CARG4 ++ | .elfv2 mr FUNCREG, CARG4 + | bctrl // (lua_State *L, lua_CFunction func, void *ud) ++ | .toc lp TOCREG, SAVE_TOC + |.if PPE + | mr BASE, CRET1 + | cmpwi CRET1, 0 +@@ -807,20 +923,27 @@ static void build_subroutines(BuildCtx *ctx) + | + |->cont_dispatch: + | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 +- | lwz TMP0, -12(BASE) // Continuation. ++ | lwz TMP0, FRAME_CONTRET(BASE) // Continuation. + | mr RB, BASE + | mr BASE, TMP2 // Restore caller BASE. + | lwz LFUNC:TMP1, FRAME_FUNC(TMP2) + |.if FFI + | cmplwi TMP0, 1 + |.endif +- | lwz PC, -16(RB) // Restore PC from [cont|PC]. +- | subi TMP2, RD, 8 ++ | lwz PC, FRAME_CONTPC(RB) // Restore PC from [cont|PC]. ++ | addi BASEP4, BASE, 4 ++ | addi TMP2, RD, WORD_HI-8 + | lwz TMP1, LFUNC:TMP1->pc + | stwx TISNIL, RA, TMP2 // Ensure one valid arg. ++ |.if P64 ++ | ld TMP3, 0(DISPATCH) ++ |.endif + |.if FFI + | ble >1 + |.endif ++ |.if P64 ++ | add TMP0, TMP0, TMP3 ++ |.endif + | lwz KBASE, PC2PROTO(k)(TMP1) + | // BASE = base, RA = resultptr, RB = meta base + | mtctr TMP0 +@@ -856,20 +979,20 @@ static void build_subroutines(BuildCtx *ctx) + | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) + | li TMP0, LJ_TSTR + | decode_RB8 RB, INS +- | stw STR:RC, 4(CARG3) ++ | stw STR:RC, WORD_LO(CARG3) + | add CARG2, BASE, RB +- | stw TMP0, 0(CARG3) ++ | stw TMP0, WORD_HI(CARG3) + | b >1 + | + |->vmeta_tgets: + | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) + | li TMP0, LJ_TTAB +- | stw TAB:RB, 4(CARG2) ++ | stw TAB:RB, WORD_LO(CARG2) + | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) +- | stw TMP0, 0(CARG2) ++ | stw TMP0, WORD_HI(CARG2) + | li TMP1, LJ_TSTR +- | stw STR:RC, 4(CARG3) +- | stw TMP1, 0(CARG3) ++ | stw STR:RC, WORD_LO(CARG3) ++ | stw TMP1, WORD_HI(CARG3) + | b >1 + | + |->vmeta_tgetb: // TMP0 = index +@@ -880,8 +1003,8 @@ static void build_subroutines(BuildCtx *ctx) + | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) + | add CARG2, BASE, RB + |.if DUALNUM +- | stw TISNUM, 0(CARG3) +- | stw TMP0, 4(CARG3) ++ | stw TISNUM, WORD_HI(CARG3) ++ | stw TMP0, WORD_LO(CARG3) + |.else + | stfd f0, 0(CARG3) + |.endif +@@ -909,7 +1032,7 @@ static void build_subroutines(BuildCtx *ctx) + | // BASE = base, L->top = new base, stack = cont/func/t/k + | subfic TMP1, BASE, FRAME_CONT + | lp BASE, L->top +- | stw PC, -16(BASE) // [cont|PC] ++ | stw PC, FRAME_CONTPC(BASE) // [cont|PC] + | add PC, TMP1, BASE + | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. + | li NARGS8:RC, 16 // 2 args for func(t, k). +@@ -923,7 +1046,7 @@ static void build_subroutines(BuildCtx *ctx) + | lfd f14, 0(CRET1) + | b ->BC_TGETR_Z + |1: +- | stwx TISNIL, BASE, RA ++ | stwx TISNIL, BASE_HI, RA + | b ->cont_nop + | + |//----------------------------------------------------------------------- +@@ -932,20 +1055,20 @@ static void build_subroutines(BuildCtx *ctx) + | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) + | li TMP0, LJ_TSTR + | decode_RB8 RB, INS +- | stw STR:RC, 4(CARG3) ++ | stw STR:RC, WORD_LO(CARG3) + | add CARG2, BASE, RB +- | stw TMP0, 0(CARG3) ++ | stw TMP0, WORD_HI(CARG3) + | b >1 + | + |->vmeta_tsets: + | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) + | li TMP0, LJ_TTAB +- | stw TAB:RB, 4(CARG2) ++ | stw TAB:RB, WORD_LO(CARG2) + | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) +- | stw TMP0, 0(CARG2) ++ | stw TMP0, WORD_HI(CARG2) + | li TMP1, LJ_TSTR +- | stw STR:RC, 4(CARG3) +- | stw TMP1, 0(CARG3) ++ | stw STR:RC, WORD_LO(CARG3) ++ | stw TMP1, WORD_HI(CARG3) + | b >1 + | + |->vmeta_tsetb: // TMP0 = index +@@ -956,8 +1079,8 @@ static void build_subroutines(BuildCtx *ctx) + | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) + | add CARG2, BASE, RB + |.if DUALNUM +- | stw TISNUM, 0(CARG3) +- | stw TMP0, 4(CARG3) ++ | stw TISNUM, WORD_HI(CARG3) ++ | stw TMP0, WORD_LO(CARG3) + |.else + | stfd f0, 0(CARG3) + |.endif +@@ -986,7 +1109,7 @@ static void build_subroutines(BuildCtx *ctx) + | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) + | subfic TMP1, BASE, FRAME_CONT + | lp BASE, L->top +- | stw PC, -16(BASE) // [cont|PC] ++ | stw PC, FRAME_CONTPC(BASE) // [cont|PC] + | add PC, TMP1, BASE + | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. + | li NARGS8:RC, 24 // 3 args for func(t, k, v) +@@ -1006,17 +1129,9 @@ static void build_subroutines(BuildCtx *ctx) + |->vmeta_comp: + | mr CARG1, L + | subi PC, PC, 4 +- |.if DUALNUM +- | mr CARG2, RA +- |.else + | add CARG2, BASE, RA +- |.endif + | stw PC, SAVE_PC +- |.if DUALNUM +- | mr CARG3, RD +- |.else + | add CARG3, BASE, RD +- |.endif + | stp BASE, L->base + | decode_OP1 CARG4, INS + | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) +@@ -1043,7 +1158,7 @@ static void build_subroutines(BuildCtx *ctx) + | b ->cont_nop + | + |->cont_condt: // RA = resultptr +- | lwz TMP0, 0(RA) ++ | lwz TMP0, WORD_HI(RA) + | .gpr64 extsw TMP0, TMP0 + | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is true. + | subfe CRET1, CRET1, CRET1 +@@ -1051,7 +1166,7 @@ static void build_subroutines(BuildCtx *ctx) + | b <4 + | + |->cont_condf: // RA = resultptr +- | lwz TMP0, 0(RA) ++ | lwz TMP0, WORD_HI(RA) + | .gpr64 extsw TMP0, TMP0 + | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is false. + | subfe CRET1, CRET1, CRET1 +@@ -1103,8 +1218,8 @@ static void build_subroutines(BuildCtx *ctx) + |.endif + | + |->vmeta_unm: +- | mr CARG3, RD +- | mr CARG4, RD ++ | add CARG3, BASE, RD ++ | add CARG4, BASE, RD + | b >1 + | + |->vmeta_arith_vn: +@@ -1139,7 +1254,7 @@ static void build_subroutines(BuildCtx *ctx) + |->vmeta_binop: + | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 + | sub TMP1, CRET1, BASE +- | stw PC, -16(CRET1) // [cont|PC] ++ | stw PC, FRAME_CONTPC(CRET1) // [cont|PC] + | mr TMP2, BASE + | addi PC, TMP1, FRAME_CONT + | mr BASE, CRET1 +@@ -1150,7 +1265,7 @@ static void build_subroutines(BuildCtx *ctx) + #if LJ_52 + | mr SAVE0, CARG1 + #endif +- | mr CARG2, RD ++ | add CARG2, BASE, RD + | stp BASE, L->base + | mr CARG1, L + | stw PC, SAVE_PC +@@ -1227,25 +1342,25 @@ static void build_subroutines(BuildCtx *ctx) + |.macro .ffunc_1, name + |->ff_ .. name: + | cmplwi NARGS8:RC, 8 +- | lwz CARG3, 0(BASE) +- | lwz CARG1, 4(BASE) ++ | lwz CARG3, WORD_HI(BASE) ++ | lwz CARG1, WORD_LO(BASE) + | blt ->fff_fallback + |.endmacro + | + |.macro .ffunc_2, name + |->ff_ .. name: + | cmplwi NARGS8:RC, 16 +- | lwz CARG3, 0(BASE) +- | lwz CARG4, 8(BASE) +- | lwz CARG1, 4(BASE) +- | lwz CARG2, 12(BASE) ++ | lwz CARG3, WORD_HI(BASE) ++ | lwz CARG4, WORD_HI+8(BASE) ++ | lwz CARG1, WORD_LO(BASE) ++ | lwz CARG2, WORD_LO+8(BASE) + | blt ->fff_fallback + |.endmacro + | + |.macro .ffunc_n, name + |->ff_ .. name: + | cmplwi NARGS8:RC, 8 +- | lwz CARG3, 0(BASE) ++ | lwz CARG3, WORD_HI(BASE) + | lfd FARG1, 0(BASE) + | blt ->fff_fallback + | checknum CARG3; bge ->fff_fallback +@@ -1254,9 +1369,9 @@ static void build_subroutines(BuildCtx *ctx) + |.macro .ffunc_nn, name + |->ff_ .. name: + | cmplwi NARGS8:RC, 16 +- | lwz CARG3, 0(BASE) ++ | lwz CARG3, WORD_HI(BASE) + | lfd FARG1, 0(BASE) +- | lwz CARG4, 8(BASE) ++ | lwz CARG4, WORD_HI+8(BASE) + | lfd FARG2, 8(BASE) + | blt ->fff_fallback + | checknum CARG3; bge ->fff_fallback +@@ -1279,9 +1394,9 @@ static void build_subroutines(BuildCtx *ctx) + | cmplw cr1, CARG3, TMP1 + | lwz PC, FRAME_PC(BASE) + | bge cr1, ->fff_fallback +- | stw CARG3, 0(RA) ++ | stw CARG3, WORD_HI(RA) + | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. +- | stw CARG1, 4(RA) ++ | stw CARG1, WORD_LO(RA) + | beq ->fff_res // Done if exactly 1 argument. + | li TMP1, 8 + | subi RC, RC, 8 +@@ -1295,17 +1410,36 @@ static void build_subroutines(BuildCtx *ctx) + | + |.ffunc type + | cmplwi NARGS8:RC, 8 +- | lwz CARG1, 0(BASE) ++ | lwz CARG1, WORD_HI(BASE) + | blt ->fff_fallback + | .gpr64 extsw CARG1, CARG1 ++ |.if P64 ++ | li TMP0, LJ_TNUMX ++ | srawi TMP3, CARG1, 15 ++ | subfc TMP1, TMP0, CARG1 ++ |.else + | subfc TMP0, TISNUM, CARG1 +- | subfe TMP2, CARG1, CARG1 ++ |.endif ++ | subfe TMP2, CARG1, CARG1 ++ |.if P64 ++ | cmpwi TMP3, -2 ++ | orc TMP1, TMP2, TMP1 ++ | subf TMP1, TMP0, TMP1 ++ | beq >1 ++ |.else + | orc TMP1, TMP2, TMP0 +- | addi TMP1, TMP1, ~LJ_TISNUM+1 ++ | subf TMP1, TISNUM, TMP1 ++ |.endif + | slwi TMP1, TMP1, 3 ++ |2: + | la TMP2, CFUNC:RB->upvalue + | lfdx FARG1, TMP2, TMP1 + | b ->fff_resn ++ |.if P64 ++ |1: ++ | li TMP1, ~LJ_TLIGHTUD<<3 ++ | b <2 ++ |.endif + | + |//-- Base library: getters and setters --------------------------------- + | +@@ -1328,10 +1462,10 @@ static void build_subroutines(BuildCtx *ctx) + | sub TMP1, TMP0, TMP1 + | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) + |3: // Rearranged logic, because we expect _not_ to find the key. +- | lwz CARG4, NODE:TMP2->key +- | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) +- | lwz CARG2, NODE:TMP2->val +- | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2) ++ | lwz CARG4, WORD_HI+offsetof(Node, key)(NODE:TMP2) ++ | lwz TMP0, WORD_LO+offsetof(Node, key)(NODE:TMP2) ++ | lwz CARG2, WORD_HI+offsetof(Node, val)(NODE:TMP2) ++ | lwz TMP1, WORD_LO+offsetof(Node, val)(NODE:TMP2) + | checkstr CARG4; bne >4 + | cmpw TMP0, STR:RC; beq >5 + |4: +@@ -1349,14 +1483,33 @@ static void build_subroutines(BuildCtx *ctx) + |6: + | cmpwi CARG3, LJ_TUDATA; beq <1 + | .gpr64 extsw CARG3, CARG3 ++ |.if P64 ++ | li TMP0, LJ_TNUMX ++ | srawi TMP3, CARG3, 15 ++ | subfc TMP1, TMP0, CARG3 ++ |.else + | subfc TMP0, TISNUM, CARG3 ++ |.endif + | subfe TMP2, CARG3, CARG3 ++ |.if P64 ++ | cmpwi TMP3, -2 ++ | orc TMP1, TMP2, TMP1 ++ | subf TMP1, TMP0, TMP1 ++ | beq >7 ++ |.else + | orc TMP1, TMP2, TMP0 +- | addi TMP1, TMP1, ~LJ_TISNUM+1 ++ | subf TMP1, TISNUM, TMP1 ++ |.endif + | slwi TMP1, TMP1, 2 ++ |8: + | la TMP2, DISPATCH_GL(gcroot[GCROOT_BASEMT])(DISPATCH) + | lwzx TAB:CARG1, TMP2, TMP1 + | b <2 ++ |.if P64 ++ |7: ++ | li TMP1, ~LJ_TLIGHTUD<<2 ++ | b <8 ++ |.endif + | + |.ffunc_2 setmetatable + | // Fast path: no mt for table yet and not clearing the mt. +@@ -1374,8 +1527,8 @@ static void build_subroutines(BuildCtx *ctx) + | + |.ffunc rawget + | cmplwi NARGS8:RC, 16 +- | lwz CARG4, 0(BASE) +- | lwz TAB:CARG2, 4(BASE) ++ | lwz CARG4, WORD_HI(BASE) ++ | lwz TAB:CARG2, WORD_LO(BASE) + | blt ->fff_fallback + | checktab CARG4; bne ->fff_fallback + | la CARG3, 8(BASE) +@@ -1390,7 +1543,7 @@ static void build_subroutines(BuildCtx *ctx) + |.ffunc tonumber + | // Only handles the number case inline (without a base argument). + | cmplwi NARGS8:RC, 8 +- | lwz CARG1, 0(BASE) ++ | lwz CARG1, WORD_HI(BASE) + | lfd FARG1, 0(BASE) + | bne ->fff_fallback // Exactly one argument. + | checknum CARG1; bgt ->fff_fallback +@@ -1425,10 +1578,15 @@ static void build_subroutines(BuildCtx *ctx) + | + |.ffunc next + | cmplwi NARGS8:RC, 8 +- | lwz CARG1, 0(BASE) +- | lwz TAB:CARG2, 4(BASE) ++ | lwz CARG1, WORD_HI(BASE) ++ | lwz TAB:CARG2, WORD_LO(BASE) + | blt ->fff_fallback ++ |.if ENDIAN_LE ++ | add TMP1, BASE, NARGS8:RC ++ | stw TISNIL, WORD_HI(TMP1) // Set missing 2nd arg to nil. ++ |.else + | stwx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil. ++ |.endif + | checktab CARG1 + | lwz PC, FRAME_PC(BASE) + | bne ->fff_fallback +@@ -1464,18 +1622,18 @@ static void build_subroutines(BuildCtx *ctx) + | lfd f0, CFUNC:RB->upvalue[0] + | la RA, -8(BASE) + #endif +- | stw TISNIL, 8(BASE) ++ | stw TISNIL, 8+WORD_HI(BASE) + | li RD, (3+1)*8 + | stfd f0, 0(RA) + | b ->fff_res + | + |.ffunc ipairs_aux + | cmplwi NARGS8:RC, 16 +- | lwz CARG3, 0(BASE) +- | lwz TAB:CARG1, 4(BASE) +- | lwz CARG4, 8(BASE) ++ | lwz CARG3, WORD_HI(BASE) ++ | lwz TAB:CARG1, WORD_LO(BASE) ++ | lwz CARG4, 8+WORD_HI(BASE) + |.if DUALNUM +- | lwz TMP2, 12(BASE) ++ | lwz TMP2, 8+WORD_LO(BASE) + |.else + | lfd FARG2, 8(BASE) + |.endif +@@ -1504,16 +1662,16 @@ static void build_subroutines(BuildCtx *ctx) + | la RA, -8(BASE) + | cmplw TMP0, TMP2 + |.if DUALNUM +- | stw TISNUM, 0(RA) ++ | stw TISNUM, WORD_HI(RA) + | slwi TMP3, TMP2, 3 +- | stw TMP2, 4(RA) ++ | stw TMP2, WORD_LO(RA) + |.else + | slwi TMP3, TMP2, 3 + | stfd FARG2, 0(RA) + |.endif + | ble >2 // Not in array part? +- | lwzx TMP2, TMP1, TMP3 +- | lfdx f0, TMP1, TMP3 ++ | lfdux f0, TMP1, TMP3 ++ | lwz TMP2, WORD_HI(TMP1) + |1: + | checknil TMP2 + | li RD, (0+1)*8 +@@ -1532,7 +1690,7 @@ static void build_subroutines(BuildCtx *ctx) + | cmplwi CRET1, 0 + | li RD, (0+1)*8 + | beq ->fff_res +- | lwz TMP2, 0(CRET1) ++ | lwz TMP2, WORD_HI(CRET1) + | lfd f0, 0(CRET1) + | b <1 + | +@@ -1551,11 +1709,11 @@ static void build_subroutines(BuildCtx *ctx) + | la RA, -8(BASE) + #endif + |.if DUALNUM +- | stw TISNUM, 8(BASE) ++ | stw TISNUM, 8+WORD_HI(BASE) + |.else +- | stw ZERO, 8(BASE) ++ | stw ZERO, 8+WORD_HI(BASE) + |.endif +- | stw ZERO, 12(BASE) ++ | stw ZERO, 8+WORD_LO(BASE) + | li RD, (3+1)*8 + | stfd f0, 0(RA) + | b ->fff_res +@@ -1576,7 +1734,7 @@ static void build_subroutines(BuildCtx *ctx) + | + |.ffunc xpcall + | cmplwi NARGS8:RC, 16 +- | lwz CARG4, 8(BASE) ++ | lwz CARG4, 8+WORD_HI(BASE) + | lfd FARG2, 8(BASE) + | lfd FARG1, 0(BASE) + | blt ->fff_fallback +@@ -1673,7 +1831,7 @@ static void build_subroutines(BuildCtx *ctx) + |.if resume + | li TMP1, LJ_TTRUE + | la RA, -8(BASE) +- | stw TMP1, -8(BASE) // Prepend true to results. ++ | stw TMP1, WORD_HI-8(BASE) // Prepend true to results. + | addi RD, RD, 16 + |.else + | mr RA, BASE +@@ -1693,7 +1851,7 @@ static void build_subroutines(BuildCtx *ctx) + | lfd f0, 0(TMP3) + | stp TMP3, L:SAVE0->top // Remove error from coroutine stack. + | li RD, (2+1)*8 +- | stw TMP1, -8(BASE) // Prepend false to results. ++ | stw TMP1, WORD_HI-8(BASE) // Prepend false to results. + | la RA, -8(BASE) + | stfd f0, 0(BASE) // Copy error message. + | b <7 +@@ -1746,8 +1904,8 @@ static void build_subroutines(BuildCtx *ctx) + |->fff_resi: + | lwz PC, FRAME_PC(BASE) + | la RA, -8(BASE) +- | stw TISNUM, -8(BASE) +- | stw CRET1, -4(BASE) ++ | stw TISNUM, WORD_HI-8(BASE) ++ | stw CRET1, WORD_LO-8(BASE) + | b ->fff_res1 + |1: + | lus CARG3, 0x41e0 // 2^31. +@@ -1762,9 +1920,9 @@ static void build_subroutines(BuildCtx *ctx) + |->fff_restv: + | // CARG3/CARG1 = TValue result. + | lwz PC, FRAME_PC(BASE) +- | stw CARG3, -8(BASE) ++ | stw CARG3, WORD_HI-8(BASE) + | la RA, -8(BASE) +- | stw CARG1, -4(BASE) ++ | stw CARG1, WORD_LO-8(BASE) + |->fff_res1: + | // RA = results, PC = return. + | li RD, (1+1)*8 +@@ -1782,10 +1940,11 @@ static void build_subroutines(BuildCtx *ctx) + | ins_next1 + | // Adjust BASE. KBASE is assumed to be set for the calling frame. + | sub BASE, RA, TMP0 ++ | addi BASEP4, BASE, 4 + | ins_next2 + | + |6: // Fill up results with nil. +- | subi TMP1, RD, 8 ++ | addi TMP1, RD, WORD_HI-8 + | addi RD, RD, 8 + | stwx TISNIL, RA, TMP1 + | b <5 +@@ -1898,7 +2057,7 @@ static void build_subroutines(BuildCtx *ctx) + | + |.ffunc math_log + | cmplwi NARGS8:RC, 8 +- | lwz CARG3, 0(BASE) ++ | lwz CARG3, WORD_HI(BASE) + | lfd FARG1, 0(BASE) + | bne ->fff_fallback // Need exactly 1 argument. + | checknum CARG3; bge ->fff_fallback +@@ -1923,13 +2082,13 @@ static void build_subroutines(BuildCtx *ctx) + |.if DUALNUM + |.ffunc math_ldexp + | cmplwi NARGS8:RC, 16 +- | lwz CARG3, 0(BASE) ++ | lwz CARG3, WORD_HI(BASE) + | lfd FARG1, 0(BASE) +- | lwz CARG4, 8(BASE) ++ | lwz CARG4, WORD_HI+8(BASE) + |.if GPR64 +- | lwz CARG2, 12(BASE) ++ | lwz CARG2, WORD_LO+8(BASE) + |.else +- | lwz CARG1, 12(BASE) ++ | lwz CARG1, WORD_LO+8(BASE) + |.endif + | blt ->fff_fallback + | checknum CARG3; bge ->fff_fallback +@@ -1961,8 +2120,8 @@ static void build_subroutines(BuildCtx *ctx) + | stfd FARG1, 0(RA) + | li RD, (2+1)*8 + |.if DUALNUM +- | stw TISNUM, 8(RA) +- | stw TMP1, 12(RA) ++ | stw TISNUM, WORD_HI+8(RA) ++ | stw TMP1, WORD_LO+8(RA) + |.else + | stfd FARG2, 8(RA) + |.endif +@@ -1989,9 +2148,9 @@ static void build_subroutines(BuildCtx *ctx) + | add TMP2, BASE, NARGS8:RC + | bne >4 + |1: // Handle integers. +- | lwz CARG4, 0(TMP1) ++ | lwz CARG4, WORD_HI(TMP1) + | cmplw cr1, TMP1, TMP2 +- | lwz CARG2, 4(TMP1) ++ | lwz CARG2, WORD_LO(TMP1) + | bge cr1, ->fff_resi + | checknum CARG4 + | xoris TMP0, CARG1, 0x8000 +@@ -2020,7 +2179,7 @@ static void build_subroutines(BuildCtx *ctx) + | lfd FARG1, 0(BASE) + | bge ->fff_fallback + |5: // Handle numbers. +- | lwz CARG4, 0(TMP1) ++ | lwz CARG4, WORD_HI(TMP1) + | cmplw cr1, TMP1, TMP2 + | lfd FARG2, 0(TMP1) + | bge cr1, ->fff_resn +@@ -2035,7 +2194,7 @@ static void build_subroutines(BuildCtx *ctx) + |.endif + | b <5 + |7: // Convert integer to number and continue above. +- | lwz CARG2, 4(TMP1) ++ | lwz CARG2, WORD_LO(TMP1) + | bne ->fff_fallback + | tonum_i FARG2, CARG2 + | b <6 +@@ -2043,7 +2202,12 @@ static void build_subroutines(BuildCtx *ctx) + | .ffunc_n name + | li TMP1, 8 + |1: ++ |.if ENDIAN_LE ++ | add CARG2, BASE, TMP1 ++ | lwz CARG2, WORD_HI(CARG2) ++ |.else + | lwzx CARG2, BASE, TMP1 ++ |.endif + | lfdx FARG2, BASE, TMP1 + | cmplw cr1, TMP1, NARGS8:RC + | checknum CARG2 +@@ -2067,8 +2231,8 @@ static void build_subroutines(BuildCtx *ctx) + | + |.ffunc string_byte // Only handle the 1-arg case here. + | cmplwi NARGS8:RC, 8 +- | lwz CARG3, 0(BASE) +- | lwz STR:CARG1, 4(BASE) ++ | lwz CARG3, WORD_HI(BASE) ++ | lwz STR:CARG1, WORD_LO(BASE) + | bne ->fff_fallback // Need exactly 1 argument. + | checkstr CARG3 + | bne ->fff_fallback +@@ -2099,12 +2263,12 @@ static void build_subroutines(BuildCtx *ctx) + |.ffunc string_char // Only handle the 1-arg case here. + | ffgccheck + | cmplwi NARGS8:RC, 8 +- | lwz CARG3, 0(BASE) ++ | lwz CARG3, WORD_HI(BASE) + |.if DUALNUM +- | lwz TMP0, 4(BASE) ++ | lwz TMP0, WORD_LO(BASE) + | bne ->fff_fallback // Exactly 1 argument. + | checknum CARG3; bne ->fff_fallback +- | la CARG2, 7(BASE) ++ | la CARG2, WORD_BLO(BASE) + |.else + | lfd FARG1, 0(BASE) + | bne ->fff_fallback // Exactly 1 argument. +@@ -2128,16 +2292,16 @@ static void build_subroutines(BuildCtx *ctx) + |.ffunc string_sub + | ffgccheck + | cmplwi NARGS8:RC, 16 +- | lwz CARG3, 16(BASE) ++ | lwz CARG3, WORD_HI+16(BASE) + |.if not DUALNUM + | lfd f0, 16(BASE) + |.endif +- | lwz TMP0, 0(BASE) +- | lwz STR:CARG1, 4(BASE) ++ | lwz TMP0, WORD_HI(BASE) ++ | lwz STR:CARG1, WORD_LO(BASE) + | blt ->fff_fallback +- | lwz CARG2, 8(BASE) ++ | lwz CARG2, WORD_HI+8(BASE) + |.if DUALNUM +- | lwz TMP1, 12(BASE) ++ | lwz TMP1, WORD_LO+8(BASE) + |.else + | lfd f1, 8(BASE) + |.endif +@@ -2145,7 +2309,7 @@ static void build_subroutines(BuildCtx *ctx) + | beq >1 + |.if DUALNUM + | checknum CARG3 +- | lwz TMP2, 20(BASE) ++ | lwz TMP2, WORD_LO+16(BASE) + | bne ->fff_fallback + |1: + | checknum CARG2; bne ->fff_fallback +@@ -2201,8 +2365,8 @@ static void build_subroutines(BuildCtx *ctx) + | .ffunc string_ .. name + | ffgccheck + | cmplwi NARGS8:RC, 8 +- | lwz CARG3, 0(BASE) +- | lwz STR:CARG2, 4(BASE) ++ | lwz CARG3, WORD_HI(BASE) ++ | lwz STR:CARG2, WORD_LO(BASE) + | blt ->fff_fallback + | checkstr CARG3 + | la SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH) +@@ -2240,10 +2404,10 @@ static void build_subroutines(BuildCtx *ctx) + | addi TMP1, BASE, 8 + | add TMP2, BASE, NARGS8:RC + |1: +- | lwz CARG4, 0(TMP1) ++ | lwz CARG4, WORD_HI(TMP1) + | cmplw cr1, TMP1, TMP2 + |.if DUALNUM +- | lwz CARG2, 4(TMP1) ++ | lwz CARG2, WORD_LO(TMP1) + |.else + | lfd FARG1, 0(TMP1) + |.endif +@@ -2344,20 +2508,23 @@ static void build_subroutines(BuildCtx *ctx) + | + |->fff_fallback: // Call fast function fallback handler. + | // BASE = new base, RB = CFUNC, RC = nargs*8 +- | lp TMP3, CFUNC:RB->f ++ | lp FUNCREG, CFUNC:RB->f + | add TMP1, BASE, NARGS8:RC + | lwz PC, FRAME_PC(BASE) // Fallback may overwrite PC. + | addi TMP0, TMP1, 8*LUA_MINSTACK + | lwz TMP2, L->maxstack + | stw PC, SAVE_PC // Redundant (but a defined value). +- | .toc lp TMP3, 0(TMP3) ++ | .opd lp TOCREG, TOC_OFS(FUNCREG) ++ | .opdenv lp ENVREG, ENV_OFS(FUNCREG) ++ | .opd lp FUNCREG, 0(FUNCREG) + | cmplw TMP0, TMP2 + | stp BASE, L->base + | stp TMP1, L->top + | mr CARG1, L + | bgt >5 // Need to grow stack. +- | mtctr TMP3 ++ | mtctr FUNCREG + | bctrl // (lua_State *L) ++ | .toc lp TOCREG, SAVE_TOC + | // Either throws an error, or recovers and returns -1, 0 or nresults+1. + | lp BASE, L->base + | cmpwi CRET1, 0 +@@ -2459,6 +2626,7 @@ static void build_subroutines(BuildCtx *ctx) + |3: + | lp BASE, L->base + |4: // Re-dispatch to static ins. ++ | addi BASEP4, BASE, 4 + | lwz INS, -4(PC) + | decode_OPP TMP1, INS + | decode_RB8 RB, INS +@@ -2472,7 +2640,7 @@ static void build_subroutines(BuildCtx *ctx) + | + |->cont_hook: // Continue from hook yield. + | addi PC, PC, 4 +- | lwz MULTRES, -20(RB) // Restore MULTRES for *M ins. ++ | lwz MULTRES, WORD_LO-24(RB) // Restore MULTRES for *M ins. + | b <4 + | + |->vm_hotloop: // Hot loop counter underflow. +@@ -2514,6 +2682,7 @@ static void build_subroutines(BuildCtx *ctx) + | lp BASE, L->base + | lp TMP0, L->top + | stw ZERO, SAVE_PC // Invalidate for subsequent line hook. ++ | addi BASEP4, BASE, 4 + | sub NARGS8:RC, TMP0, BASE + | add RA, BASE, RA + | lwz LFUNC:RB, FRAME_FUNC(BASE) +@@ -2525,7 +2694,7 @@ static void build_subroutines(BuildCtx *ctx) + |.if JIT + | // RA = resultptr, RB = meta base + | lwz INS, -4(PC) +- | lwz TRACE:TMP2, -20(RB) // Save previous trace. ++ | lwz TRACE:TMP2, WORD_LO-24(RB) // Save previous trace. + | addic. TMP1, MULTRES, -8 + | decode_RA8 RC, INS // Call base. + | beq >2 +@@ -2560,10 +2729,16 @@ static void build_subroutines(BuildCtx *ctx) + | mr CARG2, PC + | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) + | lp BASE, L->base ++ | addi BASEP4, BASE, 4 + | b ->cont_nop + | + |9: ++ |.if ENDIAN_LE ++ | addi BASEP4, BASE, 4 ++ | stwx TISNIL, BASEP4, RC ++ |.else + | stwx TISNIL, BASE, RC ++ |.endif + | addi RC, RC, 8 + | b <3 + |.endif +@@ -2578,6 +2753,7 @@ static void build_subroutines(BuildCtx *ctx) + | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. + | lp BASE, L->base + | subi PC, PC, 4 ++ | addi BASEP4, BASE, 4 + | b ->cont_nop + #endif + | +@@ -2586,39 +2762,72 @@ static void build_subroutines(BuildCtx *ctx) + |//----------------------------------------------------------------------- + | + |.macro savex_, a, b, c, d +- | stfd f..a, 16+a*8(sp) +- | stfd f..b, 16+b*8(sp) +- | stfd f..c, 16+c*8(sp) +- | stfd f..d, 16+d*8(sp) ++ | stfd f..a, EXIT_OFFSET+a*8(sp) ++ | stfd f..b, EXIT_OFFSET+b*8(sp) ++ | stfd f..c, EXIT_OFFSET+c*8(sp) ++ | stfd f..d, EXIT_OFFSET+d*8(sp) ++ |.endmacro ++ | ++ |.macro saver, a ++ | stp r..a, EXIT_OFFSET+32*8+a*PSIZE(sp) + |.endmacro + | + |->vm_exit_handler: + |.if JIT +- | addi sp, sp, -(16+32*8+32*4) +- | stmw r2, 16+32*8+2*4(sp) ++ | addi sp, TMP0, sp, -(EXIT_OFFSET+32*8+32*PSIZE) ++ | saver 3 // CARG1 ++ | saver 4 // CARG2 ++ | saver 5 // CARG3 ++ | saver 17 // DISPATCH + | addi DISPATCH, JGL, -GG_DISP2G-32768 + | li CARG2, ~LJ_VMST_EXIT +- | lwz CARG1, 16+32*8+32*4(sp) // Get stack chain. ++ | lp CARG1, EXIT_OFFSET+32*8+32*PSIZE(sp) // Get stack chain. + | stw CARG2, DISPATCH_GL(vmstate)(DISPATCH) ++ | saver 2 ++ | saver 6 ++ | saver 7 ++ | saver 8 ++ | saver 9 ++ | saver 10 ++ | saver 11 ++ | saver 12 ++ | saver 13 + | savex_ 0,1,2,3 +- | stw CARG1, 0(sp) // Store extended stack chain. +- | clrso TMP1 ++ | stp CARG1, 0(sp) // Store extended stack chain. ++ + | savex_ 4,5,6,7 +- | addi CARG2, sp, 16+32*8+32*4 // Recompute original value of sp. ++ | saver 14 ++ | saver 15 ++ | saver 16 ++ | saver 18 ++ | addi CARG2, sp, EXIT_OFFSET+32*8+32*PSIZE // Recompute original value of sp. + | savex_ 8,9,10,11 +- | stw CARG2, 16+32*8+1*4(sp) // Store sp in RID_SP. ++ | stp CARG2, EXIT_OFFSET+32*8+1*PSIZE(sp) // Store sp in RID_SP. + | savex_ 12,13,14,15 + | mflr CARG3 + | li TMP1, 0 + | savex_ 16,17,18,19 +- | stw TMP1, 16+32*8+0*4(sp) // Clear RID_TMP. ++ | stw TMP1, EXIT_OFFSET+32*8+0*PSIZE(sp) // Clear RID_TMP. + | savex_ 20,21,22,23 + | lhz CARG4, 2(CARG3) // Load trace number. + | savex_ 24,25,26,27 + | lwz L, DISPATCH_GL(cur_L)(DISPATCH) + | savex_ 28,29,30,31 ++ | saver 19 ++ | saver 20 ++ | saver 21 ++ | saver 22 ++ | saver 23 ++ | saver 24 ++ | saver 25 ++ | saver 26 ++ | saver 27 ++ | saver 28 ++ | saver 29 ++ | saver 30 ++ | saver 31 + | sub CARG3, TMP0, CARG3 // Compute exit number. +- | lp BASE, DISPATCH_GL(jit_base)(DISPATCH) ++ | lwz BASE, DISPATCH_GL(jit_base)(DISPATCH) + | srwi CARG3, CARG3, 2 + | stp L, DISPATCH_J(L)(DISPATCH) + | subi CARG3, CARG3, 2 +@@ -2627,11 +2836,11 @@ static void build_subroutines(BuildCtx *ctx) + | stw TMP1, DISPATCH_GL(jit_base)(DISPATCH) + | addi CARG1, DISPATCH, GG_DISP2J + | stw CARG3, DISPATCH_J(exitno)(DISPATCH) +- | addi CARG2, sp, 16 ++ | addi CARG2, sp, EXIT_OFFSET + | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) + | // Returns MULTRES (unscaled) or negated error code. + | lp TMP1, L->cframe +- | lwz TMP2, 0(sp) ++ | lp TMP2, 0(sp) + | lp BASE, L->base + |.if GPR64 + | rldicr sp, TMP1, 0, 61 +@@ -2639,7 +2848,7 @@ static void build_subroutines(BuildCtx *ctx) + | rlwinm sp, TMP1, 0, 0, 29 + |.endif + | lwz PC, SAVE_PC // Get SAVE_PC. +- | stw TMP2, 0(sp) ++ | stp TMP2, 0(sp) + | stw L, SAVE_L // Set SAVE_L (on-trace resume/yield). + | b >1 + |.endif +@@ -2660,7 +2869,12 @@ static void build_subroutines(BuildCtx *ctx) + | stw TMP2, DISPATCH_GL(jit_base)(DISPATCH) + | lwz KBASE, PC2PROTO(k)(TMP1) + | // Setup type comparison constants. ++ |.if P64 ++ | lus TISNUM, LJ_TISNUM >> 16 ++ | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff ++ |.else + | li TISNUM, LJ_TISNUM ++ |.endif + | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | stw TMP3, TMPD + | li ZERO, 0 +@@ -2680,14 +2894,14 @@ static void build_subroutines(BuildCtx *ctx) + | decode_RA8 RA, INS + | lpx TMP0, DISPATCH, TMP1 + | mtctr TMP0 +- | cmplwi TMP1, BC_FUNCF*4 // Function header? ++ | cmplwi TMP1, BC_FUNCF*PSIZE // Function header? + | bge >2 + | decode_RB8 RB, INS + | decode_RD8 RD, INS + | decode_RC8 RC, INS + | bctr + |2: +- | cmplwi TMP1, (BC_FUNCC+2)*4 // Fast function? ++ | cmplwi TMP1, (BC_FUNCC+2)*PSIZE // Fast function? + | blt >3 + | // Check frame below fast function. + | lwz TMP1, FRAME_PC(BASE) +@@ -2697,7 +2911,7 @@ static void build_subroutines(BuildCtx *ctx) + | lwz TMP2, -4(TMP1) + | decode_RA8 TMP0, TMP2 + | sub TMP1, BASE, TMP0 +- | lwz LFUNC:TMP2, -12(TMP1) ++ | lwz LFUNC:TMP2, WORD_LO-16(TMP1) + | lwz TMP1, LFUNC:TMP2->pc + | lwz KBASE, PC2PROTO(k)(TMP1) + |3: +@@ -2718,6 +2932,8 @@ static void build_subroutines(BuildCtx *ctx) + |// NYI: Use internal implementations of floor, ceil, trunc. + | + |->vm_modi: ++ | li TMP1, 0 ++ | mtxer TMP1 + | divwo. TMP0, CARG1, CARG2 + | bso >1 + |.if GPR64 +@@ -2736,7 +2952,8 @@ static void build_subroutines(BuildCtx *ctx) + | cmpwi CARG2, 0 + | li CARG1, 0 + | beqlr +- | clrso TMP0 // Clear SO for -2147483648 % -1 and return 0. ++ | // Clear SO for -2147483648 % -1 and return 0. ++ | crxor 4*cr0+so, 4*cr0+so, 4*cr0+so + | blr + | + |//----------------------------------------------------------------------- +@@ -2749,10 +2966,18 @@ static void build_subroutines(BuildCtx *ctx) + |->vm_cachesync: + |.if JIT or FFI + | // Compute start of first cache line and number of cache lines. ++ | .if GPR64 ++ | rldicr CARG1, CARG1, 0, 58 ++ | .else + | rlwinm CARG1, CARG1, 0, 0, 26 ++ | .endif + | sub CARG2, CARG2, CARG1 + | addi CARG2, CARG2, 31 ++ | .if GPR64 ++ | srdi. CARG2, CARG2, 5 ++ | .else + | rlwinm. CARG2, CARG2, 27, 5, 31 ++ | .endif + | beqlr + | mtctr CARG2 + | mr CARG3, CARG1 +@@ -2774,39 +2999,70 @@ static void build_subroutines(BuildCtx *ctx) + |//-- FFI helper functions ----------------------------------------------- + |//----------------------------------------------------------------------- + | +- |// Handler for callback functions. Callback slot number in r11, g in r12. ++ |// Handler for callback functions. ++ |// 32-bit: Callback slot number in r12, g in r11. ++ |// 64-bit v1: Callback slot number in bits 47+ of r11, g in 0-46, TOC in r2. ++ |// 64-bit v2: Callback slot number in bits 2-11 of r12, g in r11, ++ |// vm_ffi_callback in r2. + |->vm_ffi_callback: + |.if FFI + |.type CTSTATE, CTState, PC ++ | .if OPD ++ | rldicl r12, r11, 17, 47 ++ | rldicl r11, r11, 0, 17 ++ | .endif ++ | .if ELFV2 ++ | rlwinm r12, r12, 30, 22, 31 ++ | addisl TOCREG, TOCREG, extern .TOC.-lj_vm_ffi_callback@ha ++ | addil TOCREG, TOCREG, extern .TOC.-lj_vm_ffi_callback@l ++ | .endif + | saveregs +- | lwz CTSTATE, GL:r12->ctype_state +- | addi DISPATCH, r12, GG_G2DISP +- | stw r11, CTSTATE->cb.slot +- | stw r3, CTSTATE->cb.gpr[0] ++ | lwz CTSTATE, GL:r11->ctype_state ++ | addi DISPATCH, r11, GG_G2DISP ++ | stw r12, CTSTATE->cb.slot ++ | stp r3, CTSTATE->cb.gpr[0] + | stfd f1, CTSTATE->cb.fpr[0] +- | stw r4, CTSTATE->cb.gpr[1] ++ | stp r4, CTSTATE->cb.gpr[1] + | stfd f2, CTSTATE->cb.fpr[1] +- | stw r5, CTSTATE->cb.gpr[2] ++ | stp r5, CTSTATE->cb.gpr[2] + | stfd f3, CTSTATE->cb.fpr[2] +- | stw r6, CTSTATE->cb.gpr[3] ++ | stp r6, CTSTATE->cb.gpr[3] + | stfd f4, CTSTATE->cb.fpr[3] +- | stw r7, CTSTATE->cb.gpr[4] ++ | stp r7, CTSTATE->cb.gpr[4] + | stfd f5, CTSTATE->cb.fpr[4] +- | stw r8, CTSTATE->cb.gpr[5] ++ | stp r8, CTSTATE->cb.gpr[5] + | stfd f6, CTSTATE->cb.fpr[5] +- | stw r9, CTSTATE->cb.gpr[6] ++ | stp r9, CTSTATE->cb.gpr[6] + | stfd f7, CTSTATE->cb.fpr[6] +- | stw r10, CTSTATE->cb.gpr[7] ++ | stp r10, CTSTATE->cb.gpr[7] + | stfd f8, CTSTATE->cb.fpr[7] ++ | .if GPR64 ++ | stfd f9, CTSTATE->cb.fpr[8] ++ | stfd f10, CTSTATE->cb.fpr[9] ++ | stfd f11, CTSTATE->cb.fpr[10] ++ | stfd f12, CTSTATE->cb.fpr[11] ++ | stfd f13, CTSTATE->cb.fpr[12] ++ | .endif ++ | .if ELFV2 ++ | addi TMP0, sp, CFRAME_SPACE+96 ++ | .elif GPR64 ++ | addi TMP0, sp, CFRAME_SPACE+112 ++ | .else + | addi TMP0, sp, CFRAME_SPACE+8 +- | stw TMP0, CTSTATE->cb.stack ++ | .endif ++ | stp TMP0, CTSTATE->cb.stack + | mr CARG1, CTSTATE + | stw CTSTATE, SAVE_PC // Any value outside of bytecode is ok. + | mr CARG2, sp + | bl extern lj_ccallback_enter // (CTState *cts, void *cf) + | // Returns lua_State *. + | lp BASE, L:CRET1->base ++ |.if P64 ++ | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants. ++ | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff ++ |.else + | li TISNUM, LJ_TISNUM // Setup type comparison constants. ++ |.endif + | lp RC, L:CRET1->top + | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | li ZERO, 0 +@@ -2835,9 +3091,21 @@ static void build_subroutines(BuildCtx *ctx) + | mr CARG1, CTSTATE + | mr CARG2, RA + | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) +- | lwz CRET1, CTSTATE->cb.gpr[0] ++ | lp CRET1, CTSTATE->cb.gpr[0] + | lfd FARG1, CTSTATE->cb.fpr[0] +- | lwz CRET2, CTSTATE->cb.gpr[1] ++ | lp CRET2, CTSTATE->cb.gpr[1] ++ | .if GPR64 ++ | lfd FARG2, CTSTATE->cb.fpr[1] ++ | .else ++ | lp CARG3, CTSTATE->cb.gpr[2] ++ | lp CARG4, CTSTATE->cb.gpr[3] ++ | .endif ++ | .elfv2 lfd f3, CTSTATE->cb.fpr[2] ++ | .elfv2 lfd f4, CTSTATE->cb.fpr[3] ++ | .elfv2 lfd f5, CTSTATE->cb.fpr[4] ++ | .elfv2 lfd f6, CTSTATE->cb.fpr[5] ++ | .elfv2 lfd f7, CTSTATE->cb.fpr[6] ++ | .elfv2 lfd f8, CTSTATE->cb.fpr[7] + | b ->vm_leave_unw + |.endif + | +@@ -2850,23 +3118,46 @@ static void build_subroutines(BuildCtx *ctx) + | lbz CARG2, CCSTATE->nsp + | lbz CARG3, CCSTATE->nfpr + | neg TMP1, TMP1 ++ | .if GPR64 ++ | std TMP0, 16(sp) ++ | .else + | stw TMP0, 4(sp) ++ | .endif + | cmpwi cr1, CARG3, 0 + | mr TMP2, sp + | addic. CARG2, CARG2, -1 ++ | .if GPR64 ++ | stdux sp, sp, TMP1 ++ | .else + | stwux sp, sp, TMP1 ++ | .endif + | crnot 4*cr1+eq, 4*cr1+eq // For vararg calls. +- | stw r14, -4(TMP2) +- | stw CCSTATE, -8(TMP2) ++ | .if GPR64 ++ | std r14, -8(TMP2) ++ | std CCSTATE, -16(TMP2) ++ | .else ++ | stw r14, -4(TMP2) ++ | stw CCSTATE, -8(TMP2) ++ | .endif + | mr r14, TMP2 + | la TMP1, CCSTATE->stack ++ | .if GPR64 ++ | sldi CARG2, CARG2, 3 ++ | .else + | slwi CARG2, CARG2, 2 ++ | .endif + | blty >2 +- | la TMP2, 8(sp) ++ | .if ELFV2 ++ | la TMP2, 96(sp) ++ | .elif GPR64 ++ | la TMP2, 112(sp) ++ | .else ++ | la TMP2, 8(sp) ++ | .endif + |1: +- | lwzx TMP0, TMP1, CARG2 +- | stwx TMP0, TMP2, CARG2 +- | addic. CARG2, CARG2, -4 ++ | lpx TMP0, TMP1, CARG2 ++ | stpx TMP0, TMP2, CARG2 ++ | addic. CARG2, CARG2, -PSIZE + | bge <1 + |2: + | bney cr1, >3 +@@ -2878,28 +3169,55 @@ static void build_subroutines(BuildCtx *ctx) + | lfd f6, CCSTATE->fpr[5] + | lfd f7, CCSTATE->fpr[6] + | lfd f8, CCSTATE->fpr[7] ++ | .if GPR64 ++ | lfd f9, CCSTATE->fpr[8] ++ | lfd f10, CCSTATE->fpr[9] ++ | lfd f11, CCSTATE->fpr[10] ++ | lfd f12, CCSTATE->fpr[11] ++ | lfd f13, CCSTATE->fpr[12] ++ | .endif + |3: +- | lp TMP0, CCSTATE->func +- | lwz CARG2, CCSTATE->gpr[1] +- | lwz CARG3, CCSTATE->gpr[2] +- | lwz CARG4, CCSTATE->gpr[3] +- | lwz CARG5, CCSTATE->gpr[4] +- | mtctr TMP0 +- | lwz r8, CCSTATE->gpr[5] +- | lwz r9, CCSTATE->gpr[6] +- | lwz r10, CCSTATE->gpr[7] +- | lwz CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. ++ | .toc std TOCREG, SAVE_TOC ++ | lp FUNCREG, CCSTATE->func ++ | lp CARG2, CCSTATE->gpr[1] ++ | lp CARG3, CCSTATE->gpr[2] ++ | .opd lp TOCREG, TOC_OFS(FUNCREG) ++ | .opdenv lp ENVREG, ENV_OFS(FUNCREG) ++ | .opd lp FUNCREG, 0(FUNCREG) ++ | lp CARG4, CCSTATE->gpr[3] ++ | lp CARG5, CCSTATE->gpr[4] ++ | mtctr FUNCREG ++ | lp r8, CCSTATE->gpr[5] ++ | lp r9, CCSTATE->gpr[6] ++ | lp r10, CCSTATE->gpr[7] ++ | lp CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. + | bctrl +- | lwz CCSTATE:TMP1, -8(r14) +- | lwz TMP2, -4(r14) ++ | .toc lp TOCREG, SAVE_TOC ++ | .if GPR64 ++ | ld CCSTATE:TMP1, -16(r14) ++ | ld TMP2, -8(r14) ++ | ld TMP0, 16(r14) ++ | .else ++ | lwz CCSTATE:TMP1, -8(r14) ++ | lwz TMP2, -4(r14) + | lwz TMP0, 4(r14) +- | stw CARG1, CCSTATE:TMP1->gpr[0] ++ | .endif ++ | stp CARG1, CCSTATE:TMP1->gpr[0] + | stfd FARG1, CCSTATE:TMP1->fpr[0] +- | stw CARG2, CCSTATE:TMP1->gpr[1] ++ | stp CARG2, CCSTATE:TMP1->gpr[1] ++ | .if GPR64 ++ | stfd FARG2, CCSTATE:TMP1->fpr[1] ++ | .endif ++ | .elfv2 stfd FARG3, CCSTATE:TMP1->fpr[2] ++ | .elfv2 stfd FARG4, CCSTATE:TMP1->fpr[3] ++ | .elfv2 stfd FARG5, CCSTATE:TMP1->fpr[4] ++ | .elfv2 stfd FARG6, CCSTATE:TMP1->fpr[5] ++ | .elfv2 stfd FARG7, CCSTATE:TMP1->fpr[6] ++ | .elfv2 stfd FARG8, CCSTATE:TMP1->fpr[7] + | mtlr TMP0 +- | stw CARG3, CCSTATE:TMP1->gpr[2] ++ | stp CARG3, CCSTATE:TMP1->gpr[2] + | mr sp, r14 +- | stw CARG4, CCSTATE:TMP1->gpr[3] ++ | stp CARG4, CCSTATE:TMP1->gpr[3] + | mr r14, TMP2 + | blr + |.endif +@@ -2923,13 +3241,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: + | // RA = src1*8, RD = src2*8, JMP with RD = target + |.if DUALNUM +- | lwzux TMP0, RA, BASE ++ | lwzx TMP0, BASE_HI, RA + | addi PC, PC, 4 +- | lwz CARG2, 4(RA) +- | lwzux TMP1, RD, BASE ++ | lwzx CARG2, BASE_LO, RA ++ | lwzx TMP1, BASE_HI, RD + | lwz TMP2, -4(PC) + | checknum cr0, TMP0 +- | lwz CARG3, 4(RD) ++ | lwzx CARG3, BASE_LO, RD + | decode_RD4 TMP2, TMP2 + | checknum cr1, TMP1 + | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) +@@ -2953,7 +3271,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |7: // RA is not an integer. + | bgt cr0, ->vmeta_comp + | // RA is a number. +- | lfd f0, 0(RA) ++ | lfdx f0, BASE, RA + | bgt cr1, ->vmeta_comp + | blt cr1, >4 + | // RA is a number, RD is an integer. +@@ -2965,7 +3283,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | // RA is an integer, RD is a number. + | tonum_i f0, CARG2 + |4: +- | lfd f1, 0(RD) ++ | lfdx f1, BASE, RD + |5: + | fcmpu cr0, f0, f1 + if (op == BC_ISLT) { +@@ -2981,10 +3299,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + } + | b <1 + |.else +- | lwzx TMP0, BASE, RA ++ | lwzx TMP0, BASE_HI, RA + | addi PC, PC, 4 + | lfdx f0, BASE, RA +- | lwzx TMP1, BASE, RD ++ | lwzx TMP1, BASE_HI, RD + | checknum cr0, TMP0 + | lwz TMP2, -4(PC) + | lfdx f1, BASE, RD +@@ -3015,15 +3333,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + vk = op == BC_ISEQV; + | // RA = src1*8, RD = src2*8, JMP with RD = target + |.if DUALNUM +- | lwzux TMP0, RA, BASE ++ | lwzx TMP0, BASE_HI, RA + | addi PC, PC, 4 +- | lwz CARG2, 4(RA) +- | lwzux TMP1, RD, BASE ++ | lwzx CARG2, BASE_LO, RA ++ | .if ENDIAN_LE ++ | lwzx TMP1, BASE_HI, RD ++ | .else ++ | lwzux TMP1, RD, BASE_HI ++ | .endif + | checknum cr0, TMP0 + | lwz TMP2, -4(PC) + | checknum cr1, TMP1 + | decode_RD4 TMP2, TMP2 +- | lwz CARG3, 4(RD) ++ | .if ENDIAN_LE ++ | lwzux CARG3, RD, BASE_LO ++ | .else ++ | lwz CARG3, WORD_LO(RD) ++ | .endif + | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt + | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) + if (vk) { +@@ -3032,14 +3358,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | ble cr7, ->BC_ISNEN_Z + } + |.else +- | lwzux TMP0, RA, BASE ++ | lwzx TMP0, BASE_HI, RA + | lwz TMP2, 0(PC) +- | lfd f0, 0(RA) ++ | lfdx f0, BASE, RA + | addi PC, PC, 4 +- | lwzux TMP1, RD, BASE ++ | lwzx TMP1, BASE_HI, RD + | checknum cr0, TMP0 + | decode_RD4 TMP2, TMP2 +- | lfd f1, 0(RD) ++ | lfdx f1, BASE, RD + | checknum cr1, TMP1 + | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) + | bge cr0, >5 +@@ -3057,8 +3383,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |.endif + |5: // Either or both types are not numbers. + |.if not DUALNUM +- | lwz CARG2, 4(RA) +- | lwz CARG3, 4(RD) ++ | lwzx CARG2, BASE_LO, RA ++ | lwzx CARG3, BASE_LO, RD + |.endif + |.if FFI + | cmpwi cr7, TMP0, LJ_TCDATA +@@ -3074,10 +3400,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |.if FFI + | beq cr7, ->vmeta_equal_cd + |.endif ++ |.if P64 ++ | cmplwi cr7, TMP3, ~LJ_TUDATA // Avoid 64 bit lightuserdata. ++ |.endif + | cmplw cr5, CARG2, CARG3 + | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive. + | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type. + | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv. ++ |.if P64 ++ | cror 4*cr6+lt, 4*cr6+lt, 4*cr7+gt ++ |.endif + | mr SAVE0, PC + | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2. + | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2. +@@ -3116,9 +3448,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + case BC_ISEQS: case BC_ISNES: + vk = op == BC_ISEQS; + | // RA = src*8, RD = str_const*8 (~), JMP with RD = target +- | lwzux TMP0, RA, BASE ++ | lwzx TMP0, BASE_HI, RA + | srwi RD, RD, 1 +- | lwz STR:TMP3, 4(RA) ++ | lwzx STR:TMP3, BASE_LO, RA + | lwz TMP2, 0(PC) + | subfic RD, RD, -4 + | addi PC, PC, 4 +@@ -3150,15 +3482,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + vk = op == BC_ISEQN; + | // RA = src*8, RD = num_const*8, JMP with RD = target + |.if DUALNUM +- | lwzux TMP0, RA, BASE ++ | lwzx TMP0, BASE_HI, RA + | addi PC, PC, 4 +- | lwz CARG2, 4(RA) +- | lwzux TMP1, RD, KBASE ++ | lwzx CARG2, BASE_LO, RA ++ | lwzux2 TMP1, CARG3, RD, KBASE + | checknum cr0, TMP0 + | lwz TMP2, -4(PC) + | checknum cr1, TMP1 + | decode_RD4 TMP2, TMP2 +- | lwz CARG3, 4(RD) + | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) + if (vk) { + |->BC_ISEQN_Z: +@@ -3175,7 +3506,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + } else { + |->BC_ISNEN_Z: // Dummy label. + } +- | lwzx TMP0, BASE, RA ++ | lwzx TMP0, BASE_HI, RA + | addi PC, PC, 4 + | lfdx f0, BASE, RA + | lwz TMP2, -4(PC) +@@ -3213,7 +3544,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |7: // RA is not an integer. + | bge cr0, <3 + | // RA is a number. +- | lfd f0, 0(RA) ++ | lfdx f0, BASE, RA + | blt cr1, >1 + | // RA is a number, RD is an integer. + | tonum_i f1, CARG3 +@@ -3232,7 +3563,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + case BC_ISEQP: case BC_ISNEP: + vk = op == BC_ISEQP; + | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target +- | lwzx TMP0, BASE, RA ++ | lwzx TMP0, BASE_HI, RA + | srwi TMP1, RD, 3 + | lwz TMP2, 0(PC) + | not TMP1, TMP1 +@@ -3262,7 +3593,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + + case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: + | // RA = dst*8 or unused, RD = src*8, JMP with RD = target +- | lwzx TMP0, BASE, RD ++ | lwzx TMP0, BASE_HI, RD + | lwz INS, 0(PC) + | addi PC, PC, 4 + if (op == BC_IST || op == BC_ISF) { +@@ -3297,7 +3628,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + + case BC_ISTYPE: + | // RA = src*8, RD = -type*8 +- | lwzx TMP0, BASE, RA ++ | lwzx TMP0, BASE_HI, RA + | srwi TMP1, RD, 3 + | ins_next1 + |.if not PPE and not GPR64 +@@ -3311,7 +3642,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + break; + case BC_ISNUM: + | // RA = src*8, RD = -(TISNUM-1)*8 +- | lwzx TMP0, BASE, RA ++ | lwzx TMP0, BASE_HI, RA + | ins_next1 + | checknum TMP0 + | bge ->vmeta_istype +@@ -3330,17 +3661,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + case BC_NOT: + | // RA = dst*8, RD = src*8 + | ins_next1 +- | lwzx TMP0, BASE, RD ++ | lwzx TMP0, BASE_HI, RD + | .gpr64 extsw TMP0, TMP0 + | subfic TMP1, TMP0, LJ_TTRUE + | adde TMP0, TMP0, TMP1 +- | stwx TMP0, BASE, RA ++ | stwx TMP0, BASE_HI, RA + | ins_next2 + break; + case BC_UNM: + | // RA = dst*8, RD = src*8 +- | lwzux TMP1, RD, BASE +- | lwz TMP0, 4(RD) ++ | lwzx TMP1, BASE_HI, RD ++ | lwzx TMP0, BASE_LO, RD ++ |.if DUALNUM and not GPR64 ++ | mtxer ZERO ++ |.endif + | checknum TMP1 + |.if DUALNUM + | bne >5 +@@ -3352,18 +3686,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |.else + | nego. TMP0, TMP0 + | bso >4 +- |1: + |.endif + | ins_next1 +- | stwux TISNUM, RA, BASE +- | stw TMP0, 4(RA) ++ | stwx TISNUM, BASE_HI, RA ++ | stwx TMP0, BASE_LO, RA + |3: + | ins_next2 + |4: +- |.if not GPR64 +- | // Potential overflow. +- | checkov TMP1, <1 // Ignore unrelated overflow. +- |.endif + | lus TMP1, 0x41e0 // 2^31. + | li TMP0, 0 + | b >7 +@@ -3373,8 +3702,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | xoris TMP1, TMP1, 0x8000 + |7: + | ins_next1 +- | stwux TMP1, RA, BASE +- | stw TMP0, 4(RA) ++ | stwx TMP1, BASE_HI, RA ++ | stwx TMP0, BASE_LO, RA + |.if DUALNUM + | b <3 + |.else +@@ -3383,15 +3712,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + break; + case BC_LEN: + | // RA = dst*8, RD = src*8 +- | lwzux TMP0, RD, BASE +- | lwz CARG1, 4(RD) ++ | lwzx TMP0, BASE_HI, RD ++ | lwzx CARG1, BASE_LO, RD + | checkstr TMP0; bne >2 + | lwz CRET1, STR:CARG1->len + |1: + |.if DUALNUM + | ins_next1 +- | stwux TISNUM, RA, BASE +- | stw CRET1, 4(RA) ++ | stwx TISNUM, BASE_HI, RA ++ | stwx CRET1, BASE_LO, RA + |.else + | tonum_u f0, CRET1 // Result is a non-negative integer. + | ins_next1 +@@ -3426,9 +3755,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + ||switch (vk) { + ||case 0: +- | lwzx TMP1, BASE, RB ++ | .if ENDIAN_LE and DUALNUM ++ | addi TMP2, RC, 4 ++ | .endif ++ | lwzx TMP1, BASE_HI, RB + | .if DUALNUM +- | lwzx TMP2, KBASE, RC ++ | .if ENDIAN_LE ++ | lwzx TMP2, KBASE, TMP2 ++ | .else ++ | lwzx TMP2, KBASE, RC ++ | .endif + | .endif + | lfdx f14, BASE, RB + | lfdx f15, KBASE, RC +@@ -3442,9 +3778,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | .endif + || break; + ||case 1: +- | lwzx TMP1, BASE, RB ++ | .if ENDIAN_LE and DUALNUM ++ | addi TMP2, RC, 4 ++ | .endif ++ | lwzx TMP1, BASE_HI, RB + | .if DUALNUM +- | lwzx TMP2, KBASE, RC ++ | .if ENDIAN_LE ++ | lwzx TMP2, KBASE, TMP2 ++ | .else ++ | lwzx TMP2, KBASE, RC ++ | .endif + | .endif + | lfdx f15, BASE, RB + | lfdx f14, KBASE, RC +@@ -3458,8 +3801,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | .endif + || break; + ||default: +- | lwzx TMP1, BASE, RB +- | lwzx TMP2, BASE, RC ++ | lwzx TMP1, BASE_HI, RB ++ | lwzx TMP2, BASE_HI, RC + | lfdx f14, BASE, RB + | lfdx f15, BASE, RC + | checknum cr0, TMP1 +@@ -3514,41 +3857,62 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + ||switch (vk) { + ||case 0: +- | lwzux TMP1, RB, BASE +- | lwzux TMP2, RC, KBASE +- | lwz CARG1, 4(RB) +- | checknum cr0, TMP1 +- | lwz CARG2, 4(RC) ++ | .if ENDIAN_LE ++ | lwzux CARG2, RC, KBASE ++ | lwzx TMP1, RB, BASE_HI ++ | lwz TMP2, 4(RC) ++ | checknum cr0, TMP1 ++ | lwzux CARG1, RB, BASE ++ | .else ++ | lwzux TMP1, RB, BASE ++ | lwzux TMP2, RC, KBASE ++ | lwz CARG1, 4(RB) ++ | checknum cr0, TMP1 ++ | lwz CARG2, 4(RC) ++ | .endif + || break; + ||case 1: +- | lwzux TMP1, RB, BASE +- | lwzux TMP2, RC, KBASE +- | lwz CARG2, 4(RB) +- | checknum cr0, TMP1 +- | lwz CARG1, 4(RC) ++ | .if ENDIAN_LE ++ | lwzux CARG1, RC, KBASE ++ | lwzx TMP1, RB, BASE_HI ++ | lwz TMP2, 4(RC) ++ | checknum cr0, TMP1 ++ | lwzux CARG2, RB, BASE ++ | .else ++ | lwzux TMP1, RB, BASE ++ | lwzux TMP2, RC, KBASE ++ | lwz CARG2, 4(RB) ++ | checknum cr0, TMP1 ++ | lwz CARG1, 4(RC) ++ | .endif + || break; + ||default: +- | lwzux TMP1, RB, BASE +- | lwzux TMP2, RC, BASE +- | lwz CARG1, 4(RB) +- | checknum cr0, TMP1 +- | lwz CARG2, 4(RC) ++ | .if ENDIAN_LE ++ | lwzx TMP1, RB, BASE_HI ++ | lwzx TMP2, RC, BASE_HI ++ | lwzux CARG1, RB, BASE ++ | checknum cr0, TMP1 ++ | lwzux CARG2, RC, BASE ++ | .else ++ | lwzux TMP1, RB, BASE ++ | lwzux TMP2, RC, BASE ++ | lwz CARG1, 4(RB) ++ | checknum cr0, TMP1 ++ | lwz CARG2, 4(RC) ++ | .endif + || break; + ||} ++ | mtxer ZERO + | checknum cr1, TMP2 + | bne >5 + | bne cr1, >5 + | intins CARG1, CARG1, CARG2 +- | bso >4 +- |1: ++ | ins_arithfallback bso + | ins_next1 +- | stwux TISNUM, RA, BASE +- | stw CARG1, 4(RA) ++ | stwx TISNUM, BASE_HI, RA ++ | stwx CARG1, BASE_LO, RA + |2: + | ins_next2 +- |4: // Overflow. +- | checkov TMP0, <1 // Ignore unrelated overflow. +- | ins_arithfallback b + |5: // FP variant. + ||if (vk == 1) { + | lfd f15, 0(RB) +@@ -3620,9 +3984,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + break; + case BC_POW: + | // NYI: (partial) integer arithmetic. +- | lwzx TMP1, BASE, RB ++ | lwzx TMP1, BASE_HI, RB + | lfdx FARG1, BASE, RB +- | lwzx TMP2, BASE, RC ++ | lwzx TMP2, BASE_HI, RC + | lfdx FARG2, BASE, RC + | checknum cr0, TMP1 + | checknum cr1, TMP2 +@@ -3648,6 +4012,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | // Returns NULL (finished) or TValue * (metamethod). + | cmplwi CRET1, 0 + | lp BASE, L->base ++ | addi BASEP4, BASE, 4 + | bne ->vmeta_binop + | ins_next1 + | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. +@@ -3664,8 +4029,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | ins_next1 + | lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4 + | li TMP2, LJ_TSTR +- | stwux TMP2, RA, BASE +- | stw TMP0, 4(RA) ++ | stwx TMP2, BASE_HI, RA ++ | stwx TMP0, BASE_LO, RA + | ins_next2 + break; + case BC_KCDATA: +@@ -3676,8 +4041,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | ins_next1 + | lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4 + | li TMP2, LJ_TCDATA +- | stwux TMP2, RA, BASE +- | stw TMP0, 4(RA) ++ | stwx TMP2, BASE_HI, RA ++ | stwx TMP0, BASE_LO, RA + | ins_next2 + |.endif + break; +@@ -3687,14 +4052,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | slwi RD, RD, 13 + | srawi RD, RD, 16 + | ins_next1 +- | stwux TISNUM, RA, BASE +- | stw RD, 4(RA) ++ | stwx TISNUM, BASE_HI, RA ++ | stwx RD, BASE_LO, RA + | ins_next2 + |.else + | // The soft-float approach is faster. + | slwi RD, RD, 13 + | srawi TMP1, RD, 31 + | xor TMP2, TMP1, RD ++ | .gpr64 extsw RD, RD + | sub TMP2, TMP2, TMP1 // TMP2 = abs(x) + | cntlzw TMP3, TMP2 + | subfic TMP1, TMP3, 0x40d // TMP1 = exponent-1 +@@ -3706,8 +4072,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | add RD, RD, TMP1 // hi = hi + exponent-1 + | and RD, RD, TMP0 // hi = x == 0 ? 0 : hi + | ins_next1 +- | stwux RD, RA, BASE +- | stw ZERO, 4(RA) ++ | stwx RD, BASE_HI, RA ++ | stwx ZERO, BASE_LO, RA + | ins_next2 + |.endif + break; +@@ -3723,15 +4089,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | srwi TMP1, RD, 3 + | not TMP0, TMP1 + | ins_next1 +- | stwx TMP0, BASE, RA ++ | stwx TMP0, BASE_HI, RA + | ins_next2 + break; + case BC_KNIL: + | // RA = base*8, RD = end*8 +- | stwx TISNIL, BASE, RA ++ | stwx TISNIL, BASE_HI, RA + | addi RA, RA, 8 + |1: +- | stwx TISNIL, BASE, RA ++ | stwx TISNIL, BASE_HI, RA + | cmpw RA, RD + | addi RA, RA, 8 + | blt <1 +@@ -3763,10 +4129,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | lwz CARG2, UPVAL:RB->v + | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) + | lbz TMP0, UPVAL:RB->closed +- | lwz TMP2, 0(RD) ++ | lwz TMP2, WORD_HI(RD) + | stfd f0, 0(CARG2) + | cmplwi cr1, TMP0, 0 +- | lwz TMP1, 4(RD) ++ | lwz TMP1, WORD_LO(RD) + | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq + | subi TMP2, TMP2, (LJ_TNUMX+1) + | bne >2 // Upvalue is closed and black? +@@ -3799,8 +4165,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | lbz TMP3, STR:TMP1->marked + | lbz TMP2, UPVAL:RB->closed + | li TMP0, LJ_TSTR +- | stw STR:TMP1, 4(CARG2) +- | stw TMP0, 0(CARG2) ++ | stw STR:TMP1, WORD_LO(CARG2) ++ | stw TMP0, WORD_HI(CARG2) + | bne >2 + |1: + | ins_next +@@ -3837,7 +4203,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | lwzx UPVAL:RB, LFUNC:RB, RA + | ins_next1 + | lwz TMP1, UPVAL:RB->v +- | stw TMP0, 0(TMP1) ++ | stw TMP0, WORD_HI(TMP1) + | ins_next2 + break; + +@@ -3852,6 +4218,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | add CARG2, BASE, RA + | bl extern lj_func_closeuv // (lua_State *L, TValue *level) + | lp BASE, L->base ++ | addi BASEP4, BASE, 4 + |1: + | ins_next + break; +@@ -3870,8 +4237,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | // Returns GCfuncL *. + | lp BASE, L->base + | li TMP0, LJ_TFUNC +- | stwux TMP0, RA, BASE +- | stw LFUNC:CRET1, 4(RA) ++ | addi BASEP4, BASE, 4 ++ | stwx TMP0, BASE_HI, RA ++ | stwx LFUNC:CRET1, BASE_LO, RA + | ins_next + break; + +@@ -3904,8 +4272,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + } + | lp BASE, L->base + | li TMP0, LJ_TTAB +- | stwux TMP0, RA, BASE +- | stw TAB:CRET1, 4(RA) ++ | addi BASEP4, BASE, 4 ++ | stwx TMP0, BASE_HI, RA ++ | stwx TAB:CRET1, BASE_LO, RA + | ins_next + if (op == BC_TNEW) { + |3: +@@ -3938,13 +4307,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + + case BC_TGETV: + | // RA = dst*8, RB = table*8, RC = key*8 +- | lwzux CARG1, RB, BASE +- | lwzux CARG2, RC, BASE +- | lwz TAB:RB, 4(RB) ++ | lwzx CARG1, BASE_HI, RB ++ | lwzx CARG2, BASE_HI, RC ++ | lwzx TAB:RB, BASE_LO, RB + |.if DUALNUM +- | lwz RC, 4(RC) ++ | lwzx RC, BASE_LO, RC + |.else +- | lfd f0, 0(RC) ++ | lfdx f0, BASE, RC + |.endif + | checktab CARG1 + | checknum cr1, CARG2 +@@ -3971,8 +4340,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | slwi TMP2, TMP2, 3 + |.endif + | ble ->vmeta_tgetv // Integer key and in array part? +- | lwzx TMP0, TMP1, TMP2 +- | lfdx f14, TMP1, TMP2 ++ | .if ENDIAN_LE ++ | lfdux f14, TMP1, TMP2 ++ | lwz TMP0, WORD_HI(TMP1) ++ | .else ++ | lwzx TMP0, TMP1, TMP2 ++ | lfdx f14, TMP1, TMP2 ++ | .endif + | checknil TMP0; beq >2 + |1: + | ins_next1 +@@ -3991,15 +4365,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |5: + | checkstr CARG2; bne ->vmeta_tgetv + |.if not DUALNUM +- | lwz STR:RC, 4(RC) ++ | lwzx STR:RC, BASE_LO, RC + |.endif + | b ->BC_TGETS_Z // String key? + break; + case BC_TGETS: + | // RA = dst*8, RB = table*8, RC = str_const*8 (~) +- | lwzux CARG1, RB, BASE ++ | lwzx CARG1, BASE_HI, RB + | srwi TMP1, RC, 1 +- | lwz TAB:RB, 4(RB) ++ | lwzx TAB:RB, BASE_LO, RB + | subfic TMP1, TMP1, -4 + | checktab CARG1 + | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 +@@ -4015,16 +4389,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | sub TMP1, TMP0, TMP1 + | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) + |1: +- | lwz CARG1, NODE:TMP2->key +- | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) +- | lwz CARG2, NODE:TMP2->val +- | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2) ++ | lwz CARG1, WORD_HI+offsetof(Node, key)(NODE:TMP2) ++ | lwz TMP0, WORD_LO+offsetof(Node, key)(NODE:TMP2) ++ | lwz CARG2, WORD_HI+offsetof(Node, val)(NODE:TMP2) ++ | lwz TMP1, WORD_LO+offsetof(Node, val)(NODE:TMP2) + | checkstr CARG1; bne >4 + | cmpw TMP0, STR:RC; bne >4 + | checknil CARG2; beq >5 // Key found, but nil value? + |3: +- | stwux CARG2, RA, BASE +- | stw TMP1, 4(RA) ++ | stwx CARG2, BASE_HI, RA ++ | stwx TMP1, BASE_LO, RA + | ins_next + | + |4: // Follow hash chain. +@@ -4045,15 +4419,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + break; + case BC_TGETB: + | // RA = dst*8, RB = table*8, RC = index*8 +- | lwzux CARG1, RB, BASE ++ | lwzx CARG1, BASE_HI, RB + | srwi TMP0, RC, 3 +- | lwz TAB:RB, 4(RB) ++ | lwzx TAB:RB, BASE_LO, RB + | checktab CARG1; bne ->vmeta_tgetb + | lwz TMP1, TAB:RB->asize + | lwz TMP2, TAB:RB->array + | cmplw TMP0, TMP1; bge ->vmeta_tgetb +- | lwzx TMP1, TMP2, RC +- | lfdx f0, TMP2, RC ++ | .if ENDIAN_LE ++ | lfdux f0, TMP2, RC ++ | lwz TMP1, WORD_HI(TMP2) ++ | .else ++ | lwzx TMP1, TMP2, RC ++ | lfdx f0, TMP2, RC ++ | .endif + | checknil TMP1; beq >5 + |1: + | ins_next1 +@@ -4071,12 +4450,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + break; + case BC_TGETR: + | // RA = dst*8, RB = table*8, RC = key*8 +- | add RB, BASE, RB +- | lwz TAB:CARG1, 4(RB) ++ | lwzx TAB:CARG1, BASE_LO, RB + |.if DUALNUM +- | add RC, BASE, RC + | lwz TMP0, TAB:CARG1->asize +- | lwz CARG2, 4(RC) ++ | lwzx CARG2, BASE_LO, RC + | lwz TMP1, TAB:CARG1->array + |.else + | lfdx f0, BASE, RC +@@ -4096,13 +4473,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + + case BC_TSETV: + | // RA = src*8, RB = table*8, RC = key*8 +- | lwzux CARG1, RB, BASE +- | lwzux CARG2, RC, BASE +- | lwz TAB:RB, 4(RB) ++ | lwzx CARG1, BASE_HI, RB ++ | lwzx CARG2, BASE_HI, RC ++ | lwzx TAB:RB, BASE_LO, RB + |.if DUALNUM +- | lwz RC, 4(RC) ++ | lwzx RC, BASE_LO, RC + |.else +- | lfd f0, 0(RC) ++ | lfdx f0, BASE, RC + |.endif + | checktab CARG1 + | checknum cr1, CARG2 +@@ -4129,7 +4506,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | slwi TMP0, TMP2, 3 + |.endif + | ble ->vmeta_tsetv // Integer key and in array part? ++ | .if ENDIAN_LE ++ | addi TMP2, TMP1, 4 ++ | lwzx TMP2, TMP2, TMP0 ++ | .else + | lwzx TMP2, TMP1, TMP0 ++ | .endif + | lbz TMP3, TAB:RB->marked + | lfdx f14, BASE, RA + | checknil TMP2; beq >3 +@@ -4152,7 +4534,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |5: + | checkstr CARG2; bne ->vmeta_tsetv + |.if not DUALNUM +- | lwz STR:RC, 4(RC) ++ | lwzx STR:RC, BASE_LO, RC + |.endif + | b ->BC_TSETS_Z // String key? + | +@@ -4162,9 +4544,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + break; + case BC_TSETS: + | // RA = src*8, RB = table*8, RC = str_const*8 (~) +- | lwzux CARG1, RB, BASE ++ | lwzx CARG1, BASE_HI, RB + | srwi TMP1, RC, 1 +- | lwz TAB:RB, 4(RB) ++ | lwzx TAB:RB, BASE_LO, RB + | subfic TMP1, TMP1, -4 + | checktab CARG1 + | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 +@@ -4183,9 +4565,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | lbz TMP3, TAB:RB->marked + | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) + |1: +- | lwz CARG1, NODE:TMP2->key +- | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) +- | lwz CARG2, NODE:TMP2->val ++ | lwz CARG1, WORD_HI+offsetof(Node, key)(NODE:TMP2) ++ | lwz TMP0, WORD_LO+offsetof(Node, key)(NODE:TMP2) ++ | lwz CARG2, WORD_HI+offsetof(Node, val)(NODE:TMP2) + | lwz NODE:TMP1, NODE:TMP2->next + | checkstr CARG1; bne >5 + | cmpw TMP0, STR:RC; bne >5 +@@ -4225,13 +4607,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | beq ->vmeta_tsets // 'no __newindex' flag NOT set: check. + |6: + | li TMP0, LJ_TSTR +- | stw STR:RC, 4(CARG3) ++ | stw STR:RC, WORD_LO(CARG3) + | mr CARG2, TAB:RB +- | stw TMP0, 0(CARG3) ++ | stw TMP0, WORD_HI(CARG3) + | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) + | // Returns TValue *. + | lp BASE, L->base + | stfd f14, 0(CRET1) ++ | addi BASEP4, BASE, 4 + | b <3 // No 2nd write barrier needed. + | + |7: // Possible table write barrier for the value. Skip valiswhite check. +@@ -4240,9 +4623,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + break; + case BC_TSETB: + | // RA = src*8, RB = table*8, RC = index*8 +- | lwzux CARG1, RB, BASE ++ | lwzx CARG1, BASE_HI, RB + | srwi TMP0, RC, 3 +- | lwz TAB:RB, 4(RB) ++ | lwzx TAB:RB, BASE_LO, RB + | checktab CARG1; bne ->vmeta_tsetb + | lwz TMP1, TAB:RB->asize + | lwz TMP2, TAB:RB->array +@@ -4250,7 +4633,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | cmplw TMP0, TMP1 + | lfdx f14, BASE, RA + | bge ->vmeta_tsetb +- | lwzx TMP1, TMP2, RC ++ | .if ENDIAN_LE ++ | addi TMP1, TMP2, 4 ++ | lwzx TMP1, TMP1, RC ++ | .else ++ | lwzx TMP1, TMP2, RC ++ | .endif + | checknil TMP1; beq >5 + |1: + | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) +@@ -4274,13 +4662,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + break; + case BC_TSETR: + | // RA = dst*8, RB = table*8, RC = key*8 +- | add RB, BASE, RB +- | lwz TAB:CARG2, 4(RB) ++ | lwzx TAB:CARG2, BASE_LO, RB + |.if DUALNUM +- | add RC, BASE, RC + | lbz TMP3, TAB:CARG2->marked + | lwz TMP0, TAB:CARG2->asize +- | lwz CARG3, 4(RC) ++ | lwzx CARG3, BASE_LO, RC + | lwz TMP1, TAB:CARG2->array + |.else + | lfdx f0, BASE, RC +@@ -4311,9 +4697,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | add RA, BASE, RA + |1: + | add TMP3, KBASE, RD +- | lwz TAB:CARG2, -4(RA) // Guaranteed to be a table. ++ | lwz TAB:CARG2, WORD_LO-8(RA) // Guaranteed to be a table. + | addic. TMP0, MULTRES, -8 +- | lwz TMP3, 4(TMP3) // Integer constant is in lo-word. ++ | lwz TMP3, WORD_LO(TMP3) // Integer constant is in lo-word. + | srwi CARG3, TMP0, 3 + | beq >4 // Nothing to copy? + | add CARG3, CARG3, TMP3 +@@ -4362,8 +4748,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + case BC_CALL: + | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 + | mr TMP2, BASE +- | lwzux TMP0, BASE, RA +- | lwz LFUNC:RB, 4(BASE) ++ | lwzux2 TMP0, LFUNC:RB, BASE, RA + | subi NARGS8:RC, NARGS8:RC, 8 + | addi BASE, BASE, 8 + | checkfunc TMP0; bne ->vmeta_call +@@ -4377,8 +4762,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + break; + case BC_CALLT: + | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 +- | lwzux TMP0, RA, BASE +- | lwz LFUNC:RB, 4(RA) ++ | lwzux2 TMP0, LFUNC:RB, RA, BASE + | subi NARGS8:RC, NARGS8:RC, 8 + | lwz TMP1, FRAME_PC(BASE) + | checkfunc TMP0 +@@ -4430,12 +4814,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) + | mr TMP2, BASE + | add BASE, BASE, RA +- | lwz TMP1, -24(BASE) +- | lwz LFUNC:RB, -20(BASE) ++ | lwz TMP1, WORD_HI-24(BASE) ++ | lwz LFUNC:RB, WORD_LO-24(BASE) + | lfd f1, -8(BASE) + | lfd f0, -16(BASE) +- | stw TMP1, 0(BASE) // Copy callable. +- | stw LFUNC:RB, 4(BASE) ++ | stw TMP1, WORD_HI(BASE) // Copy callable. ++ | stw LFUNC:RB, WORD_LO(BASE) + | checkfunc TMP1 + | stfd f1, 16(BASE) // Copy control var. + | li NARGS8:RC, 16 // Iterators get 2 arguments. +@@ -4450,8 +4834,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | // NYI: add hotloop, record BC_ITERN. + |.endif + | add RA, BASE, RA +- | lwz TAB:RB, -12(RA) +- | lwz RC, -4(RA) // Get index from control var. ++ | lwz TAB:RB, WORD_LO-16(RA) ++ | lwz RC, WORD_LO-8(RA) // Get index from control var. + | lwz TMP0, TAB:RB->asize + | lwz TMP1, TAB:RB->array + | addi PC, PC, 4 +@@ -4459,14 +4843,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | cmplw RC, TMP0 + | slwi TMP3, RC, 3 + | bge >5 // Index points after array part? +- | lwzx TMP2, TMP1, TMP3 +- | lfdx f0, TMP1, TMP3 ++ | lfdux f0, TMP3, TMP1 ++ | lwz TMP2, WORD_HI(TMP3) + | checknil TMP2 + | lwz INS, -4(PC) + | beq >4 + |.if DUALNUM +- | stw RC, 4(RA) +- | stw TISNUM, 0(RA) ++ | stw RC, WORD_LO(RA) ++ | stw TISNUM, WORD_HI(RA) + |.else + | tonum_u f1, RC + |.endif +@@ -4474,7 +4858,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | addis TMP3, PC, -(BCBIAS_J*4 >> 16) + | stfd f0, 8(RA) + | decode_RD4 TMP1, INS +- | stw RC, -4(RA) // Update control var. ++ | stw RC, WORD_LO-8(RA) // Update control var. + | add PC, TMP1, TMP3 + |.if not DUALNUM + | stfd f1, 0(RA) +@@ -4496,9 +4880,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | bgty <3 + | slwi RB, RC, 3 + | sub TMP3, TMP3, RB +- | lwzx RB, TMP2, TMP3 +- | lfdx f0, TMP2, TMP3 +- | add NODE:TMP3, TMP2, TMP3 ++ | lfdux f0, TMP3, TMP2 ++ | lwz RB, WORD_HI(TMP3) + | checknil RB + | lwz INS, -4(PC) + | beq >7 +@@ -4510,7 +4893,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | stfd f1, 0(RA) + | addi RC, RC, 1 + | add PC, TMP1, TMP2 +- | stw RC, -4(RA) // Update control var. ++ | stw RC, WORD_LO-8(RA) // Update control var. + | b <3 + | + |7: // Skip holes in hash part. +@@ -4521,10 +4904,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + case BC_ISNEXT: + | // RA = base*8, RD = target (points to ITERN) + | add RA, BASE, RA +- | lwz TMP0, -24(RA) +- | lwz CFUNC:TMP1, -20(RA) +- | lwz TMP2, -16(RA) +- | lwz TMP3, -8(RA) ++ | lwz TMP0, WORD_HI-24(RA) ++ | lwz CFUNC:TMP1, WORD_LO-24(RA) ++ | lwz TMP2, WORD_HI-16(RA) ++ | lwz TMP3, WORD_HI-8(RA) + | cmpwi cr0, TMP2, LJ_TTAB + | cmpwi cr1, TMP0, LJ_TFUNC + | cmpwi cr6, TMP3, LJ_TNIL +@@ -4538,17 +4921,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | bne cr0, >5 + | lus TMP1, 0xfffe + | ori TMP1, TMP1, 0x7fff +- | stw ZERO, -4(RA) // Initialize control var. +- | stw TMP1, -8(RA) ++ | stw ZERO, WORD_LO-8(RA) // Initialize control var. ++ | stw TMP1, WORD_HI-8(RA) + | addis PC, TMP3, -(BCBIAS_J*4 >> 16) + |1: + | ins_next + |5: // Despecialize bytecode if any of the checks fail. + | li TMP0, BC_JMP + | li TMP1, BC_ITERC ++ | .if ENDIAN_LE ++ | stb TMP0, -4(PC) ++ | .else + | stb TMP0, -1(PC) ++ | .endif + | addis PC, TMP3, -(BCBIAS_J*4 >> 16) ++ | .if ENDIAN_LE ++ | stb TMP1, 0(PC) ++ | .else + | stb TMP1, 3(PC) ++ | .endif + | b <1 + break; + +@@ -4582,7 +4973,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | addi RA, RA, 8 + | blt cr1, <1 // More vararg slots? + |2: // Fill up remainder with nil. +- | stw TISNIL, 0(RA) ++ | stw TISNIL, WORD_HI(RA) + | cmplw RA, TMP2 + | addi RA, RA, 8 + | blt <2 +@@ -4619,6 +5010,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | add RA, BASE, RA + | add RC, BASE, SAVE0 + | subi TMP3, BASE, 8 ++ | addi BASEP4, BASE, 4 + | b <6 + break; + +@@ -4667,13 +5059,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | bgt >6 + | sub BASE, TMP2, RA + | lwz LFUNC:TMP1, FRAME_FUNC(BASE) ++ | addi BASEP4, BASE, 4 + | ins_next1 + | lwz TMP1, LFUNC:TMP1->pc + | lwz KBASE, PC2PROTO(k)(TMP1) + | ins_next2 + | + |6: // Fill up results with nil. +- | subi TMP1, RD, 8 ++ | addi TMP1, RD, WORD_HI-8 + | addi RD, RD, 8 + | stwx TISNIL, TMP2, TMP1 + | b <5 +@@ -4709,13 +5102,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | bgt >6 + | sub BASE, TMP2, RA + | lwz LFUNC:TMP1, FRAME_FUNC(BASE) ++ | addi BASEP4, BASE, 4 + | ins_next1 + | lwz TMP1, LFUNC:TMP1->pc + | lwz KBASE, PC2PROTO(k)(TMP1) + | ins_next2 + | + |6: // Fill up results with nil. +- | subi TMP1, RD, 8 ++ | addi TMP1, RD, WORD_HI-8 + | addi RD, RD, 8 + | stwx TISNIL, TMP2, TMP1 + | b <5 +@@ -4741,11 +5135,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + vk = (op == BC_IFORL || op == BC_JFORL); + |.if DUALNUM + | // Integer loop. +- | lwzux TMP1, RA, BASE +- | lwz CARG1, FORL_IDX*8+4(RA) ++ | lwzux2 TMP1, CARG1, RA, BASE ++ if (vk) { ++ | mtxer ZERO ++ } + | cmplw cr0, TMP1, TISNUM + if (vk) { +- | lwz CARG3, FORL_STEP*8+4(RA) ++ | lwz CARG3, FORL_STEP*8+WORD_LO(RA) + | bne >9 + |.if GPR64 + | // Need to check overflow for (a<<32) + (b<<32). +@@ -4757,15 +5153,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | addo. CARG1, CARG1, CARG3 + |.endif + | cmpwi cr6, CARG3, 0 +- | lwz CARG2, FORL_STOP*8+4(RA) +- | bso >6 ++ | lwz CARG2, FORL_STOP*8+WORD_LO(RA) ++ | bso >2 + |4: +- | stw CARG1, FORL_IDX*8+4(RA) ++ | stw CARG1, FORL_IDX*8+WORD_LO(RA) + } else { +- | lwz TMP3, FORL_STEP*8(RA) +- | lwz CARG3, FORL_STEP*8+4(RA) +- | lwz TMP2, FORL_STOP*8(RA) +- | lwz CARG2, FORL_STOP*8+4(RA) ++ | lwz TMP3, FORL_STEP*8+WORD_HI(RA) ++ | lwz CARG3, FORL_STEP*8+WORD_LO(RA) ++ | lwz TMP2, FORL_STOP*8+WORD_HI(RA) ++ | lwz CARG2, FORL_STOP*8+WORD_LO(RA) + | cmplw cr7, TMP3, TISNUM + | cmplw cr1, TMP2, TISNUM + | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq +@@ -4776,11 +5172,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | blt cr6, >5 + | cmpw CARG1, CARG2 + |1: +- | stw TISNUM, FORL_EXT*8(RA) ++ | stw TISNUM, FORL_EXT*8+WORD_HI(RA) + if (op != BC_JFORL) { + | srwi RD, RD, 1 + } +- | stw CARG1, FORL_EXT*8+4(RA) ++ | stw CARG1, FORL_EXT*8+WORD_LO(RA) + if (op != BC_JFORL) { + | add RD, PC, RD + } +@@ -4800,11 +5196,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |5: // Invert check for negative step. + | cmpw CARG2, CARG1 + | b <1 +- if (vk) { +- |6: // Potential overflow. +- | checkov TMP0, <4 // Ignore unrelated overflow. +- | b <2 +- } + |.endif + if (vk) { + |.if DUALNUM +@@ -4815,14 +5206,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |.endif + | lfd f3, FORL_STEP*8(RA) + | lfd f2, FORL_STOP*8(RA) +- | lwz TMP3, FORL_STEP*8(RA) ++ | lwz TMP3, FORL_STEP*8+WORD_HI(RA) + | fadd f1, f1, f3 + | stfd f1, FORL_IDX*8(RA) + } else { + |.if DUALNUM + |9: // FP loop. + |.else ++ |.if ENDIAN_LE ++ | lwzx TMP1, RA, BASE_LO ++ | add RA, RA, BASE ++ |.else + | lwzux TMP1, RA, BASE ++ |.endif + | lwz TMP3, FORL_STEP*8(RA) + | lwz TMP2, FORL_STOP*8(RA) + | cmplw cr0, TMP1, TISNUM +@@ -4903,17 +5299,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + #endif + case BC_IITERL: + | // RA = base*8, RD = target +- | lwzux TMP1, RA, BASE +- | lwz TMP2, 4(RA) ++ | lwzux2 TMP1, TMP2, RA, BASE + | checknil TMP1; beq >1 // Stop if iterator returned nil. + if (op == BC_JITERL) { +- | stw TMP1, -8(RA) +- | stw TMP2, -4(RA) ++ | stw TMP1, WORD_HI-8(RA) ++ | stw TMP2, WORD_LO-8(RA) + | b =>BC_JLOOP + } else { + | branch_RD // Otherwise save control var + branch. +- | stw TMP1, -8(RA) +- | stw TMP2, -4(RA) ++ | stw TMP1, WORD_HI-8(RA) ++ | stw TMP2, WORD_LO-8(RA) + } + |1: + | ins_next +@@ -4942,7 +5337,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | // Traces on PPC don't store the trace number, so use 0. + | stw ZERO, DISPATCH_GL(vmstate)(DISPATCH) + | lwzx TRACE:TMP2, TMP1, RD +- | clrso TMP1 ++ | mtxer ZERO + | lp TMP2, TRACE:TMP2->mcode + | stw BASE, DISPATCH_GL(jit_base)(DISPATCH) + | mtctr TMP2 +@@ -4994,7 +5389,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + } + | + |3: // Clear missing parameters. +- | stwx TISNIL, BASE, NARGS8:RC ++ | stwx TISNIL, BASE_HI, NARGS8:RC + | addi NARGS8:RC, NARGS8:RC, 8 + | b <2 + break; +@@ -5011,11 +5406,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | lwz TMP2, L->maxstack + | add TMP1, BASE, RC + | add TMP0, RA, RC +- | stw LFUNC:RB, 4(TMP1) // Store copy of LFUNC. ++ | stw LFUNC:RB, WORD_LO(TMP1) // Store copy of LFUNC. + | addi TMP3, RC, 8+FRAME_VARG + | lwz KBASE, -4+PC2PROTO(k)(PC) + | cmplw TMP0, TMP2 +- | stw TMP3, 0(TMP1) // Store delta + FRAME_VARG. ++ | stw TMP3, WORD_HI(TMP1) // Store delta + FRAME_VARG. + | bge ->vm_growstack_l + | lbz TMP2, -4+PC2PROTO(numparams)(PC) + | mr RA, BASE +@@ -5026,18 +5421,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | beq >3 + |1: + | cmplw RA, RC // Less args than parameters? +- | lwz TMP0, 0(RA) +- | lwz TMP3, 4(RA) ++ | lwz TMP0, WORD_HI(RA) ++ | lwz TMP3, WORD_LO(RA) + | bge >4 +- | stw TISNIL, 0(RA) // Clear old fixarg slot (help the GC). ++ | stw TISNIL, WORD_HI(RA) // Clear old fixarg slot (help the GC). + | addi RA, RA, 8 + |2: + | addic. TMP2, TMP2, -1 +- | stw TMP0, 8(TMP1) +- | stw TMP3, 12(TMP1) ++ | stw TMP0, WORD_HI+8(TMP1) ++ | stw TMP3, WORD_LO+8(TMP1) + | addi TMP1, TMP1, 8 + | bne <1 + |3: ++ | addi BASEP4, BASE, 4 + | ins_next2 + | + |4: // Clear missing parameters. +@@ -5049,35 +5445,35 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + case BC_FUNCCW: + | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 + if (op == BC_FUNCC) { +- | lp RD, CFUNC:RB->f ++ | lp FUNCREG, CFUNC:RB->f + } else { +- | lp RD, DISPATCH_GL(wrapf)(DISPATCH) ++ | lp FUNCREG, DISPATCH_GL(wrapf)(DISPATCH) + } + | add TMP1, RA, NARGS8:RC + | lwz TMP2, L->maxstack +- | .toc lp TMP3, 0(RD) ++ | .opd lp TMP3, 0(FUNCREG) + | add RC, BASE, NARGS8:RC + | stp BASE, L->base + | cmplw TMP1, TMP2 + | stp RC, L->top + | li_vmstate C +- |.if TOC ++ |.if OPD + | mtctr TMP3 + |.else +- | mtctr RD ++ | mtctr FUNCREG + |.endif + if (op == BC_FUNCCW) { + | lp CARG2, CFUNC:RB->f + } + | mr CARG1, L + | bgt ->vm_growstack_c // Need to grow stack. +- | .toc lp TOCREG, TOC_OFS(RD) +- | .tocenv lp ENVREG, ENV_OFS(RD) ++ | .opd lp TOCREG, TOC_OFS(FUNCREG) ++ | .opdenv lp ENVREG, ENV_OFS(FUNCREG) + | st_vmstate + | bctrl // (lua_State *L [, lua_CFunction f]) ++ | .toc lp TOCREG, SAVE_TOC + | // Returns nresults. + | lp BASE, L->base +- | .toc ld TOCREG, SAVE_TOC + | slwi RD, CRET1, 3 + | lp TMP1, L->top + | li_vmstate INTERP +@@ -5128,7 +5524,11 @@ static void emit_asm_debug(BuildCtx *ctx) + "\t.byte 0x1\n" + "\t.string \"\"\n" + "\t.uleb128 0x1\n" ++#if LJ_ARCH_PPC32ON64 ++ "\t.sleb128 -8\n" ++#else + "\t.sleb128 -4\n" ++#endif + "\t.byte 65\n" + "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" + "\t.align 2\n" +@@ -5141,14 +5541,24 @@ static void emit_asm_debug(BuildCtx *ctx) + "\t.long .Lbegin\n" + "\t.long %d\n" + "\t.byte 0xe\n\t.uleb128 %d\n" ++#if LJ_ARCH_PPC32ON64 ++ "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n" ++ "\t.byte 0x11\n\t.uleb128 70\n\t.sleb128 -1\n", ++#else + "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" + "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n", ++#endif + fcofs, CFRAME_SIZE); + for (i = 14; i <= 31; i++) + fprintf(ctx->fp, + "\t.byte %d\n\t.uleb128 %d\n" + "\t.byte %d\n\t.uleb128 %d\n", +- 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)); ++#if LJ_ARCH_PPC32ON64 ++ 0x80+i, 19+(31-i), 0x80+32+i, 1+(31-i) ++#else ++ 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i) ++#endif ++ ); + fprintf(ctx->fp, + "\t.align 2\n" + ".LEFDE0:\n\n"); +@@ -5164,8 +5574,12 @@ static void emit_asm_debug(BuildCtx *ctx) + "\t.long lj_vm_ffi_call\n" + #endif + "\t.long %d\n" ++#if LJ_ARCH_PPC32ON64 ++ "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n" ++#else + "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" +- "\t.byte 0x8e\n\t.uleb128 2\n" ++#endif ++ "\t.byte 0x8e\n\t.uleb128 1\n" + "\t.byte 0xd\n\t.uleb128 0xe\n" + "\t.align 2\n" + ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); +@@ -5180,7 +5594,11 @@ static void emit_asm_debug(BuildCtx *ctx) + "\t.byte 0x1\n" + "\t.string \"zPR\"\n" + "\t.uleb128 0x1\n" ++#if LJ_ARCH_PPC32ON64 ++ "\t.sleb128 -8\n" ++#else + "\t.sleb128 -4\n" ++#endif + "\t.byte 65\n" + "\t.uleb128 6\n" /* augmentation length */ + "\t.byte 0x1b\n" /* pcrel|sdata4 */ +@@ -5198,14 +5616,24 @@ static void emit_asm_debug(BuildCtx *ctx) + "\t.long %d\n" + "\t.uleb128 0\n" /* augmentation length */ + "\t.byte 0xe\n\t.uleb128 %d\n" ++#if LJ_ARCH_PPC32ON64 ++ "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n" ++ "\t.byte 0x11\n\t.uleb128 70\n\t.sleb128 -1\n", ++#else + "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" + "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n", ++#endif + fcofs, CFRAME_SIZE); + for (i = 14; i <= 31; i++) + fprintf(ctx->fp, + "\t.byte %d\n\t.uleb128 %d\n" + "\t.byte %d\n\t.uleb128 %d\n", +- 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)); ++#if LJ_ARCH_PPC32ON64 ++ 0x80+i, 19+(31-i), 0x80+32+i, 1+(31-i) ++#else ++ 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i) ++#endif ++ ); + fprintf(ctx->fp, + "\t.align 2\n" + ".LEFDE2:\n\n"); +@@ -5233,8 +5661,12 @@ static void emit_asm_debug(BuildCtx *ctx) + "\t.long lj_vm_ffi_call-.\n" + "\t.long %d\n" + "\t.uleb128 0\n" /* augmentation length */ ++#if LJ_ARCH_PPC32ON64 ++ "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n" ++#else + "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" +- "\t.byte 0x8e\n\t.uleb128 2\n" ++#endif ++ "\t.byte 0x8e\n\t.uleb128 1\n" + "\t.byte 0xd\n\t.uleb128 0xe\n" + "\t.align 2\n" + ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); + diff --git a/main/luajit/APKBUILD b/main/luajit/APKBUILD index 1dee244d20..a13ccc6e5f 100644 --- a/main/luajit/APKBUILD +++ b/main/luajit/APKBUILD @@ -4,7 +4,7 @@ pkgname=luajit _pkgname=LuaJIT pkgver=2.1.0_beta3 _pkgver=${pkgver/_/-} -pkgrel=4 +pkgrel=5 pkgdesc='Just-in-time compiler and replacement for Lua 5.1' url='http://luajit.org' arch="all !s390x" @@ -13,7 +13,7 @@ makedepends="$depends_dev paxmark" provides="lua" subpackages="$pkgname-dev $pkgname-doc" source="http://luajit.org/download/$_pkgname-$_pkgver.tar.gz - enable-support-for-ppc64le.patch + 0004-Add-ppc64-support-based-on-koriakin-GitHub-patchset.patch module-paths.patch" builddir="$srcdir/$_pkgname-$_pkgver" @@ -46,5 +46,5 @@ package() { } sha512sums="c44e967a0f671ed32b55aee810bc8b3b63737a2d7363b8984ae1949b24f98dbb3f9be7c1e10239fdeb96a3e3c836f606342cbd61838cf9bcadb077443eb5bc12 LuaJIT-2.1.0-beta3.tar.gz -c2b1a39fe19dfe1817811015ef1fea24b0966ff791fcbde1090527331c2b9ceab9b923960c01cfb8aa81bab40aa437eb0f285385378e7eaa83016abf36beae27 enable-support-for-ppc64le.patch +2e4a4416ad60f6463b2f4ce2c5b8f0519dfb7bfd85e377fed8423477088c5ac9143d0c30a772e65f492317da63bcd06ab01dabe85933c2bd32d6f061167e5515 0004-Add-ppc64-support-based-on-koriakin-GitHub-patchset.patch 9c6c410e142838c5128c70570e66beb53cc5422df5ecfb8ab0b5f362637b21ab5978c00f19bc1759165df162fd6438a2ed43e25020400011b7bb14014d87c62e module-paths.patch" diff --git a/main/luajit/enable-support-for-ppc64le.patch b/main/luajit/enable-support-for-ppc64le.patch deleted file mode 100644 index f6a46c37c9..0000000000 --- a/main/luajit/enable-support-for-ppc64le.patch +++ /dev/null @@ -1,4822 +0,0 @@ -From d2400df1c753683571c5df9ea29afb37b6583d36 Mon Sep 17 00:00:00 2001 -From: Gustavo Serra Scalet <gsscalet@gmail.com> -Date: Tue, 2 Jun 2015 14:36:20 -0300 -Subject: [PATCH] PPC64: Enable support for ppc64 little endian - ---- - dynasm/dasm_ppc.lua | 11 +- - src/host/buildvm_asm.c | 19 +- - src/lj_arch.h | 4 +- - src/lj_ccall.c | 37 + - src/lj_ccall.h | 9 + - src/lj_ccallback.c | 15 + - src/lj_ctype.h | 2 +- - src/lj_frame.h | 9 + - src/lj_target_ppc.h | 11 +- - src/vm_ppc64.dasc | 4515 ++++++++++++++++++++++++++++++++++++++++++++++++ - 10 files changed, 4621 insertions(+), 11 deletions(-) - create mode 100644 src/vm_ppc64.dasc - -diff --git a/dynasm/dasm_ppc.lua b/dynasm/dasm_ppc.lua -index 278f095..7a36454 100644 ---- a/dynasm/dasm_ppc.lua -+++ b/dynasm/dasm_ppc.lua -@@ -257,9 +257,11 @@ map_op = { - addic_3 = "30000000RRI", - ["addic._3"] = "34000000RRI", - addi_3 = "38000000RR0I", -+ addil_3 = "38000000RR0J", - li_2 = "38000000RI", - la_2 = "38000000RD", - addis_3 = "3c000000RR0I", -+ addisl_3 = "3c000000RR0J", - lis_2 = "3c000000RI", - lus_2 = "3c000000RU", - bc_3 = "40000000AAK", -@@ -764,7 +766,7 @@ map_op = { - lfddx_3 = "7c000646FRR", - stvepx_3 = "7c00064eVRR", - srawi_3 = "7c000670RR~A.", -- sradi_3 = "7c000674RR~H.", -+ sradi_3 = "7c000674RR~f.", - eieio_0 = "7c0006ac", - lfiwax_3 = "7c0006aeFR0R", - divdeuo_3 = "7c000712RRR.", -@@ -1718,7 +1720,12 @@ op_template = function(params, template, nparams) - elseif p == "G" then - op = op + parse_imm(params[n], 8, 12, 0, false); n = n + 1 - elseif p == "H" then -- op = op + parse_shiftmask(params[n], true); n = n + 1 -+ v = parse_imm(params[n], 6, 0, 0, false); -+ op = op + shl(band(v,31), 11)+shl(shr(v,5), 1); -+ n = n + 1; -+ elseif p == "f" then -+ v = tonumber(params[n]); -+ op = op + shl(band(v,31), 11)+shl(shr(v,5), 1); - elseif p == "M" then - op = op + parse_shiftmask(params[n], false); n = n + 1 - elseif p == "J" or p == "K" then -diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c -index 9b7ae53..14ae701 100644 ---- a/src/host/buildvm_asm.c -+++ b/src/host/buildvm_asm.c -@@ -139,13 +139,21 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n, - if ((ins >> 26) == 16) { - fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n", - (ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins >> 16) & 31, sym); -+#if LJ_ARCH_PPC64 -+ } else if ((ins >> 26) == 14) { -+ if (strcmp(sym, "TOC") < 0) { -+ fprintf(ctx->fp, "\taddi 2,2,%s\n", sym); -+ } -+ } else if ((ins >> 26) == 15) { -+ if (strcmp(sym, "TOC") < 0) { -+ fprintf(ctx->fp, "\taddis 2,12,%s\n", sym); -+ } -+#endif - } else if ((ins >> 26) == 18) { - #if LJ_ARCH_PPC64 -- const char *suffix = strchr(sym, '@'); -- if (suffix && suffix[1] == 'h') { -- fprintf(ctx->fp, "\taddis 11, 2, %s\n", sym); -- } else if (suffix && suffix[1] == 'l') { -- fprintf(ctx->fp, "\tld 12, %s\n", sym); -+ char *suffix = strchr(sym, '@'); -+ if (suffix) { -+ fprintf(ctx->fp, "\tld 12, %s(2)\n", sym); - } else - #endif - fprintf(ctx->fp, "\t%s " TOCPREFIX "%s\n", (ins & 1) ? "bl" : "b", sym); -@@ -247,6 +255,7 @@ void emit_asm(BuildCtx *ctx) - fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch); - #if LJ_ARCH_PPC64 - fprintf(ctx->fp, "\t.abiversion 2\n"); -+ fprintf(ctx->fp, "\t.section\t\t\".toc\",\"aw\"\n"); - #endif - fprintf(ctx->fp, "\t.text\n"); - emit_asm_align(ctx, 4); -diff --git a/src/lj_arch.h b/src/lj_arch.h -index f1e7d7f..e2cdda7 100644 ---- a/src/lj_arch.h -+++ b/src/lj_arch.h -@@ -375,8 +375,8 @@ - #if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE - #error "No support for little-endian PPC32" - #endif --#if LJ_ARCH_PPC64 --#error "No support for PowerPC 64 bit mode (yet)" -+#if LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_BE -+#error "No support for big-endian PPC64" - #endif - #ifdef __NO_FPRS__ - #error "No support for PPC/e500 anymore (use LuaJIT 2.0)" -diff --git a/src/lj_ccall.c b/src/lj_ccall.c -index 5ab5b60..9ecad9d 100644 ---- a/src/lj_ccall.c -+++ b/src/lj_ccall.c -@@ -380,6 +380,42 @@ - #define CCALL_HANDLE_COMPLEXARG \ - /* Pass complex by value in 2 or 4 GPRs. */ - -+#if LJ_ARCH_PPC64 -+#define CCALL_HANDLE_REGARG \ -+ if (isva) { /* only GPRs will be used on C ellipsis operator */ \ -+ goto gpr; \ -+ } \ -+ else { \ -+ if (isfp) { /* Try to pass argument in FPRs. */ \ -+ if (nfpr + 1 <= CCALL_NARG_FPR) { \ -+ dp = &cc->fpr[nfpr]; \ -+ nfpr += 1; \ -+ d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ -+ if (ngpr + 1 <= maxgpr) \ -+ ngpr += 1; /* align GPRs */ \ -+ else if (nsp + 1 <= CCALL_MAXSTACK) \ -+ nsp += 1; /* align save area slots */ \ -+ else \ -+ goto err_nyi; /* Too many args */ \ -+ goto done; \ -+ } \ -+ } else { /* Try to pass argument in GPRs. */ \ -+ gpr: \ -+ if (n > 1) { \ -+ lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \ -+ if (ctype_isinteger(d->info)) \ -+ ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ -+ else if (ngpr + n > maxgpr) \ -+ ngpr = maxgpr; /* Prevent reordering. */ \ -+ } \ -+ if (ngpr + n <= maxgpr) { \ -+ dp = &cc->gpr[ngpr]; \ -+ ngpr += n; \ -+ goto done; \ -+ } \ -+ } \ -+ } -+#else /* 32 bits */ - #define CCALL_HANDLE_REGARG \ - if (isfp) { /* Try to pass argument in FPRs. */ \ - if (nfpr + 1 <= CCALL_NARG_FPR) { \ -@@ -402,6 +438,7 @@ - goto done; \ - } \ - } -+#endif - - #define CCALL_HANDLE_RET \ - if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ -diff --git a/src/lj_ccall.h b/src/lj_ccall.h -index 91983fe..0994d84 100644 ---- a/src/lj_ccall.h -+++ b/src/lj_ccall.h -@@ -85,12 +85,21 @@ typedef union FPRArg { - - #elif LJ_TARGET_PPC - -+#if LJ_ARCH_PPC64 -+#define CCALL_NARG_GPR 8 -+#define CCALL_NARG_FPR 13 -+#define CCALL_NRET_GPR 4 /* For complex double. */ -+#define CCALL_NRET_FPR 1 -+#define CCALL_SPS_EXTRA 14 -+#define CCALL_SPS_FREE 0 -+#else - #define CCALL_NARG_GPR 8 - #define CCALL_NARG_FPR 8 - #define CCALL_NRET_GPR 4 /* For complex double. */ - #define CCALL_NRET_FPR 1 - #define CCALL_SPS_EXTRA 4 - #define CCALL_SPS_FREE 0 -+#endif - - typedef intptr_t GPRArg; - typedef double FPRArg; -diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c -index 065c329..0ef527b 100644 ---- a/src/lj_ccallback.c -+++ b/src/lj_ccallback.c -@@ -61,7 +61,11 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) - - #elif LJ_TARGET_PPC - -+#if LJ_ARCH_PPC64 -+#define CALLBACK_MCODE_HEAD 40 -+#else /* PPC 32bits */ - #define CALLBACK_MCODE_HEAD 24 -+#endif - - #elif LJ_TARGET_MIPS - -@@ -189,10 +193,21 @@ static void callback_mcode_init(global_State *g, uint32_t *page) - uint32_t *p = page; - void *target = (void *)lj_vm_ffi_callback; - MSize slot; -+#if LJ_ARCH_PPC64 -+ *p++ = PPCI_LI | PPCF_T(RID_TMP) | ((((intptr_t)target) >> 32) & 0xffff); -+ *p++ = PPCI_LI | PPCF_T(RID_R12) | ((((intptr_t)g) >> 32) & 0xffff); -+ *p++ = PPCI_RLDICR | PPCF_T(RID_TMP) | PPCF_A(RID_TMP) | PPCF_SH(32) | PPCF_M6(63-32); /* sldi */ -+ *p++ = PPCI_RLDICR | PPCF_T(RID_R12) | PPCF_A(RID_R12) | PPCF_SH(32) | PPCF_M6(63-32); /* sldi */ -+ *p++ = PPCI_ORIS | PPCF_A(RID_TMP) | PPCF_T(RID_TMP) | ((((intptr_t)target) >> 16) & 0xffff); -+ *p++ = PPCI_ORIS | PPCF_A(RID_R12) | PPCF_T(RID_R12) | ((((intptr_t)g) >> 16) & 0xffff); -+ *p++ = PPCI_ORI | PPCF_A(RID_TMP) | PPCF_T(RID_TMP) | (((intptr_t)target) & 0xffff); -+ *p++ = PPCI_ORI | PPCF_A(RID_R12) | PPCF_T(RID_R12) | (((intptr_t)g) & 0xffff); -+#else /* PPC 32bits */ - *p++ = PPCI_LIS | PPCF_T(RID_TMP) | (u32ptr(target) >> 16); - *p++ = PPCI_LIS | PPCF_T(RID_R12) | (u32ptr(g) >> 16); - *p++ = PPCI_ORI | PPCF_A(RID_TMP)|PPCF_T(RID_TMP) | (u32ptr(target) & 0xffff); - *p++ = PPCI_ORI | PPCF_A(RID_R12)|PPCF_T(RID_R12) | (u32ptr(g) & 0xffff); -+#endif - *p++ = PPCI_MTCTR | PPCF_T(RID_TMP); - *p++ = PPCI_BCTR; - for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { -diff --git a/src/lj_ctype.h b/src/lj_ctype.h -index 6639547..2f09d7d 100644 ---- a/src/lj_ctype.h -+++ b/src/lj_ctype.h -@@ -153,7 +153,7 @@ typedef struct CType { - - /* Simplify target-specific configuration. Checked in lj_ccall.h. */ - #define CCALL_MAX_GPR 8 --#define CCALL_MAX_FPR 8 -+#define CCALL_MAX_FPR 13 - - typedef LJ_ALIGN(8) union FPRCBArg { double d; float f[2]; } FPRCBArg; - -diff --git a/src/lj_frame.h b/src/lj_frame.h -index a86c36b..7ec6478 100644 ---- a/src/lj_frame.h -+++ b/src/lj_frame.h -@@ -207,6 +207,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ - #define CFRAME_OFS_MULTRES 456 - #define CFRAME_SIZE 400 - #define CFRAME_SHIFT_MULTRES 3 -+#elif LJ_ARCH_PPC64 -+#define CFRAME_OFS_ERRF 88 -+#define CFRAME_OFS_NRES 80 -+#define CFRAME_OFS_L 72 -+#define CFRAME_OFS_PC 64 -+#define CFRAME_OFS_MULTRES 56 -+#define CFRAME_OFS_PREV 48 -+#define CFRAME_SIZE 400 -+#define CFRAME_SHIFT_MULTRES 3 - #else - #define CFRAME_OFS_ERRF 48 - #define CFRAME_OFS_NRES 44 -diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h -index 9986768..7a68e2a 100644 ---- a/src/lj_target_ppc.h -+++ b/src/lj_target_ppc.h -@@ -1,5 +1,5 @@ - /* --** Definitions for PPC CPUs. -+** Definitions for PPC/PPC64 CPUs. - ** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h - */ - -@@ -131,6 +131,8 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) - #define PPCF_C(r) ((r) << 6) - #define PPCF_MB(n) ((n) << 6) - #define PPCF_ME(n) ((n) << 1) -+#define PPCF_SH(n) ((((n) & 31) << (11+1)) | (((n) & 32) >> (5-1))) -+#define PPCF_M6(n) ((((n) & 31) << (5+1)) | (((n) & 32) << (11-5))) - #define PPCF_Y 0x00200000 - #define PPCF_DOT 0x00000001 - -@@ -200,6 +202,13 @@ typedef enum PPCIns { - PPCI_RLWINM = 0x54000000, - PPCI_RLWIMI = 0x50000000, - -+ PPCI_RLDICL = 0x78000000, -+ PPCI_RLDICR = 0x78000004, -+ PPCI_RLDIC = 0x78000008, -+ PPCI_RLDIMI = 0x7800000c, -+ PPCI_RLDCL = 0x78000010, -+ PPCI_RLDCR = 0x78000012, -+ - PPCI_B = 0x48000000, - PPCI_BL = 0x48000001, - PPCI_BC = 0x40800000, -diff --git a/src/vm_ppc64.dasc b/src/vm_ppc64.dasc -new file mode 100644 -index 0000000..d7e6bb6 ---- /dev/null -+++ b/src/vm_ppc64.dasc -@@ -0,0 +1,4515 @@ -+|// Low-level VM code for PPC64 CPUs. -+|// Bytecode interpreter, fast functions and helper functions. -+|// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h -+| -+|.arch ppc -+|.section code_op, code_sub -+| -+|.actionlist build_actionlist -+|.globals GLOB_ -+|.globalnames globnames -+|.externnames extnames -+| -+|// Note: The ragged indentation of the instructions is intentional. -+|// The starting columns indicate data dependencies. -+| -+|// Convenience macros for TOC handling. -+|.macro blex, target -+| bl extern target@got // ld 12, target@got(2) -+| mtctr r12 -+| bctrl -+|//It is require to restore the TOC register after a function call -+|//once the linker will not imply a function from mtctr; bctrl; pair -+|//it is required to perform such step manually in this macro -+| ld TOCREG, SAVE_TOC -+|.endmacro -+| -+|.macro pic_code_setup, target_name -+| addisl r2, r12, extern .TOC.-lj_..target_name@ha -+| addil r2, r2, extern .TOC.-lj_..target_name@l -+|.endmacro -+| -+|.macro checkov, noov -+| mcrxr cr0 -+| bley noov -+|.endmacro -+| -+|//----------------------------------------------------------------------- -+| -+|// Fixed register assignments for the interpreter. -+|// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA) -+| -+|// The following must be C callee-save (but BASE is often refetched). -+|.define BASE, r14 // Base of current Lua stack frame. -+|.define KBASE, r15 // Constants of current Lua function. -+|.define PC, r16 // Next PC. -+|.define DISPATCH, r17 // Opcode dispatch table. -+|.define LREG, r18 // Register holding lua_State (also in SAVE_L). -+|.define MULTRES, r19 // Size of multi-result: (nresults+1)*8. -+|.define JGL, r31 // On-trace: global_State + 32768. -+| -+|// Constants for type-comparisons, stores and conversions. C callee-save. -+|.define TISNUM, r22 // Constant LJ_TISNUM << 47. -+|.define TISNIL, r23 -+|.define ZERO, r24 -+|.define TOBIT, f30 // 2^52 + 2^51. -+|.define TONUM, f31 // 2^52 + 2^51 + 2^31. -+| -+|// The following temporaries are not saved across C calls, except for RA. -+|.define RA, r20 // Callee-save. -+|.define RB, r10 -+|.define RC, r11 -+|.define RD, r12 // Also used as function linkage register -+|.define INS, r7 // Overlaps CARG5. -+| -+|.define TMP0, r0 -+|.define TMP1, r8 -+|.define TMP2, r9 -+|.define TMP3, r6 // Overlaps CARG4. -+|.define TMP4, r25 // reserved for check.* macros -+|.define TMP5, r26 // reserved for check.* macros -+| -+|// Saved temporaries. -+|.define SAVE0, r21 -+| -+|// Calling conventions. -+|.define CARG1, r3 -+|.define CARG2, r4 -+|.define CARG3, r5 -+|.define CARG4, r6 // Overlaps TMP3. -+|.define CARG5, r7 // Overlaps INS. -+| -+|.define FARG1, f1 -+|.define FARG2, f2 -+| -+|.define CRET1, r3 -+|.define CRET2, r4 -+| -+|.define TOCREG, r2 // TOC register (only used by C code). -+|.define ENVREG, r11 // Environment pointer (nested C functions). -+| -+|// Stack layout while in interpreter. Must match with lj_frame.h. -+| -+|.define SAVE_LR, 416(sp) -+|.define CFRAME_SPACE, 400 // Delta for sp. -+|// Back chain for sp: 400(sp) <-- sp entering interpreter -+|.define SAVE_FPR_, 256 // .. 256+18*8: 64 bit FPR saves. -+|.define SAVE_GPR_, 112 // .. 112+18*8: 64 bit GPR saves. -+|// 92(sp) // \ 32 bit C frame info. -+|.define SAVE_ERRF, 88(sp) // | -+|.define SAVE_NRES, 80(sp) // | -+|.define SAVE_L, 72(sp) // > Parameter save area. -+|.define SAVE_PC, 64(sp) // | -+|.define SAVE_MULTRES, 56(sp) // | -+|.define SAVE_CFRAME, 48(sp) // / 64 bit C frame chain. -+|.define TMPD_HI, 44(sp) // \ Link editor temp (ABI mandated). -+|.define TMPD_LO, 40(sp) // / -+|.define TONUM_HI, 36(sp) // \ Compiler temp (ABI mandated). -+|.define TONUM_LO, 32(sp) // / -+|// 32(sp) // Callee parameter save area (ABI mandated). -+|.define SAVE_TOC, 24(sp) // TOC save area. -+|// Next frame lr: 16(sp) -+|.define SAVE_CR, 8(sp) // 64 bit CR save. -+|// Back chain for sp: 0(sp) <-- sp while in interpreter -+| -+|.define TMPD_BLO, 40(sp) // LSB -+|.define TMPD, TMPD_LO // base address of TMPD doubleword -+|.define TONUM_D, TONUM_LO// base address of TONUM doubleword -+| -+|.macro save_, reg -+| std r..reg, SAVE_GPR_+(reg-14)*8(sp) -+| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) -+|.endmacro -+|.macro rest_, reg -+| ld r..reg, SAVE_GPR_+(reg-14)*8(sp) -+| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) -+|.endmacro -+| -+|.macro saveregs -+| stdu sp, -CFRAME_SPACE(sp) -+| save_ 14; save_ 15; save_ 16 -+| mflr r0 -+| save_ 17; save_ 18; save_ 19; save_ 20; save_ 21; save_ 22 -+| std r0, SAVE_LR -+| save_ 23; save_ 24; save_ 25 -+| mfcr r0 -+| save_ 26; save_ 27; save_ 28; save_ 29; save_ 30; save_ 31 -+| std r0, SAVE_CR -+| std TOCREG, SAVE_TOC -+|.endmacro -+| -+|.macro restoreregs -+| ld r0, SAVE_LR -+| ld r12, SAVE_CR -+| rest_ 14; rest_ 15; rest_ 16; rest_ 17; rest_ 18; rest_ 19 -+| mtlr r0 -+| mtcrf 0xff, r12 -+| rest_ 20; rest_ 21; rest_ 22; rest_ 23; rest_ 24; rest_ 25 -+| rest_ 26; rest_ 27; rest_ 28; rest_ 29; rest_ 30; rest_ 31 -+| addi sp, sp, CFRAME_SPACE -+|.endmacro -+| -+|// Type definitions. Some of these are only used for documentation. -+|.type L, lua_State, LREG -+|.type GL, global_State -+|.type TVALUE, TValue -+|.type GCOBJ, GCobj -+|.type STR, GCstr -+|.type TAB, GCtab -+|.type LFUNC, GCfuncL -+|.type CFUNC, GCfuncC -+|.type PROTO, GCproto -+|.type UPVAL, GCupval -+|.type NODE, Node -+|.type NARGS8, int -+|.type TRACE, GCtrace -+|.type SBUF, SBuf -+| -+|//----------------------------------------------------------------------- -+| -+|// Trap for not-yet-implemented parts. -+|.macro NYI; tw 4, sp, sp; .endmacro -+| -+|// int/FP conversions. -+|.macro tonum_i, freg, reg -+| xoris reg, reg, 0x8000 -+| stw reg, TONUM_LO -+| lfd freg, TONUM_D -+| fsub freg, freg, TONUM -+|.endmacro -+| -+|.macro toint, reg, freg, tmpfreg -+| fctiwz tmpfreg, freg -+| stfd tmpfreg, TMPD -+| lwz reg, TMPD_LO -+|.endmacro -+| -+|.macro toint, reg, freg -+| toint reg, freg, freg -+|.endmacro -+| -+|//----------------------------------------------------------------------- -+| -+|// Access to frame relative to BASE. -+|.define FRAME_PC, -8 -+|.define FRAME_FUNC, -16 -+| -+|// Instruction decode. -+|.macro decode_OP8, dst, ins; rlwinm dst, ins, 3, 21, 28; .endmacro -+|.macro decode_RA8, dst, ins; rlwinm dst, ins, 27, 21, 28; .endmacro -+|.macro decode_RB8, dst, ins; rlwinm dst, ins, 11, 21, 28; .endmacro -+|.macro decode_RC8, dst, ins; rlwinm dst, ins, 19, 21, 28; .endmacro -+|.macro decode_RD8, dst, ins; rlwinm dst, ins, 19, 13, 28; .endmacro -+| -+|.macro decode_OP1, dst, ins; rlwinm dst, ins, 0, 24, 31; .endmacro -+|.macro decode_RD4, dst, ins; rlwinm dst, ins, 18, 14, 29; .endmacro -+| -+|// Instruction fetch. -+|.macro ins_NEXT1 -+| lwz INS, 0(PC) -+| addi PC, PC, 4 -+|.endmacro -+|// Instruction decode+dispatch. Note: optimized for e300! -+|.macro ins_NEXT2 -+| decode_OP8 TMP1, INS -+| ldx TMP0, DISPATCH, TMP1 -+| mtctr TMP0 -+| decode_RB8 RB, INS -+| decode_RD8 RD, INS -+| decode_RA8 RA, INS -+| decode_RC8 RC, INS -+| bctr -+|.endmacro -+|.macro ins_NEXT -+| ins_NEXT1 -+| ins_NEXT2 -+|.endmacro -+| -+|// Instruction footer. -+|.if 1 -+| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. -+| .define ins_next, ins_NEXT -+| .define ins_next_, ins_NEXT -+| .define ins_next1, ins_NEXT1 -+| .define ins_next2, ins_NEXT2 -+|.else -+| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. -+| // Affects only certain kinds of benchmarks (and only with -j off). -+| .macro ins_next -+| b ->ins_next -+| .endmacro -+| .macro ins_next1 -+| .endmacro -+| .macro ins_next2 -+| b ->ins_next -+| .endmacro -+| .macro ins_next_ -+| ->ins_next: -+| ins_NEXT -+| .endmacro -+|.endif -+| -+|// Call decode and dispatch. -+|.macro ins_callt -+| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC -+| ld PC, LFUNC:RB->pc -+| lwz INS, 0(PC) -+| addi PC, PC, 4 -+| decode_OP8 TMP1, INS -+| decode_RA8 RA, INS -+| ldx TMP0, DISPATCH, TMP1 -+| add RA, RA, BASE -+| mtctr TMP0 -+| bctr -+|.endmacro -+| -+|.macro ins_call -+| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC -+| std PC, FRAME_PC(BASE) -+| ins_callt -+|.endmacro -+| -+|//----------------------------------------------------------------------- -+| -+|// Macros to test operand types. -+|.macro get_oper_type, dst, reg -+| sradi dst, reg, 47 -+|.endmacro -+| -+|.macro set_oper_type, dst, reg -+| sldi dst, reg, 47 -+|.endmacro -+| -+|.macro set_bool, reg, type; li TMP4, type; rotldi reg, TMP4, 47; .endmacro -+| -+|.macro add_oper_type, value, type -+| clear_field value -+| add value, value, type -+|.endmacro -+| -+|.macro clear_field, reg -+| clrldi reg, reg, 17 -+|.endmacro -+| -+|// clear bits 32-63 of 32-bit numbers and extend sign -+|.macro get_value, reg -+| srawi reg, reg, 0 -+|.endmacro -+| -+|.macro checktp, cr, reg, clear, cmp_type, cmp_arg -+| get_oper_type TMP5, reg -+| cmp_type cr, TMP5, cmp_arg -+| clear reg -+|.endmacro -+| -+|.macro nop_arg, p; .endmacro -+| -+|.macro checknum_sig, reg -+| li TMP4, LJ_TISNUM -+| checktp cr0, reg, get_value, cmpd, TMP4 -+|.endmacro -+|.macro checknum_sig, cr, reg -+| li TMP4, LJ_TISNUM -+| checktp cr, reg, get_value, cmpd, TMP4 -+|.endmacro -+|.macro checknum, reg -+| li TMP4, LJ_TISNUM -+| checktp cr0, reg, get_value, cmpld, TMP4 -+|.endmacro -+|.macro checknum, cr, reg -+| li TMP4, LJ_TISNUM -+| checktp cr, reg, get_value, cmpld, TMP4 -+|.endmacro -+|.macro checkstr, reg -+| li TMP4, LJ_TSTR -+| checktp cr0, reg, clear_field, cmpd, TMP4 -+|.endmacro -+|.macro checktab, reg -+| li TMP4, LJ_TTAB -+| checktp cr0, reg, clear_field, cmpd, TMP4 -+|.endmacro -+|.macro checkfunc, reg -+| li TMP4, LJ_TFUNC -+| checktp cr0, reg, clear_field, cmpd, TMP4 -+|.endmacro -+|.macro checkthread, reg -+| li TMP4, LJ_TTHREAD -+| checktp cr0, reg, clear_field, cmpd, TMP4 -+|.endmacro -+| -+|.macro checknil_noclear, reg -+| li TMP4, LJ_TNIL -+| checktp cr0, reg, nop_arg, cmpd, TMP4 -+|.endmacro -+|.macro checknum_noclear, reg -+| li TMP4, LJ_TISNUM -+| checktp cr0, reg, nop_arg, cmpld, TMP4 -+|.endmacro -+|.macro checknum_noclear, cr, reg -+| li TMP4, LJ_TISNUM -+| checktp cr, reg, nop_arg, cmpld, TMP4 -+|.endmacro -+|.macro checkfunc_noclear, reg -+| li TMP4, LJ_TFUNC -+| checktp cr0, reg, nop_arg, cmpd, TMP4 -+|.endmacro -+| -+|.macro branch_RD -+| srdi TMP0, RD, 1 -+| addis PC, PC, -(BCBIAS_J*4 >> 16) -+| add PC, PC, TMP0 -+|.endmacro -+| -+|// Assumes DISPATCH is relative to GL. -+#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) -+#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) -+| -+#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) -+| -+|.macro hotcheck, delta, target -+| NYI -+|.endmacro -+| -+|.macro hotloop -+| hotcheck HOTCOUNT_LOOP, ->vm_hotloop -+|.endmacro -+| -+|.macro hotcall -+| hotcheck HOTCOUNT_CALL, ->vm_hotcall -+|.endmacro -+| -+|// Set current VM state. Uses TMP0. -+|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro -+|.macro st_vmstate; std TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro -+| -+|// Move table write barrier back. Overwrites mark and tmp. -+|.macro barrierback, tab, mark, tmp -+| ld tmp, DISPATCH_GL(gc.grayagain)(DISPATCH) -+| // Assumes LJ_GC_BLACK is 0x04. -+| rlwinm mark, mark, 0, 30, 28 // black2gray(tab) -+| std tab, DISPATCH_GL(gc.grayagain)(DISPATCH) -+| stb mark, tab->marked -+| std tmp, tab->gclist -+|.endmacro -+| -+|//----------------------------------------------------------------------- -+ -+/* Generate subroutines used by opcodes and other parts of the VM. */ -+/* The .code_sub section should be last to help static branch prediction. */ -+static void build_subroutines(BuildCtx *ctx) -+{ -+ |.code_sub -+ | -+ |//----------------------------------------------------------------------- -+ |//-- Return handling ---------------------------------------------------- -+ |//----------------------------------------------------------------------- -+ | -+ |->vm_returnp: -+ | // See vm_return. Also: TMP2 = previous base. -+ | andi. TMP0, PC, FRAME_P -+ | set_bool TMP1, LJ_TTRUE -+ | beq ->cont_dispatch -+ | -+ | // Return from pcall or xpcall fast func. -+ | ld PC, FRAME_PC(TMP2) // Fetch PC of previous frame. -+ | mr BASE, TMP2 // Restore caller base. -+ | // Prepending may overwrite the pcall frame, so do it at the end. -+ | stdu TMP1, FRAME_PC(RA) // Prepend true to results. -+ | -+ |->vm_returnc: -+ | addi RD, RD, 8 // RD = (nresults+1)*8. -+ | andi. TMP0, PC, FRAME_TYPE -+ | cmpdi cr1, RD, 0 -+ | li CRET1, LUA_YIELD -+ | beq cr1, ->vm_unwind_c_eh -+ | mr MULTRES, RD -+ | beq ->BC_RET_Z // Handle regular return to Lua. -+ | -+ |->vm_return: -+ | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return -+ | // TMP0 = PC & FRAME_TYPE -+ | cmpdi TMP0, FRAME_C -+ | li TMP4, FRAME_TYPEP -+ | not TMP4, TMP4 -+ | li_vmstate C -+ | and TMP2, PC, TMP4 -+ | sub TMP2, BASE, TMP2 // TMP2 = previous base. -+ | bney ->vm_returnp -+ | -+ | addic. TMP1, RD, -8 -+ | std TMP2, L->base -+ | lwz TMP2, SAVE_NRES -+ | extsw TMP2, TMP2 -+ | subi BASE, BASE, 16 -+ | st_vmstate -+ | sldi TMP2, TMP2, 3 -+ | beq >2 -+ |1: -+ | addic. TMP1, TMP1, -8 -+ | lfd f0, 0(RA) -+ | addi RA, RA, 8 -+ | stfd f0, 0(BASE) -+ | addi BASE, BASE, 8 -+ | bney <1 -+ | -+ |2: -+ | cmpd TMP2, RD // More/less results wanted? -+ | bne >6 -+ |3: -+ | std BASE, L->top // Store new top. -+ | -+ |->vm_leave_cp: -+ | ld TMP0, SAVE_CFRAME // Restore previous C frame. -+ | li CRET1, 0 // Ok return status for vm_pcall. -+ | std TMP0, L->cframe -+ | -+ |->vm_leave_unw: -+ | restoreregs -+ | blr -+ | -+ |6: -+ | ble >7 // Less results wanted? -+ | // More results wanted. Check stack size and fill up results with nil. -+ | ld TMP1, L->maxstack -+ | cmpld BASE, TMP1 -+ | bge >8 -+ | std TISNIL, 0(BASE) -+ | addi RD, RD, 8 -+ | addi BASE, BASE, 8 -+ | b <2 -+ | -+ |7: // Less results wanted. -+ | subfic TMP3, TMP2, 0 // LUA_MULTRET+1 case? -+ | sub TMP0, RD, TMP2 -+ | subfe TMP1, TMP1, TMP1 // TMP1 = TMP2 == 0 ? 0 : -1 -+ | and TMP0, TMP0, TMP1 -+ | sub BASE, BASE, TMP0 // Either keep top or shrink it. -+ | b <3 -+ | -+ |8: // Corner case: need to grow stack for filling up results. -+ | // This can happen if: -+ | // - A C function grows the stack (a lot). -+ | // - The GC shrinks the stack in between. -+ | // - A return back from a lua_call() with (high) nresults adjustment. -+ | std BASE, L->top // Save current top held in BASE (yes). -+ | mr SAVE0, RD -+ | srdi CARG2, TMP2, 3 -+ | mr CARG1, L -+ | bl extern lj_state_growstack // (lua_State *L, int n) -+ | ld TMP2, SAVE_NRES -+ | mr RD, SAVE0 -+ | sldi TMP2, TMP2, 3 -+ | ld BASE, L->top // Need the (realloced) L->top in BASE. -+ | b <2 -+ | -+ |->vm_unwind_c: // Unwind C stack, return from vm_pcall. -+ | // (void *cframe, int errcode) -+ | mr sp, CARG1 -+ | mr CRET1, CARG2 -+ |->vm_unwind_c_eh: // Landing pad for external unwinder. -+ | ld L, SAVE_L -+ | ld TOCREG, SAVE_TOC -+ | li TMP0, ~LJ_VMST_C -+ | ld GL:TMP1, L->glref -+ | std TMP0, GL:TMP1->vmstate -+ | b ->vm_leave_unw -+ | -+ |->vm_unwind_ff: // Unwind C stack, return from ff pcall. -+ | // (void *cframe) -+ | rldicr sp, CARG1, 0, 61 -+ |->vm_unwind_ff_eh: // Landing pad for external unwinder. -+ | ld L, SAVE_L -+ | ld TOCREG, SAVE_TOC -+ | li TISNUM, LJ_TISNUM // Setup type comparison constants. -+ | set_oper_type TISNUM, TISNUM -+ | ld BASE, L->base -+ | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). -+ | ld DISPATCH, L->glref // Setup pointer to dispatch table. -+ | li ZERO, 0 -+ | std TMP3, TMPD -+ | set_bool TMP1, LJ_TFALSE -+ | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). -+ | li TISNIL, LJ_TNIL -+ | li_vmstate INTERP -+ | lfs TOBIT, TMPD -+ | ld PC, FRAME_PC(BASE) // Fetch PC of previous frame. -+ | la RA, -8(BASE) // Results start at BASE-8. -+ | std TMP3, TMPD -+ | addi DISPATCH, DISPATCH, GG_G2DISP -+ | std TMP1, 0(RA) // Prepend false to error message. -+ | li RD, 16 // 2 results: false + error message. -+ | st_vmstate -+ | lfs TONUM, TMPD -+ | b ->vm_returnc -+ | -+ |//----------------------------------------------------------------------- -+ |//-- Grow stack for calls ----------------------------------------------- -+ |//----------------------------------------------------------------------- -+ | -+ |->vm_growstack_c: // Grow stack for C function. -+ | li CARG2, LUA_MINSTACK -+ | b >2 -+ | -+ |->vm_growstack_l: // Grow stack for Lua function. -+ | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC -+ | add RC, BASE, RC -+ | sub RA, RA, BASE -+ | std BASE, L->base -+ | addi PC, PC, 4 // Must point after first instruction. -+ | std RC, L->top -+ | srdi CARG2, RA, 3 -+ |2: -+ | // L->base = new base, L->top = top -+ | std PC, SAVE_PC -+ | mr CARG1, L -+ | bl extern lj_state_growstack // (lua_State *L, int n) -+ | ld BASE, L->base -+ | ld RC, L->top -+ | ld LFUNC:RB, FRAME_FUNC(BASE) -+ | sub RC, RC, BASE -+ | clear_field RB -+ | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC -+ | ins_callt // Just retry the call. -+ | -+ |//----------------------------------------------------------------------- -+ |//-- Entry points into the assembler VM --------------------------------- -+ |//----------------------------------------------------------------------- -+ | -+ |->vm_resume: // Setup C frame and resume thread. -+ | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) -+ | saveregs -+ | mr L, CARG1 -+ | ld DISPATCH, L->glref // Setup pointer to dispatch table. -+ | mr BASE, CARG2 -+ | lbz TMP1, L->status -+ | std L, SAVE_L -+ | li PC, FRAME_CP -+ | addi TMP0, sp, CFRAME_RESUME -+ | addi DISPATCH, DISPATCH, GG_G2DISP -+ | std CARG3, SAVE_NRES -+ | cmpldi TMP1, 0 -+ | std CARG3, SAVE_ERRF -+ | std CARG3, SAVE_CFRAME -+ | std CARG1, SAVE_PC // Any value outside of bytecode is ok. -+ | std TMP0, L->cframe -+ | beq >3 -+ | -+ | // Resume after yield (like a return). -+ | std L, DISPATCH_GL(cur_L)(DISPATCH) -+ | mr RA, BASE -+ | ld BASE, L->base -+ | li TISNUM, LJ_TISNUM // Setup type comparison constants. -+ | set_oper_type TISNUM, TISNUM -+ | ld TMP1, L->top -+ | ld PC, FRAME_PC(BASE) -+ | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). -+ | stb CARG3, L->status -+ | std TMP3, TMPD -+ | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). -+ | lfs TOBIT, TMPD -+ | sub RD, TMP1, BASE -+ | std TMP3, TMPD -+ | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) -+ | addi RD, RD, 8 -+ | stw TMP0, TONUM_HI -+ | li_vmstate INTERP -+ | li ZERO, 0 -+ | st_vmstate -+ | andi. TMP0, PC, FRAME_TYPE -+ | mr MULTRES, RD -+ | lfs TONUM, TMPD -+ | li TISNIL, LJ_TNIL -+ | beq ->BC_RET_Z -+ | b ->vm_return -+ | -+ |->vm_pcall: // Setup protected C frame and enter VM. -+ | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) -+ | saveregs -+ | li PC, FRAME_CP -+ | std CARG4, SAVE_ERRF -+ | b >1 -+ | -+ |->vm_call: // Setup C frame and enter VM. -+ | // (lua_State *L, TValue *base, int nres1) -+ | saveregs -+ | li PC, FRAME_C -+ | -+ |1: // Entry point for vm_pcall above (PC = ftype). -+ | ld TMP1, L:CARG1->cframe -+ | mr L, CARG1 -+ | std CARG3, SAVE_NRES -+ | ld DISPATCH, L->glref // Setup pointer to dispatch table. -+ | std CARG1, SAVE_L -+ | mr BASE, CARG2 -+ | addi DISPATCH, DISPATCH, GG_G2DISP -+ | std CARG1, SAVE_PC // Any value outside of bytecode is ok. -+ | std TMP1, SAVE_CFRAME -+ | std sp, L->cframe // Add our C frame to cframe chain. -+ | -+ |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). -+ | std L, DISPATCH_GL(cur_L)(DISPATCH) -+ | ld TMP2, L->base // TMP2 = old base (used in vmeta_call). -+ | li TISNUM, LJ_TISNUM // Setup type comparison constants. -+ | set_oper_type TISNUM, TISNUM -+ | ld TMP1, L->top -+ | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). -+ | add PC, PC, BASE -+ | stw TMP3, TMPD -+ | li ZERO, 0 -+ | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). -+ | lfs TOBIT, TMPD -+ | sub PC, PC, TMP2 // PC = frame delta + frame type -+ | stw TMP3, TMPD -+ | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) -+ | sub NARGS8:RC, TMP1, BASE -+ | stw TMP0, TONUM_HI -+ | li_vmstate INTERP -+ | lfs TONUM, TMPD -+ | li TISNIL, LJ_TNIL -+ | st_vmstate -+ | -+ |->vm_call_dispatch: -+ | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC -+ | ld LFUNC:RB, FRAME_FUNC(BASE) -+ | checkfunc RB -+ | bne ->vmeta_call -+ | -+ |->vm_call_dispatch_f: -+ | ins_call -+ | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC -+ | -+ |->vm_cpcall: // Setup protected C frame, call C. -+ | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) -+ | saveregs -+ | mr L, CARG1 -+ | ld TMP0, L:CARG1->stack -+ | std CARG1, SAVE_L -+ | ld TMP1, L->top -+ | ld DISPATCH, L->glref // Setup pointer to dispatch table. -+ | std CARG1, SAVE_PC // Any value outside of bytecode is ok. -+ | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). -+ | ld TMP1, L->cframe -+ | addi DISPATCH, DISPATCH, GG_G2DISP -+ | li TMP2, 0 -+ | std TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. -+ | std TMP2, SAVE_ERRF // No error function. -+ | std TMP1, SAVE_CFRAME -+ | std sp, L->cframe // Add our C frame to cframe chain. -+ | std L, DISPATCH_GL(cur_L)(DISPATCH) -+ | mr r12, CARG4 // keep r12 for function linkage. -+ | mtctr r12 -+ | bctrl // (lua_State *L, lua_CFunction func, void *ud) -+ | mr. BASE, CRET1 -+ | li PC, FRAME_CP -+ | bne <3 // Else continue with the call. -+ | b ->vm_leave_cp // No base? Just remove C frame. -+ | -+ |//----------------------------------------------------------------------- -+ |//-- Metamethod handling ------------------------------------------------ -+ |//----------------------------------------------------------------------- -+ | -+ |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the -+ |// stack, so BASE doesn't need to be reloaded across these calls. -+ | -+ |//-- Continuation dispatch ---------------------------------------------- -+ | -+ |->cont_dispatch: -+ | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 -+ | ld TMP0, -32(BASE) // Continuation. -+ | clear_field TMP0 -+ | mr RB, BASE -+ | mr BASE, TMP2 // Restore caller BASE. -+ | ld LFUNC:TMP1, FRAME_FUNC(TMP2) -+ | clear_field LFUNC:TMP1 -+ |.if FFI -+ | cmpldi TMP0, 1 -+ |.endif -+ | ld PC, -24(RB) // Restore PC from [cont|PC]. -+ | subi TMP2, RD, 8 -+ | ld TMP1, LFUNC:TMP1->pc -+ | stdx TISNIL, RA, TMP2 // Ensure one valid arg. -+ |.if FFI -+ | ble >1 -+ |.endif -+ | ld KBASE, PC2PROTO(k)(TMP1) -+ | // BASE = base, RA = resultptr, RB = meta base -+ | mtctr TMP0 -+ | bctr // Jump to continuation. -+ | -+ |.if FFI -+ |1: -+ | beq ->cont_ffi_callback // cont = 1: return from FFI callback. -+ | // cont = 0: tailcall from C function. -+ | subi TMP1, RB, 32 -+ | sub RC, TMP1, BASE -+ | b ->vm_call_tail -+ |.endif -+ | -+ |->cont_cat: // RA = resultptr, RB = meta base -+ | lwz INS, -4(PC) -+ | subi CARG2, RB, 32 -+ | decode_RB8 SAVE0, INS -+ | lfd f0, 0(RA) -+ | add TMP1, BASE, SAVE0 -+ | std BASE, L->base -+ | cmpld TMP1, CARG2 -+ | sub CARG3, CARG2, TMP1 -+ | decode_RA8 RA, INS -+ | stfd f0, 0(CARG2) -+ | bney ->BC_CAT_Z -+ | stfdx f0, BASE, RA -+ | b ->cont_nop -+ | -+ |//-- Table indexing metamethods ----------------------------------------- -+ | -+ |->vmeta_tgets1: -+ | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) -+ | li TMP0, LJ_TSTR -+ | set_oper_type TMP0, TMP0 -+ | decode_RB8 RB, INS -+ | add STR:RC, RC, TMP0 -+ | add CARG2, BASE, RB -+ | std STR:RC, 0(CARG3) -+ | b >1 -+ | -+ |->vmeta_tgets: -+ | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) -+ | li TMP0, LJ_TTAB -+ | set_oper_type TMP0, TMP0 -+ | add TMP0, TMP0, TAB:RB -+ | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) -+ | std TMP0, 0(CARG2) -+ | li TMP1, LJ_TSTR -+ | set_oper_type TMP1, TMP1 -+ | add TMP1, TMP1, TAB:RC -+ | std TMP1, 0(CARG3) -+ | b >1 -+ | -+ |->vmeta_tgetb: // TMP0 = index -+ | decode_RB8 RB, INS -+ | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) -+ | add CARG2, BASE, RB -+ | li TISNUM, LJ_TISNUM -+ | set_oper_type TISNUM, TISNUM -+ | add TMP0, TMP0, TISNUM -+ | std TMP0, 0(CARG3) -+ | b >1 -+ | -+ |->vmeta_tgetv: -+ | decode_RB8 RB, INS -+ | decode_RC8 RC, INS -+ | add CARG2, BASE, RB -+ | add CARG3, BASE, RC -+ |1: -+ | std BASE, L->base -+ | mr CARG1, L -+ | std PC, SAVE_PC -+ | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) -+ | // Returns TValue * (finished) or NULL (metamethod). -+ | cmpldi CRET1, 0 -+ | beq >3 -+ | lfd f0, 0(CRET1) -+ | ins_next1 -+ | stfdx f0, BASE, RA -+ | ins_next2 -+ | -+ |3: // Call __index metamethod. -+ | // BASE = base, L->top = new base, stack = cont/func/t/k -+ | subfic TMP1, BASE, FRAME_CONT -+ | ld BASE, L->top -+ | std PC, -24(BASE) // [cont|PC] -+ | add PC, TMP1, BASE -+ | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. -+ | li NARGS8:RC, 16 // 2 args for func(t, k). -+ | clear_field RB -+ | b ->vm_call_dispatch_f -+ | -+ |->vmeta_tgetr: -+ | bl extern lj_tab_getinth // (GCtab *t, int32_t key) -+ | // Returns cTValue * or NULL. -+ | cmpldi CRET1, 0 -+ | beq >1 -+ | lfd f14, 0(CRET1) -+ | b ->BC_TGETR_Z -+ |1: -+ | stdx TISNIL, BASE, RA -+ | b ->cont_nop -+ | -+ |//----------------------------------------------------------------------- -+ | -+ |->vmeta_tsets1: -+ | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) -+ | li TMP0, LJ_TSTR -+ | set_oper_type TMP0, TMP0 -+ | decode_RB8 RB, INS -+ | add TMP0, TMP0, STR:RC -+ | add CARG2, BASE, RB -+ | std TMP0, 0(CARG3) -+ | b >1 -+ | -+ |->vmeta_tsets: -+ | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) -+ | li TMP0, LJ_TTAB -+ | set_oper_type TMP0, TMP0 -+ | add TAB:RB, TMP0, TAB:RB -+ | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) -+ | std TAB:RB, 0(CARG2) -+ | li TMP1, LJ_TSTR -+ | set_oper_type TMP1, TMP1 -+ | add STR:RC, TMP1, STR:RC -+ | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) -+ | std STR:RC, 0(CARG3) -+ | b >1 -+ | -+ |->vmeta_tsetb: // TMP0 = index -+ | decode_RB8 RB, INS -+ | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) -+ | add CARG2, BASE, RB -+ | li TISNUM, LJ_TISNUM -+ | set_oper_type TISNUM, TISNUM -+ | add TMP0, TMP0, TISNUM // assume TMP0 has no type -+ | std TMP0, 0(CARG3) -+ | b >1 -+ | -+ |->vmeta_tsetv: -+ | decode_RB8 RB, INS -+ | decode_RC8 RC, INS -+ | add CARG2, BASE, RB -+ | add CARG3, BASE, RC -+ |1: -+ | std BASE, L->base -+ | mr CARG1, L -+ | std PC, SAVE_PC -+ | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) -+ | // Returns TValue * (finished) or NULL (metamethod). -+ | cmpldi CRET1, 0 -+ | lfdx f0, BASE, RA -+ | beq >3 -+ | // NOBARRIER: lj_meta_tset ensures the table is not black. -+ | ins_next1 -+ | stfd f0, 0(CRET1) -+ | ins_next2 -+ | -+ |3: // Call __newindex metamethod. -+ | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) -+ | subfic TMP1, BASE, FRAME_CONT -+ | ld BASE, L->top -+ | std PC, -24(BASE) // [cont|PC] -+ | add PC, TMP1, BASE -+ | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. -+ | li NARGS8:RC, 24 // 3 args for func(t, k, v) -+ | stfd f0, 16(BASE) // Copy value to third argument. -+ | clear_field RB -+ | b ->vm_call_dispatch_f -+ | -+ |->vmeta_tsetr: -+ | std BASE, L->base -+ | std PC, SAVE_PC -+ | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) -+ | // Returns TValue *. -+ | stfd f14, 0(CRET1) -+ | b ->cont_nop -+ | -+ |//-- Comparison metamethods --------------------------------------------- -+ | -+ |->vmeta_comp: -+ | mr CARG1, L -+ | subi PC, PC, 4 -+ | mr CARG2, RA -+ | std PC, SAVE_PC -+ | mr CARG3, RD -+ | std BASE, L->base -+ | decode_OP1 CARG4, INS -+ | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) -+ | // Returns 0/1 or TValue * (metamethod). -+ |3: -+ | cmpldi CRET1, 1 -+ | bgt ->vmeta_binop -+ | subfic CRET1, CRET1, 0 -+ |4: -+ | lwz INS, 0(PC) -+ | addi PC, PC, 4 -+ | decode_RD4 TMP2, INS -+ | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) -+ | and TMP2, TMP2, CRET1 -+ | add PC, PC, TMP2 -+ |->cont_nop: -+ | ins_next -+ | -+ |->cont_ra: // RA = resultptr -+ | lwz INS, -4(PC) -+ | lfd f0, 0(RA) -+ | decode_RA8 TMP1, INS -+ | stfdx f0, BASE, TMP1 -+ | b ->cont_nop -+ | -+ |->cont_condt: // RA = resultptr -+ | ld TMP0, 0(RA) -+ | get_oper_type TMP0, TMP0 -+ | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is true. -+ | set_oper_type TMP0, TMP0 -+ | subfe CRET1, CRET1, CRET1 -+ | not CRET1, CRET1 -+ | b <4 -+ | -+ |->cont_condf: // RA = resultptr -+ | ld TMP0, 0(RA) -+ | get_oper_type TMP0, TMP0 -+ | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is false. -+ | set_oper_type TMP0, TMP0 -+ | subfe CRET1, CRET1, CRET1 -+ | b <4 -+ | -+ |->vmeta_equal: -+ | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. -+ | clear_field CARG3 -+ | subi PC, PC, 4 -+ | std BASE, L->base -+ | mr CARG1, L -+ | std PC, SAVE_PC -+ | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) -+ | // Returns 0/1 or TValue * (metamethod). -+ | b <3 -+ | -+ |->vmeta_equal_cd: -+ |.if FFI -+ | mr CARG2, INS -+ | subi PC, PC, 4 -+ | std BASE, L->base -+ | mr CARG1, L -+ | std PC, SAVE_PC -+ | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op) -+ | // Returns 0/1 or TValue * (metamethod). -+ | b <3 -+ |.endif -+ | -+ |->vmeta_istype: -+ | subi PC, PC, 4 -+ | std BASE, L->base -+ | srdi CARG2, RA, 3 -+ | mr CARG1, L -+ | srdi CARG3, RD, 3 -+ | std PC, SAVE_PC -+ | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) -+ | b ->cont_nop -+ | -+ |//-- Arithmetic metamethods --------------------------------------------- -+ | -+ |->vmeta_arith_nv: -+ | add CARG3, KBASE, RC -+ | add CARG4, BASE, RB -+ | b >1 -+ |->vmeta_arith_nv2: -+ | mr CARG3, RC -+ | mr CARG4, RB -+ | b >1 -+ | -+ |->vmeta_unm: -+ | mr CARG3, RD -+ | mr CARG4, RD -+ | b >1 -+ | -+ |->vmeta_arith_vn: -+ | add CARG3, BASE, RB -+ | add CARG4, KBASE, RC -+ | b >1 -+ | -+ |->vmeta_arith_vv: -+ | add CARG3, BASE, RB -+ | add CARG4, BASE, RC -+ | b >1 -+ |->vmeta_arith_vn2: -+ |->vmeta_arith_vv2: -+ | mr CARG3, RB -+ | mr CARG4, RC -+ |1: -+ | add CARG2, BASE, RA -+ | std BASE, L->base -+ | mr CARG1, L -+ | std PC, SAVE_PC -+ | decode_OP1 CARG5, INS // Caveat: CARG5 overlaps INS. -+ | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) -+ | // Returns NULL (finished) or TValue * (metamethod). -+ | cmpldi CRET1, 0 -+ | beq ->cont_nop -+ | -+ | // Call metamethod for binary op. -+ |->vmeta_binop: -+ | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 -+ | sub TMP1, CRET1, BASE -+ | std PC, -24(CRET1) // [cont|PC] -+ | mr TMP2, BASE -+ | addi PC, TMP1, FRAME_CONT -+ | mr BASE, CRET1 -+ | li NARGS8:RC, 16 // 2 args for func(o1, o2). -+ | b ->vm_call_dispatch -+ | -+ |->vmeta_len: -+#if LJ_52 -+ | mr SAVE0, CARG1 -+#endif -+ | mr CARG2, RD -+ | std BASE, L->base -+ | mr CARG1, L -+ | std PC, SAVE_PC -+ | bl extern lj_meta_len // (lua_State *L, TValue *o) -+ | // Returns NULL (retry) or TValue * (metamethod base). -+#if LJ_52 -+ | cmpldi CRET1, 0 -+ | bne ->vmeta_binop // Binop call for compatibility. -+ | mr CARG1, SAVE0 -+ | b ->BC_LEN_Z -+#else -+ | b ->vmeta_binop // Binop call for compatibility. -+#endif -+ | -+ |//-- Call metamethod ---------------------------------------------------- -+ | -+ |->vmeta_call: // Resolve and call __call metamethod. -+ | // TMP2 = old base, BASE = new base, RC = nargs*8 -+ | mr CARG1, L -+ | std TMP2, L->base // This is the callers base! -+ | subi CARG2, BASE, 16 -+ | std PC, SAVE_PC -+ | add CARG3, BASE, RC -+ | mr SAVE0, NARGS8:RC -+ | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) -+ | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. -+ | addi NARGS8:RC, SAVE0, 8 // Got one more argument now. -+ | clear_field RB -+ | ins_call -+ | -+ |->vmeta_callt: // Resolve __call for BC_CALLT. -+ | // BASE = old base, RA = new base, RC = nargs*8 -+ | mr CARG1, L -+ | std BASE, L->base -+ | subi CARG2, RA, 16 -+ | std PC, SAVE_PC -+ | add CARG3, RA, RC -+ | mr SAVE0, NARGS8:RC -+ | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) -+ | ld TMP1, FRAME_PC(BASE) -+ | addi NARGS8:RC, SAVE0, 8 // Got one more argument now. -+ | ld LFUNC:RB, FRAME_FUNC(RA) // Guaranteed to be a function here. -+ | clear_field RB -+ | b ->BC_CALLT_Z -+ | -+ |//-- Argument coercion for 'for' statement ------------------------------ -+ | -+ |->vmeta_for: -+ | mr CARG1, L -+ | std BASE, L->base -+ | mr CARG2, RA -+ | std PC, SAVE_PC -+ | mr SAVE0, INS -+ | bl extern lj_meta_for // (lua_State *L, TValue *base) -+ |.if JIT -+ | decode_OP1 TMP0, SAVE0 -+ |.endif -+ | decode_RA8 RA, SAVE0 -+ |.if JIT -+ | cmpdi TMP0, BC_JFORI -+ |.endif -+ | decode_RD8 RD, SAVE0 -+ |.if JIT -+ | beqy =>BC_JFORI -+ |.endif -+ | b =>BC_FORI -+ | -+ |//----------------------------------------------------------------------- -+ |//-- Fast functions ----------------------------------------------------- -+ |//----------------------------------------------------------------------- -+ | -+ |.macro .ffunc, name -+ |->ff_ .. name: -+ |.endmacro -+ | -+ |.macro .ffunc_1, name -+ |->ff_ .. name: -+ | cmpldi NARGS8:RC, 8 -+ | ld CARG1, 0(BASE) -+ | blt ->fff_fallback -+ |.endmacro -+ | -+ |.macro .ffunc_2, name -+ |->ff_ .. name: -+ | cmpldi NARGS8:RC, 16 -+ | ld CARG1, 0(BASE) -+ | ld CARG2, 8(BASE) -+ | blt ->fff_fallback -+ |.endmacro -+ | -+ |.macro .ffunc_n, name -+ |->ff_ .. name: -+ | cmpldi NARGS8:RC, 8 -+ | ld CARG3, 0(BASE) -+ | lfd FARG1, 0(BASE) -+ | blt ->fff_fallback -+ | checknum CARG3; bge ->fff_fallback -+ |.endmacro -+ | -+ |.macro .ffunc_nn, name -+ |->ff_ .. name: -+ | cmpldi NARGS8:RC, 16 -+ | ld CARG3, 0(BASE) -+ | lfd FARG1, 0(BASE) -+ | ld CARG4, 8(BASE) -+ | lfd FARG2, 8(BASE) -+ | blt ->fff_fallback -+ | checknum CARG3; bge ->fff_fallback -+ | checknum CARG4; bge ->fff_fallback -+ |.endmacro -+ | -+ |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1. -+ |.macro ffgccheck -+ | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH) -+ | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) -+ | cmpld TMP0, TMP1 -+ | bgel ->fff_gcstep -+ |.endmacro -+ | -+ |//-- Base library: checks ----------------------------------------------- -+ | -+ |.ffunc_1 assert -+ | set_bool TMP1, LJ_TFALSE -+ | la RA, -16(BASE) -+ | cmpld cr1, CARG1, TMP1 -+ | ld PC, FRAME_PC(BASE) -+ | bge cr1, ->fff_fallback -+ | std CARG1, 0(RA) -+ | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. -+ | beq ->fff_res // Done if exactly 1 argument. -+ | li TMP1, 8 -+ | subi RC, RC, 8 -+ |1: -+ | cmpld TMP1, RC -+ | ldx TMP0, BASE, TMP1 -+ | stdx TMP0, RA, TMP1 -+ | addi TMP1, TMP1, 8 -+ | bney <1 -+ | b ->fff_res -+ | -+ |.ffunc type -+ | cmpldi NARGS8:RC, 8 -+ | ld CARG1, 0(BASE) -+ | get_oper_type CARG1, CARG1 // only type is needed -+ | blt ->fff_fallback -+ | get_oper_type TMP0, TISNUM // comparing with type shifted -+ | subfc TMP0, TMP0, CARG1 -+ | subfe TMP2, CARG1, CARG1 -+ | orc TMP1, TMP2, TMP0 -+ | addi TMP1, TMP1, ~LJ_TISNUM+1 -+ | sldi TMP1, TMP1, 3 -+ | la TMP2, CFUNC:RB->upvalue -+ | lfdx FARG1, TMP2, TMP1 -+ | ldx CARG1, TMP2, TMP1 -+ | b ->fff_restv -+ | -+ |//-- Base library: getters and setters --------------------------------- -+ | -+ |.ffunc_1 getmetatable -+ | get_oper_type CARG3, CARG1 // saving for :6 -+ | checktab CARG1; bne >6 -+ |1: // Field metatable must be at same offset for GCtab and GCudata! -+ | ld TAB:RB, TAB:CARG1->metatable -+ |2: -+ | mr CARG1, TISNIL -+ | cmpldi TAB:RB, 0 -+ | ld STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) -+ | beq ->fff_restv -+ | lwz TMP0, TAB:RB->hmask -+ | li CARG1, LJ_TTAB // Use metatable as default result. -+ | ld TMP1, STR:RC->hash -+ | set_oper_type CARG1, CARG1 -+ | ld NODE:TMP2, TAB:RB->node -+ | add CARG1, CARG1, TAB:RB -+ | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask -+ | sldi TMP0, TMP1, 5 -+ | sldi TMP1, TMP1, 3 -+ | sub TMP1, TMP0, TMP1 -+ | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) -+ |3: // Rearranged logic, because we expect _not_ to find the key. -+ | ld CARG4, NODE:TMP2->key -+ | ld CARG2, NODE:TMP2->val -+ | checkstr CARG4; bne >4 -+ | cmpd CARG4, STR:RC; beq >5 -+ |4: -+ | ld NODE:TMP2, NODE:TMP2->next -+ | cmpldi NODE:TMP2, 0 -+ | beq ->fff_restv // Not found, keep default result. -+ | b <3 -+ |5: -+ | checknil_noclear CARG2 -+ | beq ->fff_restv // Ditto for nil value. -+ | mr CARG1, CARG2 -+ | b ->fff_restv -+ | -+ |6: -+ | get_oper_type TMP3, TISNUM -+ | cmpdi CARG3, LJ_TUDATA; beq <1 -+ | subfc TMP0, TMP3, CARG3 -+ | subfe TMP2, CARG3, CARG3 -+ | orc TMP1, TMP2, TMP0 -+ | addi TMP1, TMP1, ~LJ_TISNUM+1 -+ | sldi TMP1, TMP1, 3 -+ | la TMP2, DISPATCH_GL(gcroot[GCROOT_BASEMT])(DISPATCH) -+ | ldx TAB:RB, TMP2, TMP1 -+ | b <2 -+ | -+ |.ffunc_2 setmetatable -+ | // Fast path: no mt for table yet and not clearing the mt. -+ | mr TMP2, CARG1 -+ | checktab TMP2; bne ->fff_fallback -+ | ld TAB:TMP1, TAB:TMP2->metatable -+ | checktab CARG2; bne ->fff_fallback -+ | cmpldi TAB:TMP1, 0 -+ | lbz TMP3, TAB:TMP2->marked -+ | bne ->fff_fallback -+ | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table) -+ | std TAB:CARG2, TAB:TMP2->metatable -+ | beq ->fff_restv -+ | barrierback TAB:TMP2, TMP3, TMP0 -+ | b ->fff_restv -+ | -+ |.ffunc rawget -+ | cmpldi NARGS8:RC, 16 -+ | ld CARG2, 0(BASE) -+ | blt ->fff_fallback -+ | checktab CARG2; bne ->fff_fallback -+ | la CARG3, 8(BASE) -+ | mr CARG1, L -+ | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) -+ | // Returns cTValue *. -+ | lfd FARG1, 0(CRET1) -+ | ld CARG1, 0(CRET1) -+ | b ->fff_restv -+ | -+ |//-- Base library: conversions ------------------------------------------ -+ | -+ |.ffunc tonumber -+ | // Only handles the number case inline (without a base argument). -+ | cmpldi NARGS8:RC, 8 -+ | ld CARG1, 0(BASE) -+ | lfd FARG1, 0(BASE) -+ | bne ->fff_fallback // Exactly one argument. -+ | checknum_noclear CARG1; ble ->fff_restv -+ | b ->fff_fallback -+ | -+ |.ffunc_1 tostring -+ | // Only handles the string or number case inline. -+ | // save CARG1 type for checknum_sig and fff_restv -+ | mr CARG3, CARG1 -+ | checkstr CARG3 -+ | // A __tostring method in the string base metatable is ignored. -+ | beq ->fff_restv // String key? -+ | // Handle numbers inline, unless a number base metatable is present. -+ | ld TMP0, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) -+ | checknum_sig CARG1 -+ | cmpldi cr1, TMP0, 0 -+ | std BASE, L->base // Add frame since C call can throw. -+ | crorc 4*cr0+eq, 4*cr0+gt, 4*cr1+eq -+ | std PC, SAVE_PC // Redundant (but a defined value). -+ | beq ->fff_fallback -+ | ffgccheck -+ | mr CARG1, L -+ | mr CARG2, BASE -+ | bl extern lj_strfmt_number // (lua_State *L, cTValue *o) -+ | // Returns GCstr *. -+ | li CARG3, LJ_TSTR -+ | set_oper_type CARG3, CARG3 -+ | add CARG1, CARG1, CARG3 -+ | b ->fff_restv -+ | -+ |//-- Base library: iterators ------------------------------------------- -+ | -+ |.ffunc next -+ | cmpldi NARGS8:RC, 8 -+ | ld CARG1, 0(BASE) -+ | blt ->fff_fallback -+ | stdx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil. -+ | checktab CARG1 -+ | ld PC, FRAME_PC(BASE) -+ | bne ->fff_fallback -+ | mr CARG2, CARG1 -+ | std BASE, L->base // Add frame since C call can throw. -+ | mr CARG1, L -+ | std BASE, L->top // Dummy frame length is ok. -+ | la CARG3, 8(BASE) -+ | std PC, SAVE_PC -+ | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) -+ | // Returns 0 at end of traversal. -+ | cmpldi CRET1, 0 -+ | li CARG3, LJ_TNIL -+ | std CARG3, -16(BASE) -+ | beq ->fff_res1 // End of traversal: return nil. -+ | ld CARG1, 8(BASE) // Copy key and value to results. -+ | la RA, -16(BASE) -+ | ld CARG2, 16(BASE) -+ | std CARG1, 0(RA) -+ | li RD, (2+1)*8 -+ | std CARG2, 8(RA) -+ | b ->fff_res -+ | -+ |.ffunc_1 pairs -+ | mr TMP1, CARG1 -+ | checktab CARG1 -+ | ld PC, FRAME_PC(BASE) -+ | bne ->fff_fallback -+#if LJ_52 -+ | ld TAB:TMP2, TAB:CARG1->metatable -+ | ld TMP0, CFUNC:RB->upvalue[0] -+ | cmpldi TAB:TMP2, 0 -+ | la RA, -16(BASE) -+ | bne ->fff_fallback -+#else -+ | ld TMP0, CFUNC:RB->upvalue[0] -+ | la RA, -16(BASE) -+#endif -+ | std TMP1, 8(RA) -+ | std TISNIL, 16(RA) -+ | li RD, (3+1)*8 -+ | std TMP0, 0(RA) -+ | b ->fff_res -+ | -+ |.ffunc ipairs_aux -+ | cmpldi NARGS8:RC, 16 -+ | ld CARG4, 8(BASE) -+ | ld CARG1, 0(BASE) -+ | blt ->fff_fallback -+ | checktab CARG1 -+ | checknum cr1, CARG4 -+ | ld PC, FRAME_PC(BASE) -+ | bne ->fff_fallback -+ | bne cr1, ->fff_fallback -+ | lwz TMP0, TAB:CARG1->asize -+ | ld TMP1, TAB:CARG1->array -+ | addi TMP2, CARG4, 1 -+ | la RA, -16(BASE) -+ | cmplw TMP0, TMP2 -+ | sldi TMP3, TMP2, 3 // TMP2 is array index -+ | add TMP2, TMP2, TISNUM // TMP2 is now lua number -+ | std TMP2, 0(RA) -+ | ble >2 // Not in array part? -+ | ldx TMP2, TMP1, TMP3 -+ |1: -+ | checknil_noclear TMP2 -+ | li RD, (0+1)*8 -+ | beq ->fff_res // End of iteration, return 0 results. -+ | li RD, (2+1)*8 -+ | std TMP2, 8(RA) -+ | b ->fff_res -+ |2: // Check for empty hash part first. Otherwise call C function. -+ | lwz TMP0, TAB:CARG1->hmask -+ | cmpldi TMP0, 0 -+ | li RD, (0+1)*8 -+ | beq ->fff_res -+ | mr CARG2, TMP2 -+ | bl extern lj_tab_getinth // (GCtab *t, int32_t key) -+ | // Returns cTValue * or NULL. -+ | cmpldi CRET1, 0 -+ | li RD, (0+1)*8 -+ | beq ->fff_res -+ | ld TMP2, 0(CRET1) -+ | b <1 -+ | -+ |.ffunc_1 ipairs -+ | mr TMP1, CARG1 -+ | checktab CARG1 -+ | ld PC, FRAME_PC(BASE) -+ | bne ->fff_fallback -+#if LJ_52 -+ | ld TAB:TMP2, TAB:CARG1->metatable -+ | ld TMP0, CFUNC:RB->upvalue[0] -+ | cmpldi TAB:TMP2, 0 -+ | la RA, -16(BASE) -+ | bne ->fff_fallback -+#else -+ | ld TMP0, CFUNC:RB->upvalue[0] -+ | la RA, -16(BASE) -+#endif -+ | std TMP1, -8(BASE) -+ | std TISNUM, 0(BASE) -+ | li RD, (3+1)*8 -+ | std TMP0, -16(BASE) -+ | b ->fff_res -+ | -+ |//-- Base library: catch errors ---------------------------------------- -+ | -+ |.ffunc pcall -+ | cmpldi NARGS8:RC, 8 -+ | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) -+ | blt ->fff_fallback -+ | mr TMP2, BASE -+ | la BASE, 16(BASE) -+ | // Remember active hook before pcall. -+ | rlwinm TMP3, TMP3, 32-HOOK_ACTIVE_SHIFT, 31, 31 -+ | subi NARGS8:RC, NARGS8:RC, 8 -+ | addi PC, TMP3, 16+FRAME_PCALL -+ | cmpdi NARGS8:RC, 0 -+ | beq ->vm_call_dispatch -+ |1: -+ | add TMP1, BASE, NARGS8:RC -+ |2: -+ | ld TMP0, -16(TMP1) -+ | stdu TMP0, -8(TMP1) -+ | cmpld TMP1, BASE -+ | bne <2 -+ | b ->vm_call_dispatch -+ | -+ |.ffunc xpcall -+ | subic. NARGS8:RC, NARGS8:RC, 16 -+ | ld CARG2, 8(BASE) -+ | ld CARG1, 0(BASE) -+ | blt ->fff_fallback -+ | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH) -+ | mr TMP2, BASE -+ | // Traceback must be a function. -+ | checkfunc_noclear CARG2; bne ->fff_fallback -+ | la BASE, 24(BASE) -+ | // Remember active hook before pcall. -+ | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31 -+ | std CARG2, 0(TMP2) // Swap function and traceback. -+ | cmpdi RC, 0 -+ | std CARG1, 8(TMP2) -+ | addi PC, TMP1, 24+FRAME_PCALL -+ | beq ->vm_call_dispatch -+ | b <1 -+ | -+ |//-- Coroutine library -------------------------------------------------- -+ | -+ |.macro coroutine_resume_wrap, resume -+ |.if resume -+ |.ffunc_1 coroutine_resume -+ | checkthread CARG1; bne ->fff_fallback -+ |.else -+ |.ffunc coroutine_wrap_aux -+ | ld L:CARG1, CFUNC:RB->upvalue[0].gcr -+ | clear_field L:CARG1 -+ |.endif -+ | lbz TMP0, L:CARG1->status -+ | ld TMP1, L:CARG1->cframe -+ | la TMP3, L:CARG1->base -+ | ld TMP2, 0(TMP3) -+ | ld CARG2, 8(TMP3) -+ | cmpldi cr0, TMP0, LUA_YIELD -+ | add TMP3, CARG2, TMP0 -+ | cmpldi cr1, TMP1, 0 -+ | cmpd cr7, TMP3, TMP2 -+ | ld TMP0, L:CARG1->maxstack -+ | beq cr7, ->fff_fallback -+ | cmpld cr7, CARG2, TMP2 -+ | ld PC, FRAME_PC(BASE) -+ | bge cr0, >9 -+ | addi CARG2, CARG2, 8 -+ |9: -+ | crorc 4*cr6+lt, 4*cr0+gt, 4*cr1+eq // st>LUA_YIELD || cframe!=0 -+ | add TMP2, CARG2, NARGS8:RC -+ | crandc 4*cr6+gt, 4*cr7+eq, 4*cr0+eq // base==top && st!=LUA_YIELD -+ | cmpld cr1, TMP2, TMP0 -+ | cror 4*cr6+lt, 4*cr6+lt, 4*cr6+gt -+ | std PC, SAVE_PC -+ | cror 4*cr6+lt, 4*cr6+lt, 4*cr1+gt // cond1 || cond2 || stackov -+ | std BASE, L->base -+ | blt cr6, ->fff_fallback -+ |1: -+ |.if resume -+ | addi BASE, BASE, 8 // Keep resumed thread in stack for GC. -+ | subi NARGS8:RC, NARGS8:RC, 8 -+ | subi TMP2, TMP2, 8 -+ |.endif -+ | std TMP2, L:CARG1->top -+ | li TMP1, 0 -+ | std BASE, L->top -+ |2: // Move args to coroutine. -+ | cmpd TMP1, NARGS8:RC -+ | lfdx f0, BASE, TMP1 -+ | beq >3 -+ | stfdx f0, CARG2, TMP1 -+ | addi TMP1, TMP1, 8 -+ | b <2 -+ |3: -+ | li CARG3, 0 -+ | mr L:SAVE0, L:CARG1 -+ | li CARG4, 0 -+ | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0) -+ | // Returns thread status. -+ |4: -+ | ld TMP2, L:SAVE0->base -+ | cmpldi CRET1, LUA_YIELD -+ | ld TMP3, L:SAVE0->top -+ | li_vmstate INTERP -+ | ld BASE, L->base -+ | std L, DISPATCH_GL(cur_L)(DISPATCH) -+ | st_vmstate -+ | bgt >8 -+ | sub RD, TMP3, TMP2 -+ | ld TMP0, L->maxstack -+ | cmpldi RD, 0 -+ | add TMP1, BASE, RD -+ | beq >6 // No results? -+ | cmpld TMP1, TMP0 -+ | li TMP1, 0 -+ | bgt >9 // Need to grow stack? -+ | -+ | subi TMP3, RD, 8 -+ | std TMP2, L:SAVE0->top // Clear coroutine stack. -+ |5: // Move results from coroutine. -+ | cmpld TMP1, TMP3 -+ | lfdx f0, TMP2, TMP1 -+ | stfdx f0, BASE, TMP1 -+ | addi TMP1, TMP1, 8 -+ | bne <5 -+ |6: -+ | andi. TMP0, PC, FRAME_TYPE -+ |.if resume -+ | set_bool TMP1, LJ_TTRUE -+ | la RA, -8(BASE) -+ | std TMP1, -8(BASE) // Prepend true to results. -+ | addi RD, RD, 16 -+ |.else -+ | mr RA, BASE -+ | addi RD, RD, 8 -+ |.endif -+ |7: -+ | std PC, SAVE_PC -+ | mr MULTRES, RD -+ | beq ->BC_RET_Z -+ | b ->vm_return -+ | -+ |8: // Coroutine returned with error (at co->top-1). -+ |.if resume -+ | andi. TMP0, PC, FRAME_TYPE -+ | la TMP3, -8(TMP3) -+ | set_bool TMP1, LJ_TFALSE -+ | lfd f0, 0(TMP3) -+ | std TMP3, L:SAVE0->top // Remove error from coroutine stack. -+ | li RD, (2+1)*8 -+ | std TMP1, -16(BASE) // Prepend false to results. -+ | la RA, -16(BASE) -+ | stfd f0, -8(BASE) // Copy error message. -+ | b <7 -+ |.else -+ | mr CARG1, L -+ | mr CARG2, L:SAVE0 -+ | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) -+ |.endif -+ | -+ |9: // Handle stack expansion on return from yield. -+ | mr CARG1, L -+ | srdi CARG2, RD, 3 -+ | bl extern lj_state_growstack // (lua_State *L, int n) -+ | li CRET1, 0 -+ | b <4 -+ |.endmacro -+ | -+ | coroutine_resume_wrap 1 // coroutine.resume -+ | coroutine_resume_wrap 0 // coroutine.wrap -+ | -+ |.ffunc coroutine_yield -+ | ld TMP0, L->cframe -+ | add TMP1, BASE, NARGS8:RC -+ | std BASE, L->base -+ | andi. TMP0, TMP0, CFRAME_RESUME -+ | std TMP1, L->top -+ | li CRET1, LUA_YIELD -+ | beq ->fff_fallback -+ | std ZERO, L->cframe -+ | stb CRET1, L->status -+ | b ->vm_leave_unw -+ | -+ |//-- Math library ------------------------------------------------------- -+ | -+ |.ffunc_1 math_abs -+ | checknum_noclear CARG1 -+ | bne >2 -+ | get_value CARG1 -+ | sradi TMP1, CARG1, 31 -+ | xor TMP2, TMP1, CARG1 -+ | lus TMP0, 0x8000 -+ | sub CARG1, TMP2, TMP1 -+ | get_value CARG1 -+ | cmpld CARG1, TMP0 -+ | beq >1 -+ |->fff_resi: -+ | ld PC, FRAME_PC(BASE) -+ | la RA, -16(BASE) -+ | add_oper_type CRET1, TISNUM -+ | std CRET1, -16(BASE) -+ | b ->fff_res1 -+ |1: -+ | lus CARG1, 0x41e0 // 2^31. -+ | sldi CARG1, CARG1, 32 -+ | b ->fff_restv -+ |2: -+ | bge ->fff_fallback -+ | clrldi CARG1,CARG1,1 -+ | // Fallthrough. -+ | -+ |->fff_restv: -+ | // CARG1 = TValue result. -+ | ld PC, FRAME_PC(BASE) -+ | std CARG1, -16(BASE) -+ | la RA, -16(BASE) -+ |->fff_res1: -+ | // RA = results, PC = return. -+ | li RD, (1+1)*8 -+ |->fff_res: -+ | // RA = results, RD = (nresults+1)*8, PC = return. -+ | andi. TMP0, PC, FRAME_TYPE -+ | mr MULTRES, RD -+ | subi RA, BASE, 16 -+ | bney ->vm_return -+ | lwz INS, -4(PC) -+ | decode_RB8 RB, INS -+ |5: -+ | cmpld RB, RD // More results expected? -+ | decode_RA8 TMP0, INS -+ | bgt >6 -+ | ins_next1 -+ | // Adjust BASE. KBASE is assumed to be set for the calling frame. -+ | sub BASE, RA, TMP0 -+ | ins_next2 -+ | -+ |6: // Fill up results with nil. -+ | subi TMP1, RD, 8 -+ | addi RD, RD, 8 -+ | stdx TISNIL, RA, TMP1 -+ | b <5 -+ | -+ |.macro math_extern, func -+ | .ffunc_n math_ .. func -+ | blex func -+ | b ->fff_resn -+ |.endmacro -+ | -+ |.macro math_extern2, func -+ | .ffunc_nn math_ .. func -+ | blex func -+ | b ->fff_resn -+ |.endmacro -+ | -+ |.macro math_round, func, round -+ | .ffunc math_ .. func -+ | ld CARG1, 0(BASE) -+ | cmplwi NARGS8:RC, 8 -+ | lfd FARG1, 0(BASE) -+ | blt ->fff_fallback -+ | checknum CARG1; bgt ->fff_fallback -+ | srdi TMP0, CARG1, 32 -+ | sldi TMP1, TISNUM, 15 -+ | cmpld TMP0, TMP1 -+ | beq ->fff_restv -+ | round FARG1, FARG1 -+ | b ->fff_resn -+ |.endmacro -+ | -+ | math_round floor, frim -+ | math_round ceil, frip -+ | -+ |.if SQRT -+ |.ffunc_n math_sqrt -+ | fsqrt FARG1, FARG1 -+ | b ->fff_resn -+ |.else -+ | math_extern sqrt -+ |.endif -+ | -+ |.ffunc math_log -+ | cmpldi NARGS8:RC, 8 -+ | ld CARG3, 0(BASE) -+ | lfd FARG1, 0(BASE) -+ | bne ->fff_fallback // Need exactly 1 argument. -+ | checknum CARG3; bge ->fff_fallback -+ | blex log -+ | b ->fff_resn -+ | -+ | math_extern log10 -+ | math_extern exp -+ | math_extern sin -+ | math_extern cos -+ | math_extern tan -+ | math_extern asin -+ | math_extern acos -+ | math_extern atan -+ | math_extern sinh -+ | math_extern cosh -+ | math_extern tanh -+ | math_extern2 pow -+ | math_extern2 atan2 -+ | math_extern2 fmod -+ | -+ |.ffunc math_ldexp -+ | cmpldi NARGS8:RC, 16 -+ | ld CARG1, 0(BASE) -+ | ld CARG2, 8(BASE) -+ | blt ->fff_fallback -+ | checknum_noclear CARG1; bge ->fff_fallback -+ | checknum_noclear CARG2; bne ->fff_fallback -+ | std CARG1, 0(BASE) -+ | lfd FARG1, 0(BASE) -+ | std TOCREG, SAVE_TOC -+ | blex ldexp -+ | b ->fff_resn -+ | -+ |.ffunc_n math_frexp -+ | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) -+ | ld PC, FRAME_PC(BASE) -+ | blex frexp -+ | lwz TMP1, DISPATCH_GL(tmptv)(DISPATCH) -+ | la RA, -16(BASE) -+ | stfd FARG1, 0(RA) -+ | li RD, (2+1)*8 -+ | add TMP1, TMP1, TISNUM -+ | std TMP1, 8(RA) -+ | b ->fff_res -+ | -+ |.ffunc_n math_modf -+ | la CARG2, -16(BASE) -+ | ld PC, FRAME_PC(BASE) -+ | blex modf -+ | la RA, -16(BASE) -+ | stfd FARG1, -8(BASE) -+ | li RD, (2+1)*8 -+ | b ->fff_res -+ | -+ |.macro math_minmax, name, ismax -+ | .ffunc_1 name -+ | checknum_noclear CARG1 -+ | addi TMP1, BASE, 8 -+ | add TMP2, BASE, NARGS8:RC -+ | bne >4 -+ |1: // Handle integers. -+ | ld CARG2, 0(TMP1) -+ | cmpld cr1, TMP1, TMP2 -+ | bge cr1, ->fff_restv -+ | checknum_noclear CARG2 -+ | xoris TMP0, CARG1, 0x8000 -+ | clrldi TMP0, TMP0, 32 -+ | xoris TMP3, CARG2, 0x8000 -+ | clrldi TMP3, TMP3, 32 -+ | bne >3 -+ | subfc TMP3, TMP3, TMP0 -+ | subfe TMP0, TMP0, TMP0 -+ |.if ismax -+ | andc TMP3, TMP3, TMP0 -+ |.else -+ | and TMP3, TMP3, TMP0 -+ |.endif -+ | add CARG1, TMP3, CARG2 -+ | add_oper_type CARG1, TISNUM -+ | addi TMP1, TMP1, 8 -+ | b <1 -+ |3: -+ | bge ->fff_fallback -+ | // Convert intermediate result to number and continue below. -+ | tonum_i FARG1, CARG1 -+ | lfd FARG2, 0(TMP1) -+ | b >6 -+ |4: -+ | lfd FARG1, 0(BASE) -+ | bge ->fff_fallback -+ |5: // Handle numbers. -+ | ld CARG2, 0(TMP1) -+ | cmpld cr1, TMP1, TMP2 -+ | lfd FARG2, 0(TMP1) -+ | bge cr1, ->fff_resn -+ | checknum_noclear CARG2; bge >7 -+ |6: -+ | fsub f0, FARG1, FARG2 -+ | addi TMP1, TMP1, 8 -+ |.if ismax -+ | fsel FARG1, f0, FARG1, FARG2 -+ |.else -+ | fsel FARG1, f0, FARG2, FARG1 -+ |.endif -+ | b <5 -+ |7: // Convert integer to number and continue above. -+ | ld CARG2, 0(TMP1) -+ | bne ->fff_fallback -+ | tonum_i FARG2, CARG2 -+ | b <6 -+ |.endmacro -+ | -+ | math_minmax math_min, 0 -+ | math_minmax math_max, 1 -+ | -+ |//-- String library ----------------------------------------------------- -+ | -+ |.ffunc string_byte // Only handle the 1-arg case here. -+ | cmpldi NARGS8:RC, 8 -+ | ld STR:CARG1, 0(BASE) -+ | bne ->fff_fallback // Need exactly 1 argument. -+ | checkstr CARG1 -+ | bne ->fff_fallback -+ | ld TMP0, STR:CARG1->len -+ | lbz CARG1, STR:CARG1[1] // Access is always ok (NUL at end). -+ | li RD, (0+1)*8 -+ | ld PC, FRAME_PC(BASE) -+ | cmpldi TMP0, 0 -+ | la RA, -16(BASE) -+ | beqy ->fff_res -+ | b ->fff_resi -+ | -+ |.ffunc string_char // Only handle the 1-arg case here. -+ | ffgccheck -+ | cmpldi NARGS8:RC, 8 -+ | ld CARG3, 0(BASE) -+ | bne ->fff_fallback // Exactly 1 argument. -+ | checknum CARG3; bne ->fff_fallback -+ | la CARG2, 0(BASE) // Points to stack. Little-endian. -+ -+ | cmpldi CARG3, 255 -+ | li CARG3, 1 -+ | bgt ->fff_fallback -+ |->fff_newstr: -+ | mr CARG1, L -+ | std BASE, L->base -+ | std PC, SAVE_PC -+ | bl extern lj_str_new // (lua_State *L, char *str, size_t l) -+ |->fff_resstr: -+ | // Returns GCstr *. -+ | ld BASE, L->base -+ | li TMP1, LJ_TSTR -+ | set_oper_type, TMP1, TMP1 -+ | add CARG1, CARG1, TMP1 -+ | b ->fff_restv -+ | -+ |.ffunc string_sub -+ | ffgccheck -+ | cmpldi NARGS8:RC, 16 -+ | ld STR:CARG1, 0(BASE) -+ | blt ->fff_fallback -+ | ld TMP1, 8(BASE) -+ | get_oper_type TMP0,CARG1 -+ | get_oper_type CARG2,TMP1 -+ | li TMP2,-1 -+ | beq >1 -+ | ld TMP2, 16(BASE) -+ | get_oper_type CARG3,TMP2 -+ | checknum TMP2 -+ | bne ->fff_fallback -+ |1: -+ | checknum TMP1; bne ->fff_fallback -+ | checkstr CARG1; bne ->fff_fallback -+ | lwz TMP0, STR:CARG1->len -+ | cmpld TMP0, TMP2 // len < end? (unsigned compare) -+ | addi TMP3, TMP2, 1 -+ | blt >5 -+ |2: -+ | cmpdi TMP1, 0 // start <= 0? -+ | add TMP3, TMP1, TMP0 -+ | ble >7 -+ |3: -+ | sub CARG3, TMP2, TMP1 -+ | addi CARG2, STR:CARG1, #STR-1 -+ | sradi TMP0, CARG3, 31 -+ | addi CARG3, CARG3, 1 -+ | add CARG2, CARG2, TMP1 -+ | andc CARG3, CARG3, TMP0 -+ | b ->fff_newstr -+ | -+ |5: // Negative end or overflow. -+ | cmpd TMP0, TMP2 // len >= end? (signed compare) -+ | add TMP2, TMP0, TMP3 // Negative end: end = end+len+1. -+ | bge <2 -+ | mr TMP2, TMP0 // Overflow: end = len. -+ | b <2 -+ | -+ |7: // Negative start or underflow. -+ | extsw TMP1, TMP1 -+ | addic CARG3, TMP1, -1 -+ | subfe CARG3, CARG3, CARG3 -+ | sradi CARG2, TMP3, 31 // Note: modifies carry. -+ | andc TMP3, TMP3, CARG3 -+ | andc TMP1, TMP3, CARG2 -+ | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0) -+ | b <3 -+ | -+ |.macro ffstring_op, name -+ | .ffunc string_ .. name -+ | ffgccheck -+ | cmpldi NARGS8:RC, 8 -+ | ld STR:CARG2, 0(BASE) -+ | blt ->fff_fallback -+ | checkstr CARG2 -+ | la SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH) -+ | bne ->fff_fallback -+ | ld TMP0, SBUF:CARG1->b -+ | std L, SBUF:CARG1->L -+ | std BASE, L->base -+ | std PC, SAVE_PC -+ | std TMP0, SBUF:CARG1->p -+ | bl extern lj_buf_putstr_ .. name -+ | bl extern lj_buf_tostr -+ | b ->fff_resstr -+ |.endmacro -+ | -+ |ffstring_op reverse -+ |ffstring_op lower -+ |ffstring_op upper -+ | -+ |//-- Bit library -------------------------------------------------------- -+ | -+ |.macro .ffunc_bit, name -+ | .ffunc_1 bit_..name -+ | checknum CARG1; bnel ->fff_tobit_fb -+ |.endmacro -+ | -+ |.macro .ffunc_bit_op, name, ins -+ | .ffunc_bit name -+ | mr TMP0, CARG1 -+ | rldicl TMP0, TMP0, 0, 32 -+ | addi TMP1, BASE, 8 -+ | add TMP2, BASE, NARGS8:RC -+ |1: -+ | ld CARG2, 0(TMP1) -+ | cmpld cr1, TMP1, TMP2 -+ | bgey cr1, >9 -+ | checknum CARG2 -+ | bnel ->fff_bitop_fb -+ | rldicl CARG2, CARG2, 0, 32 -+ | ins TMP0, TMP0, CARG2 -+ | addi TMP1, TMP1, 8 -+ | b <1 -+ |9: -+ | add CARG1, TMP0, TISNUM -+ | b ->fff_restv -+ | -+ |.endmacro -+ | -+ |.ffunc_bit_op band, and -+ |.ffunc_bit_op bor, or -+ |.ffunc_bit_op bxor, xor -+ | -+ |.ffunc_bit bswap -+ | rlwinm TMP0, CARG1, 8, 0, 31 -+ | rlwimi TMP0, CARG1, 24, 0, 7 -+ | rlwimi TMP0, CARG1, 24, 16, 23 -+ | mr CRET1, TMP0 -+ | b ->fff_resi -+ | -+ |.ffunc_bit bnot -+ | not CRET1, CARG1 -+ | rldicl CRET1, CRET1, 0, 32 -+ | add CRET1, CRET1, TISNUM -+ | b ->fff_resi -+ | -+ |.macro .ffunc_bit_sh, name, ins, shmod -+ | .ffunc_2 bit_..name -+ | checknum CARG1; bnel ->fff_tobit_fb -+ | // Note: no inline conversion from number for 2nd argument! -+ | checknum CARG2; bne ->fff_fallback -+ | rldicl TMP0, CARG1, 0, 32 -+ | rldicl TMP1, CARG2, 0, 32 -+ |.if shmod == 1 -+ | rlwinm TMP1, TMP1, 0, 27, 31 -+ |.elif shmod == 2 -+ | neg TMP1, TMP1 -+ |.endif -+ | ins CARG1, TMP0, TMP1 -+ | rldicl CARG1, CARG1, 0, 32 -+ | add CARG1, CARG1, TISNUM -+ | b ->fff_restv -+ |.endmacro -+ | -+ |.ffunc_bit_sh lshift, slw, 1 -+ |.ffunc_bit_sh rshift, srw, 1 -+ |.ffunc_bit_sh arshift, sraw, 1 -+ |.ffunc_bit_sh rol, rotlw, 0 -+ |.ffunc_bit_sh ror, rotlw, 2 -+ | -+ |.ffunc_bit tobit -+ | rldicl TMP0, CARG1, 0, 32 -+ | add CARG1, TMP0, TISNUM -+ | b ->fff_restv -+ | -+ |->fff_resn: -+ | ld PC, FRAME_PC(BASE) -+ | la RA, -16(BASE) -+ | stfd FARG1, -16(BASE) -+ | b ->fff_res1 -+ | -+ |// Fallback FP number to bit conversion. -+ |->fff_tobit_fb: -+ | lfd FARG1, 0(BASE) -+ | bgt ->fff_fallback -+ | fadd FARG1, FARG1, TOBIT -+ | stfd FARG1, TMPD -+ | ld CARG1, TMPD -+ | blr -+ |->fff_bitop_fb: -+ | lfd FARG1, 0(TMP1) -+ | bgt ->fff_fallback -+ | fadd FARG1, FARG1, TOBIT -+ | stfd FARG1, TMPD -+ | ld CARG2, TMPD -+ | blr -+ | -+ |//----------------------------------------------------------------------- -+ | -+ |->fff_fallback: // Call fast function fallback handler. -+ | // BASE = new base, RB = CFUNC, RC = nargs*8 -+ | ld TMP3, CFUNC:RB->f -+ | add TMP1, BASE, NARGS8:RC -+ | ld PC, FRAME_PC(BASE) // Fallback may overwrite PC. -+ | addi TMP0, TMP1, 8*LUA_MINSTACK -+ | ld TMP2, L->maxstack -+ | std PC, SAVE_PC // Redundant (but a defined value). -+ | cmpld TMP0, TMP2 -+ | std BASE, L->base -+ | std TMP1, L->top -+ | mr CARG1, L -+ | bgt >5 // Need to grow stack. -+ | mr r12, TMP3 // keep r12 for function linkage. -+ | mtctr r12 -+ | bctrl // (lua_State *L) -+ | // Either throws an error, or recovers and returns -1, 0 or nresults+1. -+ | ld BASE, L->base -+ | cmpdi CRET1, 0 -+ | sldi RD, CRET1, 3 -+ | la RA, -16(BASE) -+ | bgt ->fff_res // Returned nresults+1? -+ |1: // Returned 0 or -1: retry fast path. -+ | ld TMP0, L->top -+ | ld LFUNC:RB, FRAME_FUNC(BASE) -+ | sub NARGS8:RC, TMP0, BASE -+ | bne ->vm_call_tail // Returned -1? -+ | clear_field RB -+ | ins_callt // Returned 0: retry fast path. -+ | -+ |// Reconstruct previous base for vmeta_call during tailcall. -+ |->vm_call_tail: -+ | andi. TMP0, PC, FRAME_TYPE -+ | rlwinm TMP1, PC, 0, 0, 28 -+ | bne >3 -+ | lwz INS, -4(PC) -+ | decode_RA8 TMP1, INS -+ | addi TMP1, TMP1, 8 -+ |3: -+ | sub TMP2, BASE, TMP1 -+ | b ->vm_call_dispatch // Resolve again for tailcall. -+ | -+ |5: // Grow stack for fallback handler. -+ | li CARG2, LUA_MINSTACK -+ | bl extern lj_state_growstack // (lua_State *L, int n) -+ | ld BASE, L->base -+ | cmpd TMP0, TMP0 // Set 4*cr0+eq to force retry. -+ | b <1 -+ | -+ |->fff_gcstep: // Call GC step function. -+ | // BASE = new base, RC = nargs*8 -+ | mflr SAVE0 -+ | std BASE, L->base -+ | add TMP0, BASE, NARGS8:RC -+ | std PC, SAVE_PC // Redundant (but a defined value). -+ | std TMP0, L->top -+ | mr CARG1, L -+ | bl extern lj_gc_step // (lua_State *L) -+ | ld BASE, L->base -+ | mtlr SAVE0 -+ | ld TMP0, L->top -+ | sub NARGS8:RC, TMP0, BASE -+ | ld CFUNC:RB, FRAME_FUNC(BASE) -+ | clear_field CFUNC:RB -+ | blr -+ | -+ |//----------------------------------------------------------------------- -+ |//-- Special dispatch targets ------------------------------------------- -+ |//----------------------------------------------------------------------- -+ | -+ |->vm_record: // Dispatch target for recording phase. -+ | NYI -+ | -+ |->vm_rethook: // Dispatch target for return hooks. -+ | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) -+ | andi. TMP0, TMP3, HOOK_ACTIVE // Hook already active? -+ | beq >1 -+ |5: // Re-dispatch to static ins. -+ | addi TMP1, TMP1, GG_DISP2STATIC // Assumes decode_OP8 TMP1, INS. -+ | ldx TMP0, DISPATCH, TMP1 -+ | mtctr TMP0 -+ | bctr -+ | -+ |->vm_inshook: // Dispatch target for instr/line hooks. -+ | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) -+ | lwz TMP2, DISPATCH_GL(hookcount)(DISPATCH) -+ | andi. TMP0, TMP3, HOOK_ACTIVE // Hook already active? -+ | rlwinm TMP0, TMP3, 31-LUA_HOOKLINE, 31, 0 -+ | bne <5 -+ | -+ | cmpdi cr1, TMP0, 0 -+ | addic. TMP2, TMP2, -1 -+ | beq cr1, <5 -+ | stw TMP2, DISPATCH_GL(hookcount)(DISPATCH) -+ | beq >1 -+ | bge cr1, <5 -+ |1: -+ | mr CARG1, L -+ | std MULTRES, SAVE_MULTRES -+ | mr CARG2, PC -+ | std BASE, L->base -+ | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. -+ | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) -+ |3: -+ | ld BASE, L->base -+ |4: // Re-dispatch to static ins. -+ | lwz INS, -4(PC) -+ | decode_OP8 TMP1, INS -+ | decode_RB8 RB, INS -+ | addi TMP1, TMP1, GG_DISP2STATIC -+ | decode_RD8 RD, INS -+ | ldx TMP0, DISPATCH, TMP1 -+ | decode_RA8 RA, INS -+ | decode_RC8 RC, INS -+ | mtctr TMP0 -+ | bctr -+ | -+ |->cont_hook: // Continue from hook yield. -+ | addi PC, PC, 4 -+ | ld MULTRES, -40(RB) // Restore MULTRES for *M ins. -+ | b <4 -+ | -+ |->vm_hotloop: // Hot loop counter underflow. -+ | NYI -+ | -+ |->vm_callhook: // Dispatch target for call hooks. -+ | mr CARG2, PC -+ |.if JIT -+ | b >1 -+ |.endif -+ | -+ |->vm_hotcall: // Hot call counter underflow. -+ |.if JIT -+ | ori CARG2, PC, 1 -+ |1: -+ |.endif -+ | add TMP0, BASE, RC -+ | std PC, SAVE_PC -+ | mr CARG1, L -+ | std BASE, L->base -+ | sub RA, RA, BASE -+ | std TMP0, L->top -+ | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc) -+ | // Returns ASMFunction. -+ | ld BASE, L->base -+ | ld TMP0, L->top -+ | std ZERO, SAVE_PC // Invalidate for subsequent line hook. -+ | sub NARGS8:RC, TMP0, BASE -+ | add RA, BASE, RA -+ | ld LFUNC:RB, FRAME_FUNC(BASE) -+ | clear_field RB -+ | lwz INS, -4(PC) -+ | mtctr CRET1 -+ | bctr -+ | -+ |->cont_stitch: // Trace stitching. -+ | NYI -+ | -+ |->vm_profhook: // Dispatch target for profiler hook. -+#if LJ_HASPROFILE -+ | mr CARG1, L -+ | std MULTRES, SAVE_MULTRES -+ | mr CARG2, PC -+ | std BASE, L->base -+ | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) -+ | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. -+ | ld BASE, L->base -+ | subi PC, PC, 4 -+ | b ->cont_nop -+#endif -+ | -+ |//----------------------------------------------------------------------- -+ |//-- Trace exit handler ------------------------------------------------- -+ |//----------------------------------------------------------------------- -+ | -+ |->vm_exit_handler: -+ | NYI -+ |->vm_exit_interp: -+ | NYI -+ | -+ |//----------------------------------------------------------------------- -+ |//-- Math helper functions ---------------------------------------------- -+ |//----------------------------------------------------------------------- -+ | -+ |// NYI: Use internal implementations of floor, ceil, trunc. -+ | -+ |->vm_modi: -+ | divdo. TMP0, CARG1, CARG2 -+ | bso >1 -+ | xor CARG3, CARG1, CARG2 -+ | cmpdi CARG3, 0 -+ | mulld TMP0, TMP0, CARG2 -+ | sub CARG1, CARG1, TMP0 -+ | bgelr -+ | cmpdi CARG1, 0; beqlr -+ | add CARG1, CARG1, CARG2 -+ | blr -+ |1: -+ | cmpdi CARG2, 0 -+ | li CARG1, 0 -+ | beqlr -+ | mcrxr cr0 // Clear SO for -2147483648 % -1 and return 0. -+ | blr -+ | -+ |//----------------------------------------------------------------------- -+ |//-- Miscellaneous functions -------------------------------------------- -+ |//----------------------------------------------------------------------- -+ | -+ |// void lj_vm_cachesync(void *start, void *end) -+ |// Flush D-Cache and invalidate I-Cache. Assumes 32 byte cache line size. -+ |// This is a good lower bound, except for very ancient PPC models. -+ |->vm_cachesync: -+ |.if JIT or FFI -+ | // Compute start of first cache line and number of cache lines. -+ | clrldi CARG1, CARG1, 17 -+ | sub CARG2, CARG2, CARG1 -+ | addi CARG2, CARG2, 31 -+ | rlwinm. CARG2, CARG2, 27, 5, 31 -+ | beqlr -+ | mtctr CARG2 -+ | mr CARG3, CARG1 -+ |1: // Flush D-Cache. -+ | dcbst r0, CARG1 -+ | addi CARG1, CARG1, 32 -+ | bdnz <1 -+ | sync -+ | mtctr CARG2 -+ |1: // Invalidate I-Cache. -+ | icbi r0, CARG3 -+ | addi CARG3, CARG3, 32 -+ | bdnz <1 -+ | isync -+ | blr -+ |.endif -+ | -+ |//----------------------------------------------------------------------- -+ |//-- FFI helper functions ----------------------------------------------- -+ |//----------------------------------------------------------------------- -+ | -+ |// Handler for callback functions. Callback slot number in r11, g in r12. -+ |->vm_ffi_callback: -+ |.if FFI -+ |.type CTSTATE, CTState, PC -+ | pic_code_setup vm_ffi_callback -+ | saveregs -+ | ld CTSTATE, GL:r12->ctype_state -+ | addi DISPATCH, r12, GG_G2DISP -+ | std r11, CTSTATE->cb.slot -+ | std r3, CTSTATE->cb.gpr[0] -+ | stfd f1, CTSTATE->cb.fpr[0] -+ | std r4, CTSTATE->cb.gpr[1] -+ | stfd f2, CTSTATE->cb.fpr[1] -+ | std r5, CTSTATE->cb.gpr[2] -+ | stfd f3, CTSTATE->cb.fpr[2] -+ | std r6, CTSTATE->cb.gpr[3] -+ | stfd f4, CTSTATE->cb.fpr[3] -+ | std r7, CTSTATE->cb.gpr[4] -+ | stfd f5, CTSTATE->cb.fpr[4] -+ | std r8, CTSTATE->cb.gpr[5] -+ | stfd f6, CTSTATE->cb.fpr[5] -+ | std r9, CTSTATE->cb.gpr[6] -+ | stfd f7, CTSTATE->cb.fpr[6] -+ | std r10, CTSTATE->cb.gpr[7] -+ | stfd f8, CTSTATE->cb.fpr[7] -+ | addi TMP0, sp, CFRAME_SPACE+8 -+ | std TMP0, CTSTATE->cb.stack -+ | mr CARG1, CTSTATE -+ | std CTSTATE, SAVE_PC // Any value outside of bytecode is ok. -+ | mr CARG2, sp -+ | bl extern lj_ccallback_enter // (CTState *cts, void *cf) -+ | // Returns lua_State *. -+ | ld BASE, L:CRET1->base -+ | li TISNUM, LJ_TISNUM // Setup type comparison constants. -+ | set_oper_type TISNUM, TISNUM -+ | ld RC, L:CRET1->top -+ | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). -+ | li ZERO, 0 -+ | mr L, CRET1 -+ | std TMP3, TMPD -+ | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) -+ | ld LFUNC:RB, FRAME_FUNC(BASE) -+ | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). -+ | stw TMP0, TONUM_HI -+ | li TISNIL, LJ_TNIL -+ | li_vmstate INTERP -+ | lfs TOBIT, TMPD -+ | std TMP3, TMPD -+ | sub RC, RC, BASE -+ | st_vmstate -+ | lfs TONUM, TMPD -+ | clear_field RB -+ | ins_callt -+ |.endif -+ | -+ |->cont_ffi_callback: // Return from FFI callback. -+ |.if FFI -+ | ld CTSTATE, DISPATCH_GL(ctype_state)(DISPATCH) -+ | std BASE, L->base -+ | std RB, L->top -+ | std L, CTSTATE->L -+ | mr CARG1, CTSTATE -+ | mr CARG2, RA -+ | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) -+ | ld CRET1, CTSTATE->cb.gpr[0] -+ | lfd FARG1, CTSTATE->cb.fpr[0] -+ | ld CRET2, CTSTATE->cb.gpr[1] -+ | b ->vm_leave_unw -+ |.endif -+ | -+ |->vm_ffi_call: // Call C function via FFI. -+ | // Caveat: needs special frame unwinding, see below. -+ |.if FFI -+ | .type CCSTATE, CCallState, CARG1 -+ | lwz TMP1, CCSTATE->spadj -+ | mflr TMP0 -+ | lbz CARG2, CCSTATE->nsp -+ | lbz CARG3, CCSTATE->nfpr -+ | neg TMP1, TMP1 -+ | std TMP0, 16(sp) -+ | cmpdi cr1, CARG3, 0 -+ | std TOCREG, 24(sp) -+ | mr TMP2, sp -+ | addic. CARG2, CARG2, -1 -+ | stdux sp, sp, TMP1 -+ | crnot 4*cr1+eq, 4*cr1+eq // For vararg calls. -+ | std r14, -8(TMP2) -+ | std CCSTATE, -16(TMP2) -+ | mr r14, TMP2 -+ | la TMP1, CCSTATE->stack -+ | sldi CARG2, CARG2, 3 -+ | blty >2 -+ | la TMP2, 96(sp) // stack params after regs -+ |1: // Copy stack slots -+ | ldx TMP0, TMP1, CARG2 -+ | stdx TMP0, TMP2, CARG2 -+ | addic. CARG2, CARG2, -8 -+ | bge <1 -+ |2: -+ | bney cr1, >3 -+ | lfd f1, CCSTATE->fpr[0] -+ | lfd f2, CCSTATE->fpr[1] -+ | lfd f3, CCSTATE->fpr[2] -+ | lfd f4, CCSTATE->fpr[3] -+ | lfd f5, CCSTATE->fpr[4] -+ | lfd f6, CCSTATE->fpr[5] -+ | lfd f7, CCSTATE->fpr[6] -+ | lfd f8, CCSTATE->fpr[7] -+ | lfd f9, CCSTATE->fpr[8] -+ | lfd f10, CCSTATE->fpr[9] -+ | lfd f11, CCSTATE->fpr[10] -+ | lfd f12, CCSTATE->fpr[11] -+ | lfd f13, CCSTATE->fpr[12] -+ |3: -+ | ld r12, CCSTATE->func -+ | ld CARG2, CCSTATE->gpr[1] -+ | ld CARG3, CCSTATE->gpr[2] -+ | ld CARG4, CCSTATE->gpr[3] -+ | ld CARG5, CCSTATE->gpr[4] -+ | mtctr r12 -+ | ld r8, CCSTATE->gpr[5] -+ | ld r9, CCSTATE->gpr[6] -+ | ld r10, CCSTATE->gpr[7] -+ | ld CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. -+ | bctrl -+ | ld CCSTATE:TMP1, -16(r14) -+ | ld TMP2, -8(r14) -+ | ld TMP0, 16(r14) -+ | ld TOCREG, 24(r14) -+ | std CARG1, CCSTATE:TMP1->gpr[0] -+ | stfd FARG1, CCSTATE:TMP1->fpr[0] -+ | std CARG2, CCSTATE:TMP1->gpr[1] -+ | mtlr TMP0 -+ | std CARG3, CCSTATE:TMP1->gpr[2] -+ | mr sp, r14 -+ | std CARG4, CCSTATE:TMP1->gpr[3] -+ | mr r14, TMP2 -+ | blr -+ |.endif -+ |// Note: vm_ffi_call must be the last function in this object file! -+ | -+ |//----------------------------------------------------------------------- -+} -+ -+/* Generate the code for a single instruction. */ -+static void build_ins(BuildCtx *ctx, BCOp op, int defop) -+{ -+ int vk = 0; -+ |=>defop: -+ -+ switch (op) { -+ -+ /* -- Comparison ops ---------------------------------------------------- */ -+ -+ /* Remember: all ops branch for a true comparison, fall through otherwise. */ -+ -+ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: -+ | // RA = src1*8, RD = src2*8, JMP with RD = target -+ | ldux CARG2, RA, BASE -+ | addi PC, PC, 4 -+ | get_oper_type TMP0, CARG2 -+ | ldux CARG3, RD, BASE -+ | lwz TMP2, -4(PC) -+ | checknum cr0, CARG2 -+ | decode_RD4 TMP2, TMP2 -+ | get_oper_type TMP1, CARG3 -+ | checknum cr1, CARG3 -+ | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) -+ | bne cr0, >7 -+ | bne cr1, >8 -+ | cmpd CARG2, CARG3 -+ if (op == BC_ISLT) { -+ | bge >2 -+ } else if (op == BC_ISGE) { -+ | blt >2 -+ } else if (op == BC_ISLE) { -+ | bgt >2 -+ } else { -+ | ble >2 -+ } -+ |1: -+ | add PC, PC, TMP2 -+ |2: -+ | ins_next -+ | -+ |7: // RA is not an integer. -+ | bgt cr0, ->vmeta_comp -+ | // RA is a number. -+ | lfd f0, 0(RA) -+ | bgt cr1, ->vmeta_comp -+ | blt cr1, >4 -+ | // RA is a number, RD is an integer. -+ | tonum_i f1, CARG3 -+ | b >5 -+ | -+ |8: // RA is an integer, RD is not an integer. -+ | bgt cr1, ->vmeta_comp -+ | // RA is an integer, RD is a number. -+ | tonum_i f0, CARG2 -+ |4: -+ | lfd f1, 0(RD) -+ |5: -+ | fcmpu cr0, f0, f1 -+ if (op == BC_ISLT) { -+ | bge <2 -+ } else if (op == BC_ISGE) { -+ | blt <2 -+ } else if (op == BC_ISLE) { -+ | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq -+ | bge <2 -+ } else { -+ | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq -+ | blt <2 -+ } -+ | b <1 -+ break; -+ -+ case BC_ISEQV: case BC_ISNEV: -+ vk = op == BC_ISEQV; -+ | // RA = src1*8, RD = src2*8, JMP with RD = target -+ | ldux CARG2, RA, BASE -+ | addi PC, PC, 4 -+ | get_oper_type TMP0, CARG2 -+ | ldux CARG3, RD, BASE -+ | li TMP3, LJ_TISNUM -+ | cmpld cr0, TMP0, TMP3 -+ | ld TMP2, -4(PC) -+ | get_oper_type TMP1, CARG3 -+ | cmpld cr1, TMP1, TMP3 -+ | decode_RD4 TMP2, TMP2 -+ | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt -+ | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) -+ if (vk) { -+ | ble cr7, ->BC_ISEQN_Z -+ } else { -+ | ble cr7, ->BC_ISNEN_Z -+ } -+ |5: // Either or both types are not numbers. -+ |.if FFI -+ | cmpdi cr7, TMP0, LJ_TCDATA -+ | cmpdi cr5, TMP1, LJ_TCDATA -+ |.endif -+ | not TMP3, TMP0 -+ | cmpld TMP0, TMP1 -+ | cmpldi cr1, TMP3, ~LJ_TISPRI // Primitive? -+ |.if FFI -+ | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq -+ |.endif -+ | cmpldi cr6, TMP3, ~LJ_TISTABUD // Table or userdata? -+ |.if FFI -+ | beq cr7, ->vmeta_equal_cd -+ |.endif -+ | cmpld cr5, CARG2, CARG3 -+ | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive. -+ | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type. -+ | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv. -+ | mr SAVE0, PC -+ | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2. -+ | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2. -+ if (vk) { -+ | bne cr0, >6 -+ | add PC, PC, TMP2 -+ |6: -+ } else { -+ | beq cr0, >6 -+ | add PC, PC, TMP2 -+ |6: -+ } -+ | bge cr0, >2 // Done if 1 or 2. -+ |1: -+ | ins_next -+ |2: -+ | blt cr6, <1 // Done if not tab/ud. -+ | -+ | // Different tables or userdatas. Need to check __eq metamethod. -+ | // Field metatable must be at same offset for GCtab and GCudata! -+ | clear_field TAB:CARG2 -+ | ld TAB:TMP2, TAB:CARG2->metatable -+ | li CARG4, 1-vk // ne = 0 or 1. -+ | cmpldi TAB:TMP2, 0 -+ | beq <1 // No metatable? -+ | lbz TMP2, TAB:TMP2->nomm -+ | andi. TMP2, TMP2, 1<<MM_eq -+ | bne <1 // Or 'no __eq' flag set? -+ | mr PC, SAVE0 // Restore old PC. -+ | b ->vmeta_equal // Handle __eq metamethod. -+ break; -+ -+ case BC_ISEQS: case BC_ISNES: -+ vk = op == BC_ISEQS; -+ | // RA = src*8, RD = str_const*8 (~), JMP with RD = target -+ | ldux TMP3, RA, BASE -+ | get_oper_type TMP0,TMP3 -+ | clear_field TMP3 -+ | lwz TMP2, 0(PC) -+ | subfic RD, RD, -8 -+ | addi PC, PC, 4 -+ |.if FFI -+ | cmpdi TMP0, LJ_TCDATA -+ |.endif -+ | ldx STR:TMP1, KBASE, RD // KBASE-8-str_const*8 -+ | subfic TMP0, TMP0, LJ_TSTR -+ |.if FFI -+ | beq ->vmeta_equal_cd -+ |.endif -+ | sub TMP1, STR:TMP1, STR:TMP3 -+ | or TMP0, TMP0, TMP1 -+ | decode_RD4 TMP2, TMP2 -+ | subfic TMP0, TMP0, 0 -+ | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) -+ | subfe TMP1, TMP1, TMP1 -+ if (vk) { -+ | andc TMP2, TMP2, TMP1 -+ } else { -+ | and TMP2, TMP2, TMP1 -+ } -+ | add PC, PC, TMP2 -+ | ins_next -+ break; -+ -+ case BC_ISEQN: case BC_ISNEN: -+ vk = op == BC_ISEQN; -+ | // RA = src*8, RD = num_const*8, JMP with RD = target -+ | ldux CARG2, RA, BASE -+ | addi PC, PC, 4 -+ | get_oper_type TMP0, CARG2 -+ | ldux CARG3, RD, KBASE -+ | li TMP4, LJ_TISNUM -+ | get_value CARG2 -+ | cmpld cr0, TMP0, TMP4 -+ | ld TMP2, -4(PC) -+ | checknum cr1, CARG3 -+ | decode_RD4 TMP2, TMP2 -+ | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) -+ if (vk) { -+ |->BC_ISEQN_Z: -+ } else { -+ |->BC_ISNEN_Z: -+ } -+ | bne cr0, >7 -+ | bne cr1, >8 -+ | cmpw CARG2, CARG3 -+ |4: -+ if (vk) { -+ | bne >1 -+ | add PC, PC, TMP2 -+ |1: -+ |.if not FFI -+ |3: -+ |.endif -+ } else { -+ | beq >2 -+ |1: -+ |.if not FFI -+ |3: -+ |.endif -+ | add PC, PC, TMP2 -+ |2: -+ } -+ | ins_next -+ |.if FFI -+ |3: -+ | cmpdi TMP0, LJ_TCDATA -+ | beq ->vmeta_equal_cd -+ | b <1 -+ |.endif -+ |7: // RA is not an integer. -+ | bge cr0, <3 -+ | // RA is a number. -+ | lfd f0, 0(RA) -+ | blt cr1, >1 -+ | // RA is a number, RD is an integer. -+ | tonum_i f1, CARG3 -+ | b >2 -+ | -+ |8: // RA is an integer, RD is a number. -+ | tonum_i f0, CARG2 -+ |1: -+ | lfd f1, 0(RD) -+ |2: -+ | fcmpu cr0, f0, f1 -+ | b <4 -+ break; -+ -+ case BC_ISEQP: case BC_ISNEP: -+ vk = op == BC_ISEQP; -+ | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target -+ | ldx TMP0, BASE, RA -+ | srdi TMP1, RD, 3 -+ | lwz TMP2, 0(PC) -+ | get_oper_type TMP0, TMP0 -+ | not TMP1, TMP1 -+ | addi PC, PC, 4 -+ |.if FFI -+ | cmpdi TMP0, LJ_TCDATA -+ |.endif -+ | sub TMP0, TMP0, TMP1 -+ |.if FFI -+ | beq ->vmeta_equal_cd -+ |.endif -+ | decode_RD4 TMP2, TMP2 -+ | extsw TMP0, TMP0 -+ | addic TMP0, TMP0, -1 -+ | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) -+ | subfe TMP1, TMP1, TMP1 -+ if (vk) { -+ | and TMP2, TMP2, TMP1 -+ } else { -+ | andc TMP2, TMP2, TMP1 -+ } -+ | add PC, PC, TMP2 -+ | ins_next -+ break; -+ -+ /* -- Unary test and copy ops ------------------------------------------- */ -+ -+ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: -+ | // RA = dst*8 or unused, RD = src*8, JMP with RD = target -+ | ldx TMP0, BASE, RD -+ | lwz INS, 0(PC) -+ | addi PC, PC, 4 -+ if (op == BC_IST || op == BC_ISF) { -+ | get_oper_type TMP0, TMP0 -+ | subfic TMP0, TMP0, LJ_TTRUE -+ | decode_RD4 TMP2, INS -+ | subfe TMP1, TMP1, TMP1 -+ | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) -+ if (op == BC_IST) { -+ | andc TMP2, TMP2, TMP1 -+ } else { -+ | and TMP2, TMP2, TMP1 -+ } -+ | add PC, PC, TMP2 -+ } else { -+ | set_bool TMP1, LJ_TFALSE -+ | lfdx f0, BASE, RD -+ | cmpld TMP0, TMP1 // compare directly due to get_oper_type -+ if (op == BC_ISTC) { -+ | bge >1 -+ } else { -+ | blt >1 -+ } -+ | addis PC, PC, -(BCBIAS_J*4 >> 16) -+ | decode_RD4 TMP2, INS -+ | stfdx f0, BASE, RA -+ | add PC, PC, TMP2 -+ |1: -+ } -+ | ins_next -+ break; -+ -+ case BC_ISTYPE: -+ | // RA = src*8, RD = -type*8 -+ | ldx TMP0, BASE, RA -+ | srdi TMP1, RD, 3 -+ | ins_next1 -+ | neg TMP1, TMP1 -+ | get_oper_type TMP0, TMP0 // TMP1 is not shifted -+ | cmpd TMP0, TMP1 -+ | bne ->vmeta_istype -+ | ins_next2 -+ break; -+ case BC_ISNUM: -+ | // RA = src*8, RD = -(TISNUM-1)*8 -+ | ldx TMP0, BASE, RA -+ | ins_next1 -+ | checknum TMP0 -+ | bge ->vmeta_istype -+ | ins_next2 -+ break; -+ -+ /* -- Unary ops --------------------------------------------------------- */ -+ -+ case BC_MOV: -+ | // RA = dst*8, RD = src*8 -+ | ins_next1 -+ | lfdx f0, BASE, RD -+ | stfdx f0, BASE, RA -+ | ins_next2 -+ break; -+ case BC_NOT: -+ | // RA = dst*8, RD = src*8 -+ | ins_next1 -+ | ldx TMP0, BASE, RD -+ | rotldi TMP0, TMP0, 17 -+ | subfic TMP1, TMP0, LJ_TTRUE -+ | adde TMP0, TMP0, TMP1 -+ | rotldi TMP0, TMP0, 47 -+ | stdx TMP0, BASE, RA -+ | ins_next2 -+ break; -+ case BC_UNM: -+ | // RA = dst*8, RD = src*8 -+ | ldux TMP0, RD, BASE // RD is Used in vmeta_unm -+ | mr TMP1, TMP0 -+ | lus TMP2, 0x8000 -+ | checknum TMP1 -+ | bne >5 -+ | neg TMP1, TMP1 -+ | cmplw TMP1, TMP2 -+ | beq >4 -+ |1: -+ | ins_next1 -+ | rldicl TMP0, TMP1, 0, 32 // clear the high order 32 bits -+ | add TMP0, TMP0, TISNUM -+ | stdx TMP0, RA, BASE -+ |3: -+ | ins_next2 -+ |4: -+ | li TMP0, 0x41e0 // 2^31. -+ | sldi TMP0, TMP0, 48 -+ | b >7 -+ |5: -+ | sldi TMP2, TMP2, 32 -+ | bge ->vmeta_unm -+ | xor TMP0, TMP0, TMP2 -+ |7: -+ | ins_next1 -+ | stdx TMP0, RA, BASE -+ | b <3 -+ break; -+ case BC_LEN: -+ | // RA = dst*8, RD = src*8 -+ | ldux CARG1, RD, BASE -+ | mr TMP0, CARG1 -+ | checkstr CARG1; bne >2 -+ | lwz CRET1, STR:CARG1->len -+ |1: -+ | ins_next1 -+ | add CRET1, CRET1, TISNUM -+ | stdux CRET1, RA, BASE -+ | ins_next2 -+ |2: -+ | checktab TMP0; bne ->vmeta_len -+#if LJ_52 -+ | ld TAB:TMP2, TAB:CARG1->metatable -+ | cmpldi TAB:TMP2, 0 -+ | bne >9 -+ |3: -+#endif -+ |->BC_LEN_Z: -+ | bl extern lj_tab_len // (GCtab *t) -+ | // Returns uint32_t (but less than 2^31). -+ | b <1 -+#if LJ_52 -+ |9: -+ | lbz TMP0, TAB:TMP2->nomm -+ | andi. TMP0, TMP0, 1<<MM_len -+ | bne <3 // 'no __len' flag set: done. -+ | b ->vmeta_len -+#endif -+ break; -+ -+ /* -- Binary ops -------------------------------------------------------- */ -+ -+ |.macro ins_arithpre -+ | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 -+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); -+ ||switch (vk) { -+ ||case 0: -+ | ldx TMP1, BASE, RB -+ | ldx TMP2, KBASE, RC -+ | lfdx f14, BASE, RB -+ | lfdx f15, KBASE, RC -+ | checknum cr0, TMP1 -+ | checknum cr1, TMP2 -+ | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt -+ | bge ->vmeta_arith_vn -+ || break; -+ ||case 1: -+ | ldx TMP1, BASE, RB -+ | ldx TMP2, KBASE, RC -+ | lfdx f15, BASE, RB -+ | lfdx f14, KBASE, RC -+ | checknum cr0, TMP1 -+ | checknum cr1, TMP2 -+ | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt -+ | bge ->vmeta_arith_nv -+ || break; -+ ||default: -+ | ldx TMP1, BASE, RB -+ | ldx TMP2, BASE, RC -+ | lfdx f14, BASE, RB -+ | lfdx f15, BASE, RC -+ | checknum cr0, TMP1 -+ | checknum cr1, TMP2 -+ | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt -+ | bge ->vmeta_arith_vv -+ || break; -+ ||} -+ |.endmacro -+ | -+ |.macro ins_arithfallback, ins -+ ||switch (vk) { -+ ||case 0: -+ | ins ->vmeta_arith_vn2 -+ || break; -+ ||case 1: -+ | ins ->vmeta_arith_nv2 -+ || break; -+ ||default: -+ | ins ->vmeta_arith_vv2 -+ || break; -+ ||} -+ |.endmacro -+ | -+ |.macro intmod, a, b, c -+ | bl ->vm_modi -+ |.endmacro -+ | -+ |.macro fpmod, a, b, c -+ |->BC_MODVN_Z: -+ | fdiv FARG1, b, c -+ | // NYI: Use internal implementation of floor. -+ | blex floor // floor(b/c) -+ | fmul a, FARG1, c -+ | fsub a, b, a // b - floor(b/c)*c -+ |.endmacro -+ | -+ |.macro ins_arithfp, fpins -+ | ins_arithpre -+ |.if "fpins" == "fpmod_" -+ | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. -+ |.else -+ | fpins f0, f14, f15 -+ | ins_next1 -+ | stfdx f0, BASE, RA -+ | ins_next2 -+ |.endif -+ |.endmacro -+ | -+ |.macro ins_arithdn, intins, fpins -+ | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 -+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); -+ ||switch (vk) { -+ ||case 0: -+ | ldux CARG1, RB, BASE -+ | ldux CARG2, RC, KBASE -+ | checknum cr0, CARG1 -+ || break; -+ ||case 1: -+ | ldux CARG1, RC, KBASE -+ | ldux CARG2, RB, BASE -+ | checknum cr0, CARG1 -+ || break; -+ ||default: -+ | ldux CARG1, RB, BASE -+ | ldux CARG2, RC, BASE -+ | checknum cr0, CARG1 -+ || break; -+ ||} -+ | checknum cr1, CARG2 -+ | bne >5 -+ | bne cr1, >5 -+ | intins CARG1, CARG1, CARG2 -+ | bso >4 -+ |1: -+ | ins_next1 -+ | li TISNUM, LJ_TISNUM -+ | set_oper_type TISNUM, TISNUM -+ | clear_field CARG1 -+ | add CARG1, CARG1, TISNUM -+ | stdux CARG1, RA, BASE -+ |2: -+ | ins_next2 -+ |4: // Overflow. -+ | checkov <1 // Ignore unrelated overflow. -+ | ins_arithfallback b -+ |5: // FP variant. -+ ||if (vk == 1) { -+ | lfd f15, 0(RB) -+ | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt -+ | lfd f14, 0(RC) -+ ||} else { -+ | lfd f14, 0(RB) -+ | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt -+ | lfd f15, 0(RC) -+ ||} -+ | ins_arithfallback bge -+ |.if "fpins" == "fpmod_" -+ | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. -+ |.else -+ | fpins f0, f14, f15 -+ | ins_next1 -+ | stfdx f0, BASE, RA -+ | b <2 -+ |.endif -+ |.endmacro -+ | -+ |.macro ins_arith, intins, fpins -+ | ins_arithdn intins, fpins -+ |.endmacro -+ -+ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: -+ |.macro addo32., y, a, b -+ | // Need to check overflow for (a<<32) + (b<<32). -+ | rldicr TMP0, a, 32, 31 -+ | rldicr TMP3, b, 32, 31 -+ | addo. TMP0, TMP0, TMP3 -+ | add y, a, b -+ |.endmacro -+ | ins_arith addo32., fadd -+ break; -+ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: -+ |.macro subo32., y, a, b -+ | // Need to check overflow for (a<<32) - (b<<32). -+ | rldicr TMP0, a, 32, 31 -+ | rldicr TMP3, b, 32, 31 -+ | subo. TMP0, TMP0, TMP3 -+ | sub y, a, b -+ |.endmacro -+ | ins_arith subo32., fsub -+ break; -+ case BC_MULVN: case BC_MULNV: case BC_MULVV: -+ | ins_arith mullwo., fmul -+ break; -+ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: -+ | ins_arithfp fdiv -+ break; -+ case BC_MODVN: -+ | ins_arith intmod, fpmod -+ break; -+ case BC_MODNV: case BC_MODVV: -+ | ins_arith intmod, fpmod_ -+ break; -+ case BC_POW: -+ | // NYI: (partial) integer arithmetic. -+ | ldx TMP1, BASE, RB -+ | lfdx FARG1, BASE, RB -+ | ldx TMP2, BASE, RC -+ | lfdx FARG2, BASE, RC -+ | checknum cr0, TMP1 -+ | checknum cr1, TMP2 -+ | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt -+ | bge ->vmeta_arith_vv -+ | blex pow -+ | ins_next1 -+ | stfdx FARG1, BASE, RA -+ | ins_next2 -+ break; -+ -+ case BC_CAT: -+ | // RA = dst*8, RB = src_start*8, RC = src_end*8 -+ | sub CARG3, RC, RB -+ | std BASE, L->base -+ | add CARG2, BASE, RC -+ | mr SAVE0, RB -+ |->BC_CAT_Z: -+ | std PC, SAVE_PC -+ | mr CARG1, L -+ | srdi CARG3, CARG3, 3 -+ | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) -+ | // Returns NULL (finished) or TValue * (metamethod). -+ | cmpldi CRET1, 0 -+ | ld BASE, L->base -+ | bne ->vmeta_binop -+ | ins_next1 -+ | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. -+ | stfdx f0, BASE, RA -+ | ins_next2 -+ break; -+ /* -- Constant ops ------------------------------------------------------ */ -+ -+ case BC_KSTR: -+ | // RA = dst*8, RD = str_const*8 (~) -+ | subfic TMP1, RD, -8 -+ | ins_next1 -+ | ldx TMP0, KBASE, TMP1 // KBASE-8-str_const*8 -+ | li TMP2, LJ_TSTR -+ | set_oper_type TMP2, TMP2 -+ | add TMP0, TMP2, TMP0 -+ | stdx TMP0, RA, BASE -+ | ins_next2 -+ break; -+ case BC_KCDATA: -+ |.if FFI -+ | // RA = dst*8, RD = cdata_const*8 (~) -+ | subfic TMP1, RD, -8 -+ | ins_next1 -+ | ldx TMP0, KBASE, TMP1 // KBASE-8-cdata_const*8 -+ | li TMP2, LJ_TCDATA -+ | set_oper_type TMP2, TMP2 -+ | add TMP2, TMP2, TMP0 -+ | stdx TMP2, RA, BASE -+ | ins_next2 -+ |.endif -+ break; -+ case BC_KSHORT: -+ | // RA = dst*8, RD = int16_literal*8 -+ | srdi RD, RD, 3 -+ | extsh RD, RD // extend sign for negative numbers -+ | clear_field RD -+ | ins_next1 -+ | add TMP0, RD, TISNUM -+ | stdx TMP0, RA, BASE -+ | ins_next2 -+ break; -+ case BC_KNUM: -+ | // RA = dst*8, RD = num_const*8 -+ | ins_next1 -+ | lfdx f0, KBASE, RD -+ | stfdx f0, BASE, RA -+ | ins_next2 -+ break; -+ case BC_KPRI: -+ | // RA = dst*8, RD = primitive_type*8 (~) -+ | srdi TMP1, RD, 3 -+ | set_oper_type TMP1,TMP1 -+ | not TMP0, TMP1 -+ | ins_next1 -+ | stdx TMP0, BASE, RA -+ | ins_next2 -+ break; -+ case BC_KNIL: -+ | // RA = base*8, RD = end*8 -+ | stdx TISNIL, BASE, RA -+ | addi RA, RA, 8 -+ |1: -+ | stdx TISNIL, BASE, RA -+ | cmpd RA, RD -+ | addi RA, RA, 8 -+ | blt <1 -+ | ins_next_ -+ break; -+ -+ /* -- Upvalue and function ops ------------------------------------------ */ -+ -+ case BC_UGET: -+ | // RA = dst*8, RD = uvnum*8 -+ | ld LFUNC:RB, FRAME_FUNC(BASE) -+ | addi RD, RD, offsetof(GCfuncL, uvptr) -+ | clear_field RB -+ | ldx UPVAL:RB, LFUNC:RB, RD -+ | ins_next1 -+ | ld TMP1, UPVAL:RB->v -+ | ld TMP1, 0(TMP1) -+ | stdx TMP1, BASE, RA -+ | ins_next2 -+ break; -+ case BC_USETV: -+ | // RA = uvnum*8, RD = src*8 -+ | ld LFUNC:RB, FRAME_FUNC(BASE) -+ | addi RA, RA, offsetof(GCfuncL, uvptr) -+ | lfdux f0, RD, BASE -+ | clear_field RB -+ | ldx UPVAL:RB, LFUNC:RB, RA -+ | lbz TMP3, UPVAL:RB->marked -+ | ld CARG2, UPVAL:RB->v -+ | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) -+ | lbz TMP0, UPVAL:RB->closed -+ | ld TMP1, 0(RD) -+ | get_oper_type TMP2, TMP1 -+ | stfd f0, 0(CARG2) -+ | cmpldi cr1, TMP0, 0 -+ | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq -+ | subi TMP2, TMP2, (LJ_TNUMX+1) -+ | bne >2 // Upvalue is closed and black? -+ |1: -+ | ins_next -+ | -+ |2: // Check if new value is collectable. -+ | cmpldi TMP2, LJ_TISGCV - (LJ_TNUMX+1) -+ | bge <1 // tvisgcv(v) -+ | clear_field GCOBJ:TMP1 -+ | lbz TMP3, GCOBJ:TMP1->gch.marked -+ | andi. TMP3, TMP3, LJ_GC_WHITES // iswhite(v) -+ | la CARG1, GG_DISP2G(DISPATCH) -+ | // Crossed a write barrier. Move the barrier forward. -+ | beq <1 -+ | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) -+ | b <1 -+ break; -+ case BC_USETS: -+ | // RA = uvnum*8, RD = str_const*8 (~) -+ | ld LFUNC:RB, FRAME_FUNC(BASE) -+ | subfic TMP1, RD, -8 -+ | addi RA, RA, offsetof(GCfuncL, uvptr) -+ | clear_field RB -+ | ldx STR:TMP1, KBASE, TMP1 // KBASE-8-str_const*8 -+ | ldx UPVAL:RB, LFUNC:RB, RA -+ | lbz TMP3, UPVAL:RB->marked -+ | ld CARG2, UPVAL:RB->v -+ | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) -+ | lbz TMP3, STR:TMP1->marked -+ | lbz TMP2, UPVAL:RB->closed -+ | li TMP0, LJ_TSTR -+ | set_oper_type TMP0, TMP0 -+ | add TMP0, TMP0, STR:TMP1 -+ | std TMP0, 0(CARG2) -+ | bne >2 -+ |1: -+ | ins_next -+ | -+ |2: // Check if string is white and ensure upvalue is closed. -+ | andi. TMP3, TMP3, LJ_GC_WHITES // iswhite(str) -+ | cmpldi cr1, TMP2, 0 -+ | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq -+ | la CARG1, GG_DISP2G(DISPATCH) -+ | // Crossed a write barrier. Move the barrier forward. -+ | beq <1 -+ | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) -+ | b <1 -+ break; -+ case BC_USETN: -+ | // RA = uvnum*8, RD = num_const*8 -+ | ld LFUNC:RB, FRAME_FUNC(BASE) -+ | addi RA, RA, offsetof(GCfuncL, uvptr) -+ | clear_field RB -+ | lfdx f0, KBASE, RD -+ | ldx UPVAL:RB, LFUNC:RB, RA -+ | ins_next1 -+ | ld TMP1, UPVAL:RB->v -+ | stfd f0, 0(TMP1) -+ | ins_next2 -+ break; -+ case BC_USETP: -+ | // RA = uvnum*8, RD = primitive_type*8 (~) -+ | ld LFUNC:RB, FRAME_FUNC(BASE) -+ | srdi TMP0, RD, 3 -+ | addi RA, RA, offsetof(GCfuncL, uvptr) -+ | set_oper_type TMP0, TMP0 -+ | clear_field RB -+ | not TMP0, TMP0 -+ | ldx UPVAL:RB, LFUNC:RB, RA -+ | ins_next1 -+ | ld TMP1, UPVAL:RB->v -+ | std TMP0, 0(TMP1) -+ | ins_next2 -+ break; -+ -+ case BC_UCLO: -+ | // RA = level*8, RD = target -+ | ld TMP1, L->openupval -+ | branch_RD // Do this first since RD is not saved. -+ | std BASE, L->base -+ | cmpldi TMP1, 0 -+ | mr CARG1, L -+ | beq >1 -+ | add CARG2, BASE, RA -+ | bl extern lj_func_closeuv // (lua_State *L, TValue *level) -+ | ld BASE, L->base -+ |1: -+ | ins_next -+ break; -+ -+ case BC_FNEW: -+ | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) -+ | std BASE, L->base -+ | subfic TMP1, RD, -8 -+ | std PC, SAVE_PC -+ | ldx CARG2, KBASE, TMP1 // KBASE-8-tab_const*8 -+ | mr CARG1, L -+ | ld CARG3, FRAME_FUNC(BASE) -+ | clear_field CARG3 -+ | // (lua_State *L, GCproto *pt, GCfuncL *parent) -+ | bl extern lj_func_newL_gc -+ | // Returns GCfuncL *. -+ | ld BASE, L->base -+ | li TMP0, LJ_TFUNC -+ | set_oper_type TMP0, TMP0 -+ | add TMP0, TMP0, LFUNC:CRET1 -+ | stdx TMP0, RA, BASE -+ | ins_next -+ break; -+ -+ /* -- Table ops --------------------------------------------------------- */ -+ -+ case BC_TNEW: -+ case BC_TDUP: -+ | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) -+ | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH) -+ | mr CARG1, L -+ | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) -+ | std BASE, L->base -+ | cmpld TMP0, TMP1 -+ | std PC, SAVE_PC -+ | bge >5 -+ |1: -+ if (op == BC_TNEW) { -+ | rlwinm CARG2, RD, 29, 21, 31 -+ | rlwinm CARG3, RD, 18, 27, 31 -+ | cmpdi CARG2, 0x7ff; beq >3 -+ |2: -+ | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) -+ | // Returns Table *. -+ } else { -+ | subfic TMP1, RD, -8 -+ | ldx CARG2, KBASE, TMP1 // KBASE-8-tab_const*8 -+ | bl extern lj_tab_dup // (lua_State *L, Table *kt) -+ | // Returns Table *. -+ } -+ | ld BASE, L->base -+ | li TMP0, LJ_TTAB -+ | set_oper_type TMP0, TMP0 -+ | add TAB:CRET1, TMP0, TAB:CRET1 -+ | stdx TAB:CRET1, RA, BASE -+ | ins_next -+ if (op == BC_TNEW) { -+ |3: -+ | li CARG2, 0x801 -+ | b <2 -+ } -+ |5: -+ | mr SAVE0, RD -+ | bl extern lj_gc_step_fixtop // (lua_State *L) -+ | mr RD, SAVE0 -+ | mr CARG1, L -+ | b <1 -+ break; -+ -+ case BC_GGET: -+ | // RA = dst*8, RD = str_const*8 (~) -+ case BC_GSET: -+ | // RA = src*8, RD = str_const*8 (~) -+ | ld LFUNC:TMP2, FRAME_FUNC(BASE) -+ | clear_field LFUNC:TMP2 -+ | ld TAB:RB, LFUNC:TMP2->env -+ | subfic TMP1, RD, -8 -+ | ldx STR:RC, KBASE, TMP1 // KBASE-8-str_const*8 -+ if (op == BC_GGET) { -+ | b ->BC_TGETS_Z -+ } else { -+ | b ->BC_TSETS_Z -+ } -+ break; -+ -+ case BC_TGETV: -+ | // RA = dst*8, RB = table*8, RC = key*8 -+ | ldux CARG1, RB, BASE -+ | ldux CARG2, RC, BASE -+ | checktab CARG1 -+ | checknum_noclear cr1, CARG2 -+ | bne ->vmeta_tgetv -+ | lwz TMP0, TAB:CARG1->asize -+ | bne cr1, >5 -+ | get_value CARG2 // Number is cleared through get_value -+ | ld TMP1, TAB:CARG1->array -+ | cmpld TMP0, CARG2 -+ | sldi TMP2, CARG2, 3 -+ | ble ->vmeta_tgetv // Integer key and in array part? -+ | ldx TMP0, TMP1, TMP2 -+ | lfdx f14, TMP1, TMP2 -+ | checknil_noclear TMP0; beq >2 -+ |1: -+ | ins_next1 -+ | stfdx f14, BASE, RA -+ | ins_next2 -+ | -+ |2: // Check for __index if table value is nil. -+ | ld TAB:TMP2, TAB:CARG1->metatable -+ | cmpldi TAB:TMP2, 0 -+ | beq <1 // No metatable: done. -+ | lbz TMP0, TAB:TMP2->nomm -+ | andi. TMP0, TMP0, 1<<MM_index -+ | bne <1 // 'no __index' flag set: done. -+ | b ->vmeta_tgetv -+ | -+ |5: -+ | checkstr CARG2; bne ->vmeta_tgetv -+ | mr RC, CARG2 // BC_TGETS_Z needs plain RB and RC -+ | mr RB, CARG1 // pointers without its type -+ | b ->BC_TGETS_Z // String key? -+ break; -+ case BC_TGETS: -+ | // RA = dst*8, RB = table*8, RC = str_const*8 (~) -+ | ldux CARG1, RB, BASE -+ | ld TAB:RB, 0(RB) -+ | clear_field TAB:RB -+ | subfic TMP1, RC, -8 -+ | checktab CARG1 -+ | ldx STR:RC, KBASE, TMP1 // KBASE-8-str_const*8 -+ | bne ->vmeta_tgets1 -+ |->BC_TGETS_Z: -+ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 -+ | lwz TMP0, TAB:RB->hmask -+ | lwz TMP1, STR:RC->hash -+ | ld NODE:TMP2, TAB:RB->node -+ | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask -+ | sldi TMP0, TMP1, 5 -+ | sldi TMP1, TMP1, 3 -+ | sub TMP1, TMP0, TMP1 -+ | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) -+ |1: -+ | ld CARG1, NODE:TMP2->key -+ | ld CARG2, NODE:TMP2->val -+ | checkstr CARG1; bne >4 -+ | cmpd CARG1, STR:RC; bne >4 -+ | checknil_noclear CARG2; beq >5 // Key found, but nil value? -+ |3: -+ | stdx CARG2, RA, BASE -+ | ins_next -+ | -+ |4: // Follow hash chain. -+ | ld NODE:TMP2, NODE:TMP2->next -+ | cmpldi NODE:TMP2, 0 -+ | bne <1 -+ | // End of hash chain: key not found, nil result. -+ | li CARG2, LJ_TNIL -+ | -+ |5: // Check for __index if table value is nil. -+ | ld TAB:TMP2, TAB:RB->metatable -+ | cmpldi TAB:TMP2, 0 -+ | beq <3 // No metatable: done. -+ | lbz TMP0, TAB:TMP2->nomm -+ | andi. TMP0, TMP0, 1<<MM_index -+ | bne <3 // 'no __index' flag set: done. -+ | b ->vmeta_tgets -+ break; -+ case BC_TGETB: -+ | // RA = dst*8, RB = table*8, RC = index*8 -+ | ldux CARG1, RB, BASE -+ | srdi TMP0, RC, 3 -+ | ld TAB:RB, 8(RB) -+ | checktab CARG1; bne ->vmeta_tgetb -+ | lwz TMP1, TAB:CARG1->asize -+ | ld TMP2, TAB:CARG1->array -+ | cmpld TMP0, TMP1; bge ->vmeta_tgetb -+ | ldx TMP1, TMP2, RC -+ | lfdx f0, TMP2, RC -+ | checknil_noclear TMP1; beq >5 -+ |1: -+ | ins_next1 -+ | stfdx f0, BASE, RA -+ | ins_next2 -+ | -+ |5: // Check for __index if table value is nil. -+ | ld TAB:TMP2, TAB:CARG1->metatable -+ | cmpldi TAB:TMP2, 0 -+ | beq <1 // No metatable: done. -+ | lbz TMP2, TAB:TMP2->nomm -+ | andi. TMP2, TMP2, 1<<MM_index -+ | bne <1 // 'no __index' flag set: done. -+ | b ->vmeta_tgetb // Caveat: preserve TMP0! -+ break; -+ case BC_TGETR: -+ | // RA = dst*8, RB = table*8, RC = key*8 -+ | ldx TAB:CARG1, BASE, RB -+ | clear_field TAB:CARG1 -+ | lwz TMP0, TAB:CARG1->asize -+ | ldx CARG2, BASE, RC -+ | clear_field TAB:CARG2 -+ | ld TMP1, TAB:CARG1->array -+ | cmpld TMP0, CARG2 -+ | sldi TMP2, CARG2, 3 -+ | ble ->vmeta_tgetr // In array part? -+ | lfdx f14, TMP1, TMP2 -+ |->BC_TGETR_Z: -+ | ins_next1 -+ | stfdx f14, BASE, RA -+ | ins_next2 -+ break; -+ -+ case BC_TSETV: -+ | // RA = src*8, RB = table*8, RC = key*8 -+ | ldux CARG1, RB, BASE -+ | ldux CARG2, RC, BASE -+ | checktab CARG1 -+ | bne ->vmeta_tsetv -+ | checknum_noclear CARG2 -+ | bne >5 -+ | lwz TMP0, TAB:CARG1->asize -+ | clear_field CARG2 -+ | ld TMP1, TAB:CARG1->array -+ | cmpld TMP0, CARG2 -+ | sldi TMP0, CARG2, 3 -+ | ble ->vmeta_tsetv // Integer key and in array part? -+ | ldx TMP2, TMP1, TMP0 -+ | lbz TMP3, TAB:CARG1->marked -+ | lfdx f14, BASE, RA -+ | checknil_noclear TMP2; beq >3 -+ |1: -+ | andi. TMP2, TMP3, LJ_GC_BLACK // isblack(table) -+ | stfdx f14, TMP1, TMP0 -+ | bne >7 -+ |2: -+ | ins_next -+ | -+ |3: // Check for __newindex if previous value is nil. -+ | ld TAB:TMP2, TAB:CARG1->metatable -+ | cmpldi TAB:TMP2, 0 -+ | beq <1 // No metatable: done. -+ | lbz TMP2, TAB:TMP2->nomm -+ | andi. TMP2, TMP2, 1<<MM_newindex -+ | bne <1 // 'no __newindex' flag set: done. -+ | b ->vmeta_tsetv -+ | -+ |5: -+ | checkstr CARG2; bne ->vmeta_tsetv -+ | mr RC, CARG2 -+ | ld TAB:RB, 0(RB) -+ | clear_field TAB:RB -+ | b ->BC_TSETS_Z // String key? -+ | -+ |7: // Possible table write barrier for the value. Skip valiswhite check. -+ | barrierback TAB:CARG1, TMP3, TMP0 -+ | b <2 -+ break; -+ case BC_TSETS: -+ | // RA = src*8, RB = table*8, RC = str_const*8 (~) -+ | ldux CARG1, RB, BASE -+ | ld TAB:RB, 0(RB) -+ | clear_field TAB:RB -+ | subfic TMP1, RC, -8 -+ | checktab CARG1 -+ | ldx STR:RC, KBASE, TMP1 // KBASE-8-str_const*8 -+ | bne ->vmeta_tsets1 -+ |->BC_TSETS_Z: -+ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8 -+ | lwz TMP0, TAB:RB->hmask -+ | lwz TMP1, STR:RC->hash -+ | ld NODE:TMP2, TAB:RB->node -+ | stb ZERO, TAB:RB->nomm // Clear metamethod cache. -+ | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask -+ | lfdx f14, BASE, RA -+ | sldi TMP0, TMP1, 5 -+ | sldi TMP1, TMP1, 3 -+ | sub TMP1, TMP0, TMP1 -+ | lbz TMP3, TAB:RB->marked -+ | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) -+ |1: -+ | ld CARG1, NODE:TMP2->key -+ | ld CARG2, NODE:TMP2->val -+ | ld NODE:TMP1, NODE:TMP2->next -+ | checkstr CARG1; bne >5 -+ | cmpd CARG1, STR:RC; bne >5 -+ | checknil_noclear CARG2; beq >4 // Key found, but nil value? -+ |2: -+ | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table) -+ | stfd f14, NODE:TMP2->val -+ | bne >7 -+ |3: -+ | ins_next -+ | -+ |4: // Check for __newindex if previous value is nil. -+ | ld TAB:TMP1, TAB:RB->metatable -+ | cmpldi TAB:TMP1, 0 -+ | beq <2 // No metatable: done. -+ | lbz TMP0, TAB:TMP1->nomm -+ | andi. TMP0, TMP0, 1<<MM_newindex -+ | bne <2 // 'no __newindex' flag set: done. -+ | b ->vmeta_tsets -+ | -+ |5: // Follow hash chain. -+ | cmpldi NODE:TMP1, 0 -+ | mr NODE:TMP2, NODE:TMP1 -+ | bne <1 -+ | // End of hash chain: key not found, add a new one. -+ | -+ | // But check for __newindex first. -+ | ld TAB:TMP1, TAB:RB->metatable -+ | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) -+ | std PC, SAVE_PC -+ | mr CARG1, L -+ | cmpldi TAB:TMP1, 0 -+ | std BASE, L->base -+ | beq >6 // No metatable: continue. -+ | lbz TMP0, TAB:TMP1->nomm -+ | andi. TMP0, TMP0, 1<<MM_newindex -+ | beq ->vmeta_tsets // 'no __newindex' flag NOT set: check. -+ |6: -+ | li TMP0, LJ_TSTR -+ | set_oper_type TMP0, TMP0 -+ | add TMP0, TMP0, STR:RC -+ | mr CARG2, TAB:RB -+ | std TMP0, 0(CARG3) -+ | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) -+ | // Returns TValue *. -+ | ld BASE, L->base -+ | stfd f14, 0(CRET1) -+ | b <3 // No 2nd write barrier needed. -+ | -+ |7: // Possible table write barrier for the value. Skip valiswhite check. -+ | barrierback TAB:RB, TMP3, TMP0 -+ | b <3 -+ break; -+ case BC_TSETB: -+ | // RA = src*8, RB = table*8, RC = index*8 -+ | ldux CARG1, RB, BASE -+ | srdi TMP0, RC, 3 -+ | ld TAB:RB, 8(RB) -+ | checktab CARG1; bne ->vmeta_tsetb -+ | lwz TMP1, TAB:CARG1->asize -+ | ld TMP2, TAB:CARG1->array -+ | lbz TMP3, TAB:CARG1->marked -+ | cmpld TMP0, TMP1 -+ | lfdx f14, BASE, RA -+ | bge ->vmeta_tsetb -+ | ldx TMP1, TMP2, RC -+ | checknil_noclear TMP1; beq >5 -+ |1: -+ | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table) -+ | stfdx f14, TMP2, RC -+ | bne >7 -+ |2: -+ | ins_next -+ | -+ |5: // Check for __newindex if previous value is nil. -+ | ld TAB:TMP1, TAB:CARG1->metatable -+ | cmpldi TAB:TMP1, 0 -+ | beq <1 // No metatable: done. -+ | lbz TMP1, TAB:TMP1->nomm -+ | andi. TMP1, TMP1, 1<<MM_newindex -+ | bne <1 // 'no __newindex' flag set: done. -+ | b ->vmeta_tsetb // Caveat: preserve TMP0! -+ | -+ |7: // Possible table write barrier for the value. Skip valiswhite check. -+ | barrierback TAB:CARG1, TMP3, TMP0 -+ | b <2 -+ break; -+ case BC_TSETR: -+ | // RA = dst*8, RB = table*8, RC = key*8 -+ | add RB, BASE, RB -+ | ld TAB:CARG2, 0(RB) -+ | clear_field TAB:CARG2 -+ | add RC, BASE, RC -+ | lbz TMP3, TAB:CARG2->marked -+ | lwz TMP0, TAB:CARG2->asize -+ | ld CARG3, 0(RC) -+ | clear_field CARG3 -+ | ld TMP1, TAB:CARG2->array -+ | andi. TMP2, TMP3, LJ_GC_BLACK // isblack(table) -+ | bne >7 -+ |2: -+ | cmpld TMP0, CARG3 -+ | sldi TMP2, CARG3, 3 -+ | lfdx f14, BASE, RA -+ | ble ->vmeta_tsetr // In array part? -+ | ins_next1 -+ | stfdx f14, TMP1, TMP2 -+ | ins_next2 -+ | -+ |7: // Possible table write barrier for the value. Skip valiswhite check. -+ | barrierback TAB:CARG2, TMP3, TMP2 -+ | b <2 -+ break; -+ -+ -+ case BC_TSETM: -+ | // RA = base*8 (table at base-1), RD = num_const*8 (start index) -+ | add RA, BASE, RA -+ |1: -+ | add TMP3, KBASE, RD -+ | ld TAB:CARG2, -8(RA) // Guaranteed to be a table. -+ | clear_field TAB:CARG2 -+ | addic. TMP0, MULTRES, -8 -+ | ld TMP3, 0(TMP3) // Integer constant needs to be cleaned -+ | srdi CARG3, TMP0, 3 -+ | clear_field TMP3 -+ | beq >4 // Nothing to copy? -+ | add CARG3, CARG3, TMP3 -+ | lwz TMP2, TAB:CARG2->asize -+ | sldi TMP1, TMP3, 3 -+ | lbz TMP3, TAB:CARG2->marked -+ | cmpld CARG3, TMP2 -+ | add TMP2, RA, TMP0 -+ | ld TMP0, TAB:CARG2->array -+ | bgt >5 -+ | add TMP1, TMP1, TMP0 -+ | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table) -+ |3: // Copy result slots to table. -+ | lfd f0, 0(RA) -+ | addi RA, RA, 8 -+ | cmpd cr1, RA, TMP2 -+ | stfd f0, 0(TMP1) -+ | addi TMP1, TMP1, 8 -+ | blt cr1, <3 -+ | bne >7 -+ |4: -+ | ins_next -+ | -+ |5: // Need to resize array part. -+ | std BASE, L->base -+ | mr CARG1, L -+ | std PC, SAVE_PC -+ | mr SAVE0, RD -+ | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) -+ | // Must not reallocate the stack. -+ | mr RD, SAVE0 -+ | b <1 -+ | -+ |7: // Possible table write barrier for any value. Skip valiswhite check. -+ | barrierback TAB:CARG2, TMP3, TMP0 -+ | b <4 -+ break; -+ -+ /* -- Calls and vararg handling ----------------------------------------- */ -+ -+ case BC_CALLM: -+ | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 -+ | add NARGS8:RC, NARGS8:RC, MULTRES -+ | // Fall through. Assumes BC_CALL follows. -+ break; -+ case BC_CALL: -+ | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 -+ | mr TMP2, BASE // needed on vmeta_call! -+ | ldux LFUNC:RB, BASE, RA -+ | subi NARGS8:RC, NARGS8:RC, 8 -+ | addi BASE, BASE, 16 -+ | checkfunc RB; bne ->vmeta_call -+ | ins_call -+ break; -+ -+ case BC_CALLMT: -+ | // RA = base*8, (RB = 0,) RC = extra_nargs*8 -+ | add NARGS8:RC, NARGS8:RC, MULTRES -+ | // Fall through. Assumes BC_CALLT follows. -+ break; -+ case BC_CALLT: -+ | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 -+ | ldux TMP0, RA, BASE -+ | ld LFUNC:RB, 0(RA) -+ | clear_field RB -+ | subi NARGS8:RC, NARGS8:RC, 8 -+ | ld TMP1, FRAME_PC(BASE) -+ | checkfunc TMP0 -+ | addi RA, RA, 16 -+ | bne ->vmeta_callt -+ |->BC_CALLT_Z: -+ | andi. TMP0, TMP1, FRAME_TYPE // Caveat: preserve cr0 until the crand. -+ | lbz TMP3, LFUNC:RB->ffid -+ | xori TMP2, TMP1, FRAME_VARG -+ | cmpldi cr1, NARGS8:RC, 0 -+ | bne >7 -+ |1: -+ | std LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. -+ | li TMP2, 0 -+ | cmpldi cr7, TMP3, 1 // (> FF_C) Calling a fast function? -+ | beq cr1, >3 -+ |2: -+ | addi TMP3, TMP2, 8 -+ | lfdx f0, RA, TMP2 -+ | cmpld cr1, TMP3, NARGS8:RC -+ | stfdx f0, BASE, TMP2 -+ | mr TMP2, TMP3 -+ | bne cr1, <2 -+ |3: -+ | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+gt -+ | beq >5 -+ |4: -+ | ins_callt -+ | -+ |5: // Tailcall to a fast function with a Lua frame below. -+ | lwz INS, -4(TMP1) -+ | decode_RA8 RA, INS -+ | sub TMP1, BASE, RA -+ | ld LFUNC:TMP1, FRAME_FUNC-16(TMP1) -+ | clear_field TMP1 -+ | ld TMP1, LFUNC:TMP1->pc -+ | ld KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. -+ | b <4 -+ | -+ |7: // Tailcall from a vararg function. -+ | andi. TMP0, TMP2, FRAME_TYPEP -+ | bne <1 // Vararg frame below? -+ | sub BASE, BASE, TMP2 // Relocate BASE down. -+ | ld TMP1, FRAME_PC(BASE) -+ | andi. TMP0, TMP1, FRAME_TYPE -+ | b <1 -+ break; -+ -+ case BC_ITERC: -+ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) -+ | mr TMP2, BASE -+ | add BASE, BASE, RA -+ | ld LFUNC:RB, -24(BASE) -+ | ld CARG1, -16(BASE) -+ | ld CARG2, -8(BASE) -+ | std LFUNC:RB, 0(BASE) // Copy callable. -+ | checkfunc LFUNC:RB -+ | li NARGS8:RC, 16 // Iterators get 2 arguments. -+ | std CARG2, 24(BASE) // Copy state. -+ | stdu CARG1, 16(BASE) // Copy control var. -+ | bne ->vmeta_call -+ | ins_call -+ break; -+ -+ case BC_ITERN: -+ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) -+ |.if JIT -+ | // NYI: add hotloop, record BC_ITERN. -+ |.endif -+ | add RA, BASE, RA -+ | ld TAB:RB, -16(RA) -+ | clear_field TAB:RB -+ | lwz RC, -8(RA) // Get index from control var. -+ | lwz TMP0, TAB:RB->asize -+ | ld TMP1, TAB:RB->array -+ | addi PC, PC, 4 -+ |1: // Traverse array part. -+ | cmpld RC, TMP0 -+ | sldi TMP3, RC, 3 -+ | bge >5 // Index points after array part? -+ | ldx TMP2, TMP1, TMP3 -+ | lfdx f0, TMP1, TMP3 -+ | checknil_noclear TMP2 -+ | lwz INS, -4(PC) -+ | beq >4 -+ | add RC, RC, TISNUM -+ | std RC, 0(RA) -+ | addi RC, RC, 1 -+ | addis TMP3, PC, -(BCBIAS_J*4 >> 16) -+ | stfd f0, 8(RA) -+ | decode_RD4 TMP1, INS -+ | std RC, -8(RA) // Update control var. -+ | add PC, TMP1, TMP3 -+ |3: -+ | ins_next -+ | -+ |4: // Skip holes in array part. -+ | addi RC, RC, 1 -+ | b <1 -+ | -+ |5: // Traverse hash part. -+ | lwz TMP1, TAB:RB->hmask -+ | sub RC, RC, TMP0 -+ | ld TMP2, TAB:RB->node -+ |6: -+ | cmpld RC, TMP1 // End of iteration? Branch to ITERL+1. -+ | sldi TMP3, RC, 5 -+ | bgty <3 -+ | sldi RB, RC, 3 -+ | sub TMP3, TMP3, RB -+ | ldx RB, TMP2, TMP3 -+ | lfdx f0, TMP2, TMP3 -+ | add NODE:TMP3, TMP2, TMP3 -+ | checknil_noclear RB -+ | lwz INS, -4(PC) -+ | beq >7 -+ | lfd f1, NODE:TMP3->key -+ | addis TMP2, PC, -(BCBIAS_J*4 >> 16) -+ | stfd f0, 8(RA) -+ | add RC, RC, TMP0 -+ | decode_RD4 TMP1, INS -+ | stfd f1, 0(RA) -+ | addi RC, RC, 1 -+ | add PC, TMP1, TMP2 -+ | std RC, -8(RA) // Update control var. -+ | b <3 -+ | -+ |7: // Skip holes in hash part. -+ | addi RC, RC, 1 -+ | b <6 -+ break; -+ -+ case BC_ISNEXT: -+ | // RA = base*8, RD = target (points to ITERN) -+ | add RA, BASE, RA -+ | ld TMP1, -24(RA) -+ | get_oper_type TMP0, TMP1 -+ | cmpdi cr1, TMP0, LJ_TFUNC -+ | ld TMP2, -16(RA) -+ | get_oper_type TMP2, TMP2 -+ | cmpdi cr0, TMP2, LJ_TTAB -+ | ld TMP3, -8(RA) -+ | get_oper_type TMP3, TMP3 -+ | cmpdi cr6, TMP3, LJ_TNIL -+ | -+ | bne cr1, >5 -+ | -+ | clear_field TMP1 -+ | lbz TMP1, CFUNC:TMP1->ffid -+ | crand 4*cr0+eq, 4*cr0+eq, 4*cr6+eq -+ | cmpwi cr7, TMP1, FF_next_N -+ | srdi TMP0, RD, 1 -+ | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq -+ | add TMP3, PC, TMP0 -+ | bne cr0, >5 -+ | lus TMP1, 0xfffe -+ | ori TMP1, TMP1, 0x7fff -+ | sldi TMP1, TMP1, 32 // Initialize control var. -+ | std TMP1, -8(RA) -+ | addis PC, TMP3, -(BCBIAS_J*4 >> 16) -+ |1: -+ | ins_next -+ |5: // Despecialize bytecode if any of the checks fail. -+ | li TMP0, BC_JMP -+ | li TMP1, BC_ITERC -+ | stb TMP0, -4(PC) -+ | addis PC, TMP3, -(BCBIAS_J*4 >> 16) -+ | stb TMP1, 0(PC) -+ | b <1 -+ break; -+ -+ case BC_VARG: -+ | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 -+ | ld TMP0, FRAME_PC(BASE) -+ | add RC, BASE, RC -+ | add RA, BASE, RA -+ | addi RC, RC, FRAME_VARG -+ | add TMP2, RA, RB -+ | subi TMP3, BASE, 16 // TMP3 = vtop -+ | sub RC, RC, TMP0 // RC = vbase -+ | // Note: RC may now be even _above_ BASE if nargs was < numparams. -+ | cmpldi cr1, RB, 0 -+ | sub. TMP1, TMP3, RC -+ | beq cr1, >5 // Copy all varargs? -+ | subi TMP2, TMP2, 16 -+ | ble >2 // No vararg slots? -+ |1: // Copy vararg slots to destination slots. -+ | lfd f0, 0(RC) -+ | addi RC, RC, 8 -+ | stfd f0, 0(RA) -+ | cmpld RA, TMP2 -+ | cmpld cr1, RC, TMP3 -+ | bge >3 // All destination slots filled? -+ | addi RA, RA, 8 -+ | blt cr1, <1 // More vararg slots? -+ |2: // Fill up remainder with nil. -+ | std TISNIL, 0(RA) -+ | cmpld RA, TMP2 -+ | addi RA, RA, 8 -+ | blt <2 -+ |3: -+ | ins_next -+ | -+ |5: // Copy all varargs. -+ | ld TMP0, L->maxstack -+ | li MULTRES, 8 // MULTRES = (0+1)*8 -+ | bley <3 // No vararg slots? -+ | add TMP2, RA, TMP1 -+ | cmpld TMP2, TMP0 -+ | addi MULTRES, TMP1, 8 -+ | bgt >7 -+ |6: -+ | lfd f0, 0(RC) -+ | addi RC, RC, 8 -+ | stfd f0, 0(RA) -+ | cmpld RC, TMP3 -+ | addi RA, RA, 8 -+ | blt <6 // More vararg slots? -+ | b <3 -+ | -+ |7: // Grow stack for varargs. -+ | mr CARG1, L -+ | std RA, L->top -+ | sub SAVE0, RC, BASE // Need delta, because BASE may change. -+ | std BASE, L->base -+ | sub RA, RA, BASE -+ | std PC, SAVE_PC -+ | srdi CARG2, TMP1, 3 -+ | bl extern lj_state_growstack // (lua_State *L, int n) -+ | ld BASE, L->base -+ | add RA, BASE, RA -+ | add RC, BASE, SAVE0 -+ | subi TMP3, BASE, 16 -+ | b <6 -+ break; -+ -+ /* -- Returns ----------------------------------------------------------- */ -+ -+ case BC_RETM: -+ | // RA = results*8, RD = extra_nresults*8 -+ | add RD, RD, MULTRES // MULTRES >= 8, so RD >= 8. -+ | // Fall through. Assumes BC_RET follows. -+ break; -+ -+ case BC_RET: -+ | // RA = results*8, RD = (nresults+1)*8 -+ | ld PC, FRAME_PC(BASE) -+ | add RA, BASE, RA -+ | mr MULTRES, RD -+ |1: -+ | andi. TMP0, PC, FRAME_TYPE -+ | xori TMP1, PC, FRAME_VARG -+ | bne ->BC_RETV_Z -+ | -+ |->BC_RET_Z: -+ | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return -+ | lwz INS, -4(PC) -+ | cmpdi RD, 8 -+ | subi TMP2, BASE, 16 -+ | subi RC, RD, 8 -+ | decode_RB8 RB, INS -+ | beq >3 -+ | li TMP1, 0 -+ |2: -+ | addi TMP3, TMP1, 8 -+ | lfdx f0, RA, TMP1 -+ | cmpd TMP3, RC -+ | stfdx f0, TMP2, TMP1 -+ | beq >3 -+ | addi TMP1, TMP3, 8 -+ | lfdx f1, RA, TMP3 -+ | cmpd TMP1, RC -+ | stfdx f1, TMP2, TMP3 -+ | bne <2 -+ |3: -+ |5: -+ | cmpld RB, RD -+ | decode_RA8 RA, INS -+ | bgt >6 -+ | sub BASE, TMP2, RA -+ | ld LFUNC:TMP1, FRAME_FUNC(BASE) -+ | clear_field TMP1 -+ | ins_next1 -+ | ld TMP1, LFUNC:TMP1->pc -+ | ld KBASE, PC2PROTO(k)(TMP1) -+ | ins_next2 -+ | -+ |6: // Fill up results with nil. -+ | subi TMP1, RD, 8 -+ | addi RD, RD, 8 -+ | stdx TISNIL, TMP2, TMP1 -+ | b <5 -+ | -+ |->BC_RETV_Z: // Non-standard return case. -+ | andi. TMP2, TMP1, FRAME_TYPEP -+ | bne ->vm_return -+ | // Return from vararg function: relocate BASE down. -+ | sub BASE, BASE, TMP1 -+ | ld PC, FRAME_PC(BASE) -+ | b <1 -+ break; -+ -+ case BC_RET0: case BC_RET1: -+ | // RA = results*8, RD = (nresults+1)*8 -+ | ld PC, FRAME_PC(BASE) -+ | add RA, BASE, RA -+ | mr MULTRES, RD -+ | andi. TMP0, PC, FRAME_TYPE -+ | xori TMP1, PC, FRAME_VARG -+ | bney ->BC_RETV_Z -+ | -+ | lwz INS, -4(PC) -+ | subi TMP2, BASE, 16 -+ | decode_RB8 RB, INS -+ if (op == BC_RET1) { -+ | lfd f0, 0(RA) -+ | stfd f0, 0(TMP2) -+ } -+ |5: -+ | cmpld RB, RD -+ | decode_RA8 RA, INS -+ | bgt >6 -+ | sub BASE, TMP2, RA -+ | ld LFUNC:TMP1, FRAME_FUNC(BASE) -+ | clear_field TMP1 -+ | ins_next1 -+ | ld TMP1, LFUNC:TMP1->pc -+ | ld KBASE, PC2PROTO(k)(TMP1) -+ | ins_next2 -+ | -+ |6: // Fill up results with nil. -+ | subi TMP1, RD, 8 -+ | addi RD, RD, 8 -+ | stdx TISNIL, TMP2, TMP1 -+ | b <5 -+ break; -+ -+ /* -- Loops and branches ------------------------------------------------ */ -+ -+ case BC_FORL: -+ |.if JIT -+ | hotloop -+ |.endif -+ | // Fall through. Assumes BC_IFORL follows. -+ break; -+ -+ case BC_JFORI: -+ case BC_JFORL: -+#if !LJ_HASJIT -+ break; -+#endif -+ case BC_FORI: -+ case BC_IFORL: -+ | // RA = base*8, RD = target (after end of loop or start of loop) -+ vk = (op == BC_IFORL || op == BC_JFORL); -+ | // Integer loop. -+ | ldux CARG1, RA, BASE -+ | get_oper_type TMP1, CARG1 -+ | checknum cr0, CARG1 -+ if (vk) { -+ | ld CARG3, FORL_STEP*8(RA) -+ | checknum CARG3 -+ | bne >9 -+ | // Need to check overflow for (a<<32) + (b<<32). -+ | rldicr TMP0, CARG1, 32, 31 -+ | rldicr TMP2, CARG3, 32, 31 -+ | add CARG1, CARG1, CARG3 -+ | addo. TMP0, TMP0, TMP2 -+ | cmpdi cr6, CARG3, 0 -+ | ld CARG2, FORL_STOP*8(RA) -+ | checknum CARG2 -+ | bso >6 -+ |4: -+ | std CARG1, FORL_IDX*8(RA) -+ | checknum CARG1 -+ } else { -+ | ld CARG3, FORL_STEP*8(RA) -+ | get_oper_type TMP3, CARG3 -+ | checknum cr7, CARG3 -+ | ld CARG2, FORL_STOP*8(RA) -+ | get_oper_type TMP2, CARG2 -+ | checknum cr1, CARG2 -+ | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq -+ | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq -+ | cmpdi cr6, CARG3, 0 -+ | bne >9 -+ } -+ | blt cr6, >5 -+ | cmpd CARG1, CARG2 -+ |1: -+ if (op != BC_JFORL) { -+ | srdi RD, RD, 1 -+ } -+ | add_oper_type CARG1, TISNUM -+ | std CARG1, FORL_EXT*8(RA) -+ if (op != BC_JFORL) { -+ | add RD, PC, RD -+ } -+ if (op == BC_FORI) { -+ | bgt >3 // See FP loop below. -+ } else if (op == BC_JFORI) { -+ | addis PC, RD, -(BCBIAS_J*4 >> 16) -+ | bley >7 -+ } else if (op == BC_IFORL) { -+ | bgt >2 -+ | addis PC, RD, -(BCBIAS_J*4 >> 16) -+ } else { -+ | bley =>BC_JLOOP -+ } -+ |2: -+ | ins_next -+ |5: // Invert check for negative step. -+ | cmpd CARG2, CARG1 -+ | b <1 -+ if (vk) { -+ |6: // Potential overflow. -+ | checkov <4 // Ignore unrelated overflow. -+ | b <2 -+ } -+ if (vk) { -+ |9: // FP loop. -+ | lfd f1, FORL_IDX*8(RA) -+ | lfd f3, FORL_STEP*8(RA) -+ | lfd f2, FORL_STOP*8(RA) -+ | ld TMP3, FORL_STEP*8(RA) -+ | fadd f1, f1, f3 -+ | stfd f1, FORL_IDX*8(RA) -+ } else { -+ |9: // FP loop. -+ | lfd f1, FORL_IDX*8(RA) -+ | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt -+ | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt -+ | lfd f2, FORL_STOP*8(RA) -+ | bge ->vmeta_for -+ } -+ | cmpdi cr6, TMP3, 0 -+ if (op != BC_JFORL) { -+ | srdi RD, RD, 1 -+ } -+ | stfd f1, FORL_EXT*8(RA) -+ if (op != BC_JFORL) { -+ | add RD, PC, RD -+ } -+ | fcmpu cr0, f1, f2 -+ if (op == BC_JFORI) { -+ | addis PC, RD, -(BCBIAS_J*4 >> 16) -+ } -+ | blt cr6, >5 -+ if (op == BC_FORI) { -+ | bgt >3 -+ } else if (op == BC_IFORL) { -+ | bgty <2 -+ |1: -+ | addis PC, RD, -(BCBIAS_J*4 >> 16) -+ } else if (op == BC_JFORI) { -+ | bley >7 -+ } else { -+ | bley =>BC_JLOOP -+ } -+ | b <2 -+ |5: // Negative step. -+ if (op == BC_FORI) { -+ | bge <2 -+ |3: // Used by integer loop, too. -+ | addis PC, RD, -(BCBIAS_J*4 >> 16) -+ } else if (op == BC_IFORL) { -+ | bgey <1 -+ } else if (op == BC_JFORI) { -+ | bgey >7 -+ } else { -+ | bgey =>BC_JLOOP -+ } -+ | b <2 -+ if (op == BC_JFORI) { -+ |7: -+ | lwz INS, -4(PC) -+ | decode_RD8 RD, INS -+ | b =>BC_JLOOP -+ } -+ break; -+ -+ case BC_ITERL: -+ |.if JIT -+ | hotloop -+ |.endif -+ | // Fall through. Assumes BC_IITERL follows. -+ break; -+ -+ case BC_JITERL: -+#if !LJ_HASJIT -+ break; -+#endif -+ case BC_IITERL: -+ | // RA = base*8, RD = target -+ | ldux TMP1, RA, BASE -+ | checknil_noclear TMP1; beq >1 // Stop if iterator returned nil. -+ if (op == BC_JITERL) { -+ | std TMP1, -8(RA) -+ | b =>BC_JLOOP -+ } else { -+ | branch_RD // Otherwise save control var + branch. -+ | std TMP1, -8(RA) -+ } -+ |1: -+ | ins_next -+ break; -+ -+ case BC_LOOP: -+ | // RA = base*8, RD = target (loop extent) -+ | // Note: RA/RD is only used by trace recorder to determine scope/extent -+ | // This opcode does NOT jump, it's only purpose is to detect a hot loop. -+ |.if JIT -+ | hotloop -+ |.endif -+ | // Fall through. Assumes BC_ILOOP follows. -+ break; -+ -+ case BC_ILOOP: -+ | // RA = base*8, RD = target (loop extent) -+ | ins_next -+ break; -+ -+ case BC_JLOOP: -+ |.if JIT -+ | NYI -+ |.endif -+ break; -+ -+ case BC_JMP: -+ | // RA = base*8 (only used by trace recorder), RD = target -+ | branch_RD -+ | ins_next -+ break; -+ -+ /* -- Function headers -------------------------------------------------- */ -+ -+ case BC_FUNCF: -+ |.if JIT -+ | hotcall -+ |.endif -+ case BC_FUNCV: /* NYI: compiled vararg functions. */ -+ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. -+ break; -+ -+ case BC_JFUNCF: -+#if !LJ_HASJIT -+ break; -+#endif -+ case BC_IFUNCF: -+ | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 -+ | ld TMP2, L->maxstack -+ | lbz TMP1, -4+PC2PROTO(numparams)(PC) -+ | ld KBASE, -4+PC2PROTO(k)(PC) -+ | cmpld RA, TMP2 -+ | sldi TMP1, TMP1, 3 -+ | bgt ->vm_growstack_l -+ if (op != BC_JFUNCF) { -+ | ins_next1 -+ } -+ |2: -+ | cmpld NARGS8:RC, TMP1 // Check for missing parameters. -+ | blt >3 -+ if (op == BC_JFUNCF) { -+ | decode_RD8 RD, INS -+ | b =>BC_JLOOP -+ } else { -+ | ins_next2 -+ } -+ | -+ |3: // Clear missing parameters. -+ | stdx TISNIL, BASE, NARGS8:RC -+ | addi NARGS8:RC, NARGS8:RC, 8 -+ | b <2 -+ break; -+ -+ case BC_JFUNCV: -+#if !LJ_HASJIT -+ break; -+#endif -+ | NYI // NYI: compiled vararg functions -+ break; /* NYI: compiled vararg functions. */ -+ -+ case BC_IFUNCV: -+ | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 -+ | ld TMP2, L->maxstack -+ | add TMP1, BASE, RC -+ | add RA, RA, RC -+ | std LFUNC:RB, 0(TMP1) // Store (untagged) copy of LFUNC. -+ | addi TMP3, RC, 16+FRAME_VARG -+ | ld KBASE, -4+PC2PROTO(k)(PC) -+ | cmpld RA, TMP2 -+ | std TMP3, 8(TMP1) // Store delta + FRAME_VARG. -+ | bge ->vm_growstack_l -+ | lbz TMP2, -4+PC2PROTO(numparams)(PC) -+ | mr RA, BASE -+ | mr RC, TMP1 -+ | addi TMP1, TMP1, 16 -+ | ins_next1 -+ | cmpdi TMP2, 0 -+ | mr BASE, TMP1 -+ | beq >3 -+ |1: -+ | cmpld RA, RC // Less args than parameters? -+ | ld TMP0, 0(RA) -+ | bge >4 -+ | std TISNIL, 0(RA) // Clear old fixarg slot (help the GC). -+ | addi RA, RA, 8 -+ |2: -+ | addic. TMP2, TMP2, -1 -+ | std TMP0, 0(TMP1) -+ | addi TMP1, TMP1, 8 -+ | bne <1 -+ |3: -+ | ins_next2 -+ | -+ |4: // Clear missing parameters. -+ | mr TMP0, TISNIL -+ | b <2 -+ break; -+ -+ case BC_FUNCC: -+ case BC_FUNCCW: -+ | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 -+ if (op == BC_FUNCC) { -+ | ld RD, CFUNC:RB->f -+ } else { -+ | ld RD, DISPATCH_GL(wrapf)(DISPATCH) -+ } -+ | add TMP1, RA, NARGS8:RC -+ | ld TMP2, L->maxstack -+ | add RC, BASE, NARGS8:RC -+ | std BASE, L->base -+ | cmpld TMP1, TMP2 -+ | std RC, L->top -+ | li_vmstate C -+ | mtctr RD // RD is r12, the function linkage register -+ if (op == BC_FUNCCW) { -+ | ld CARG2, CFUNC:RB->f -+ } -+ | mr CARG1, L -+ | bgt ->vm_growstack_c // Need to grow stack. -+ | st_vmstate -+ | bctrl // (lua_State *L [, lua_CFunction f]) -+ | // Returns nresults. -+ | ld BASE, L->base -+ | ld TOCREG, SAVE_TOC -+ | sldi RD, CRET1, 3 -+ | ld TMP1, L->top -+ | li_vmstate INTERP -+ | ld PC, FRAME_PC(BASE) // Fetch PC of caller. -+ | std L, DISPATCH_GL(cur_L)(DISPATCH) -+ | sub RA, TMP1, RD // RA = L->top - nresults*8 -+ | st_vmstate -+ | b ->vm_returnc -+ break; -+ -+ /* ---------------------------------------------------------------------- */ -+ -+ default: -+ fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); -+ exit(2); -+ break; -+ } -+} -+ -+static int build_backend(BuildCtx *ctx) -+{ -+ int op; -+ -+ dasm_growpc(Dst, BC__MAX); -+ -+ build_subroutines(ctx); -+ -+ |.code_op -+ for (op = 0; op < BC__MAX; op++) -+ build_ins(ctx, (BCOp)op, op); -+ -+ return BC__MAX; -+} -+ -+/* Emit pseudo frame-info for all assembler functions. */ -+static void emit_asm_debug(BuildCtx *ctx) -+{ -+ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); -+ int i, lr_offset = -16 >> 2; -+ switch (ctx->mode) { -+ case BUILD_elfasm: -+ fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); -+ fprintf(ctx->fp, -+ ".Lframe0:\n" /* Common Information Entry (CIE) */ -+ "\t.long .LECIE0-.LSCIE0\n" /* length */ -+ ".LSCIE0:\n" -+ "\t.long 0xffffffff\n" /* CIE_Id */ -+ "\t.byte 0x1\n" /* Version */ -+ "\t.string \"\"\n" /* augmentation */ -+ "\t.uleb128 0x1\n" /* code_alignment_factor */ -+ "\t.sleb128 -4\n" /* data_alignment_factor */ -+ "\t.byte 65\n" /* return_address_register (LR) */ -+ "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" /* DW_CFA_def_cfa */ -+ "\t.align 2\n" -+ ".LECIE0:\n\n"); -+ fprintf(ctx->fp, -+ ".LSFDE0:\n" /* Frame Description Entry (FDE) */ -+ "\t.long .LEFDE0-.LASFDE0\n" /* length */ -+ ".LASFDE0:\n" -+ "\t.long .Lframe0\n" /* CIE_ptr */ -+ "\t.long .Lbegin\n" /* initial_location */ -+ "\t.long %d\n" /* address_range */ -+ "\t.byte 0xe\n\t.uleb128 %d\n" /* DW_CFA_def_cfa_offset */ -+ /* DW_CFA_offset_extended_sf */ -+ "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 %d\n" -+ /* DW_CFA_offset_extended */ -+ "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n", -+ fcofs, CFRAME_SIZE, lr_offset); -+ for (i = 14; i <= 31; i++) -+ fprintf(ctx->fp, -+ "\t.byte %d\n\t.uleb128 %d\n" /* DW_CFA_offset from r14 to r31 */ -+ "\t.byte %d\n\t.uleb128 %d\n", /* DW_CFA_offset from f14 to f31 */ -+ 0x80+i, 38+2*(31-i), 0x80+32+i, 2+2*(31-i)); -+ fprintf(ctx->fp, -+ "\t.align 2\n" -+ ".LEFDE0:\n\n"); -+#if LJ_HASFFI -+ fprintf(ctx->fp, -+ ".LSFDE1:\n" /* Frame Description Entry (FDE) */ -+ "\t.long .LEFDE1-.LASFDE1\n" /* length */ -+ ".LASFDE1:\n" -+ "\t.long .Lframe0\n" /* CIE_ptr */ -+ "\t.long lj_vm_ffi_call\n" /* initial_location */ -+ "\t.long %d\n" /* address_range */ -+ /* DW_CFA_offset_extended_sf */ -+ "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 %d\n" -+ "\t.byte 0x8e\n\t.uleb128 2\n" /* DW_CFA_offset */ -+ "\t.byte 0xd\n\t.uleb128 0xe\n" /* DW_CFA_def_cfa_register */ -+ "\t.align 2\n" -+ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs, lr_offset); -+#endif -+#if !LJ_NO_UNWIND -+ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); -+ fprintf(ctx->fp, -+ ".Lframe1:\n" -+ "\t.long .LECIE1-.LSCIE1\n" /* length */ -+ ".LSCIE1:\n" /* Common Information Entry (CIE) */ -+ "\t.long 0\n" /* CIE_Id */ -+ "\t.byte 0x1\n" /* Version */ -+ "\t.string \"zPR\"\n" /* augmentation string */ -+ "\t.uleb128 0x1\n" /* code_alignment_factor */ -+ "\t.sleb128 -4\n" /* data_alignment_factor */ -+ "\t.byte 65\n" /* return_address_register (LR) */ -+ "\t.uleb128 6\n" /* augmentation length */ -+ "\t.byte 0x1b\n" /* pcrel|sdata4 */ -+ "\t.long lj_err_unwind_dwarf-.\n" -+ "\t.byte 0x1b\n" /* pcrel|sdata4 */ -+ "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" /* DW_CFA_def_cfa */ -+ "\t.align 2\n" -+ ".LECIE1:\n\n"); -+ fprintf(ctx->fp, -+ ".LSFDE2:\n" -+ "\t.long .LEFDE2-.LASFDE2\n" -+ ".LASFDE2:\n" -+ "\t.long .LASFDE2-.Lframe1\n" -+ "\t.long .Lbegin-.\n" -+ "\t.long %d\n" -+ "\t.uleb128 0\n" /* augmentation length */ -+ "\t.byte 0xe\n\t.uleb128 %d\n" /* DW_CFA_def_cfa_offset */ -+ /* DW_CFA_offset_extended_sf */ -+ "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 %d\n" -+ /* DW_CFA_offset_extended */ -+ "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n", -+ fcofs, CFRAME_SIZE, lr_offset); -+ for (i = 14; i <= 31; i++) -+ fprintf(ctx->fp, -+ "\t.byte %d\n\t.uleb128 %d\n" /* DW_CFA_offset from r14 to r31 */ -+ "\t.byte %d\n\t.uleb128 %d\n", /* DW_CFA_offset from f14 to f31 */ -+ 0x80+i, 38+2*(31-i), 0x80+32+i, 2+2*(31-i)); -+ fprintf(ctx->fp, -+ "\t.align 2\n" -+ ".LEFDE2:\n\n"); -+#if LJ_HASFFI -+ fprintf(ctx->fp, -+ ".Lframe2:\n" -+ "\t.long .LECIE2-.LSCIE2\n" -+ ".LSCIE2:\n" /* Common Information Entry (CIE) */ -+ "\t.long 0\n" /* CIE_Id */ -+ "\t.byte 0x1\n" /* Version */ -+ "\t.string \"zR\"\n" /* augmentation string */ -+ "\t.uleb128 0x1\n" /* code_alignment_factor */ -+ "\t.sleb128 -4\n" /* data_alignment_factor */ -+ "\t.byte 65\n" /* return_address_register (LR) */ -+ "\t.uleb128 1\n" /* augmentation length */ -+ "\t.byte 0x1b\n" /* pcrel|sdata4 */ -+ "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" /* DW_CFA_def_cfa */ -+ "\t.align 2\n" -+ ".LECIE2:\n\n"); -+ fprintf(ctx->fp, -+ ".LSFDE3:\n" -+ "\t.long .LEFDE3-.LASFDE3\n" -+ ".LASFDE3:\n" -+ "\t.long .LASFDE3-.Lframe2\n" -+ "\t.long lj_vm_ffi_call-.\n" -+ "\t.long %d\n" -+ "\t.uleb128 0\n" /* augmentation length */ -+ /* DW_CFA_offset_extended_sf */ -+ "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 %d\n" -+ "\t.byte 0x8e\n\t.uleb128 2\n" /* DW_CFA_offset */ -+ "\t.byte 0xd\n\t.uleb128 0xe\n" /* DW_CFA_def_cfa_register */ -+ "\t.align 2\n" -+ ".LEFDE3:\n\n", (int)ctx->codesz - fcofs, lr_offset); -+#endif -+#endif -+ break; -+ default: -+ break; -+ } -+} --- -2.12.2 - |