Xtensa cleanups and XEA3 support (#677) * xtensa: clean up stack usage in ffi_trampoline call Space for outgoing call arguments reserved in the stack frame of the function ffi_trampoline overlaps register spill overflow area at the top of the frame. In xtensa XEA2 exception architecture the layout of overlapping areas is identical so that even if the ffi_trampoline registers frame gets spilled the memory contents doesn't change. This is not so with the xtensa XEA3 exception architecture, where registers a0 - a7 of a different function are spilled in that location. Reserve spill area for 8 registers to avoid overlapping of the spill area with the outgoing call arguments area in the ffi_trampoline. Signed-off-by: Max Filippov <jcmvbkbc@gmail.com> * xtensa: support xtensa XEA3 exception architecture XEA3 requires that 32 bytes of register spill area is reserved in all functions. Fix ffi_cacheflush entry instruction to satisfy this requirement. Signed-off-by: Max Filippov <jcmvbkbc@gmail.com> * xtensa: maintain stack alignment xtensa ABI requires stack alignment on 16 byte boundary and passing up to 6 arguments in registers. To simplify stack alignment maintenance fixed amount of stack space is reserved for arguments passed in registers and variable but correctly aligned amount is reserved for the remaining arguments. After copying arguments to the stack and loading registers the fixed part of the stack reservation is freed. Signed-off-by: Max Filippov <jcmvbkbc@gmail.com> * xtensa: fix err_bad_abi tests Check ffi_cif::abi value in the ffi_prep_closure_loc and return FFI_BAD_ABI error if it's not one of the supported values. Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
diff --git a/src/xtensa/ffi.c b/src/xtensa/ffi.c
index 9a0575f..82b42a1 100644
--- a/src/xtensa/ffi.c
+++ b/src/xtensa/ffi.c
@@ -86,11 +86,16 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
break;
}
- /* Round the stack up to a full 4 register frame, just in case
- (we use this size in movsp). This way, it's also a multiple of
- 8 bytes for 64-bit arguments. */
- cif->bytes = FFI_ALIGN(cif->bytes, 16);
-
+ /* Round up stack size needed for arguments.
+ Allocate FFI_REGISTER_ARGS_SPACE bytes when there are only arguments
+ passed in registers, round space reserved for arguments passed on stack
+ up to ABI-specified alignment. */
+ if (cif->bytes < FFI_REGISTER_NARGS * 4)
+ cif->bytes = FFI_REGISTER_ARGS_SPACE;
+ else
+ cif->bytes = FFI_REGISTER_ARGS_SPACE +
+ FFI_ALIGN(cif->bytes - FFI_REGISTER_NARGS * 4,
+ XTENSA_STACK_ALIGNMENT);
return FFI_OK;
}
@@ -232,6 +237,9 @@ ffi_prep_closure_loc (ffi_closure* closure,
void *user_data,
void *codeloc)
{
+ if (cif->abi != FFI_SYSV)
+ return FFI_BAD_ABI;
+
/* copye trampoline to stack and patch 'ffi_closure_SYSV' pointer */
memcpy(closure->tramp, ffi_trampoline, FFI_TRAMPOLINE_SIZE);
*(unsigned int*)(&closure->tramp[8]) = (unsigned int)ffi_closure_SYSV;
@@ -277,15 +285,15 @@ ffi_closure_SYSV_inner(ffi_closure *closure, void **values, void *rvalue)
if (arg_types[i]->alignment == 8 && (areg & 1) != 0)
areg++;
- // skip the entry 16,a1 framework, add 16 bytes (4 registers)
+ // skip the entry a1, * framework, see ffi_trampoline
if (areg == FFI_REGISTER_NARGS)
- areg += 4;
+ areg = (FFI_REGISTER_ARGS_SPACE + 32) / 4;
if (arg_types[i]->type == FFI_TYPE_STRUCT)
{
int numregs = ((arg_types[i]->size + 3) & ~3) / 4;
if (areg < FFI_REGISTER_NARGS && areg + numregs > FFI_REGISTER_NARGS)
- areg = FFI_REGISTER_NARGS + 4;
+ areg = (FFI_REGISTER_ARGS_SPACE + 32) / 4;
}
avalue[i] = &values[areg];
diff --git a/src/xtensa/ffitarget.h b/src/xtensa/ffitarget.h
index 0ba728b..4231ed3 100644
--- a/src/xtensa/ffitarget.h
+++ b/src/xtensa/ffitarget.h
@@ -43,6 +43,10 @@ typedef enum ffi_abi {
#endif
#define FFI_REGISTER_NARGS 6
+#define XTENSA_STACK_ALIGNMENT 16
+#define FFI_REGISTER_ARGS_SPACE ((FFI_REGISTER_NARGS * 4 + \
+ XTENSA_STACK_ALIGNMENT - 1) & \
+ -XTENSA_STACK_ALIGNMENT)
/* ---- Definitions for closures ----------------------------------------- */
diff --git a/src/xtensa/sysv.S b/src/xtensa/sysv.S
index e942179..70e83ac 100644
--- a/src/xtensa/sysv.S
+++ b/src/xtensa/sysv.S
@@ -43,6 +43,9 @@
#error "xtensa/sysv.S out of sync with ffi.h"
#endif
+#define FFI_REGISTER_ARGS_OFFSET ((XTENSA_STACK_ALIGNMENT - \
+ FFI_REGISTER_NARGS * 4) & \
+ (XTENSA_STACK_ALIGNMENT - 1))
/* ffi_call_SYSV (rvalue, rbytes, flags, (*fnaddr)(), bytes, ecif)
void *rvalue; a2
@@ -62,28 +65,28 @@ ENTRY(ffi_call_SYSV)
mov a7, a1 # fp
movsp a1, a11 # set new sp = old_sp - bytes
+ # align ffi_prep_args stack argument so that arguments
+ # passed on stack if any start on 16-byte aligned boundary
+
+ addi a11, a11, FFI_REGISTER_ARGS_OFFSET
+
movi a8, ffi_prep_args
callx8 a8 # ffi_prep_args(ecif, stack)
- # prepare to move stack pointer back up to 6 arguments
- # note that 'bytes' is already aligned
-
- movi a10, 6*4
- sub a11, a6, a10
- movgez a6, a10, a11
- add a6, a1, a6
+ # prepare to move stack pointer back
+ # to point to arguments passed on stack
+ addi a6, a1, FFI_REGISTER_ARGS_SPACE
# we can pass up to 6 arguments in registers
# for simplicity, just load 6 arguments
- # (the stack size is at least 32 bytes, so no risk to cross boundaries)
- l32i a10, a1, 0
- l32i a11, a1, 4
- l32i a12, a1, 8
- l32i a13, a1, 12
- l32i a14, a1, 16
- l32i a15, a1, 20
+ l32i a10, a1, FFI_REGISTER_ARGS_OFFSET + 0
+ l32i a11, a1, FFI_REGISTER_ARGS_OFFSET + 4
+ l32i a12, a1, FFI_REGISTER_ARGS_OFFSET + 8
+ l32i a13, a1, FFI_REGISTER_ARGS_OFFSET + 12
+ l32i a14, a1, FFI_REGISTER_ARGS_OFFSET + 16
+ l32i a15, a1, FFI_REGISTER_ARGS_OFFSET + 20
# move stack pointer
@@ -167,7 +170,7 @@ END(ffi_call_SYSV)
ENTRY(ffi_cacheflush)
- entry a1, 16
+ entry a1, 32
1:
#if XCHAL_DCACHE_SIZE
@@ -187,7 +190,14 @@ END(ffi_cacheflush)
ENTRY(ffi_trampoline)
- entry a1, 16 + (FFI_REGISTER_NARGS * 4) + (4 * 4) # [ 0]
+ /* 32 bytes for spill + spill overflow area of a frame that uses
+ call8,
+ FFI_REGISTER_NARGS * 4 bytes for arguments passed in registers,
+ aligned up to 4 to maintain 16 byte stack alignment,
+ 4 * 4 bytes for the result.
+ This size must be in sync with ffi_closure_SYSV_inner logic.
+ */
+ entry a1, 32 + FFI_REGISTER_ARGS_SPACE + (4 * 4) # [ 0]
j 2f # [ 3]
.align 4 # [ 6]
1: .long 0 # [ 8]