Commit ab1677106605aba1c27665964ff90bea59612ce3

Max Filippov 2022-02-20T16:01:38

Xtensa cleanups and XEA3 support (#677) * xtensa: clean up stack usage in ffi_trampoline call Space for outgoing call arguments reserved in the stack frame of the function ffi_trampoline overlaps register spill overflow area at the top of the frame. In xtensa XEA2 exception architecture the layout of overlapping areas is identical so that even if the ffi_trampoline registers frame gets spilled the memory contents doesn't change. This is not so with the xtensa XEA3 exception architecture, where registers a0 - a7 of a different function are spilled in that location. Reserve spill area for 8 registers to avoid overlapping of the spill area with the outgoing call arguments area in the ffi_trampoline. Signed-off-by: Max Filippov <jcmvbkbc@gmail.com> * xtensa: support xtensa XEA3 exception architecture XEA3 requires that 32 bytes of register spill area is reserved in all functions. Fix ffi_cacheflush entry instruction to satisfy this requirement. Signed-off-by: Max Filippov <jcmvbkbc@gmail.com> * xtensa: maintain stack alignment xtensa ABI requires stack alignment on 16 byte boundary and passing up to 6 arguments in registers. To simplify stack alignment maintenance fixed amount of stack space is reserved for arguments passed in registers and variable but correctly aligned amount is reserved for the remaining arguments. After copying arguments to the stack and loading registers the fixed part of the stack reservation is freed. Signed-off-by: Max Filippov <jcmvbkbc@gmail.com> * xtensa: fix err_bad_abi tests Check ffi_cif::abi value in the ffi_prep_closure_loc and return FFI_BAD_ABI error if it's not one of the supported values. Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>

diff --git a/src/xtensa/ffi.c b/src/xtensa/ffi.c
index 9a0575f..82b42a1 100644
--- a/src/xtensa/ffi.c
+++ b/src/xtensa/ffi.c
@@ -86,11 +86,16 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
       break;
   }
 
-  /* Round the stack up to a full 4 register frame, just in case
-     (we use this size in movsp). This way, it's also a  multiple of
-     8 bytes for 64-bit arguments.  */
-  cif->bytes = FFI_ALIGN(cif->bytes, 16);
-
+  /* Round up stack size needed for arguments.
+     Allocate FFI_REGISTER_ARGS_SPACE bytes when there are only arguments
+     passed in registers, round space reserved for arguments passed on stack
+     up to ABI-specified alignment.  */
+  if (cif->bytes < FFI_REGISTER_NARGS * 4)
+    cif->bytes = FFI_REGISTER_ARGS_SPACE;
+  else
+    cif->bytes = FFI_REGISTER_ARGS_SPACE +
+	    FFI_ALIGN(cif->bytes - FFI_REGISTER_NARGS * 4,
+		      XTENSA_STACK_ALIGNMENT);
   return FFI_OK;
 }
 
@@ -232,6 +237,9 @@ ffi_prep_closure_loc (ffi_closure* closure,
                       void *user_data,
                       void *codeloc)
 {
+  if (cif->abi != FFI_SYSV)
+    return FFI_BAD_ABI;
+
   /* copye trampoline to stack and patch 'ffi_closure_SYSV' pointer */
   memcpy(closure->tramp, ffi_trampoline, FFI_TRAMPOLINE_SIZE);
   *(unsigned int*)(&closure->tramp[8]) = (unsigned int)ffi_closure_SYSV;
@@ -277,15 +285,15 @@ ffi_closure_SYSV_inner(ffi_closure *closure, void **values, void *rvalue)
     if (arg_types[i]->alignment == 8 && (areg & 1) != 0)
       areg++;
 
-    // skip the entry 16,a1 framework, add 16 bytes (4 registers)
+    // skip the entry a1, * framework, see ffi_trampoline
     if (areg == FFI_REGISTER_NARGS)
-      areg += 4;
+      areg = (FFI_REGISTER_ARGS_SPACE + 32) / 4;
 
     if (arg_types[i]->type == FFI_TYPE_STRUCT)
     {
       int numregs = ((arg_types[i]->size + 3) & ~3) / 4;
       if (areg < FFI_REGISTER_NARGS && areg + numregs > FFI_REGISTER_NARGS)
-        areg = FFI_REGISTER_NARGS + 4;
+        areg = (FFI_REGISTER_ARGS_SPACE + 32) / 4;
     }
 
     avalue[i] = &values[areg];
diff --git a/src/xtensa/ffitarget.h b/src/xtensa/ffitarget.h
index 0ba728b..4231ed3 100644
--- a/src/xtensa/ffitarget.h
+++ b/src/xtensa/ffitarget.h
@@ -43,6 +43,10 @@ typedef enum ffi_abi {
 #endif
 
 #define FFI_REGISTER_NARGS	6
+#define XTENSA_STACK_ALIGNMENT	16
+#define FFI_REGISTER_ARGS_SPACE ((FFI_REGISTER_NARGS * 4 + \
+				  XTENSA_STACK_ALIGNMENT - 1) & \
+				  -XTENSA_STACK_ALIGNMENT)
 
 /* ---- Definitions for closures ----------------------------------------- */
 
diff --git a/src/xtensa/sysv.S b/src/xtensa/sysv.S
index e942179..70e83ac 100644
--- a/src/xtensa/sysv.S
+++ b/src/xtensa/sysv.S
@@ -43,6 +43,9 @@
 #error "xtensa/sysv.S out of sync with ffi.h"
 #endif
 
+#define FFI_REGISTER_ARGS_OFFSET ((XTENSA_STACK_ALIGNMENT - \
+				   FFI_REGISTER_NARGS * 4) & \
+				   (XTENSA_STACK_ALIGNMENT - 1))
 
 /* ffi_call_SYSV (rvalue, rbytes, flags, (*fnaddr)(), bytes, ecif)
       void *rvalue;            a2
@@ -62,28 +65,28 @@ ENTRY(ffi_call_SYSV)
 	mov	a7, a1              # fp
 	movsp	a1, a11             # set new sp = old_sp - bytes
 
+	# align ffi_prep_args stack argument so that arguments
+	# passed on stack if any start on 16-byte aligned boundary
+
+	addi	a11, a11, FFI_REGISTER_ARGS_OFFSET
+
 	movi	a8, ffi_prep_args
 	callx8	a8                  # ffi_prep_args(ecif, stack)
 
-	# prepare to move stack pointer back up to 6 arguments
-	# note that 'bytes' is already aligned
-
-	movi	a10, 6*4 
-	sub	a11, a6, a10
-	movgez	a6, a10, a11
-	add	a6, a1, a6
+	# prepare to move stack pointer back
+	# to point to arguments passed on stack
 
+	addi	a6, a1, FFI_REGISTER_ARGS_SPACE
 	
 	# we can pass up to 6 arguments in registers
 	# for simplicity, just load 6 arguments
-	# (the stack size is at least 32 bytes, so no risk to cross boundaries)
 
-	l32i	a10, a1, 0
-	l32i	a11, a1, 4
-	l32i	a12, a1, 8
-	l32i	a13, a1, 12
-	l32i	a14, a1, 16
-	l32i	a15, a1, 20
+	l32i	a10, a1, FFI_REGISTER_ARGS_OFFSET + 0
+	l32i	a11, a1, FFI_REGISTER_ARGS_OFFSET + 4
+	l32i	a12, a1, FFI_REGISTER_ARGS_OFFSET + 8
+	l32i	a13, a1, FFI_REGISTER_ARGS_OFFSET + 12
+	l32i	a14, a1, FFI_REGISTER_ARGS_OFFSET + 16
+	l32i	a15, a1, FFI_REGISTER_ARGS_OFFSET + 20
 
 	# move stack pointer
 
@@ -167,7 +170,7 @@ END(ffi_call_SYSV)
 
 ENTRY(ffi_cacheflush)
 
-	entry	a1, 16
+	entry	a1, 32
 
 1:	
 #if XCHAL_DCACHE_SIZE
@@ -187,7 +190,14 @@ END(ffi_cacheflush)
 
 ENTRY(ffi_trampoline)
 
-	entry	a1, 16 + (FFI_REGISTER_NARGS * 4) + (4 * 4)   # [ 0]
+	/* 32 bytes for spill + spill overflow area of a frame that uses
+	   call8,
+	   FFI_REGISTER_NARGS * 4 bytes for arguments passed in registers,
+	   aligned up to 4 to maintain 16 byte stack alignment,
+	   4 * 4 bytes for the result.
+	   This size must be in sync with ffi_closure_SYSV_inner logic.
+	 */
+	entry	a1, 32 + FFI_REGISTER_ARGS_SPACE + (4 * 4)   # [ 0]
 	j	2f                                # [ 3]
 	.align	4                                 # [ 6]
 1:	.long	0                                 # [ 8]