Commit 5a4774cd4d90f9ea7e7f9e34b15de29463aba4c4

Xi Ruoyao 2022-07-22T05:56:30

static trampoline for LoongArch (#723) For the benefit and technical details of static trampoline, see https://github.com/libffi/libffi/pull/624. As a new architecture, let's be "safer" from the start. The change survived libffi testsuite on loongarch64-linux-gnu.

diff --git a/configure.ac b/configure.ac
index 4629d2d..e427584 100644
--- a/configure.ac
+++ b/configure.ac
@@ -375,7 +375,7 @@ if test "$enable_exec_static_tramp" != no; then
 case "$target" in
      *-cygwin*)
      ;;
-     *arm*-*-linux-* | aarch64*-*-linux-* | i*86-*-linux-* | x86_64-*-linux-*)
+     *arm*-*-linux-* | aarch64*-*-linux-* | i*86-*-linux-* | x86_64-*-linux-* | loongarch*-*-linux-*)
        AC_DEFINE(FFI_EXEC_STATIC_TRAMP, 1,
                  [Define this if you want statically defined trampolines])
      ;;
diff --git a/src/loongarch64/ffi.c b/src/loongarch64/ffi.c
index 7a28892..ed9c15f 100644
--- a/src/loongarch64/ffi.c
+++ b/src/loongarch64/ffi.c
@@ -519,8 +519,16 @@ ffi_prep_closure_loc (ffi_closure *closure, ffi_cif *cif,
   if (cif->abi <= FFI_FIRST_ABI || cif->abi >= FFI_LAST_ABI)
     return FFI_BAD_ABI;
 
-  /* We will call ffi_closure_inner with codeloc, not closure, but as long
-     as the memory is readable it should work.  */
+#if defined(FFI_EXEC_STATIC_TRAMP)
+  if (ffi_tramp_is_present(closure))
+    {
+      ffi_tramp_set_parms (closure->ftramp, ffi_closure_asm, closure);
+      goto out;
+    }
+#endif
+
+  /* Fill the dynamic trampoline.  We will call ffi_closure_inner with codeloc,
+     not closure, but as long as the memory is readable it should work.  */
   tramp[0] = 0x1800000c; /* pcaddi $t0, 0 (i.e. $t0 <- tramp) */
   tramp[1] = 0x28c0418d; /* ld.d   $t1, $t0, 16 */
   tramp[2] = 0x4c0001a0; /* jirl   $zero, $t1, 0 */
@@ -528,11 +536,13 @@ ffi_prep_closure_loc (ffi_closure *closure, ffi_cif *cif,
   tramp[4] = fn;
   tramp[5] = fn >> 32;
 
+  __builtin___clear_cache (codeloc, codeloc + FFI_TRAMPOLINE_SIZE);
+
+out:
   closure->cif = cif;
   closure->fun = fun;
   closure->user_data = user_data;
 
-  __builtin___clear_cache (codeloc, codeloc + FFI_TRAMPOLINE_SIZE);
   return FFI_OK;
 }
 
@@ -593,3 +603,17 @@ ffi_closure_inner (ffi_cif *cif,
       marshal (&cb, cif->rtype, 0, rvalue);
     }
 }
+
+#if defined(FFI_EXEC_STATIC_TRAMP)
+void *
+ffi_tramp_arch (size_t *tramp_size, size_t *map_size)
+{
+  extern void *trampoline_code_table;
+
+  *tramp_size = 16;
+  /* A mapping size of 64K is chosen to cover the page sizes of 4K, 16K, and
+     64K.  */
+  *map_size = 1 << 16;
+  return &trampoline_code_table;
+}
+#endif
diff --git a/src/loongarch64/sysv.S b/src/loongarch64/sysv.S
index 9e0da11..aa7bde2 100644
--- a/src/loongarch64/sysv.S
+++ b/src/loongarch64/sysv.S
@@ -147,7 +147,7 @@ ffi_call_asm:
 	.size	ffi_call_asm, .-ffi_call_asm
 
 
-/* ffi_closure_asm. Expects address of the passed-in ffi_closure in t1.
+/* ffi_closure_asm. Expects address of the passed-in ffi_closure in t0.
    void ffi_closure_inner (ffi_cif *cif,
 			   void (*fun)(ffi_cif *, void *, void **, void *),
 			   void *user_data,
@@ -219,6 +219,37 @@ ffi_closure_asm:
 	.cfi_endproc
 	.size	ffi_closure_asm, .-ffi_closure_asm
 
+/* Static trampoline code table, in which each element is a trampoline.
+
+   The trampoline clobbers t0 and t1, but we don't save them on the stack
+   because our psABI explicitly says they are scratch registers, at least for
+   ELF.  Our dynamic trampoline is already clobbering them anyway.
+
+   The trampoline has two parameters - target code to jump to and data for
+   the target code. The trampoline extracts the parameters from its parameter
+   block (see tramp_table_map()).  The trampoline saves the data address in
+   t0 and jumps to the target code.  As ffi_closure_asm() already expects the
+   data address to be in t0, we don't need a "ffi_closure_asm_alt".  */
+
+#if defined(FFI_EXEC_STATIC_TRAMP)
+	.align	16
+	.globl	trampoline_code_table
+	.hidden	trampoline_code_table
+	.type	trampoline_code_table, @function
+
+trampoline_code_table:
+
+	.rept	65536 / 16
+	pcaddu12i	$t1, 16 # 65536 >> 12
+	ld.d	$t0, $t1, 0
+	ld.d	$t1, $t1, 8
+	jirl	$zero, $t1, 0
+	.endr
+	.size	trampoline_code_table, .-trampoline_code_table
+
+	.align	2
+#endif
+
 /* ffi_go_closure_asm.  Expects address of the passed-in ffi_go_closure in t2.
    void ffi_closure_inner (ffi_cif *cif,
 			   void (*fun)(ffi_cif *, void *, void **, void *),