Commit 31a618530737cc8f1666845f8e65a3c097f6d408

Anthony Green 2015-01-21T05:55:47

Merge pull request #170 from fealebenpae/aarch64-trampoline-table Support closures on ARM64 iOS

diff --git a/configure.ac b/configure.ac
index 4754c03..467e311 100644
--- a/configure.ac
+++ b/configure.ac
@@ -215,7 +215,7 @@ fi
 
 FFI_EXEC_TRAMPOLINE_TABLE=0
 case "$target" in
-     *arm*-apple-darwin*)
+     *arm*-apple-darwin* | aarch64-apple-darwin*)
        FFI_EXEC_TRAMPOLINE_TABLE=1
        AC_DEFINE(FFI_EXEC_TRAMPOLINE_TABLE, 1,
                  [Cannot use PROT_EXEC on this target, so, we revert to
diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c
index 0cace9d..f79602b 100644
--- a/src/aarch64/ffi.c
+++ b/src/aarch64/ffi.c
@@ -725,6 +725,240 @@ ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue,
 extern void ffi_closure_SYSV (void) FFI_HIDDEN;
 extern void ffi_closure_SYSV_V (void) FFI_HIDDEN;
 
+#if FFI_EXEC_TRAMPOLINE_TABLE
+
+#include <mach/mach.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+extern void *ffi_closure_trampoline_table_page;
+
+typedef struct ffi_trampoline_table ffi_trampoline_table;
+typedef struct ffi_trampoline_table_entry ffi_trampoline_table_entry;
+
+struct ffi_trampoline_table
+{
+  /* contiguous writable and executable pages */
+  vm_address_t config_page;
+  vm_address_t trampoline_page;
+
+  /* free list tracking */
+  uint16_t free_count;
+  ffi_trampoline_table_entry *free_list;
+  ffi_trampoline_table_entry *free_list_pool;
+
+  ffi_trampoline_table *prev;
+  ffi_trampoline_table *next;
+};
+
+struct ffi_trampoline_table_entry
+{
+  void *(*trampoline) ();
+  ffi_trampoline_table_entry *next;
+};
+
+/* The trampoline configuration is placed a page prior to the trampoline's entry point */
+#define FFI_TRAMPOLINE_CODELOC_CONFIG(codeloc) ((void **) (((uint8_t *) codeloc) - PAGE_SIZE));
+
+/* Total number of trampolines that fit in one trampoline table */
+#define FFI_TRAMPOLINE_COUNT (PAGE_SIZE / FFI_TRAMPOLINE_SIZE)
+
+static pthread_mutex_t ffi_trampoline_lock = PTHREAD_MUTEX_INITIALIZER;
+static ffi_trampoline_table *ffi_trampoline_tables = NULL;
+
+static ffi_trampoline_table *
+ffi_trampoline_table_alloc ()
+{
+  ffi_trampoline_table *table = NULL;
+
+  /* Loop until we can allocate two contiguous pages */
+  while (table == NULL)
+    {
+      vm_address_t config_page = 0x0;
+      kern_return_t kt;
+
+      /* Try to allocate two pages */
+      kt =
+	vm_allocate (mach_task_self (), &config_page, PAGE_SIZE * 2,
+		     VM_FLAGS_ANYWHERE);
+      if (kt != KERN_SUCCESS)
+	{
+	  fprintf (stderr, "vm_allocate() failure: %d at %s:%d\n", kt,
+		   __FILE__, __LINE__);
+	  break;
+	}
+
+      /* Now drop the second half of the allocation to make room for the trampoline table */
+      vm_address_t trampoline_page = config_page + PAGE_SIZE;
+      kt = vm_deallocate (mach_task_self (), trampoline_page, PAGE_SIZE);
+      if (kt != KERN_SUCCESS)
+	{
+	  fprintf (stderr, "vm_deallocate() failure: %d at %s:%d\n", kt,
+		   __FILE__, __LINE__);
+	  break;
+	}
+
+      /* Remap the trampoline table to directly follow the config page */
+      vm_prot_t cur_prot;
+      vm_prot_t max_prot;
+
+      kt =
+	vm_remap (mach_task_self (), &trampoline_page, PAGE_SIZE, 0x0, FALSE,
+		  mach_task_self (),
+		  (vm_address_t) & ffi_closure_trampoline_table_page, FALSE,
+		  &cur_prot, &max_prot, VM_INHERIT_SHARE);
+
+      /* If we lost access to the destination trampoline page, drop our config allocation mapping and retry */
+      if (kt != KERN_SUCCESS)
+	{
+	  /* Log unexpected failures */
+	  if (kt != KERN_NO_SPACE)
+	    {
+	      fprintf (stderr, "vm_remap() failure: %d at %s:%d\n", kt,
+		       __FILE__, __LINE__);
+	    }
+
+	  vm_deallocate (mach_task_self (), config_page, PAGE_SIZE);
+	  continue;
+	}
+
+      /* We have valid trampoline and config pages */
+      table = calloc (1, sizeof (ffi_trampoline_table));
+      table->free_count = FFI_TRAMPOLINE_COUNT;
+      table->config_page = config_page;
+      table->trampoline_page = trampoline_page;
+
+      /* Create and initialize the free list */
+      table->free_list_pool =
+	calloc (FFI_TRAMPOLINE_COUNT, sizeof (ffi_trampoline_table_entry));
+
+      uint16_t i;
+      for (i = 0; i < table->free_count; i++)
+	{
+	  ffi_trampoline_table_entry *entry = &table->free_list_pool[i];
+	  entry->trampoline =
+	    (void *) (table->trampoline_page + (i * FFI_TRAMPOLINE_SIZE));
+
+	  if (i < table->free_count - 1)
+	    entry->next = &table->free_list_pool[i + 1];
+	}
+
+      table->free_list = table->free_list_pool;
+    }
+
+  return table;
+}
+
+void *
+ffi_closure_alloc (size_t size, void **code)
+{
+  /* Create the closure */
+  ffi_closure *closure = malloc (size);
+  if (closure == NULL)
+    return NULL;
+
+  pthread_mutex_lock (&ffi_trampoline_lock);
+
+  /* Check for an active trampoline table with available entries. */
+  ffi_trampoline_table *table = ffi_trampoline_tables;
+  if (table == NULL || table->free_list == NULL)
+    {
+      table = ffi_trampoline_table_alloc ();
+      if (table == NULL)
+	{
+	  free (closure);
+	  return NULL;
+	}
+
+      /* Insert the new table at the top of the list */
+      table->next = ffi_trampoline_tables;
+      if (table->next != NULL)
+	table->next->prev = table;
+
+      ffi_trampoline_tables = table;
+    }
+
+  /* Claim the free entry */
+  ffi_trampoline_table_entry *entry = ffi_trampoline_tables->free_list;
+  ffi_trampoline_tables->free_list = entry->next;
+  ffi_trampoline_tables->free_count--;
+  entry->next = NULL;
+
+  pthread_mutex_unlock (&ffi_trampoline_lock);
+
+  /* Initialize the return values */
+  *code = entry->trampoline;
+  closure->trampoline_table = table;
+  closure->trampoline_table_entry = entry;
+
+  return closure;
+}
+
+void
+ffi_closure_free (void *ptr)
+{
+  ffi_closure *closure = ptr;
+
+  pthread_mutex_lock (&ffi_trampoline_lock);
+
+  /* Fetch the table and entry references */
+  ffi_trampoline_table *table = closure->trampoline_table;
+  ffi_trampoline_table_entry *entry = closure->trampoline_table_entry;
+
+  /* Return the entry to the free list */
+  entry->next = table->free_list;
+  table->free_list = entry;
+  table->free_count++;
+
+  /* If all trampolines within this table are free, and at least one other table exists, deallocate
+   * the table */
+  if (table->free_count == FFI_TRAMPOLINE_COUNT
+      && ffi_trampoline_tables != table)
+    {
+      /* Remove from the list */
+      if (table->prev != NULL)
+	table->prev->next = table->next;
+
+      if (table->next != NULL)
+	table->next->prev = table->prev;
+
+      /* Deallocate pages */
+      kern_return_t kt;
+      kt = vm_deallocate (mach_task_self (), table->config_page, PAGE_SIZE);
+      if (kt != KERN_SUCCESS)
+	fprintf (stderr, "vm_deallocate() failure: %d at %s:%d\n", kt,
+		 __FILE__, __LINE__);
+
+      kt =
+	vm_deallocate (mach_task_self (), table->trampoline_page, PAGE_SIZE);
+      if (kt != KERN_SUCCESS)
+	fprintf (stderr, "vm_deallocate() failure: %d at %s:%d\n", kt,
+		 __FILE__, __LINE__);
+
+      /* Deallocate free list */
+      free (table->free_list_pool);
+      free (table);
+    }
+  else if (ffi_trampoline_tables != table)
+    {
+      /* Otherwise, bump this table to the top of the list */
+      table->prev = NULL;
+      table->next = ffi_trampoline_tables;
+      if (ffi_trampoline_tables != NULL)
+	ffi_trampoline_tables->prev = table;
+
+      ffi_trampoline_tables = table;
+    }
+
+  pthread_mutex_unlock (&ffi_trampoline_lock);
+
+  /* Free the closure */
+  free (closure);
+}
+
+#endif
+
 ffi_status
 ffi_prep_closure_loc (ffi_closure *closure,
                       ffi_cif* cif,
@@ -732,31 +966,39 @@ ffi_prep_closure_loc (ffi_closure *closure,
                       void *user_data,
                       void *codeloc)
 {
+  if (cif->abi != FFI_SYSV)
+    return FFI_BAD_ABI;
+
+  void (*start)(void);
+  
+  if (cif->flags & AARCH64_FLAG_ARG_V)
+    start = ffi_closure_SYSV_V;
+  else
+    start = ffi_closure_SYSV;
+
+#if FFI_EXEC_TRAMPOLINE_TABLE
+  void **config = FFI_TRAMPOLINE_CODELOC_CONFIG (codeloc);
+  config[0] = closure;
+  config[1] = start;
+#else
   static const unsigned char trampoline[16] = {
     0x90, 0x00, 0x00, 0x58,	/* ldr	x16, tramp+16	*/
     0xf1, 0xff, 0xff, 0x10,	/* adr	x17, tramp+0	*/
     0x00, 0x02, 0x1f, 0xd6	/* br	x16		*/
   };
   char *tramp = closure->tramp;
-  void (*start)(void);
+  
+  memcpy (tramp, trampoline, sizeof(trampoline));
+  
+  *(UINT64 *)(tramp + 16) = (uintptr_t)start;
 
-  if (cif->abi != FFI_SYSV)
-    return FFI_BAD_ABI;
+  ffi_clear_cache(tramp, tramp + FFI_TRAMPOLINE_SIZE);
+#endif
 
   closure->cif = cif;
   closure->fun = fun;
   closure->user_data = user_data;
 
-  memcpy (tramp, trampoline, sizeof(trampoline));
-
-  if (cif->flags & AARCH64_FLAG_ARG_V)
-    start = ffi_closure_SYSV_V;
-  else
-    start = ffi_closure_SYSV;
-  *(UINT64 *)(tramp + 16) = (uintptr_t)start;
-
-  ffi_clear_cache(tramp, tramp + FFI_TRAMPOLINE_SIZE);
-
   return FFI_OK;
 }
 
diff --git a/src/aarch64/ffitarget.h b/src/aarch64/ffitarget.h
index 80d09af..fca2811 100644
--- a/src/aarch64/ffitarget.h
+++ b/src/aarch64/ffitarget.h
@@ -42,7 +42,13 @@ typedef enum ffi_abi
 /* ---- Definitions for closures ----------------------------------------- */
 
 #define FFI_CLOSURES 1
+#if defined (__APPLE__)
+#define FFI_TRAMPOLINE_SIZE 20
+#define FFI_TRAMPOLINE_CLOSURE_OFFSET 16
+#else
 #define FFI_TRAMPOLINE_SIZE 24
+#define FFI_TRAMPOLINE_CLOSURE_OFFSET FFI_TRAMPOLINE_SIZE
+#endif
 #define FFI_NATIVE_RAW_API 0
 
 /* ---- Internal ---- */
diff --git a/src/aarch64/sysv.S b/src/aarch64/sysv.S
index 5c9cdda..46f50b9 100644
--- a/src/aarch64/sysv.S
+++ b/src/aarch64/sysv.S
@@ -134,17 +134,17 @@ CNAME(ffi_call_SYSV):
 	ret
 7:	brk	#1000			/* UNUSED */
 	ret
-8:	st4	{ v0.s-v3.s }[0], [x3]	/* S4 */
+8:	st4	{ v0.s, v1.s, v2.s, v3.s }[0], [x3]	/* S4 */
 	ret
-9:	st3	{ v0.s-v2.s }[0], [x3]	/* S3 */
+9:	st3	{ v0.s, v1.s, v2.s }[0], [x3]	/* S3 */
 	ret
 10:	stp	s0, s1, [x3]		/* S2 */
 	ret
 11:	str	s0, [x3]		/* S1 */
 	ret
-12:	st4	{ v0.d-v3.d }[0], [x3]	/* D4 */
+12:	st4	{ v0.d, v1.d, v2.d, v3.d }[0], [x3]	/* D4 */
 	ret
-13:	st3	{ v0.d-v2.d }[0], [x3]	/* D3 */
+13:	st3	{ v0.d, v1.d, v2.d }[0], [x3]	/* D3 */
 	ret
 14:	stp	d0, d1, [x3]		/* D2 */
 	ret
@@ -248,8 +248,8 @@ CNAME(ffi_closure_SYSV):
 	stp     x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48]
 
 	/* Load ffi_closure_inner arguments.  */
-	ldp	x0, x1, [x17, #FFI_TRAMPOLINE_SIZE]	/* load cif, fn */
-	ldr	x2, [x17, #FFI_TRAMPOLINE_SIZE+16]	/* load user_data */
+	ldp	x0, x1, [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET]	/* load cif, fn */
+	ldr	x2, [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET+16]	/* load user_data */
 .Ldo_closure:
 	add	x3, sp, #16				/* load context */
 	add	x4, sp, #ffi_closure_SYSV_FS		/* load stack */
@@ -343,6 +343,25 @@ CNAME(ffi_closure_SYSV):
 	.size	CNAME(ffi_closure_SYSV), . - CNAME(ffi_closure_SYSV)
 #endif
 
+#if FFI_EXEC_TRAMPOLINE_TABLE
+    .align 12
+CNAME(ffi_closure_trampoline_table_page):
+    .rept 16384 / FFI_TRAMPOLINE_SIZE
+    adr	x17, -16384
+    adr	x16, -16380
+    ldr x16, [x16]
+    ldr x17, [x17]
+    br	x16
+    .endr
+    
+    .globl CNAME(ffi_closure_trampoline_table_page)
+    #ifdef __ELF__
+    	.type	CNAME(ffi_closure_trampoline_table_page), #function
+    	.hidden	CNAME(ffi_closure_trampoline_table_page)
+    	.size	CNAME(ffi_closure_trampoline_table_page), . - CNAME(ffi_closure_trampoline_table_page)
+    #endif
+#endif
+
 #ifdef FFI_GO_CLOSURES
 	.align 4
 CNAME(ffi_go_closure_SYSV_V):