Commit 83038cf24aa1a92b62b91ffee1dcc25d79243484

Landon Fuller 2010-09-19T14:36:45

Implement FFI_EXEC_TRAMPOLINE_TABLE allocator for iOS/ARM. This provides working closure support on iOS/ARM devices where PROT_WRITE|PROT_EXEC is not permitted. The code passes basic smoke tests, but requires further review.

diff --git a/src/arm/ffi.c b/src/arm/ffi.c
index 7da69c1..4b8e6de 100644
--- a/src/arm/ffi.c
+++ b/src/arm/ffi.c
@@ -273,6 +273,220 @@ ffi_prep_incoming_args_SYSV(char *stack, void **rvalue,
 
 /* How to make a trampoline.  */
 
+#if FFI_EXEC_TRAMPOLINE_TABLE
+
+#include <mach/mach.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+extern void *ffi_closure_trampoline_table_page;
+
+typedef struct ffi_trampoline_table ffi_trampoline_table;
+typedef struct ffi_trampoline_table_entry ffi_trampoline_table_entry;
+
+struct ffi_trampoline_table {
+  /* contigious writable and executable pages */
+  vm_address_t config_page;
+  vm_address_t trampoline_page;
+
+  /* free list tracking */
+  uint16_t free_count;
+  ffi_trampoline_table_entry *free_list;
+  ffi_trampoline_table_entry *free_list_pool;
+
+  ffi_trampoline_table *prev;
+  ffi_trampoline_table *next;
+};
+
+struct ffi_trampoline_table_entry {
+  void *(*trampoline)();
+  ffi_trampoline_table_entry *next;
+};
+
+/* Override the standard architecture trampoline size */
+// XXX TODO - Fix
+#undef FFI_TRAMPOLINE_SIZE
+#define FFI_TRAMPOLINE_SIZE 12
+
+/* The trampoline configuration is placed at 4080 bytes prior to the trampoline's entry point */
+#define FFI_TRAMPOLINE_CODELOC_CONFIG(codeloc) ((void **) (((uint8_t *) codeloc) - 4080));
+
+/* The first 16 bytes of the config page are unused, as they are unaddressable from the trampoline page. */
+#define FFI_TRAMPOLINE_CONFIG_PAGE_OFFSET 16
+
+/* Total number of trampolines that fit in one trampoline table */
+#define FFI_TRAMPOLINE_COUNT ((PAGE_SIZE - FFI_TRAMPOLINE_CONFIG_PAGE_OFFSET) / FFI_TRAMPOLINE_SIZE)
+
+static pthread_mutex_t ffi_trampoline_lock = PTHREAD_MUTEX_INITIALIZER;
+static ffi_trampoline_table *ffi_trampoline_tables = NULL;
+
+static ffi_trampoline_table *
+ffi_trampoline_table_alloc ()
+{
+  ffi_trampoline_table *table = NULL;
+
+  /* Loop until we can allocate two contigious pages */
+  while (table == NULL) {
+    vm_address_t config_page = 0x0;
+    kern_return_t kt;
+
+    /* Try to allocate two pages */
+    kt = vm_allocate (mach_task_self (), &config_page, PAGE_SIZE*2, VM_FLAGS_ANYWHERE);
+    if (kt != KERN_SUCCESS) {
+      fprintf(stderr, "vm_allocate() failure: %d at %s:%d\n", kt, __FILE__, __LINE__);
+      break;
+    }
+  
+    /* Now drop the second half of the allocation to make room for the trampoline table */
+    vm_address_t trampoline_page = config_page+PAGE_SIZE;
+    kt = vm_deallocate (mach_task_self (), trampoline_page, PAGE_SIZE);
+    if (kt != KERN_SUCCESS) {
+      fprintf(stderr, "vm_deallocate() failure: %d at %s:%d\n", kt, __FILE__, __LINE__);
+      break;
+    }
+
+    /* Remap the trampoline table to directly follow the config page */
+    vm_prot_t cur_prot;
+    vm_prot_t max_prot;
+
+    kt = vm_remap (mach_task_self (), &trampoline_page, PAGE_SIZE, 0x0, FALSE, mach_task_self (), (vm_address_t) &ffi_closure_trampoline_table_page, FALSE, &cur_prot, &max_prot, VM_INHERIT_SHARE);
+
+    /* If we lost access to the destination trampoline page, drop our config allocation mapping and retry */
+    if (kt != KERN_SUCCESS) {
+      /* Log unexpected failures */
+      if (kt != KERN_NO_SPACE) {
+        fprintf(stderr, "vm_remap() failure: %d at %s:%d\n", kt, __FILE__, __LINE__);
+      }
+
+      vm_deallocate (mach_task_self (), config_page, PAGE_SIZE);
+      continue;
+    }
+
+    /* We have valid trampoline and config pages */
+    table = calloc (1, sizeof(ffi_trampoline_table));
+    table->free_count = FFI_TRAMPOLINE_COUNT;
+    table->config_page = config_page;
+    table->trampoline_page = trampoline_page;
+    
+    /* Create and initialize the free list */
+    table->free_list_pool = calloc(FFI_TRAMPOLINE_COUNT, sizeof(ffi_trampoline_table_entry));
+
+    uint16_t i;
+    for (i = 0; i < table->free_count; i++) {
+      ffi_trampoline_table_entry *entry = &table->free_list_pool[i];
+      entry->trampoline = (void *) (table->trampoline_page + (i * FFI_TRAMPOLINE_SIZE));
+
+      if (i < table->free_count - 1)
+        entry->next = &table->free_list_pool[i+1];
+    }
+
+    table->free_list = table->free_list_pool;
+  }
+
+  return table;
+}
+
+void *
+ffi_closure_alloc (size_t size, void **code)
+{
+  /* Create the closure */
+  ffi_closure *closure = malloc(size);
+  if (closure == NULL)
+    return NULL;
+
+  pthread_mutex_lock(&ffi_trampoline_lock);
+
+  /* Check for an active trampoline table with available entries. */
+  ffi_trampoline_table *table = ffi_trampoline_tables;
+  if (table == NULL || table->free_list == NULL) {
+    table = ffi_trampoline_table_alloc ();
+    if (table == NULL) {
+      free(closure);
+      return NULL;
+    }
+
+    /* Insert the new table at the top of the list */
+    table->next = ffi_trampoline_tables;
+    if (table->next != NULL)
+        table->next->prev = table;
+
+    ffi_trampoline_tables = table;
+  }
+
+  /* Claim the free entry */
+  ffi_trampoline_table_entry *entry = ffi_trampoline_tables->free_list;
+  ffi_trampoline_tables->free_list = entry->next;
+  ffi_trampoline_tables->free_count--;
+  entry->next = NULL;
+
+  pthread_mutex_unlock(&ffi_trampoline_lock);
+
+  /* Initialize the return values */
+  *code = entry->trampoline;
+  closure->trampoline_table = table;
+  closure->trampoline_table_entry = entry;
+
+  return closure;
+}
+
+void
+ffi_closure_free (void *ptr)
+{
+  ffi_closure *closure = ptr;
+
+  pthread_mutex_lock(&ffi_trampoline_lock);
+
+  /* Fetch the table and entry references */
+  ffi_trampoline_table *table = closure->trampoline_table;
+  ffi_trampoline_table_entry *entry = closure->trampoline_table_entry;
+
+  /* Return the entry to the free list */
+  entry->next = table->free_list;
+  table->free_list = entry;
+  table->free_count++;
+
+  /* If all trampolines within this table are free, and at least one other table exists, deallocate
+   * the table */
+  if (table->free_count == FFI_TRAMPOLINE_COUNT && ffi_trampoline_tables != table) {
+    /* Remove from the list */
+    if (table->prev != NULL)
+      table->prev->next = table->next;
+  
+    if (table->next != NULL)
+      table->next->prev = table->prev;
+
+    /* Deallocate pages */
+    kern_return_t kt;
+    kt = vm_deallocate (mach_task_self (), table->config_page, PAGE_SIZE);
+    if (kt != KERN_SUCCESS)
+      fprintf(stderr, "vm_deallocate() failure: %d at %s:%d\n", kt, __FILE__, __LINE__);
+
+    kt = vm_deallocate (mach_task_self (), table->trampoline_page, PAGE_SIZE);
+    if (kt != KERN_SUCCESS)
+      fprintf(stderr, "vm_deallocate() failure: %d at %s:%d\n", kt, __FILE__, __LINE__);
+
+    /* Deallocate free list */
+    free (table->free_list_pool);
+    free (table);
+  } else if (ffi_trampoline_tables != table) {
+    /* Otherwise, bump this table to the top of the list */
+    table->prev = NULL;
+    table->next = ffi_trampoline_tables;
+    if (ffi_trampoline_tables != NULL)
+      ffi_trampoline_tables->prev = table;
+
+    ffi_trampoline_tables = table;
+  }
+
+  pthread_mutex_unlock (&ffi_trampoline_lock);
+
+  /* Free the closure */
+  free (closure);
+}
+
+#else
+
 #define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX)				\
 ({ unsigned char *__tramp = (unsigned char*)(TRAMP);			\
    unsigned int  __fun = (unsigned int)(FUN);				\
@@ -285,6 +499,7 @@ ffi_prep_incoming_args_SYSV(char *stack, void **rvalue,
    __clear_cache((&__tramp[0]), (&__tramp[19]));			\
  })
 
+#endif
 
 /* the cif must already be prep'ed */
 
@@ -298,8 +513,7 @@ ffi_prep_closure_loc (ffi_closure* closure,
   FFI_ASSERT (cif->abi == FFI_SYSV);
 
 #if FFI_EXEC_TRAMPOLINE_TABLE
-  // XXX - hardcoded offset
-  void **config = (void **) (((uint8_t *) codeloc) - 4080);
+  void **config = FFI_TRAMPOLINE_CODELOC_CONFIG(codeloc);
   config[0] = closure;
   config[1] = ffi_closure_SYSV;
 #else
diff --git a/src/arm/trampoline.S b/src/arm/trampoline.S
index 9f5891e..83be0dc 100644
--- a/src/arm/trampoline.S
+++ b/src/arm/trampoline.S
@@ -3,8 +3,8 @@
 
 .text
 .align 12
-.globl _ffi_closure_trampoline_table
-_ffi_closure_trampoline_table:
+.globl _ffi_closure_trampoline_table_page
+_ffi_closure_trampoline_table_page:
 
     // trampoline
     // Save to stack
diff --git a/src/closures.c b/src/closures.c
index c214196..ac878df 100644
--- a/src/closures.c
+++ b/src/closures.c
@@ -65,85 +65,7 @@
 
 # if FFI_EXEC_TRAMPOLINE_TABLE
 
-// XXX - non-thread-safe, non-portable implementation, intended for initial testing
-#include <mach/mach.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-extern void *ffi_closure_trampoline_table;
-
-static void *tramp_table = NULL;
-static void **config_table = NULL;
-static int tramp_table_free = 0;
-
-void *
-ffi_closure_alloc (size_t size, void **code)
-{
-  if (!code)
-    return NULL;
-
-  /* Allocate our config page and remap the trampoline table */
-  while (tramp_table == NULL) {
-    vm_address_t config_page = 0x0;
-    kern_return_t kt;
-
-    /* Try to allocate two pages */
-    kt = vm_allocate(mach_task_self(), &config_page, PAGE_SIZE*2, VM_FLAGS_ANYWHERE);
-    if (kt != KERN_SUCCESS) {
-      fprintf(stderr, "vm_allocate() failure: %d at %s:%d\n", kt, __FILE__, __LINE__);
-      break;
-    }
-    
-    /* Now drop the second half of the allocation to make room for the trampoline table */
-    kt = vm_deallocate(mach_task_self(), config_page+PAGE_SIZE, PAGE_SIZE);
-    if (kt != KERN_SUCCESS) {
-      fprintf(stderr, "vm_deallocate() failure: %d at %s:%d\n", kt, __FILE__, __LINE__);
-      break;
-    }
-
-    /* Remap the trampoline table to directly follow the config page */
-    vm_address_t table_page = config_page+PAGE_SIZE;
-    vm_prot_t cur_prot;
-    vm_prot_t max_prot;
-
-    kt = vm_remap(mach_task_self(), &table_page, PAGE_SIZE, 0x0, FALSE, mach_task_self(), (vm_address_t) &ffi_closure_trampoline_table, FALSE, &cur_prot, &max_prot, VM_INHERIT_SHARE);
-
-    /* If we lost access to the destination trampoline page, drop our config allocation mapping and retry */
-    if (kt != KERN_SUCCESS) {
-      /* Log unexpected failures */
-      if (kt != KERN_NO_SPACE) {
-        fprintf(stderr, "vm_remap() failure: %d at %s:%d\n", kt, __FILE__, __LINE__);
-      }
-
-      vm_deallocate(mach_task_self(), config_page, PAGE_SIZE);
-      continue;
-    }
-
-    tramp_table = (void *) table_page;
-    config_table = (void **) config_page;
-    config_table += 4; // XXX - there's a 16 byte offset into the config page on ARM
-
-    fprintf(stderr, "[DEBUG] Allocated config page at %p, trampoline page at %p, configs start at %p\n", (void *) config_page, (void *) table_page, config_table);
-  }
-
-  /* Check for permanent allocation failure */
-  if (tramp_table == NULL)
-    return *code = NULL;
-
-  *code = tramp_table + (3 * tramp_table_free);
-  tramp_table_free++;
-
-  void *closure = malloc(size);
-
-  return closure;
-}
-
-void
-ffi_closure_free (void *ptr)
-{
-  // TODO
-  //free (ptr);
-}
+// Per-target implementation; It's unclear what can reasonable be shared between two OS/architecture implementations.
 
 # elif FFI_MMAP_EXEC_WRIT /* !FFI_EXEC_TRAMPOLINE_TABLE */