Commit eafab2356e8dcf5f01d2bcfa311cafba3b395a7e

Jeremy Huddleston Sequoia 2021-03-24T11:38:36

arm64e: Pull in pointer authentication code from Apple's arm64e libffi port (#565) NOTES: This changes the ptrauth support from #548 to match what Apple is shipping in its libffi-27 tag. Signed-off-by: Jeremy Huddleston Sequoia <jeremyhu@apple.com>

diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c
index fd7f9d1..5c85fcd 100644
--- a/src/aarch64/ffi.c
+++ b/src/aarch64/ffi.c
@@ -617,11 +617,12 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *orig_rvalue,
   else if (flags & AARCH64_RET_NEED_COPY)
     rsize = 16;
 
-  /* Allocate consectutive stack for everything we'll need.  */
-  context = alloca (sizeof(struct call_context) + stack_bytes + 32 + rsize);
+  /* Allocate consectutive stack for everything we'll need.
+     The frame uses 40 bytes for: lr, fp, rvalue, flags, sp */
+  context = alloca (sizeof(struct call_context) + stack_bytes + 40 + rsize);
   stack = context + 1;
   frame = (void*)((uintptr_t)stack + (uintptr_t)stack_bytes);
-  rvalue = (rsize ? (void*)((uintptr_t)frame + 32) : orig_rvalue);
+  rvalue = (rsize ? (void*)((uintptr_t)frame + 40) : orig_rvalue);
 
   arg_init (&state);
   for (i = 0, nargs = cif->nargs; i < nargs; i++)
@@ -810,7 +811,7 @@ ffi_prep_closure_loc (ffi_closure *closure,
 #if FFI_EXEC_TRAMPOLINE_TABLE
 #ifdef __MACH__
 #ifdef HAVE_PTRAUTH
-  codeloc = ptrauth_strip (codeloc, ptrauth_key_asia);
+  codeloc = ptrauth_auth_data(codeloc, ptrauth_key_function_pointer, 0);
 #endif
   void **config = (void **)((uint8_t *)codeloc - PAGE_MAX_SIZE);
   config[0] = closure;
diff --git a/src/aarch64/internal.h b/src/aarch64/internal.h
index de55755..b5d102b 100644
--- a/src/aarch64/internal.h
+++ b/src/aarch64/internal.h
@@ -75,4 +75,26 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
 #define AARCH64_TRAMP_MAP_SHIFT	14
 #define AARCH64_TRAMP_MAP_SIZE	(1 << AARCH64_TRAMP_MAP_SHIFT)
 #define AARCH64_TRAMP_SIZE	32
+
+#endif
+
+/* Helpers for writing assembly compatible with arm ptr auth */
+#ifdef LIBFFI_ASM
+
+#ifdef HAVE_PTRAUTH
+#define SIGN_LR pacibsp
+#define SIGN_LR_WITH_REG(x) pacib lr, x
+#define AUTH_LR_AND_RET retab
+#define AUTH_LR_WITH_REG(x) autib lr, x
+#define BRANCH_AND_LINK_TO_REG blraaz
+#define BRANCH_TO_REG braaz
+#else
+#define SIGN_LR
+#define SIGN_LR_WITH_REG(x)
+#define AUTH_LR_AND_RET ret
+#define AUTH_LR_WITH_REG(x)
+#define BRANCH_AND_LINK_TO_REG blr
+#define BRANCH_TO_REG br
+#endif
+
 #endif
diff --git a/src/aarch64/sysv.S b/src/aarch64/sysv.S
index bc10da8..eeaf3f8 100644
--- a/src/aarch64/sysv.S
+++ b/src/aarch64/sysv.S
@@ -58,14 +58,6 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
 #define PTR_SIZE	8
 #endif
 
-#if FFI_EXEC_TRAMPOLINE_TABLE && defined(__MACH__) && defined(HAVE_PTRAUTH)
-# define BR(r)  braaz r
-# define BLR(r) blraaz r
-#else
-# define BR(r)  br r
-# define BLR(r) blr r
-#endif
-
 	.text
 	.align 4
 
@@ -86,9 +78,22 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
 
 	cfi_startproc
 CNAME(ffi_call_SYSV):
+	/* Sign the lr with x1 since that is where it will be stored */
+	SIGN_LR_WITH_REG(x1)
+
 	/* Use a stack frame allocated by our caller.  */
-	cfi_def_cfa(x1, 32);
+#if defined(HAVE_PTRAUTH) && defined(__APPLE__)
+	/* darwin's libunwind assumes that the cfa is the sp and that's the data
+	 * used to sign the lr.  In order to allow unwinding through this
+	 * function it is necessary to point the cfa at the signing register.
+	 */
+	cfi_def_cfa(x1, 0);
+#else
+	cfi_def_cfa(x1, 40);
+#endif
 	stp	x29, x30, [x1]
+	mov	x9, sp
+	str	x9, [x1, #32]
 	mov	x29, x1
 	mov	sp, x0
 	cfi_def_cfa_register(x29)
@@ -119,13 +124,15 @@ CNAME(ffi_call_SYSV):
 	/* Deallocate the context, leaving the stacked arguments.  */
 	add	sp, sp, #CALL_CONTEXT_SIZE
 
-	BLR(x9)				/* call fn */
+	BRANCH_AND_LINK_TO_REG     x9			/* call fn */
 
 	ldp	x3, x4, [x29, #16]	/* reload rvalue and flags */
 
 	/* Partially deconstruct the stack frame.  */
-	mov     sp, x29
+	ldr	x9, [x29, #32]
+	mov	sp, x9
 	cfi_def_cfa_register (sp)
+	mov	x2, x29			/* Preserve for auth */
 	ldp     x29, x30, [x29]
 
 	/* Save the return value as directed.  */
@@ -139,71 +146,76 @@ CNAME(ffi_call_SYSV):
 	   and therefore we want to extend to 64 bits; these types
 	   have two consecutive entries allocated for them.  */
 	.align	4
-0:	ret				/* VOID */
+0:	b 99f				/* VOID */
 	nop
 1:	str	x0, [x3]		/* INT64 */
-	ret
+	b 99f
 2:	stp	x0, x1, [x3]		/* INT128 */
-	ret
+	b 99f
 3:	brk	#1000			/* UNUSED */
-	ret
+	b 99f
 4:	brk	#1000			/* UNUSED */
-	ret
+	b 99f
 5:	brk	#1000			/* UNUSED */
-	ret
+	b 99f
 6:	brk	#1000			/* UNUSED */
-	ret
+	b 99f
 7:	brk	#1000			/* UNUSED */
-	ret
+	b 99f
 8:	st4	{ v0.s, v1.s, v2.s, v3.s }[0], [x3]	/* S4 */
-	ret
+	b 99f
 9:	st3	{ v0.s, v1.s, v2.s }[0], [x3]	/* S3 */
-	ret
+	b 99f
 10:	stp	s0, s1, [x3]		/* S2 */
-	ret
+	b 99f
 11:	str	s0, [x3]		/* S1 */
-	ret
+	b 99f
 12:	st4	{ v0.d, v1.d, v2.d, v3.d }[0], [x3]	/* D4 */
-	ret
+	b 99f
 13:	st3	{ v0.d, v1.d, v2.d }[0], [x3]	/* D3 */
-	ret
+	b 99f
 14:	stp	d0, d1, [x3]		/* D2 */
-	ret
+	b 99f
 15:	str	d0, [x3]		/* D1 */
-	ret
+	b 99f
 16:	str	q3, [x3, #48]		/* Q4 */
 	nop
 17:	str	q2, [x3, #32]		/* Q3 */
 	nop
 18:	stp	q0, q1, [x3]		/* Q2 */
-	ret
+	b 99f
 19:	str	q0, [x3]		/* Q1 */
-	ret
+	b 99f
 20:	uxtb	w0, w0			/* UINT8 */
 	str	x0, [x3]
-21:	ret				/* reserved */
+21:	b 99f				/* reserved */
 	nop
 22:	uxth	w0, w0			/* UINT16 */
 	str	x0, [x3]
-23:	ret				/* reserved */
+23:	b 99f				/* reserved */
 	nop
 24:	mov	w0, w0			/* UINT32 */
 	str	x0, [x3]
-25:	ret				/* reserved */
+25:	b 99f				/* reserved */
 	nop
 26:	sxtb	x0, w0			/* SINT8 */
 	str	x0, [x3]
-27:	ret				/* reserved */
+27:	b 99f				/* reserved */
 	nop
 28:	sxth	x0, w0			/* SINT16 */
 	str	x0, [x3]
-29:	ret				/* reserved */
+29:	b 99f				/* reserved */
 	nop
 30:	sxtw	x0, w0			/* SINT32 */
 	str	x0, [x3]
-31:	ret				/* reserved */
+31:	b 99f				/* reserved */
 	nop
 
+	/* Return now that result has been populated. */
+99:
+	AUTH_LR_WITH_REG(x2)
+	ret
+
 	cfi_endproc
 
 	.globl	CNAME(ffi_call_SYSV)
@@ -234,6 +246,7 @@ CNAME(ffi_call_SYSV):
 	.align 4
 CNAME(ffi_closure_SYSV_V):
 	cfi_startproc
+	SIGN_LR
 	stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]!
 	cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
 	cfi_rel_offset (x29, 0)
@@ -257,6 +270,7 @@ CNAME(ffi_closure_SYSV_V):
 	.align	4
 	cfi_startproc
 CNAME(ffi_closure_SYSV):
+	SIGN_LR
 	stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]!
 	cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
 	cfi_rel_offset (x29, 0)
@@ -283,9 +297,6 @@ CNAME(ffi_closure_SYSV):
 	bl      CNAME(ffi_closure_SYSV_inner)
 
 	/* Load the return value as directed.  */
-#if FFI_EXEC_TRAMPOLINE_TABLE && defined(__MACH__) && defined(HAVE_PTRAUTH)
-	autiza	x1
-#endif
 	adr	x1, 0f
 	and	w0, w0, #AARCH64_RET_MASK
 	add	x1, x1, x0, lsl #3
@@ -361,7 +372,7 @@ CNAME(ffi_closure_SYSV):
 	cfi_adjust_cfa_offset (-ffi_closure_SYSV_FS)
 	cfi_restore (x29)
 	cfi_restore (x30)
-	ret
+	AUTH_LR_AND_RET
 	cfi_endproc
 
 	.globl	CNAME(ffi_closure_SYSV)
@@ -450,7 +461,7 @@ CNAME(ffi_closure_trampoline_table_page):
     .rept PAGE_MAX_SIZE / FFI_TRAMPOLINE_SIZE
     adr x16, -PAGE_MAX_SIZE
     ldp x17, x16, [x16]
-    BR(x16)
+    br x16
 	nop		/* each entry in the trampoline config page is 2*sizeof(void*) so the trampoline itself cannot be smaller than 16 bytes */
     .endr
 
diff --git a/src/closures.c b/src/closures.c
index 3558d78..f7bead6 100644
--- a/src/closures.c
+++ b/src/closures.c
@@ -173,7 +173,6 @@ struct ffi_trampoline_table
 {
   /* contiguous writable and executable pages */
   vm_address_t config_page;
-  vm_address_t trampoline_page;
 
   /* free list tracking */
   uint16_t free_count;
@@ -217,7 +216,13 @@ ffi_trampoline_table_alloc (void)
 
   /* Remap the trampoline table on top of the placeholder page */
   trampoline_page = config_page + PAGE_MAX_SIZE;
+
+#ifdef HAVE_PTRAUTH
+  trampoline_page_template = (vm_address_t)(uintptr_t)ptrauth_auth_data((void *)&ffi_closure_trampoline_table_page, ptrauth_key_function_pointer, 0);
+#else
   trampoline_page_template = (vm_address_t)&ffi_closure_trampoline_table_page;
+#endif
+
 #ifdef __arm__
   /* ffi_closure_trampoline_table_page can be thumb-biased on some ARM archs */
   trampoline_page_template &= ~1UL;
@@ -225,7 +230,7 @@ ffi_trampoline_table_alloc (void)
   kt = vm_remap (mach_task_self (), &trampoline_page, PAGE_MAX_SIZE, 0x0,
 		 VM_FLAGS_OVERWRITE, mach_task_self (), trampoline_page_template,
 		 FALSE, &cur_prot, &max_prot, VM_INHERIT_SHARE);
-  if (kt != KERN_SUCCESS)
+  if (kt != KERN_SUCCESS || !(cur_prot & VM_PROT_EXECUTE))
     {
       vm_deallocate (mach_task_self (), config_page, PAGE_MAX_SIZE * 2);
       return NULL;
@@ -235,7 +240,6 @@ ffi_trampoline_table_alloc (void)
   table = calloc (1, sizeof (ffi_trampoline_table));
   table->free_count = FFI_TRAMPOLINE_COUNT;
   table->config_page = config_page;
-  table->trampoline_page = trampoline_page;
 
   /* Create and initialize the free list */
   table->free_list_pool =
@@ -245,7 +249,10 @@ ffi_trampoline_table_alloc (void)
     {
       ffi_trampoline_table_entry *entry = &table->free_list_pool[i];
       entry->trampoline =
-	(void *) (table->trampoline_page + (i * FFI_TRAMPOLINE_SIZE));
+	(void *) (trampoline_page + (i * FFI_TRAMPOLINE_SIZE));
+#ifdef HAVE_PTRAUTH
+      entry->trampoline = ptrauth_sign_unauthenticated(entry->trampoline, ptrauth_key_function_pointer, 0);
+#endif
 
       if (i < table->free_count - 1)
 	entry->next = &table->free_list_pool[i + 1];
@@ -314,9 +321,6 @@ ffi_closure_alloc (size_t size, void **code)
 
   /* Initialize the return values */
   *code = entry->trampoline;
-#ifdef HAVE_PTRAUTH
-  *code = ptrauth_sign_unauthenticated (*code, ptrauth_key_asia, 0);
-#endif
   closure->trampoline_table = table;
   closure->trampoline_table_entry = entry;