Commit 325471ea6a7bf954943485458a1bd391635dfaa8

Richard Henderson 2014-10-22T13:58:59

aarch64: Merge prep_args with ffi_call Use the trick to allocate the stack frame for ffi_call_SYSV within ffi_call itself.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c
index d19384b..a067303 100644
--- a/src/aarch64/ffi.c
+++ b/src/aarch64/ffi.c
@@ -72,14 +72,6 @@ ffi_clear_cache (void *start, void *end)
 }
 
 extern void
-ffi_call_SYSV (unsigned (*)(struct call_context *context, unsigned char *,
-			    extended_cif *),
-               struct call_context *context,
-               extended_cif *,
-               size_t,
-               void (*fn)(void));
-
-extern void
 ffi_closure_SYSV (ffi_closure *);
 
 /* Test for an FFI floating point representation.  */
@@ -311,12 +303,11 @@ struct arg_state
 
 /* Initialize a procedure call argument marshalling state.  */
 static void
-arg_init (struct arg_state *state, size_t call_frame_size)
+arg_init (struct arg_state *state)
 {
   state->ngrn = 0;
   state->nsrn = 0;
   state->nsaa = 0;
-
 #if defined (__APPLE__)
   state->allocating_variadic = 0;
 #endif
@@ -529,27 +520,88 @@ allocate_int_to_reg_or_stack (struct call_context *context,
   return allocate_to_stack (state, stack, size, size);
 }
 
-/* Marshall the arguments from FFI representation to procedure call
-   context and stack.  */
+ffi_status
+ffi_prep_cif_machdep (ffi_cif *cif)
+{
+  /* Round the stack up to a multiple of the stack alignment requirement. */
+  cif->bytes = ALIGN(cif->bytes, 16);
 
-static unsigned
-aarch64_prep_args (struct call_context *context, unsigned char *stack,
-		   extended_cif *ecif)
+  /* Initialize our flags. We are interested if this CIF will touch a
+     vector register, if so we will enable context save and load to
+     those registers, otherwise not. This is intended to be friendly
+     to lazy float context switching in the kernel.  */
+  cif->aarch64_flags = 0;
+
+  if (is_v_register_candidate (cif->rtype))
+    {
+      cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
+    }
+  else
+    {
+      int i;
+      for (i = 0; i < cif->nargs; i++)
+        if (is_v_register_candidate (cif->arg_types[i]))
+          {
+            cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
+            break;
+          }
+    }
+
+#if defined (__APPLE__)
+  cif->aarch64_nfixedargs = 0;
+#endif
+
+  return FFI_OK;
+}
+
+#if defined (__APPLE__)
+
+/* Perform Apple-specific cif processing for variadic calls */
+ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
+				    unsigned int nfixedargs,
+				    unsigned int ntotalargs)
 {
-  ffi_cif *cif = ecif->cif;
-  void **avalue = ecif->avalue;
-  int i, nargs = cif->nargs;
+  ffi_status status;
+
+  status = ffi_prep_cif_machdep (cif);
+
+  cif->aarch64_nfixedargs = nfixedargs;
+
+  return status;
+}
+
+#endif
+
+extern void ffi_call_SYSV (void *stack, void *frame,
+			   void (*fn)(void), int flags) FFI_HIDDEN;
+
+/* Call a function with the provided arguments and capture the return
+   value.  */
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  struct call_context *context;
+  void *stack, *frame;
   struct arg_state state;
+  size_t stack_bytes;
+  int i, nargs = cif->nargs;
+  int h, t;
+  ffi_type *rtype;
 
-  arg_init (&state, cif->bytes);
+  /* Allocate consectutive stack for everything we'll need.  */
+  stack_bytes = cif->bytes;
+  stack = alloca (stack_bytes + 32 + sizeof(struct call_context));
+  frame = stack + stack_bytes;
+  context = frame + 32;
 
+  arg_init (&state);
   for (i = 0; i < nargs; i++)
     {
       ffi_type *ty = cif->arg_types[i];
       size_t s = ty->size;
-      int h, t = ty->type;
       void *a = avalue[i];
 
+      t = ty->type;
       switch (t)
 	{
 	case FFI_TYPE_VOID:
@@ -665,83 +717,12 @@ aarch64_prep_args (struct call_context *context, unsigned char *stack,
 #endif
     }
 
-  return cif->aarch64_flags;
-}
-
-ffi_status
-ffi_prep_cif_machdep (ffi_cif *cif)
-{
-  /* Round the stack up to a multiple of the stack alignment requirement. */
-  cif->bytes = ALIGN(cif->bytes, 16);
-
-  /* Initialize our flags. We are interested if this CIF will touch a
-     vector register, if so we will enable context save and load to
-     those registers, otherwise not. This is intended to be friendly
-     to lazy float context switching in the kernel.  */
-  cif->aarch64_flags = 0;
-
-  if (is_v_register_candidate (cif->rtype))
-    {
-      cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
-    }
-  else
-    {
-      int i;
-      for (i = 0; i < cif->nargs; i++)
-        if (is_v_register_candidate (cif->arg_types[i]))
-          {
-            cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
-            break;
-          }
-    }
-
-#if defined (__APPLE__)
-  cif->aarch64_nfixedargs = 0;
-#endif
-
-  return FFI_OK;
-}
-
-#if defined (__APPLE__)
-
-/* Perform Apple-specific cif processing for variadic calls */
-ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
-				    unsigned int nfixedargs,
-				    unsigned int ntotalargs)
-{
-  ffi_status status;
-
-  status = ffi_prep_cif_machdep (cif);
-
-  cif->aarch64_nfixedargs = nfixedargs;
-
-  return status;
-}
-
-#endif
-
-/* Call a function with the provided arguments and capture the return
-   value.  */
-void
-ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
-{
-  extended_cif ecif;
-  struct call_context context;
-  size_t stack_bytes;
-  int h, t;
-
-  ecif.cif = cif;
-  ecif.avalue = avalue;
-  ecif.rvalue = rvalue;
-
-  stack_bytes = cif->bytes;
-
-  memset (&context, 0, sizeof (context));
-  if (is_register_candidate (cif->rtype))
+  rtype = cif->rtype;
+  if (is_register_candidate (rtype))
     {
-      ffi_call_SYSV (aarch64_prep_args, &context, &ecif, stack_bytes, fn);
+      ffi_call_SYSV (stack, frame, fn, cif->aarch64_flags);
 
-      t = cif->rtype->type;
+      t = rtype->type;
       switch (t)
 	{
 	case FFI_TYPE_INT:
@@ -754,33 +735,35 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
 	case FFI_TYPE_POINTER:
 	case FFI_TYPE_UINT64:
 	case FFI_TYPE_SINT64:
-	  *(ffi_arg *)rvalue = extend_integer_type (&context.x[0], t);
+	  *(ffi_arg *)rvalue = extend_integer_type (&context->x[0], t);
 	  break;
 
 	case FFI_TYPE_FLOAT:
 	case FFI_TYPE_DOUBLE:
 	case FFI_TYPE_LONGDOUBLE:
-	  compress_hfa_type (rvalue, &context.v[0], 0x100 + t);
+	  compress_hfa_type (rvalue, &context->v[0], 0x100 + t);
 	  break;
 
 	case FFI_TYPE_STRUCT:
 	  h = is_hfa (cif->rtype);
 	  if (h)
-	    compress_hfa_type (rvalue, &context.v[0], h);
-	  else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG)
-	    memcpy (rvalue, &context.x[0], cif->rtype->size);
+	    compress_hfa_type (rvalue, &context->v[0], h);
 	  else
-	    abort();
+	    {
+	      FFI_ASSERT (rtype->size <= 16);
+	      memcpy (rvalue, &context->x[0], rtype->size);
+	    }
 	  break;
 
 	default:
-	  abort();
+	  FFI_ASSERT (0);
+	  break;
 	}
     }
   else
     {
-      context.x8 = (uintptr_t)rvalue;
-      ffi_call_SYSV (aarch64_prep_args, &context, &ecif, stack_bytes, fn);
+      context->x8 = (uintptr_t)rvalue;
+      ffi_call_SYSV (stack, frame, fn, cif->aarch64_flags);
     }
 }
 
@@ -851,7 +834,7 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
   struct arg_state state;
   ffi_type *rtype;
 
-  arg_init (&state, ALIGN(cif->bytes, 16));
+  arg_init (&state);
 
   for (i = 0; i < nargs; i++)
     {
diff --git a/src/aarch64/sysv.S b/src/aarch64/sysv.S
index fa7ff5b..a5f636a 100644
--- a/src/aarch64/sysv.S
+++ b/src/aarch64/sysv.S
@@ -22,6 +22,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
 #define LIBFFI_ASM
 #include <fficonfig.h>
 #include <ffi.h>
+#include <ffi_cfi.h>
 #include "internal.h"
 
 #ifdef HAVE_MACHINE_ASM_H
@@ -38,158 +39,77 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
 #endif
 #endif
 
-#define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#define cfi_restore(reg)		.cfi_restore reg
-#define cfi_def_cfa_register(reg)	.cfi_def_cfa_register reg
+	.text
+	.align 2
 
-        .text
-        .align 2
-
-        .globl CNAME(ffi_call_SYSV)
+	.globl CNAME(ffi_call_SYSV)
 #ifdef __ELF__
-        .type CNAME(ffi_call_SYSV), #function
+	.type	CNAME(ffi_call_SYSV), #function
+	.hidden	CNAME(ffi_call_SYSV)
 #endif
 
-/* ffi_call_SYSV()
-
-   Create a stack frame, setup an argument context, call the callee
-   and extract the result.
-
-   The maximum required argument stack size is provided,
-   ffi_call_SYSV() allocates that stack space then calls the
-   prepare_fn to populate register context and stack.  The
-   argument passing registers are loaded from the register
-   context and the callee called, on return the register passing
-   register are saved back to the context.  Our caller will
-   extract the return value from the final state of the saved
-   register context.
-
-   Prototype:
-
-   extern unsigned
-   ffi_call_SYSV (void (*)(struct call_context *context, unsigned char *,
-			   extended_cif *),
-                  struct call_context *context,
-                  extended_cif *,
-                  size_t required_stack_size,
-                  void (*fn)(void));
+/* ffi_call_SYSV
+   extern void ffi_call_SYSV (void *stack, void *frame,
+			      void (*fn)(void), int flags);
 
    Therefore on entry we have:
 
-   x0 prepare_fn
-   x1 &context
-   x2 &ecif
-   x3 bytes
-   x4 fn
-
-   This function uses the following stack frame layout:
+   x0 stack
+   x1 frame
+   x2 fn
+   x3 flags
+*/
 
-   ==
-                saved x30(lr)
-   x29(fp)->    saved x29(fp)
-                saved x24
-                saved x23
-                saved x22
-   sp'    ->    saved x21
-                ...
-   sp     ->    (constructed callee stack arguments)
-   ==
-
-   Voila! */
-
-#define ffi_call_SYSV_FS (8 * 4)
-
-        .cfi_startproc
+	cfi_startproc
 CNAME(ffi_call_SYSV):
-        stp     x29, x30, [sp, #-16]!
-	cfi_adjust_cfa_offset (16)
-        cfi_rel_offset (x29, 0)
-        cfi_rel_offset (x30, 8)
-
-        mov     x29, sp
-	cfi_def_cfa_register (x29)
-        sub     sp, sp, #ffi_call_SYSV_FS
-
-        stp     x21, x22, [sp, #0]
-        cfi_rel_offset (x21, 0 - ffi_call_SYSV_FS)
-        cfi_rel_offset (x22, 8 - ffi_call_SYSV_FS)
-
-        stp     x23, x24, [sp, #16]
-        cfi_rel_offset (x23, 16 - ffi_call_SYSV_FS)
-        cfi_rel_offset (x24, 24 - ffi_call_SYSV_FS)
-
-        mov     x21, x1
-        mov     x22, x2
-        mov     x24, x4
-
-        /* Allocate the stack space for the actual arguments, many
-           arguments will be passed in registers, but we assume
-           worst case and allocate sufficient stack for ALL of
-           the arguments.  */
-        sub     sp, sp, x3
-
-        /* unsigned (*prepare_fn) (struct call_context *context,
-				   unsigned char *stack, extended_cif *ecif);
-	 */
-        mov     x23, x0
-        mov     x0, x1
-        mov     x1, sp
-        /* x2 already in place */
-        blr     x23
-
-        /* Preserve the flags returned.  */
-        mov     x23, x0
-
-        /* Figure out if we should touch the vector registers.  */
-        tbz     x23, #AARCH64_FLAG_ARG_V_BIT, 1f
-
-        /* Load the vector argument passing registers.  */
-        ldp     q0, q1, [x21, #0]
-        ldp     q2, q3, [x21, #32]
-        ldp     q4, q5, [x21, #64]
-        ldp     q6, q7, [x21, #96]
+	/* Use a stack frame allocated by our caller.  */
+	cfi_def_cfa(x1, 32);
+	stp	x29, x30, [x1]
+	mov	x29, x1
+	mov	sp, x0
+	cfi_def_cfa_register(x29)
+	cfi_rel_offset (x29, 0)
+	cfi_rel_offset (x30, 8)
+
+	str	w3, [x29, #16]		/* save flags */
+	mov	x9, x2			/* save fn */
+
+	/* Load the vector argument passing registers, if necessary.  */
+	tbz	w3, #AARCH64_FLAG_ARG_V_BIT, 1f
+	ldp     q0, q1, [x29, #32 + 0]
+	ldp     q2, q3, [x29, #32 + 32]
+	ldp     q4, q5, [x29, #32 + 64]
+	ldp     q6, q7, [x29, #32 + 96]
 1:
-        /* Load the core argument passing registers, including
+	/* Load the core argument passing registers, including
 	   the structure return pointer.  */
-        ldp     x0, x1, [x21, #16*N_V_ARG_REG + 0]
-        ldp     x2, x3, [x21, #16*N_V_ARG_REG + 16]
-        ldp     x4, x5, [x21, #16*N_V_ARG_REG + 32]
-        ldp     x6, x7, [x21, #16*N_V_ARG_REG + 48]
-        ldr     x8,     [x21, #16*N_V_ARG_REG + 64]
-
-        blr     x24
+	ldp     x0, x1, [x29, #32 + 16*N_V_ARG_REG + 0]
+	ldp     x2, x3, [x29, #32 + 16*N_V_ARG_REG + 16]
+	ldp     x4, x5, [x29, #32 + 16*N_V_ARG_REG + 32]
+	ldp     x6, x7, [x29, #32 + 16*N_V_ARG_REG + 48]
+	ldr     x8,     [x29, #32 + 16*N_V_ARG_REG + 64]
 
-        /* Save the core return registers.  */
-        stp     x0, x1, [x21, #16*N_V_ARG_REG]
+	blr     x9			/* call fn */
 
-        /* Figure out if we should touch the vector registers.  */
-        tbz     x23, #AARCH64_FLAG_ARG_V_BIT, 1f
+	ldr	w3, [x29, #16]		/* reload flags */
 
-        /* Save the vector return registers.  */
-        stp     q0, q1, [x21, #0]
-        stp     q2, q3, [x21, #32]
-1:
-        /* All done, unwind our stack frame.  */
-        ldp     x21, x22, [x29,  # - ffi_call_SYSV_FS]
-        cfi_restore (x21)
-        cfi_restore (x22)
-
-        ldp     x23, x24, [x29,  # - ffi_call_SYSV_FS + 16]
-        cfi_restore (x23)
-        cfi_restore (x24)
-
-        mov     sp, x29
+	/* Partially deconstruct the stack frame.  */
+	mov     sp, x29
 	cfi_def_cfa_register (sp)
+	ldp     x29, x30, [x29]
 
-        ldp     x29, x30, [sp], #16
-	cfi_adjust_cfa_offset (-16)
-        cfi_restore (x29)
-        cfi_restore (x30)
+	/* Save the core return registers.  */
+	stp     x0, x1, [sp, #32 + 16*N_V_ARG_REG]
 
-        ret
+	/* Save the vector return registers, if necessary.  */
+	tbz     w3, #AARCH64_FLAG_ARG_V_BIT, 1f
+	stp     q0, q1, [sp, #32 + 0]
+	stp     q2, q3, [sp, #32 + 32]
+1:
+	/* All done.  */
+	ret
 
-        .cfi_endproc
+	cfi_endproc
 #ifdef __ELF__
         .size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV)
 #endif
@@ -237,7 +157,7 @@ CNAME(ffi_call_SYSV):
         .align 2
 
         .globl CNAME(ffi_closure_SYSV)
-        .cfi_startproc
+        cfi_startproc
 CNAME(ffi_closure_SYSV):
         stp     x29, x30, [sp, #-16]!
 	cfi_adjust_cfa_offset (16)
@@ -310,7 +230,7 @@ CNAME(ffi_closure_SYSV):
         cfi_restore (x30)
 
         ret
-        .cfi_endproc
+	cfi_endproc
 #ifdef __ELF__
         .size CNAME(ffi_closure_SYSV), .-CNAME(ffi_closure_SYSV)
 #endif