Commit dc33cb3c998da521a960385c1269c3aef552f69f

Anthony Green 2014-04-05T23:41:22

Merge pull request #114 from joshtriplett/bounce-on-a-tiny-trampoline Fix ABI on 32-bit non-Windows x86: go back to trampoline size 10

diff --git a/src/x86/ffi.c b/src/x86/ffi.c
index 79407ae..72bed06 100644
--- a/src/x86/ffi.c
+++ b/src/x86/ffi.c
@@ -439,9 +439,11 @@ unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (ffi_closure *, void **, void *)
      __attribute__ ((regparm(1)));
 void FFI_HIDDEN ffi_closure_raw_SYSV (ffi_raw_closure *)
      __attribute__ ((regparm(1)));
-#ifndef X86_WIN64
+#ifdef X86_WIN32
 void FFI_HIDDEN ffi_closure_raw_THISCALL (ffi_raw_closure *)
      __attribute__ ((regparm(1)));
+#endif
+#ifndef X86_WIN64
 void FFI_HIDDEN ffi_closure_STDCALL (ffi_closure *)
      __attribute__ ((regparm(1)));
 void FFI_HIDDEN ffi_closure_THISCALL (ffi_closure *)
@@ -605,7 +607,7 @@ ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue,
    *(unsigned int*)  &__tramp[6] = __dis; /* jmp __fun  */ \
  }
 
-#define FFI_INIT_TRAMPOLINE_THISCALL(TRAMP,FUN,CTX,SIZE) \
+#define FFI_INIT_TRAMPOLINE_RAW_THISCALL(TRAMP,FUN,CTX,SIZE) \
 { unsigned char *__tramp = (unsigned char*)(TRAMP); \
    unsigned int  __fun = (unsigned int)(FUN); \
    unsigned int  __ctx = (unsigned int)(CTX); \
@@ -632,18 +634,15 @@ ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue,
    *(unsigned short*)  &__tramp[50] = (__size + 8); /* ret (__size + 8)  */ \
  }
 
-#define FFI_INIT_TRAMPOLINE_STDCALL(TRAMP,FUN,CTX,SIZE)  \
+#define FFI_INIT_TRAMPOLINE_STDCALL(TRAMP,FUN,CTX)  \
 { unsigned char *__tramp = (unsigned char*)(TRAMP); \
    unsigned int  __fun = (unsigned int)(FUN); \
    unsigned int  __ctx = (unsigned int)(CTX); \
    unsigned int  __dis = __fun - (__ctx + 10); \
-   unsigned short __size = (unsigned short)(SIZE); \
    *(unsigned char*) &__tramp[0] = 0xb8; \
    *(unsigned int*)  &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
    *(unsigned char *)  &__tramp[5] = 0xe8; \
    *(unsigned int*)  &__tramp[6] = __dis; /* call __fun  */ \
-   *(unsigned char *)  &__tramp[10] = 0xc2; \
-   *(unsigned short*)  &__tramp[11] = __size; /* ret __size  */ \
  }
 
 /* the cif must already be prep'ed */
@@ -675,16 +674,15 @@ ffi_prep_closure_loc (ffi_closure* closure,
     }
   else if (cif->abi == FFI_THISCALL)
     {
-      FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0],
-				    &ffi_closure_THISCALL,
-				    (void*)codeloc,
-				    cif->bytes);
+      FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0],
+				   &ffi_closure_THISCALL,
+				   (void*)codeloc);
     }
   else if (cif->abi == FFI_STDCALL)
     {
       FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0],
                                    &ffi_closure_STDCALL,
-                                   (void*)codeloc, cif->bytes);
+                                   (void*)codeloc);
     }
 #ifdef X86_WIN32
   else if (cif->abi == FFI_MS_CDECL)
@@ -721,7 +719,7 @@ ffi_prep_raw_closure_loc (ffi_raw_closure* closure,
   int i;
 
   if (cif->abi != FFI_SYSV
-#ifndef X86_WIN64
+#ifdef X86_WIN32
       && cif->abi != FFI_THISCALL
 #endif
      )
@@ -738,18 +736,17 @@ ffi_prep_raw_closure_loc (ffi_raw_closure* closure,
       FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_LONGDOUBLE);
     }
   
-#ifndef X86_WIN64
+#ifdef X86_WIN32
   if (cif->abi == FFI_SYSV)
     {
 #endif
   FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_raw_SYSV,
                        codeloc);
-#ifndef X86_WIN64
+#ifdef X86_WIN32
     }
   else if (cif->abi == FFI_THISCALL)
     {
-      FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0], &ffi_closure_raw_THISCALL,
-				    codeloc, cif->bytes);
+      FFI_INIT_TRAMPOLINE_RAW_THISCALL (&closure->tramp[0], &ffi_closure_raw_THISCALL, codeloc);
     }
 #endif
   closure->cif  = cif;
diff --git a/src/x86/ffitarget.h b/src/x86/ffitarget.h
index d2aaf9d..b2afe91 100644
--- a/src/x86/ffitarget.h
+++ b/src/x86/ffitarget.h
@@ -122,14 +122,22 @@ typedef enum ffi_abi {
 #if defined (X86_64) || (defined (__x86_64__) && defined (X86_DARWIN))
 #define FFI_TRAMPOLINE_SIZE 24
 #define FFI_NATIVE_RAW_API 0
-#elif defined(X86_WIN64)
+#else
+#ifdef X86_WIN32
+#define FFI_TRAMPOLINE_SIZE 52
+#else
+#ifdef X86_WIN64
 #define FFI_TRAMPOLINE_SIZE 29
 #define FFI_NATIVE_RAW_API 0
 #define FFI_NO_RAW_API 1
 #else
-#define FFI_TRAMPOLINE_SIZE 52
+#define FFI_TRAMPOLINE_SIZE 10
+#endif
+#endif
+#ifndef X86_WIN64
 #define FFI_NATIVE_RAW_API 1	/* x86 has native raw api support */
 #endif
+#endif
 
 #endif
 
diff --git a/src/x86/win32.S b/src/x86/win32.S
index d71c8b8..0a655c4 100644
--- a/src/x86/win32.S
+++ b/src/x86/win32.S
@@ -33,8 +33,13 @@
 #include <fficonfig.h>
 #include <ffi.h>
 
+#define CIF_ABI_OFFSET 0
+#define CIF_BYTES_OFFSET 16
+
 #ifdef _MSC_VER
 
+#define CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) AND NOT 3)
+
 .386
 .MODEL FLAT, C
 
@@ -188,12 +193,12 @@ ca_epilogue:
         ret
 ffi_call_win32 ENDP
 
-ffi_closure_THISCALL PROC NEAR FORCEFRAME
-	sub	esp, 40
-	lea	edx, [ebp -24]
-	mov	[ebp - 12], edx	/* resp */
-	lea	edx, [ebp + 12]  /* account for stub return address on stack */
-	jmp	stub
+ffi_closure_THISCALL PROC NEAR
+	;; Insert the register argument on the stack as the first argument
+	xchg	DWORD PTR [esp+4], ecx
+	xchg	DWORD PTR [esp], ecx
+	push	ecx
+	jmp	ffi_closure_STDCALL
 ffi_closure_THISCALL ENDP
 
 ffi_closure_SYSV PROC NEAR FORCEFRAME
@@ -464,8 +469,18 @@ cd_retlongdouble:
         jmp   cd_epilogue
 
 cd_epilogue:
-        ;; Epilogue code is autogenerated.
-        ret
+        mov   esp, ebp
+        pop   ebp
+        pop   ecx
+        mov   ecx, DWORD PTR [ecx + (CLOSURE_CIF_OFFSET-10)]
+        cmp   DWORD PTR [ecx + CIF_ABI_OFFSET], 3
+        mov   ecx, DWORD PTR [ecx + CIF_BYTES_OFFSET]
+        jne   cd_not_thiscall
+        add   ecx, 4
+cd_not_thiscall:
+        pop   edx
+        add   esp, ecx
+        jmp   edx
 ffi_closure_STDCALL ENDP
 
 _TEXT ENDS
@@ -473,6 +488,8 @@ END
 
 #else
 
+#define CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
+
 #if defined(SYMBOL_UNDERSCORE)
 #define USCORE_SYMBOL(x) _##x
 #else
@@ -657,13 +674,11 @@ FFI_HIDDEN(ffi_closure_THISCALL)
 	.def	_ffi_closure_THISCALL;	.scl	2;	.type	32;	.endef
 #endif
 USCORE_SYMBOL(ffi_closure_THISCALL):
-	pushl	%ebp
-	movl	%esp, %ebp
-	subl	$40, %esp
-	leal	-24(%ebp), %edx
-	movl	%edx, -12(%ebp)	/* resp */
-	leal	12(%ebp), %edx  /* account for stub return address on stack */
-	jmp	.stub
+	/* Insert the register argument on the stack as the first argument */
+	xchg	%ecx, 4(%esp)
+	xchg	%ecx, (%esp)
+	push	%ecx
+	jmp	.ffi_closure_STDCALL_internal
 .LFE1:
 
         # This assumes we are using gas.
@@ -685,7 +700,6 @@ USCORE_SYMBOL(ffi_closure_SYSV):
 	leal	-24(%ebp), %edx
 	movl	%edx, -12(%ebp)	/* resp */
 	leal	8(%ebp), %edx
-.stub:
 	movl	%edx, 4(%esp)	/* args = __builtin_dwarf_cfa () */
 	leal	-12(%ebp), %edx
 	movl	%edx, (%esp)	/* &resp */
@@ -811,6 +825,8 @@ USCORE_SYMBOL(ffi_closure_SYSV):
 #define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
 #define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
 #define CIF_FLAGS_OFFSET 20
+
+#ifdef X86_WIN32
         .balign 16
 FFI_HIDDEN(ffi_closure_raw_THISCALL)
 	.globl	USCORE_SYMBOL(ffi_closure_raw_THISCALL)
@@ -827,6 +843,8 @@ USCORE_SYMBOL(ffi_closure_raw_THISCALL):
 	movl	%edx, 12(%esp)	/* user_data */
 	leal	12(%ebp), %edx	/* __builtin_dwarf_cfa () */
 	jmp	.stubraw
+#endif /* X86_WIN32 */
+
         # This assumes we are using gas.
         .balign 16
 #if defined(X86_WIN32)
@@ -958,6 +976,7 @@ FFI_HIDDEN(ffi_closure_STDCALL)
 	.def	_ffi_closure_STDCALL;	.scl	2;	.type	32;	.endef
 #endif
 USCORE_SYMBOL(ffi_closure_STDCALL):
+.ffi_closure_STDCALL_internal:
 .LFB5:
 	pushl	%ebp
 .LCFI9:
@@ -1070,7 +1089,15 @@ USCORE_SYMBOL(ffi_closure_STDCALL):
 .Lscls_epilogue:
 	movl	%ebp, %esp
 	popl	%ebp
-	ret
+	popl	%ecx
+	movl	(CLOSURE_CIF_OFFSET-10)(%ecx), %ecx
+	cmpl	$3, CIF_ABI_OFFSET(%ecx) /* FFI_THISCALL */
+	movl	CIF_BYTES_OFFSET(%ecx), %ecx
+	jne	1f
+	addl	$4, %ecx
+1:	popl	%edx
+	addl	%ecx, %esp
+	jmp	*%edx
 .ffi_closure_STDCALL_end:
 .LFE5: