Commit b5fed601948237037513a9b7f967c8fc6c9ff1f6

Josh Triplett 2014-04-05T17:33:42

Fix ABI on 32-bit non-Windows x86: go back to trampoline size 10 The trampoline size is part of the ABI, so it cannot change. Move the logic from the stdcall and thiscall trampolines to the functions they call, to reduce them both to 10 bytes. This drops the previously added support for raw THISCALL closures on non-Windows. (Non-raw THISCALL closures still work.)

diff --git a/src/x86/ffi.c b/src/x86/ffi.c
index 79407ae..72bed06 100644
--- a/src/x86/ffi.c
+++ b/src/x86/ffi.c
@@ -439,9 +439,11 @@ unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (ffi_closure *, void **, void *)
      __attribute__ ((regparm(1)));
 void FFI_HIDDEN ffi_closure_raw_SYSV (ffi_raw_closure *)
      __attribute__ ((regparm(1)));
-#ifndef X86_WIN64
+#ifdef X86_WIN32
 void FFI_HIDDEN ffi_closure_raw_THISCALL (ffi_raw_closure *)
      __attribute__ ((regparm(1)));
+#endif
+#ifndef X86_WIN64
 void FFI_HIDDEN ffi_closure_STDCALL (ffi_closure *)
      __attribute__ ((regparm(1)));
 void FFI_HIDDEN ffi_closure_THISCALL (ffi_closure *)
@@ -605,7 +607,7 @@ ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue,
    *(unsigned int*)  &__tramp[6] = __dis; /* jmp __fun  */ \
  }
 
-#define FFI_INIT_TRAMPOLINE_THISCALL(TRAMP,FUN,CTX,SIZE) \
+#define FFI_INIT_TRAMPOLINE_RAW_THISCALL(TRAMP,FUN,CTX,SIZE) \
 { unsigned char *__tramp = (unsigned char*)(TRAMP); \
    unsigned int  __fun = (unsigned int)(FUN); \
    unsigned int  __ctx = (unsigned int)(CTX); \
@@ -632,18 +634,15 @@ ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue,
    *(unsigned short*)  &__tramp[50] = (__size + 8); /* ret (__size + 8)  */ \
  }
 
-#define FFI_INIT_TRAMPOLINE_STDCALL(TRAMP,FUN,CTX,SIZE)  \
+#define FFI_INIT_TRAMPOLINE_STDCALL(TRAMP,FUN,CTX)  \
 { unsigned char *__tramp = (unsigned char*)(TRAMP); \
    unsigned int  __fun = (unsigned int)(FUN); \
    unsigned int  __ctx = (unsigned int)(CTX); \
    unsigned int  __dis = __fun - (__ctx + 10); \
-   unsigned short __size = (unsigned short)(SIZE); \
    *(unsigned char*) &__tramp[0] = 0xb8; \
    *(unsigned int*)  &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
    *(unsigned char *)  &__tramp[5] = 0xe8; \
    *(unsigned int*)  &__tramp[6] = __dis; /* call __fun  */ \
-   *(unsigned char *)  &__tramp[10] = 0xc2; \
-   *(unsigned short*)  &__tramp[11] = __size; /* ret __size  */ \
  }
 
 /* the cif must already be prep'ed */
@@ -675,16 +674,15 @@ ffi_prep_closure_loc (ffi_closure* closure,
     }
   else if (cif->abi == FFI_THISCALL)
     {
-      FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0],
-				    &ffi_closure_THISCALL,
-				    (void*)codeloc,
-				    cif->bytes);
+      FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0],
+				   &ffi_closure_THISCALL,
+				   (void*)codeloc);
     }
   else if (cif->abi == FFI_STDCALL)
     {
       FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0],
                                    &ffi_closure_STDCALL,
-                                   (void*)codeloc, cif->bytes);
+                                   (void*)codeloc);
     }
 #ifdef X86_WIN32
   else if (cif->abi == FFI_MS_CDECL)
@@ -721,7 +719,7 @@ ffi_prep_raw_closure_loc (ffi_raw_closure* closure,
   int i;
 
   if (cif->abi != FFI_SYSV
-#ifndef X86_WIN64
+#ifdef X86_WIN32
       && cif->abi != FFI_THISCALL
 #endif
      )
@@ -738,18 +736,17 @@ ffi_prep_raw_closure_loc (ffi_raw_closure* closure,
       FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_LONGDOUBLE);
     }
   
-#ifndef X86_WIN64
+#ifdef X86_WIN32
   if (cif->abi == FFI_SYSV)
     {
 #endif
   FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_raw_SYSV,
                        codeloc);
-#ifndef X86_WIN64
+#ifdef X86_WIN32
     }
   else if (cif->abi == FFI_THISCALL)
     {
-      FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0], &ffi_closure_raw_THISCALL,
-				    codeloc, cif->bytes);
+      FFI_INIT_TRAMPOLINE_RAW_THISCALL (&closure->tramp[0], &ffi_closure_raw_THISCALL, codeloc);
     }
 #endif
   closure->cif  = cif;
diff --git a/src/x86/ffitarget.h b/src/x86/ffitarget.h
index d2aaf9d..b2afe91 100644
--- a/src/x86/ffitarget.h
+++ b/src/x86/ffitarget.h
@@ -122,14 +122,22 @@ typedef enum ffi_abi {
 #if defined (X86_64) || (defined (__x86_64__) && defined (X86_DARWIN))
 #define FFI_TRAMPOLINE_SIZE 24
 #define FFI_NATIVE_RAW_API 0
-#elif defined(X86_WIN64)
+#else
+#ifdef X86_WIN32
+#define FFI_TRAMPOLINE_SIZE 52
+#else
+#ifdef X86_WIN64
 #define FFI_TRAMPOLINE_SIZE 29
 #define FFI_NATIVE_RAW_API 0
 #define FFI_NO_RAW_API 1
 #else
-#define FFI_TRAMPOLINE_SIZE 52
+#define FFI_TRAMPOLINE_SIZE 10
+#endif
+#endif
+#ifndef X86_WIN64
 #define FFI_NATIVE_RAW_API 1	/* x86 has native raw api support */
 #endif
+#endif
 
 #endif
 
diff --git a/src/x86/win32.S b/src/x86/win32.S
index d71c8b8..0a655c4 100644
--- a/src/x86/win32.S
+++ b/src/x86/win32.S
@@ -33,8 +33,13 @@
 #include <fficonfig.h>
 #include <ffi.h>
 
+#define CIF_ABI_OFFSET 0
+#define CIF_BYTES_OFFSET 16
+
 #ifdef _MSC_VER
 
+#define CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) AND NOT 3)
+
 .386
 .MODEL FLAT, C
 
@@ -188,12 +193,12 @@ ca_epilogue:
         ret
 ffi_call_win32 ENDP
 
-ffi_closure_THISCALL PROC NEAR FORCEFRAME
-	sub	esp, 40
-	lea	edx, [ebp -24]
-	mov	[ebp - 12], edx	/* resp */
-	lea	edx, [ebp + 12]  /* account for stub return address on stack */
-	jmp	stub
+ffi_closure_THISCALL PROC NEAR
+	;; Insert the register argument on the stack as the first argument
+	xchg	DWORD PTR [esp+4], ecx
+	xchg	DWORD PTR [esp], ecx
+	push	ecx
+	jmp	ffi_closure_STDCALL
 ffi_closure_THISCALL ENDP
 
 ffi_closure_SYSV PROC NEAR FORCEFRAME
@@ -464,8 +469,18 @@ cd_retlongdouble:
         jmp   cd_epilogue
 
 cd_epilogue:
-        ;; Epilogue code is autogenerated.
-        ret
+        mov   esp, ebp
+        pop   ebp
+        pop   ecx
+        mov   ecx, DWORD PTR [ecx + (CLOSURE_CIF_OFFSET-10)]
+        cmp   DWORD PTR [ecx + CIF_ABI_OFFSET], 3
+        mov   ecx, DWORD PTR [ecx + CIF_BYTES_OFFSET]
+        jne   cd_not_thiscall
+        add   ecx, 4
+cd_not_thiscall:
+        pop   edx
+        add   esp, ecx
+        jmp   edx
 ffi_closure_STDCALL ENDP
 
 _TEXT ENDS
@@ -473,6 +488,8 @@ END
 
 #else
 
+#define CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
+
 #if defined(SYMBOL_UNDERSCORE)
 #define USCORE_SYMBOL(x) _##x
 #else
@@ -657,13 +674,11 @@ FFI_HIDDEN(ffi_closure_THISCALL)
 	.def	_ffi_closure_THISCALL;	.scl	2;	.type	32;	.endef
 #endif
 USCORE_SYMBOL(ffi_closure_THISCALL):
-	pushl	%ebp
-	movl	%esp, %ebp
-	subl	$40, %esp
-	leal	-24(%ebp), %edx
-	movl	%edx, -12(%ebp)	/* resp */
-	leal	12(%ebp), %edx  /* account for stub return address on stack */
-	jmp	.stub
+	/* Insert the register argument on the stack as the first argument */
+	xchg	%ecx, 4(%esp)
+	xchg	%ecx, (%esp)
+	push	%ecx
+	jmp	.ffi_closure_STDCALL_internal
 .LFE1:
 
         # This assumes we are using gas.
@@ -685,7 +700,6 @@ USCORE_SYMBOL(ffi_closure_SYSV):
 	leal	-24(%ebp), %edx
 	movl	%edx, -12(%ebp)	/* resp */
 	leal	8(%ebp), %edx
-.stub:
 	movl	%edx, 4(%esp)	/* args = __builtin_dwarf_cfa () */
 	leal	-12(%ebp), %edx
 	movl	%edx, (%esp)	/* &resp */
@@ -811,6 +825,8 @@ USCORE_SYMBOL(ffi_closure_SYSV):
 #define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
 #define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
 #define CIF_FLAGS_OFFSET 20
+
+#ifdef X86_WIN32
         .balign 16
 FFI_HIDDEN(ffi_closure_raw_THISCALL)
 	.globl	USCORE_SYMBOL(ffi_closure_raw_THISCALL)
@@ -827,6 +843,8 @@ USCORE_SYMBOL(ffi_closure_raw_THISCALL):
 	movl	%edx, 12(%esp)	/* user_data */
 	leal	12(%ebp), %edx	/* __builtin_dwarf_cfa () */
 	jmp	.stubraw
+#endif /* X86_WIN32 */
+
         # This assumes we are using gas.
         .balign 16
 #if defined(X86_WIN32)
@@ -958,6 +976,7 @@ FFI_HIDDEN(ffi_closure_STDCALL)
 	.def	_ffi_closure_STDCALL;	.scl	2;	.type	32;	.endef
 #endif
 USCORE_SYMBOL(ffi_closure_STDCALL):
+.ffi_closure_STDCALL_internal:
 .LFB5:
 	pushl	%ebp
 .LCFI9:
@@ -1070,7 +1089,15 @@ USCORE_SYMBOL(ffi_closure_STDCALL):
 .Lscls_epilogue:
 	movl	%ebp, %esp
 	popl	%ebp
-	ret
+	popl	%ecx
+	movl	(CLOSURE_CIF_OFFSET-10)(%ecx), %ecx
+	cmpl	$3, CIF_ABI_OFFSET(%ecx) /* FFI_THISCALL */
+	movl	CIF_BYTES_OFFSET(%ecx), %ecx
+	jne	1f
+	addl	$4, %ecx
+1:	popl	%edx
+	addl	%ecx, %esp
+	jmp	*%edx
 .ffi_closure_STDCALL_end:
 .LFE5: