Commit 6d8711057bc3a3befa37eed6765231ea5d244078

Richard Henderson 2016-05-01T11:02:40

Merge pull request #241 from rth7680/fix-win64 Fix win64 abi calling from unix64

diff --git a/src/x86/ffiw64.c b/src/x86/ffiw64.c
index 0029be0..fd47c58 100644
--- a/src/x86/ffiw64.c
+++ b/src/x86/ffiw64.c
@@ -231,7 +231,11 @@ struct win64_closure_frame
   UINT64 args[];
 };
 
-int FFI_HIDDEN
+/* Force the inner function to use the MS ABI.  When compiling on win64
+   this is a nop.  When compiling on unix, this simplifies the assembly,
+   and places the burden of saving the extra call-saved registers on
+   the compiler.  */
+int FFI_HIDDEN __attribute__((ms_abi))
 ffi_closure_win64_inner(ffi_cif *cif,
 			void (*fun)(ffi_cif*, void*, void**, void*),
 			void *user_data,
diff --git a/src/x86/win64.S b/src/x86/win64.S
index 9d4f8b9..1f82a3e 100644
--- a/src/x86/win64.S
+++ b/src/x86/win64.S
@@ -22,10 +22,15 @@
 #define arg3	%rcx
 #endif
 
-.macro E which
-	.align	8
-	.org	0b + \which * 8
-.endm
+/* This macro allows the safe creation of jump tables without an
+   actual table.  The entry points into the table are all 8 bytes.
+   The use of ORG asserts that we're at the correct location.  */
+/* ??? The clang assembler doesn't handle .org with symbolic expressions.  */
+#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
+# define E(BASE, X)	.balign 8
+#else
+# define E(BASE, X)	.balign 8; .org BASE + X * 8
+#endif
 
 	.text
 
@@ -88,62 +93,62 @@ ffi_call_win64:
 
 	.align	8
 0:
-E FFI_TYPE_VOID
+E(0b, FFI_TYPE_VOID)
 	epilogue
-E FFI_TYPE_INT
+E(0b, FFI_TYPE_INT)
 	movslq	%eax, %rax
 	movq	%rax, (%r8)
 	epilogue
-E FFI_TYPE_FLOAT
+E(0b, FFI_TYPE_FLOAT)
 	movss	%xmm0, (%r8)
 	epilogue
-E FFI_TYPE_DOUBLE
+E(0b, FFI_TYPE_DOUBLE)
 	movsd	%xmm0, (%r8)
 	epilogue
-E FFI_TYPE_LONGDOUBLE
+E(0b, FFI_TYPE_LONGDOUBLE)
 	call	PLT(C(abort))
-E FFI_TYPE_UINT8
+E(0b, FFI_TYPE_UINT8)
 	movzbl	%al, %eax
 	movq	%rax, (%r8)
 	epilogue
-E FFI_TYPE_SINT8
+E(0b, FFI_TYPE_SINT8)
 	movsbq	%al, %rax
 	jmp	98f
-E FFI_TYPE_UINT16
+E(0b, FFI_TYPE_UINT16)
 	movzwl	%ax, %eax
 	movq	%rax, (%r8)
 	epilogue
-E FFI_TYPE_SINT16
+E(0b, FFI_TYPE_SINT16)
 	movswq	%ax, %rax
 	jmp	98f
-E FFI_TYPE_UINT32
+E(0b, FFI_TYPE_UINT32)
 	movl	%eax, %eax
 	movq	%rax, (%r8)
 	epilogue
-E FFI_TYPE_SINT32
+E(0b, FFI_TYPE_SINT32)
 	movslq	%eax, %rax
 	movq	%rax, (%r8)
 	epilogue
-E FFI_TYPE_UINT64
+E(0b, FFI_TYPE_UINT64)
 98:	movq	%rax, (%r8)
 	epilogue
-E FFI_TYPE_SINT64
+E(0b, FFI_TYPE_SINT64)
 	movq	%rax, (%r8)
 	epilogue
-E FFI_TYPE_STRUCT
+E(0b, FFI_TYPE_STRUCT)
 	epilogue
-E FFI_TYPE_POINTER
+E(0b, FFI_TYPE_POINTER)
 	movq	%rax, (%r8)
 	epilogue
-E FFI_TYPE_COMPLEX
+E(0b, FFI_TYPE_COMPLEX)
 	call	PLT(C(abort))
-E FFI_TYPE_SMALL_STRUCT_1B
+E(0b, FFI_TYPE_SMALL_STRUCT_1B)
 	movb	%al, (%r8)
 	epilogue
-E FFI_TYPE_SMALL_STRUCT_2B
+E(0b, FFI_TYPE_SMALL_STRUCT_2B)
 	movw	%ax, (%r8)
 	epilogue
-E FFI_TYPE_SMALL_STRUCT_4B
+E(0b, FFI_TYPE_SMALL_STRUCT_4B)
 	movl	%eax, (%r8)
 	epilogue
 
@@ -174,9 +179,9 @@ ffi_go_closure_win64:
 	movq	%r8, 24(%rsp)
 	movq	%r9, 32(%rsp)
 
-	movq	8(%r10), arg0			/* load cif */
-	movq	16(%r10), arg1			/* load fun */
-	movq	%r10, arg2			/* closure is user_data */
+	movq	8(%r10), %rcx			/* load cif */
+	movq	16(%r10), %rdx			/* load fun */
+	movq	%r10, %r8			/* closure is user_data */
 	jmp	0f
 	cfi_endproc
 	SEH(.seh_endproc)
@@ -193,9 +198,9 @@ ffi_closure_win64:
 	movq	%r8, 24(%rsp)
 	movq	%r9, 32(%rsp)
 
-	movq	FFI_TRAMPOLINE_SIZE(%r10), arg0		/* load cif */
-	movq	FFI_TRAMPOLINE_SIZE+8(%r10), arg1	/* load fun */
-	movq	FFI_TRAMPOLINE_SIZE+16(%r10), arg2	/* load user_data */
+	movq	FFI_TRAMPOLINE_SIZE(%r10), %rcx		/* load cif */
+	movq	FFI_TRAMPOLINE_SIZE+8(%r10), %rdx	/* load fun */
+	movq	FFI_TRAMPOLINE_SIZE+16(%r10), %r8	/* load user_data */
 0:
 	subq	$ffi_clo_FS, %rsp
 	cfi_adjust_cfa_offset(ffi_clo_FS)
@@ -208,7 +213,7 @@ ffi_closure_win64:
 	movsd	%xmm2, ffi_clo_OFF_X+16(%rsp)
 	movsd	%xmm3, ffi_clo_OFF_X+24(%rsp)
 
-	leaq	ffi_clo_OFF_R(%rsp), arg3
+	leaq	ffi_clo_OFF_R(%rsp), %r9
 	call	ffi_closure_win64_inner
 
 	/* Load the result into both possible result registers.  */
diff --git a/testsuite/lib/libffi.exp b/testsuite/lib/libffi.exp
index 0d74627..6d19393 100644
--- a/testsuite/lib/libffi.exp
+++ b/testsuite/lib/libffi.exp
@@ -315,6 +315,11 @@ proc run-many-tests { testcases extra_flags } {
                 "-DABI_NUM=FFI_THISCALL -DABI_ATTR=__THISCALL__"
                 "-DABI_NUM=FFI_FASTCALL -DABI_ATTR=__FASTCALL__"
             }
+        } elseif [istarget "x86_64-*-*"] {
+            set targetabis {
+                ""
+                "-DABI_NUM=FFI_WIN64 -DABI_ATTR=__MSABI__"
+            }
         }
     }
 
diff --git a/testsuite/libffi.call/ffitest.h b/testsuite/libffi.call/ffitest.h
index 15d5e44..5e19451 100644
--- a/testsuite/libffi.call/ffitest.h
+++ b/testsuite/libffi.call/ffitest.h
@@ -24,6 +24,7 @@
 #define __STDCALL__ __attribute__((stdcall))
 #define __THISCALL__ __attribute__((thiscall))
 #define __FASTCALL__ __attribute__((fastcall))
+#define __MSABI__ __attribute__((ms_abi))
 #else
 #define __UNUSED__
 #define __STDCALL__ __stdcall