Merge pull request #241 from rth7680/fix-win64 Fix win64 abi calling from unix64
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
diff --git a/src/x86/ffiw64.c b/src/x86/ffiw64.c
index 0029be0..fd47c58 100644
--- a/src/x86/ffiw64.c
+++ b/src/x86/ffiw64.c
@@ -231,7 +231,11 @@ struct win64_closure_frame
UINT64 args[];
};
-int FFI_HIDDEN
+/* Force the inner function to use the MS ABI. When compiling on win64
+ this is a nop. When compiling on unix, this simplifies the assembly,
+ and places the burden of saving the extra call-saved registers on
+ the compiler. */
+int FFI_HIDDEN __attribute__((ms_abi))
ffi_closure_win64_inner(ffi_cif *cif,
void (*fun)(ffi_cif*, void*, void**, void*),
void *user_data,
diff --git a/src/x86/win64.S b/src/x86/win64.S
index 9d4f8b9..1f82a3e 100644
--- a/src/x86/win64.S
+++ b/src/x86/win64.S
@@ -22,10 +22,15 @@
#define arg3 %rcx
#endif
-.macro E which
- .align 8
- .org 0b + \which * 8
-.endm
+/* This macro allows the safe creation of jump tables without an
+ actual table. The entry points into the table are all 8 bytes.
+ The use of ORG asserts that we're at the correct location. */
+/* ??? The clang assembler doesn't handle .org with symbolic expressions. */
+#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
+# define E(BASE, X) .balign 8
+#else
+# define E(BASE, X) .balign 8; .org BASE + X * 8
+#endif
.text
@@ -88,62 +93,62 @@ ffi_call_win64:
.align 8
0:
-E FFI_TYPE_VOID
+E(0b, FFI_TYPE_VOID)
epilogue
-E FFI_TYPE_INT
+E(0b, FFI_TYPE_INT)
movslq %eax, %rax
movq %rax, (%r8)
epilogue
-E FFI_TYPE_FLOAT
+E(0b, FFI_TYPE_FLOAT)
movss %xmm0, (%r8)
epilogue
-E FFI_TYPE_DOUBLE
+E(0b, FFI_TYPE_DOUBLE)
movsd %xmm0, (%r8)
epilogue
-E FFI_TYPE_LONGDOUBLE
+E(0b, FFI_TYPE_LONGDOUBLE)
call PLT(C(abort))
-E FFI_TYPE_UINT8
+E(0b, FFI_TYPE_UINT8)
movzbl %al, %eax
movq %rax, (%r8)
epilogue
-E FFI_TYPE_SINT8
+E(0b, FFI_TYPE_SINT8)
movsbq %al, %rax
jmp 98f
-E FFI_TYPE_UINT16
+E(0b, FFI_TYPE_UINT16)
movzwl %ax, %eax
movq %rax, (%r8)
epilogue
-E FFI_TYPE_SINT16
+E(0b, FFI_TYPE_SINT16)
movswq %ax, %rax
jmp 98f
-E FFI_TYPE_UINT32
+E(0b, FFI_TYPE_UINT32)
movl %eax, %eax
movq %rax, (%r8)
epilogue
-E FFI_TYPE_SINT32
+E(0b, FFI_TYPE_SINT32)
movslq %eax, %rax
movq %rax, (%r8)
epilogue
-E FFI_TYPE_UINT64
+E(0b, FFI_TYPE_UINT64)
98: movq %rax, (%r8)
epilogue
-E FFI_TYPE_SINT64
+E(0b, FFI_TYPE_SINT64)
movq %rax, (%r8)
epilogue
-E FFI_TYPE_STRUCT
+E(0b, FFI_TYPE_STRUCT)
epilogue
-E FFI_TYPE_POINTER
+E(0b, FFI_TYPE_POINTER)
movq %rax, (%r8)
epilogue
-E FFI_TYPE_COMPLEX
+E(0b, FFI_TYPE_COMPLEX)
call PLT(C(abort))
-E FFI_TYPE_SMALL_STRUCT_1B
+E(0b, FFI_TYPE_SMALL_STRUCT_1B)
movb %al, (%r8)
epilogue
-E FFI_TYPE_SMALL_STRUCT_2B
+E(0b, FFI_TYPE_SMALL_STRUCT_2B)
movw %ax, (%r8)
epilogue
-E FFI_TYPE_SMALL_STRUCT_4B
+E(0b, FFI_TYPE_SMALL_STRUCT_4B)
movl %eax, (%r8)
epilogue
@@ -174,9 +179,9 @@ ffi_go_closure_win64:
movq %r8, 24(%rsp)
movq %r9, 32(%rsp)
- movq 8(%r10), arg0 /* load cif */
- movq 16(%r10), arg1 /* load fun */
- movq %r10, arg2 /* closure is user_data */
+ movq 8(%r10), %rcx /* load cif */
+ movq 16(%r10), %rdx /* load fun */
+ movq %r10, %r8 /* closure is user_data */
jmp 0f
cfi_endproc
SEH(.seh_endproc)
@@ -193,9 +198,9 @@ ffi_closure_win64:
movq %r8, 24(%rsp)
movq %r9, 32(%rsp)
- movq FFI_TRAMPOLINE_SIZE(%r10), arg0 /* load cif */
- movq FFI_TRAMPOLINE_SIZE+8(%r10), arg1 /* load fun */
- movq FFI_TRAMPOLINE_SIZE+16(%r10), arg2 /* load user_data */
+ movq FFI_TRAMPOLINE_SIZE(%r10), %rcx /* load cif */
+ movq FFI_TRAMPOLINE_SIZE+8(%r10), %rdx /* load fun */
+ movq FFI_TRAMPOLINE_SIZE+16(%r10), %r8 /* load user_data */
0:
subq $ffi_clo_FS, %rsp
cfi_adjust_cfa_offset(ffi_clo_FS)
@@ -208,7 +213,7 @@ ffi_closure_win64:
movsd %xmm2, ffi_clo_OFF_X+16(%rsp)
movsd %xmm3, ffi_clo_OFF_X+24(%rsp)
- leaq ffi_clo_OFF_R(%rsp), arg3
+ leaq ffi_clo_OFF_R(%rsp), %r9
call ffi_closure_win64_inner
/* Load the result into both possible result registers. */
diff --git a/testsuite/lib/libffi.exp b/testsuite/lib/libffi.exp
index 0d74627..6d19393 100644
--- a/testsuite/lib/libffi.exp
+++ b/testsuite/lib/libffi.exp
@@ -315,6 +315,11 @@ proc run-many-tests { testcases extra_flags } {
"-DABI_NUM=FFI_THISCALL -DABI_ATTR=__THISCALL__"
"-DABI_NUM=FFI_FASTCALL -DABI_ATTR=__FASTCALL__"
}
+ } elseif [istarget "x86_64-*-*"] {
+ set targetabis {
+ ""
+ "-DABI_NUM=FFI_WIN64 -DABI_ATTR=__MSABI__"
+ }
}
}
diff --git a/testsuite/libffi.call/ffitest.h b/testsuite/libffi.call/ffitest.h
index 15d5e44..5e19451 100644
--- a/testsuite/libffi.call/ffitest.h
+++ b/testsuite/libffi.call/ffitest.h
@@ -24,6 +24,7 @@
#define __STDCALL__ __attribute__((stdcall))
#define __THISCALL__ __attribute__((thiscall))
#define __FASTCALL__ __attribute__((fastcall))
+#define __MSABI__ __attribute__((ms_abi))
#else
#define __UNUSED__
#define __STDCALL__ __stdcall