Fix building for arm windows with mingw toolchains (#631) * arm: Check _WIN32 instead of _M_ARM or _MSC_VER for detecting windows This matches what was done for ARM64 in c06468fa6674d3783a0edb1d0fae9afc8bc28513. * arm: Only use armasm source when building with MSVC When building for windows/arm with clang, the normal gas style .S source works fine (if fixed up to support thumb and other windows specifics). This matches what was done for ARM64 in c06468fa6674d3783a0edb1d0fae9afc8bc28513. * arm: Fix sysv.S to work in thumb mode Align cases in jump tables (adding nop padding to make sure each case starts where expected). Rewrite instructions that add directly to the pc register. For ffi_closure_ret, factor out a call_epilogue subroutine that restores both sp and pc from the stack; the thumb version of ldm can't load into the sp register. To avoid excessive ifdeffing, keep using call_epilogue in arm mode, but keep the shorter "ldm sp, {sp, pc}" epilogue in that case. * arm: Add win32 version of trampoline to sysv.S This matches the version of it in sysv_msvc_arm32.S. The calling C code expects a specific form of the trampoline on windows; make sure these work the same on windows regardless of the form of assembly used. * arm: Avoid optimizing out clearing the thumb bit of ffi_arm_trampoline We clear the thumb bit of ffi_arm_trampoline with a bitmask before memcpying its instructions into closure->tramp. If the bit isn't cleared, the memcpy of the trampoline function copies the wrong instructions. If the ffi_arm_trampoline symbol is declared as an array of int, the compiler can assume that it is aligned to a 4 byte boundary and the bitmask operation is a no-op, and optimize it out. See https://godbolt.org/z/dE3jE1WTz; both Clang and GCC optimize out the bitmask as it is, while MSVC doesn't. By declaring the trampoline as an array of unsigned char, the bitmask works as intended.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303
diff --git a/configure.host b/configure.host
index 257b784..2682671 100644
--- a/configure.host
+++ b/configure.host
@@ -32,7 +32,9 @@ case "${host}" in
arm*-*-cygwin* | arm*-*-mingw* | arm*-*-win* )
TARGET=ARM_WIN32; TARGETDIR=arm
- MSVC=1
+ if test "${ax_cv_c_compiler_vendor}" = "microsoft"; then
+ MSVC=1
+ fi
;;
arm*-*-*)
@@ -264,7 +266,11 @@ esac
# ... but some of the cases above share configury.
case "${TARGET}" in
ARM_WIN32)
- SOURCES="ffi.c sysv_msvc_arm32.S"
+ if test "$MSVC" = 1; then
+ SOURCES="ffi.c sysv_msvc_arm32.S"
+ else
+ SOURCES="ffi.c sysv.S"
+ fi
;;
ARM_WIN64)
if test "$MSVC" = 1; then
diff --git a/src/arm/ffi.c b/src/arm/ffi.c
index b2f60d1..593ab4d 100644
--- a/src/arm/ffi.c
+++ b/src/arm/ffi.c
@@ -37,7 +37,7 @@
#include <tramp.h>
#include "internal.h"
-#if defined(_MSC_VER) && defined(_M_ARM)
+#if defined(_WIN32)
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif
@@ -49,10 +49,13 @@
#endif
#else
-#ifndef _M_ARM
+#ifndef _WIN32
extern unsigned int ffi_arm_trampoline[2] FFI_HIDDEN;
#else
-extern unsigned int ffi_arm_trampoline[3] FFI_HIDDEN;
+// Declare this as an array of char, instead of array of int,
+// otherwise Clang optimizes out the "& 0xFFFFFFFE" for clearing
+// the thumb bit.
+extern unsigned char ffi_arm_trampoline[12] FFI_HIDDEN;
#endif
#endif
@@ -104,13 +107,13 @@ ffi_put_arg (ffi_type *ty, void *src, void *dst)
case FFI_TYPE_SINT32:
case FFI_TYPE_UINT32:
case FFI_TYPE_POINTER:
-#ifndef _MSC_VER
+#ifndef _WIN32
case FFI_TYPE_FLOAT:
#endif
*(UINT32 *)dst = *(UINT32 *)src;
break;
-#ifdef _MSC_VER
+#ifdef _WIN32
// casting a float* to a UINT32* doesn't work on Windows
case FFI_TYPE_FLOAT:
*(uintptr_t *)dst = 0;
@@ -633,7 +636,7 @@ ffi_prep_closure_loc (ffi_closure * closure,
#endif
/* Initialize the dynamic trampoline. */
-#ifndef _M_ARM
+#ifndef _WIN32
memcpy(closure->tramp, ffi_arm_trampoline, 8);
#else
// cast away function type so MSVC doesn't set the lower bit of the function pointer
@@ -643,13 +646,13 @@ ffi_prep_closure_loc (ffi_closure * closure,
#if defined (__QNX__)
msync(closure->tramp, 8, 0x1000000); /* clear data map */
msync(codeloc, 8, 0x1000000); /* clear insn map */
-#elif defined(_MSC_VER)
+#elif defined(_WIN32)
FlushInstructionCache(GetCurrentProcess(), closure->tramp, FFI_TRAMPOLINE_SIZE);
#else
__clear_cache(closure->tramp, closure->tramp + 8); /* clear data map */
__clear_cache(codeloc, codeloc + 8); /* clear insn map */
#endif
-#ifdef _M_ARM
+#ifdef _WIN32
*(void(**)(void))(closure->tramp + FFI_TRAMPOLINE_CLOSURE_FUNCTION) = closure_func;
#else
*(void (**)(void))(closure->tramp + 8) = closure_func;
diff --git a/src/arm/ffitarget.h b/src/arm/ffitarget.h
index cb57b84..12d5d20 100644
--- a/src/arm/ffitarget.h
+++ b/src/arm/ffitarget.h
@@ -43,7 +43,7 @@ typedef enum ffi_abi {
FFI_SYSV,
FFI_VFP,
FFI_LAST_ABI,
-#if defined(__ARM_PCS_VFP) || defined(_M_ARM)
+#if defined(__ARM_PCS_VFP) || defined(_WIN32)
FFI_DEFAULT_ABI = FFI_VFP,
#else
FFI_DEFAULT_ABI = FFI_SYSV,
@@ -57,7 +57,7 @@ typedef enum ffi_abi {
signed char vfp_args[16] \
#define FFI_TARGET_SPECIFIC_VARIADIC
-#ifndef _M_ARM
+#ifndef _WIN32
#define FFI_TARGET_HAS_COMPLEX_TYPE
#endif
@@ -77,7 +77,7 @@ typedef enum ffi_abi {
#endif
#else
-#ifdef _MSC_VER
+#ifdef _WIN32
#define FFI_TRAMPOLINE_SIZE 16
#define FFI_TRAMPOLINE_CLOSURE_FUNCTION 12
#else
diff --git a/src/arm/sysv.S b/src/arm/sysv.S
index e816e32..fb36213 100644
--- a/src/arm/sysv.S
+++ b/src/arm/sysv.S
@@ -92,9 +92,25 @@
#define ARM_FUNC_END(name) \
SIZE(name)
+ .text
+ .syntax unified
+#if defined(_WIN32)
+ /* Windows on ARM is thumb-only */
+ .thumb
+#else
+ /* Keep the assembly in ARM mode in other cases, for simplicity
+ * (to avoid interworking issues). */
+#undef __thumb__
+ .arm
+#endif
+
/* Aid in defining a jump table with 8 bytes between entries. */
+#ifdef __thumb__
+/* In thumb mode, instructions can be shorter than expected in arm mode, so
+ * we need to align the start of each case. */
+# define E(index) .align 3
+#elif defined(__clang__)
/* ??? The clang assembler doesn't handle .if with symbolic expressions. */
-#ifdef __clang__
# define E(index)
#else
# define E(index) \
@@ -103,9 +119,6 @@
.endif
#endif
- .text
- .syntax unified
- .arm
#ifndef __clang__
/* We require interworking on LDM, which implies ARMv5T,
@@ -128,6 +141,7 @@ ARM_FUNC_START(ffi_call_VFP)
cfi_startproc
cmp r3, #3 @ load only d0 if possible
+ ite le
#ifdef __clang__
vldrle d0, [r0]
vldmgt r0, {d0-d7}
@@ -167,9 +181,16 @@ ARM_FUNC_START(ffi_call_SYSV)
cfi_def_cfa_register(sp)
@ Store values stored in registers.
+#ifndef __thumb__
.align 3
add pc, pc, r3, lsl #3
nop
+#else
+ adr ip, 0f
+ add ip, ip, r3, lsl #3
+ mov pc, ip
+ .align 3
+#endif
0:
E(ARM_TYPE_VFP_S)
#ifdef __clang__
@@ -228,6 +249,9 @@ ARM_FUNC_END(ffi_go_closure_SYSV)
ARM_FUNC_START(ffi_closure_SYSV)
UNWIND(.fnstart)
cfi_startproc
+#ifdef _WIN32
+ ldmfd sp!, {r0, ip} @ restore fp (r0 is used for stack alignment)
+#endif
stmdb sp!, {r0-r3} @ save argument regs
cfi_adjust_cfa_offset(16)
@@ -256,7 +280,12 @@ ARM_FUNC_START(ffi_closure_SYSV)
@ Load values returned in registers.
add r2, sp, #8+64 @ load result
adr r3, CNAME(ffi_closure_ret)
+#ifndef __thumb__
add pc, r3, r0, lsl #3
+#else
+ add r3, r3, r0, lsl #3
+ mov pc, r3
+#endif
cfi_endproc
UNWIND(.fnend)
ARM_FUNC_END(ffi_closure_SYSV)
@@ -275,6 +304,9 @@ ARM_FUNC_END(ffi_go_closure_VFP)
ARM_FUNC_START(ffi_closure_VFP)
UNWIND(.fnstart)
cfi_startproc
+#ifdef _WIN32
+ ldmfd sp!, {r0, ip} @ restore fp (r0 is used for stack alignment)
+#endif
stmdb sp!, {r0-r3} @ save argument regs
cfi_adjust_cfa_offset(16)
@@ -306,7 +338,12 @@ ARM_FUNC_START(ffi_closure_VFP)
@ Load values returned in registers.
add r2, sp, #8+64 @ load result
adr r3, CNAME(ffi_closure_ret)
+#ifndef __thumb__
add pc, r3, r0, lsl #3
+#else
+ add r3, r3, r0, lsl #3
+ mov pc, r3
+#endif
cfi_endproc
UNWIND(.fnend)
ARM_FUNC_END(ffi_closure_VFP)
@@ -326,32 +363,40 @@ E(ARM_TYPE_VFP_S)
#else
ldc p10, cr0, [r2] @ vldr s0, [r2]
#endif
- ldm sp, {sp,pc}
+ b call_epilogue
E(ARM_TYPE_VFP_D)
#ifdef __clang__
vldr d0, [r2]
#else
ldc p11, cr0, [r2] @ vldr d0, [r2]
#endif
- ldm sp, {sp,pc}
+ b call_epilogue
E(ARM_TYPE_VFP_N)
#ifdef __clang__
vldm r2, {d0-d3}
#else
ldc p11, cr0, [r2], {8} @ vldm r2, {d0-d3}
#endif
- ldm sp, {sp,pc}
+ b call_epilogue
E(ARM_TYPE_INT64)
ldr r1, [r2, #4]
nop
E(ARM_TYPE_INT)
ldr r0, [r2]
- ldm sp, {sp,pc}
+ b call_epilogue
E(ARM_TYPE_VOID)
- ldm sp, {sp,pc}
+ b call_epilogue
nop
E(ARM_TYPE_STRUCT)
+ b call_epilogue
+call_epilogue:
+#ifndef __thumb__
ldm sp, {sp,pc}
+#else
+ ldm sp, {ip,lr}
+ mov sp, ip
+ bx lr
+#endif
cfi_endproc
ARM_FUNC_END(ffi_closure_ret)
@@ -419,6 +464,15 @@ ARM_FUNC_START(ffi_closure_trampoline_table_page)
ARM_FUNC_END(ffi_closure_trampoline_table_page)
#endif
+#elif defined(_WIN32)
+
+ARM_FUNC_START(ffi_arm_trampoline)
+0: adr ip, 0b
+ stmdb sp!, {r0, ip}
+ ldr pc, 1f
+1: .long 0
+ARM_FUNC_END(ffi_arm_trampoline)
+
#else
ARM_FUNC_START(ffi_arm_trampoline)