x86: Use .balign not .align The Apple assembler defaults to power of two alignment, rather than byte alignment like everyone else. Force byte alignment by using the proper directive.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
diff --git a/src/x86/sysv.S b/src/x86/sysv.S
index 47e73b6..bb9d568 100644
--- a/src/x86/sysv.S
+++ b/src/x86/sysv.S
@@ -61,13 +61,13 @@
The use of ORG asserts that we're at the correct location. */
/* ??? The clang assembler doesn't handle .org with symbolic expressions. */
#if defined(__clang__) || defined(__APPLE__)
-# define E(X) .align 8
+# define E(X) .balign 8
#else
-# define E(X) .align 8; .org 0b + X * 8
+# define E(X) .balign 8; .org 0b + X * 8
#endif
.text
- .align 16
+ .balign 16
.globl ffi_call_i386
FFI_HIDDEN(ffi_call_i386)
@@ -120,7 +120,7 @@ ffi_call_i386:
movl 16(%ebp), %ecx /* load result address */
jmp *%ebx
- .align 8
+ .balign 8
0:
E(X86_RET_FLOAT)
fstps (%ecx)
@@ -250,7 +250,7 @@ ENDF(ffi_call_i386)
#endif /* __PIC__ */
#define FFI_GO_CLOSURE(suffix, chain, t1, t2) \
- .align 16; \
+ .balign 16; \
.globl C(C1(ffi_go_closure_,suffix)); \
FFI_HIDDEN(C(C1(ffi_go_closure_,suffix))); \
C(C1(ffi_go_closure_,suffix)): \
@@ -274,7 +274,7 @@ FFI_GO_CLOSURE(ECX, %ecx, %edx, %eax)
/* The closure entry points are reached from the ffi_closure trampoline.
On entry, %eax contains the address of the ffi_closure. */
- .align 16
+ .balign 16
.globl C(ffi_closure_i386)
FFI_HIDDEN(C(ffi_closure_i386))
@@ -292,7 +292,7 @@ C(ffi_closure_i386):
FFI_CLOSURE_CALL_INNER
FFI_CLOSURE_MASK_AND_JUMP
- .align 8
+ .balign 8
0:
E(X86_RET_FLOAT)
flds (%esp)
@@ -355,7 +355,7 @@ FFI_GO_CLOSURE(STDCALL, %ecx, %edx, %eax)
/* For REGISTER, we have no available parameter registers, and so we
enter here having pushed the closure onto the stack. */
- .align 16
+ .balign 16
.globl C(ffi_closure_REGISTER)
FFI_HIDDEN(C(ffi_closure_REGISTER))
C(ffi_closure_REGISTER):
@@ -380,7 +380,7 @@ ENDF(C(ffi_closure_REGISTER))
the stack following the closure. The amount needing to be popped
is returned to us from ffi_closure_inner. */
- .align 16
+ .balign 16
.globl C(ffi_closure_STDCALL)
FFI_HIDDEN(C(ffi_closure_STDCALL))
C(ffi_closure_STDCALL):
@@ -418,7 +418,7 @@ C(ffi_closure_STDCALL):
FFI_CLOSURE_MASK_AND_JUMP
- .align 8
+ .balign 8
0:
E(X86_RET_FLOAT)
flds (%esp)
@@ -489,7 +489,7 @@ ENDF(C(ffi_closure_STDCALL))
#define raw_closure_S_FS (16+16+12)
- .align 16
+ .balign 16
.globl C(ffi_closure_raw_SYSV)
FFI_HIDDEN(C(ffi_closure_raw_SYSV))
C(ffi_closure_raw_SYSV):
@@ -522,7 +522,7 @@ C(ffi_closure_raw_SYSV):
cfi_restore(%ebx)
jmp *%eax
- .align 8
+ .balign 8
0:
E(X86_RET_FLOAT)
flds 16(%esp)
@@ -583,7 +583,7 @@ ENDF(C(ffi_closure_raw_SYSV))
#undef raw_closure_S_FS
#define raw_closure_T_FS (16+16+8)
- .align 16
+ .balign 16
.globl C(ffi_closure_raw_THISCALL)
FFI_HIDDEN(C(ffi_closure_raw_THISCALL))
C(ffi_closure_raw_THISCALL):
@@ -626,7 +626,7 @@ C(ffi_closure_raw_THISCALL):
cfi_restore(%ebx)
jmp *%eax
- .align 8
+ .balign 8
0:
E(X86_RET_FLOAT)
flds 16(%esp)
diff --git a/src/x86/unix64.S b/src/x86/unix64.S
index ce19ba5..42880d5 100644
--- a/src/x86/unix64.S
+++ b/src/x86/unix64.S
@@ -56,7 +56,7 @@
The use of ORG asserts that we're at the correct location. */
/* ??? The clang assembler doesn't handle .org with symbolic expressions. */
.macro E index
- .align 8
+ .balign 8
#if !defined(__clang__) && !defined(__APPLE__)
.org 0b + \index * 8, 0x90
#endif
@@ -69,7 +69,7 @@
for this function. This has been allocated by ffi_call. We also
deallocate some of the stack that has been alloca'd. */
- .align 8
+ .balign 8
.globl C(ffi_call_unix64)
FFI_HIDDEN(C(ffi_call_unix64))
@@ -137,7 +137,7 @@ C(ffi_call_unix64):
leaq -20(%rsp), %rsi
jmp *%r10
- .align 8
+ .balign 8
0:
E UNIX64_RET_VOID
ret
@@ -196,7 +196,7 @@ E UNIX64_RET_ST_RAX_RDX
shrl $UNIX64_SIZE_SHIFT, %ecx
rep movsb
ret
- .align 8
+ .balign 8
3: movq %xmm0, (%rsi)
shrl $UNIX64_SIZE_SHIFT, %ecx
rep movsb
@@ -207,7 +207,7 @@ E UNIX64_RET_ST_RAX_RDX
/* Many times we can avoid loading any SSE registers at all.
It's not worth an indirect jump to load the exact set of
SSE registers needed; zero or all is a good compromise. */
- .align 2
+ .balign 2
cfi_restore_state
.Lload_sse:
movdqa 0x30(%r10), %xmm0
@@ -233,7 +233,7 @@ ENDF(C(ffi_call_unix64))
/* The location of rvalue within the red zone after deallocating the frame. */
#define ffi_closure_RED_RVALUE (ffi_closure_OFS_RVALUE - ffi_closure_FS)
- .align 2
+ .balign 2
.globl C(ffi_closure_unix64_sse)
FFI_HIDDEN(C(ffi_closure_unix64_sse))
@@ -256,7 +256,7 @@ C(ffi_closure_unix64_sse):
cfi_endproc
ENDF(C(ffi_closure_unix64_sse))
- .align 2
+ .balign 2
.globl C(ffi_closure_unix64)
FFI_HIDDEN(C(ffi_closure_unix64))
@@ -301,7 +301,7 @@ C(ffi_closure_unix64):
leaq ffi_closure_RED_RVALUE(%rsp), %rsi
jmp *%r10
- .align 8
+ .balign 8
0:
E UNIX64_RET_VOID
ret
@@ -352,7 +352,7 @@ E UNIX64_RET_ST_RAX_RDX
movq 8(%rsi), %rdx
2: movq (%rsi), %rax
ret
- .align 8
+ .balign 8
3: movq (%rsi), %xmm0
ret
@@ -361,7 +361,7 @@ E UNIX64_RET_ST_RAX_RDX
cfi_endproc
ENDF(C(ffi_closure_unix64))
- .align 2
+ .balign 2
.globl C(ffi_go_closure_unix64_sse)
FFI_HIDDEN(C(ffi_go_closure_unix64_sse))
@@ -384,7 +384,7 @@ C(ffi_go_closure_unix64_sse):
cfi_endproc
ENDF(C(ffi_go_closure_unix64_sse))
- .align 2
+ .balign 2
.globl C(ffi_go_closure_unix64)
FFI_HIDDEN(C(ffi_go_closure_unix64))