Commit 042b8dafeeee82667e00660fb1edeab72fd9de47

Richard Henderson 2014-11-24T11:24:02

x86: Use .balign not .align The Apple assembler defaults to power of two alignment, rather than byte alignment like everyone else. Force byte alignment by using the proper directive.

diff --git a/src/x86/sysv.S b/src/x86/sysv.S
index 47e73b6..bb9d568 100644
--- a/src/x86/sysv.S
+++ b/src/x86/sysv.S
@@ -61,13 +61,13 @@
    The use of ORG asserts that we're at the correct location.  */
 /* ??? The clang assembler doesn't handle .org with symbolic expressions.  */
 #if defined(__clang__) || defined(__APPLE__)
-# define E(X)	.align 8
+# define E(X)	.balign 8
 #else
-# define E(X)	.align 8; .org 0b + X * 8
+# define E(X)	.balign 8; .org 0b + X * 8
 #endif
 
 	.text
-	.align	16
+	.balign	16
 	.globl	ffi_call_i386
 	FFI_HIDDEN(ffi_call_i386)
 
@@ -120,7 +120,7 @@ ffi_call_i386:
 	movl	16(%ebp), %ecx		/* load result address */
 	jmp	*%ebx
 
-	.align	8
+	.balign	8
 0:
 E(X86_RET_FLOAT)
 	fstps	(%ecx)
@@ -250,7 +250,7 @@ ENDF(ffi_call_i386)
 #endif /* __PIC__ */
 
 #define FFI_GO_CLOSURE(suffix, chain, t1, t2)				\
-	.align	16;							\
+	.balign	16;							\
 	.globl	C(C1(ffi_go_closure_,suffix));				\
 	FFI_HIDDEN(C(C1(ffi_go_closure_,suffix)));			\
 C(C1(ffi_go_closure_,suffix)):						\
@@ -274,7 +274,7 @@ FFI_GO_CLOSURE(ECX, %ecx, %edx, %eax)
 /* The closure entry points are reached from the ffi_closure trampoline.
    On entry, %eax contains the address of the ffi_closure.  */
 
-	.align	16
+	.balign	16
 	.globl	C(ffi_closure_i386)
 	FFI_HIDDEN(C(ffi_closure_i386))
 
@@ -292,7 +292,7 @@ C(ffi_closure_i386):
 	FFI_CLOSURE_CALL_INNER
 	FFI_CLOSURE_MASK_AND_JUMP
 
-	.align	8
+	.balign	8
 0:
 E(X86_RET_FLOAT)
 	flds	(%esp)
@@ -355,7 +355,7 @@ FFI_GO_CLOSURE(STDCALL, %ecx, %edx, %eax)
 /* For REGISTER, we have no available parameter registers, and so we
    enter here having pushed the closure onto the stack.  */
 
-	.align	16
+	.balign	16
 	.globl	C(ffi_closure_REGISTER)
 	FFI_HIDDEN(C(ffi_closure_REGISTER))
 C(ffi_closure_REGISTER):
@@ -380,7 +380,7 @@ ENDF(C(ffi_closure_REGISTER))
    the stack following the closure.  The amount needing to be popped
    is returned to us from ffi_closure_inner.  */
 
-	.align	16
+	.balign	16
 	.globl	C(ffi_closure_STDCALL)
 	FFI_HIDDEN(C(ffi_closure_STDCALL))
 C(ffi_closure_STDCALL):
@@ -418,7 +418,7 @@ C(ffi_closure_STDCALL):
 
 	FFI_CLOSURE_MASK_AND_JUMP
 
-	.align	8
+	.balign	8
 0:
 E(X86_RET_FLOAT)
 	flds    (%esp)
@@ -489,7 +489,7 @@ ENDF(C(ffi_closure_STDCALL))
 
 #define raw_closure_S_FS	(16+16+12)
 
-	.align	16
+	.balign	16
 	.globl	C(ffi_closure_raw_SYSV)
 	FFI_HIDDEN(C(ffi_closure_raw_SYSV))
 C(ffi_closure_raw_SYSV):
@@ -522,7 +522,7 @@ C(ffi_closure_raw_SYSV):
 	cfi_restore(%ebx)
 	jmp	*%eax
 
-	.align	8
+	.balign	8
 0:
 E(X86_RET_FLOAT)
 	flds	16(%esp)
@@ -583,7 +583,7 @@ ENDF(C(ffi_closure_raw_SYSV))
 #undef	raw_closure_S_FS
 #define raw_closure_T_FS	(16+16+8)
 
-	.align	16
+	.balign	16
 	.globl	C(ffi_closure_raw_THISCALL)
 	FFI_HIDDEN(C(ffi_closure_raw_THISCALL))
 C(ffi_closure_raw_THISCALL):
@@ -626,7 +626,7 @@ C(ffi_closure_raw_THISCALL):
 	cfi_restore(%ebx)
 	jmp	*%eax
 
-	.align	8
+	.balign	8
 0:
 E(X86_RET_FLOAT)
 	flds	16(%esp)
diff --git a/src/x86/unix64.S b/src/x86/unix64.S
index ce19ba5..42880d5 100644
--- a/src/x86/unix64.S
+++ b/src/x86/unix64.S
@@ -56,7 +56,7 @@
    The use of ORG asserts that we're at the correct location.  */
 /* ??? The clang assembler doesn't handle .org with symbolic expressions.  */
 .macro E index
-	.align	8
+	.balign	8
 #if !defined(__clang__) && !defined(__APPLE__)
 	.org	0b + \index * 8, 0x90
 #endif
@@ -69,7 +69,7 @@
    for this function.  This has been allocated by ffi_call.  We also
    deallocate some of the stack that has been alloca'd.  */
 
-	.align	8
+	.balign	8
 	.globl	C(ffi_call_unix64)
 	FFI_HIDDEN(C(ffi_call_unix64))
 
@@ -137,7 +137,7 @@ C(ffi_call_unix64):
 	leaq	-20(%rsp), %rsi
 	jmp	*%r10
 
-	.align	8
+	.balign	8
 0:
 E UNIX64_RET_VOID
 	ret
@@ -196,7 +196,7 @@ E UNIX64_RET_ST_RAX_RDX
 	shrl	$UNIX64_SIZE_SHIFT, %ecx
 	rep movsb
 	ret
-	.align 8
+	.balign 8
 3:	movq	%xmm0, (%rsi)
 	shrl	$UNIX64_SIZE_SHIFT, %ecx
 	rep movsb
@@ -207,7 +207,7 @@ E UNIX64_RET_ST_RAX_RDX
 	/* Many times we can avoid loading any SSE registers at all.
 	   It's not worth an indirect jump to load the exact set of
 	   SSE registers needed; zero or all is a good compromise.  */
-	.align 2
+	.balign 2
 	cfi_restore_state
 .Lload_sse:
 	movdqa	0x30(%r10), %xmm0
@@ -233,7 +233,7 @@ ENDF(C(ffi_call_unix64))
 /* The location of rvalue within the red zone after deallocating the frame.  */
 #define ffi_closure_RED_RVALUE	(ffi_closure_OFS_RVALUE - ffi_closure_FS)
 
-	.align	2
+	.balign	2
 	.globl	C(ffi_closure_unix64_sse)
 	FFI_HIDDEN(C(ffi_closure_unix64_sse))
 
@@ -256,7 +256,7 @@ C(ffi_closure_unix64_sse):
 	cfi_endproc
 ENDF(C(ffi_closure_unix64_sse))
 
-	.align	2
+	.balign	2
 	.globl	C(ffi_closure_unix64)
 	FFI_HIDDEN(C(ffi_closure_unix64))
 
@@ -301,7 +301,7 @@ C(ffi_closure_unix64):
 	leaq	ffi_closure_RED_RVALUE(%rsp), %rsi
 	jmp	*%r10
 
-	.align	8
+	.balign	8
 0:
 E UNIX64_RET_VOID
 	ret
@@ -352,7 +352,7 @@ E UNIX64_RET_ST_RAX_RDX
 	movq	8(%rsi), %rdx
 2:	movq	(%rsi), %rax
 	ret
-	.align	8
+	.balign	8
 3:	movq	(%rsi), %xmm0
 	ret
 
@@ -361,7 +361,7 @@ E UNIX64_RET_ST_RAX_RDX
 	cfi_endproc
 ENDF(C(ffi_closure_unix64))
 
-	.align	2
+	.balign	2
 	.globl	C(ffi_go_closure_unix64_sse)
 	FFI_HIDDEN(C(ffi_go_closure_unix64_sse))
 
@@ -384,7 +384,7 @@ C(ffi_go_closure_unix64_sse):
 	cfi_endproc
 ENDF(C(ffi_go_closure_unix64_sse))
 
-	.align	2
+	.balign	2
 	.globl	C(ffi_go_closure_unix64)
 	FFI_HIDDEN(C(ffi_go_closure_unix64))