Commit 3fa5d70cbb18b39a5e44f1c7984dedf73446bf6c

Richard Henderson 2015-01-05T13:03:06

x86: Avoid fastcall when building with pcc Apparently, PCC doesn't support the fastcall calling convention. Nor does it issue a warning or error for the attribute that it does not understand.

diff --git a/src/x86/ffi.c b/src/x86/ffi.c
index 1d474e3..3885e39 100644
--- a/src/x86/ffi.c
+++ b/src/x86/ffi.c
@@ -235,7 +235,10 @@ static const struct abi_params abi_params[FFI_LAST_ABI] = {
 };
 
 extern void ffi_call_i386(struct call_frame *, char *)
-	FFI_HIDDEN __declspec(fastcall);
+#if HAVE_FASTCALL
+	__declspec(fastcall)
+#endif
+	FFI_HIDDEN;
 
 static void
 ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
@@ -392,7 +395,10 @@ struct closure_frame
   void *user_data;				/* 36 */
 };
 
-int FFI_HIDDEN __declspec(fastcall)
+int FFI_HIDDEN
+#if HAVE_FASTCALL
+__declspec(fastcall)
+#endif
 ffi_closure_inner (struct closure_frame *frame, char *stack)
 {
   ffi_cif *cif = frame->cif;
diff --git a/src/x86/internal.h b/src/x86/internal.h
index 480c1d0..09771ba 100644
--- a/src/x86/internal.h
+++ b/src/x86/internal.h
@@ -21,3 +21,9 @@
 #define R_EAX	0
 #define R_EDX	1
 #define R_ECX	2
+
+#ifdef __PCC__
+# define HAVE_FASTCALL 0
+#else
+# define HAVE_FASTCALL 1
+#endif
diff --git a/src/x86/sysv.S b/src/x86/sysv.S
index 36e73b2..ebbea5d 100644
--- a/src/x86/sysv.S
+++ b/src/x86/sysv.S
@@ -90,6 +90,10 @@
 ffi_call_i386:
 L(UW0):
 	# cfi_startproc
+#if !HAVE_FASTCALL
+	movl	4(%esp), %ecx
+	movl	8(%esp), %edx
+#endif
 	movl	(%esp), %eax		/* move the return address */
 	movl	%ebp, (%ecx)		/* store %ebp into local frame */
 	movl	%eax, 4(%ecx)		/* store retaddr into local frame */
@@ -210,29 +214,46 @@ ENDF(ffi_call_i386)
 
 /* Macros to help setting up the closure_data structure.  */
 
-#define closure_FS	(16 + 3*4 + 3*4 + 4)
+#if HAVE_FASTCALL
+# define closure_FS	(40 + 4)
+# define closure_CF	0
+#else
+# define closure_FS	(8 + 40 + 12)
+# define closure_CF	8
+#endif
 
 #define FFI_CLOSURE_SAVE_REGS		\
-	movl	%eax, 16+R_EAX*4(%esp);	\
-	movl	%edx, 16+R_EDX*4(%esp);	\
-	movl	%ecx, 16+R_ECX*4(%esp)
+	movl	%eax, closure_CF+16+R_EAX*4(%esp);	\
+	movl	%edx, closure_CF+16+R_EDX*4(%esp);	\
+	movl	%ecx, closure_CF+16+R_ECX*4(%esp)
 
 #define FFI_CLOSURE_COPY_TRAMP_DATA					\
 	movl	FFI_TRAMPOLINE_SIZE(%eax), %edx;	/* copy cif */	\
 	movl	FFI_TRAMPOLINE_SIZE+4(%eax), %ecx;	/* copy fun */	\
 	movl	FFI_TRAMPOLINE_SIZE+8(%eax), %eax;	/* copy user_data */ \
-	movl	%edx, 28(%esp);						\
-	movl	%ecx, 32(%esp);						\
-	movl	%eax, 36(%esp)
+	movl	%edx, closure_CF+28(%esp);				\
+	movl	%ecx, closure_CF+32(%esp);				\
+	movl	%eax, closure_CF+36(%esp)
 
-# define FFI_CLOSURE_CALL_INNER(UW)					\
+#if HAVE_FASTCALL
+# define FFI_CLOSURE_PREP_CALL						\
 	movl	%esp, %ecx;			/* load closure_data */	\
+	leal	closure_FS+4(%esp), %edx;	/* load incoming stack */
+#else
+# define FFI_CLOSURE_PREP_CALL						\
+	leal	closure_CF(%esp), %ecx;		/* load closure_data */	\
 	leal	closure_FS+4(%esp), %edx;	/* load incoming stack */ \
+	movl	%ecx, (%esp);						\
+	movl	%edx, 4(%esp)
+#endif
+
+#define FFI_CLOSURE_CALL_INNER(UWN) \
 	call	ffi_closure_inner
+
 #define FFI_CLOSURE_MASK_AND_JUMP(N, UW)				\
 	andl	$X86_RET_TYPE_MASK, %eax;				\
 	leal	L(C1(load_table,N))(, %eax, 8), %edx;			\
-	movl	(%esp), %eax;			/* optimiztic load */	\
+	movl	closure_CF(%esp), %eax;		/* optimiztic load */	\
 	jmp	*%edx
 
 #ifdef __PIC__
@@ -243,14 +264,12 @@ ENDF(ffi_call_i386)
 	call	C(__x86.get_pc_thunk.dx);				\
 L(C1(pc,N)):								\
 	leal	L(C1(load_table,N))-L(C1(pc,N))(%edx, %eax, 8), %edx;	\
-	movl	(%esp), %eax;			/* optimiztic load */	\
+	movl	closure_CF(%esp), %eax;		/* optimiztic load */	\
 	jmp	*%edx
 # else
 #  define FFI_CLOSURE_CALL_INNER_SAVE_EBX
 #  undef FFI_CLOSURE_CALL_INNER
 #  define FFI_CLOSURE_CALL_INNER(UWN)					\
-	movl	%esp, %ecx;			/* load closure_data */	\
-	leal	closure_FS+4(%esp), %edx;	/* load incoming stack */ \
 	movl	%ebx, 40(%esp);			/* save ebx */		\
 L(C1(UW,UWN)):								\
 	# cfi_rel_offset(%ebx, 40);					\
@@ -264,7 +283,7 @@ L(C1(UW,UWN)):								\
 	movl	40(%esp), %ebx;			/* restore ebx */	\
 L(C1(UW,UWN)):								\
 	# cfi_restore(%ebx);						\
-	movl	(%esp), %eax;			/* optimiztic load */	\
+	movl	closure_CF(%esp), %eax;		/* optimiztic load */	\
 	jmp	*%edx
 # endif /* DARWIN || HIDDEN */
 #endif /* __PIC__ */
@@ -279,11 +298,11 @@ L(UW6):
 L(UW7):
 	# cfi_def_cfa_offset(closure_FS + 4)
 	FFI_CLOSURE_SAVE_REGS
-	movl	4(%eax), %edx		/* copy cif */
-	movl	8(%eax), %ecx		/* copy fun */
-	movl	%edx, 28(%esp)
-	movl	%ecx, 32(%esp)
-	movl	%eax, 36(%esp)		/* closure is user_data */
+	movl	4(%eax), %edx			/* copy cif */
+	movl	8(%eax), %ecx			/* copy fun */
+	movl	%edx, closure_CF+28(%esp)
+	movl	%ecx, closure_CF+32(%esp)
+	movl	%eax, closure_CF+36(%esp)	/* closure is user_data */
 	jmp	L(do_closure_i386)
 L(UW8):
 	# cfi_endproc
@@ -299,11 +318,11 @@ L(UW9):
 L(UW10):
 	# cfi_def_cfa_offset(closure_FS + 4)
 	FFI_CLOSURE_SAVE_REGS
-	movl	4(%ecx), %edx		/* copy cif */
-	movl	8(%ecx), %eax		/* copy fun */
-	movl	%edx, 28(%esp)
-	movl	%eax, 32(%esp)
-	movl	%ecx, 36(%esp)		/* closure is user_data */
+	movl	4(%ecx), %edx			/* copy cif */
+	movl	8(%ecx), %eax			/* copy fun */
+	movl	%edx, closure_CF+28(%esp)
+	movl	%eax, closure_CF+32(%esp)
+	movl	%ecx, closure_CF+36(%esp)	/* closure is user_data */
 	jmp	L(do_closure_i386)
 L(UW11):
 	# cfi_endproc
@@ -329,19 +348,20 @@ L(UW13):
 	/* Entry point from preceeding Go closures.  */
 L(do_closure_i386):
 
+	FFI_CLOSURE_PREP_CALL
 	FFI_CLOSURE_CALL_INNER(14)
 	FFI_CLOSURE_MASK_AND_JUMP(2, 15)
 
 	.balign	8
 L(load_table2):
 E(L(load_table2), X86_RET_FLOAT)
-	flds	(%esp)
+	flds	closure_CF(%esp)
 	jmp	L(e2)
 E(L(load_table2), X86_RET_DOUBLE)
-	fldl	(%esp)
+	fldl	closure_CF(%esp)
 	jmp	L(e2)
 E(L(load_table2), X86_RET_LDOUBLE)
-	fldt	(%esp)
+	fldt	closure_CF(%esp)
 	jmp	L(e2)
 E(L(load_table2), X86_RET_SINT8)
 	movsbl	%al, %eax
@@ -356,7 +376,7 @@ E(L(load_table2), X86_RET_UINT16)
 	movzwl	%ax, %eax
 	jmp	L(e2)
 E(L(load_table2), X86_RET_INT64)
-	movl	4(%esp), %edx
+	movl	closure_CF+4(%esp), %edx
 	jmp	L(e2)
 E(L(load_table2), X86_RET_INT32)
 	nop
@@ -405,11 +425,11 @@ L(UW21):
 L(UW22):
 	# cfi_def_cfa_offset(closure_FS + 4)
 	FFI_CLOSURE_SAVE_REGS
-	movl	4(%ecx), %edx		/* copy cif */
-	movl	8(%ecx), %eax		/* copy fun */
-	movl	%edx, 28(%esp)
-	movl	%eax, 32(%esp)
-	movl	%ecx, 36(%esp)		/* closure is user_data */
+	movl	4(%ecx), %edx			/* copy cif */
+	movl	8(%ecx), %eax			/* copy fun */
+	movl	%edx, closure_CF+28(%esp)
+	movl	%eax, closure_CF+32(%esp)
+	movl	%ecx, closure_CF+36(%esp)	/* closure is user_data */
 	jmp	L(do_closure_STDCALL)
 L(UW23):
 	# cfi_endproc
@@ -462,6 +482,7 @@ L(do_closure_REGISTER):
 	/* Entry point from preceeding Go closure.  */
 L(do_closure_STDCALL):
 
+	FFI_CLOSURE_PREP_CALL
 	FFI_CLOSURE_CALL_INNER(29)
 
 	movl	%eax, %ecx
@@ -481,15 +502,15 @@ L(do_closure_STDCALL):
 	.balign	8
 L(load_table3):
 E(L(load_table3), X86_RET_FLOAT)
-	flds    (%esp)
+	flds    closure_CF(%esp)
 	movl    %ecx, %esp
 	ret
 E(L(load_table3), X86_RET_DOUBLE)
-	fldl    (%esp)
+	fldl    closure_CF(%esp)
 	movl    %ecx, %esp
 	ret
 E(L(load_table3), X86_RET_LDOUBLE)
-	fldt    (%esp)
+	fldt    closure_CF(%esp)
 	movl    %ecx, %esp
 	ret
 E(L(load_table3), X86_RET_SINT8)
@@ -509,8 +530,7 @@ E(L(load_table3), X86_RET_UINT16)
 	movl    %ecx, %esp
 	ret
 E(L(load_table3), X86_RET_INT64)
-	popl    %eax
-	popl    %edx
+	movl	closure_CF+4(%esp), %edx
 	movl    %ecx, %esp
 	ret
 E(L(load_table3), X86_RET_INT32)