Commit 610c90bf7131de70318dc981a529a63ae36981b8

Richard Henderson 2014-10-28T11:21:50

x86_64: Add support for complex types

diff --git a/src/x86/ffi64.c b/src/x86/ffi64.c
index a03061b..650f7bb 100644
--- a/src/x86/ffi64.c
+++ b/src/x86/ffi64.c
@@ -171,6 +171,7 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
     case FFI_TYPE_UINT64:
     case FFI_TYPE_SINT64:
     case FFI_TYPE_POINTER:
+    do_integer:
       {
 	size_t size = byte_offset + type->size;
 
@@ -301,11 +302,42 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
 	  }
 	return words;
       }
-
-    default:
-      FFI_ASSERT(0);
+    case FFI_TYPE_COMPLEX:
+      {
+	ffi_type *inner = type->elements[0];
+	switch (inner->type)
+	  {
+	  case FFI_TYPE_INT:
+	  case FFI_TYPE_UINT8:
+	  case FFI_TYPE_SINT8:
+	  case FFI_TYPE_UINT16:
+	  case FFI_TYPE_SINT16:
+	  case FFI_TYPE_UINT32:
+	  case FFI_TYPE_SINT32:
+	  case FFI_TYPE_UINT64:
+	  case FFI_TYPE_SINT64:
+	    goto do_integer;
+
+	  case FFI_TYPE_FLOAT:
+	    classes[0] = X86_64_SSE_CLASS;
+	    if (byte_offset % 8)
+	      {
+		classes[1] = X86_64_SSESF_CLASS;
+		return 2;
+	      }
+	    return 1;
+	  case FFI_TYPE_DOUBLE:
+	    classes[0] = classes[1] = X86_64_SSEDF_CLASS;
+	    return 2;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+	  case FFI_TYPE_LONGDOUBLE:
+	    classes[0] = X86_64_COMPLEX_X87_CLASS;
+	    return 1;
+#endif
+	  }
+      }
     }
-  return 0; /* Never reached.  */
+  abort();
 }
 
 /* Examine the argument and return set number of register required in each
@@ -360,7 +392,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
 {
   int gprcount, ssecount, i, avn, ngpr, nsse, flags;
   enum x86_64_reg_class classes[MAX_CLASSES];
-  size_t bytes, n;
+  size_t bytes, n, rtype_size;
   ffi_type *rtype;
 
   if (cif->abi != FFI_UNIX64)
@@ -369,6 +401,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
   gprcount = ssecount = 0;
 
   rtype = cif->rtype;
+  rtype_size = rtype->size;
   switch (rtype->type)
     {
     case FFI_TYPE_VOID:
@@ -421,16 +454,54 @@ ffi_prep_cif_machdep (ffi_cif *cif)
 	}
       else
 	{
-	  /* Mark which registers the result appears in.  */
 	  _Bool sse0 = SSE_CLASS_P (classes[0]);
-	  _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
-	  if (sse0)
-	    flags = (sse1 ? UNIX64_RET_ST_XMM0_XMM1 : UNIX64_RET_ST_XMM0_RAX);
-	  else
-	    flags = (sse1 ? UNIX64_RET_ST_RAX_XMM0 : UNIX64_RET_ST_RAX_RDX);
 
-	  /* Mark the true size of the structure.  */
-	  flags |= rtype->size << UNIX64_SIZE_SHIFT;
+	  if (rtype_size == 4 && sse0)
+	    flags = UNIX64_RET_XMM32;
+	  else if (rtype_size == 8)
+	    flags = sse0 ? UNIX64_RET_XMM64 : UNIX64_RET_INT64;
+	  else
+	    {
+	      _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
+	      if (sse0 && sse1)
+		flags = UNIX64_RET_ST_XMM0_XMM1;
+	      else if (sse0)
+		flags = UNIX64_RET_ST_XMM0_RAX;
+	      else if (sse1)
+		flags = UNIX64_RET_ST_RAX_XMM0;
+	      else
+		flags = UNIX64_RET_ST_RAX_RDX;
+	      flags |= rtype_size << UNIX64_SIZE_SHIFT;
+	    }
+	}
+      break;
+    case FFI_TYPE_COMPLEX:
+      switch (rtype->elements[0]->type)
+	{
+	case FFI_TYPE_UINT8:
+	case FFI_TYPE_SINT8:
+	case FFI_TYPE_UINT16:
+	case FFI_TYPE_SINT16:
+	case FFI_TYPE_INT:
+	case FFI_TYPE_UINT32:
+	case FFI_TYPE_SINT32:
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_SINT64:
+	  flags = UNIX64_RET_ST_RAX_RDX | (rtype_size << UNIX64_SIZE_SHIFT);
+	  break;
+	case FFI_TYPE_FLOAT:
+	  flags = UNIX64_RET_XMM64;
+	  break;
+	case FFI_TYPE_DOUBLE:
+	  flags = UNIX64_RET_ST_XMM0_XMM1 | (16 << UNIX64_SIZE_SHIFT);
+	  break;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+	case FFI_TYPE_LONGDOUBLE:
+	  flags = UNIX64_RET_X87_2;
+	  break;
+#endif
+	default:
+	  return FFI_BAD_TYPEDEF;
 	}
       break;
     default:
diff --git a/src/x86/internal64.h b/src/x86/internal64.h
index 07b1b10..512e955 100644
--- a/src/x86/internal64.h
+++ b/src/x86/internal64.h
@@ -9,11 +9,13 @@
 #define UNIX64_RET_XMM32	8
 #define UNIX64_RET_XMM64	9
 #define UNIX64_RET_X87		10
-#define UNIX64_RET_ST_RAX_RDX	11
+#define UNIX64_RET_X87_2	11
 #define UNIX64_RET_ST_XMM0_RAX	12
 #define UNIX64_RET_ST_RAX_XMM0	13
 #define UNIX64_RET_ST_XMM0_XMM1	14
-#define UNIX64_RET_LAST		14
+#define UNIX64_RET_ST_RAX_RDX	15
+
+#define UNIX64_RET_LAST		15
 
 #define UNIX64_FLAG_RET_IN_MEM	(1 << 10)
 #define UNIX64_FLAG_XMM_ARGS	(1 << 11)
diff --git a/src/x86/unix64.S b/src/x86/unix64.S
index 0151229..6066bbf 100644
--- a/src/x86/unix64.S
+++ b/src/x86/unix64.S
@@ -156,9 +156,10 @@ E UNIX64_RET_XMM64
 E UNIX64_RET_X87
 	fstpt	(%rdi)
 	ret
-E UNIX64_RET_ST_RAX_RDX
-	movq	%rdx, 8(%rsi)
-	jmp	2f
+E UNIX64_RET_X87_2
+	fstpt	(%rdi)
+	fstpt	16(%rdi)
+	ret
 E UNIX64_RET_ST_XMM0_RAX
 	movq	%rax, 8(%rsi)
 	jmp	3f
@@ -167,14 +168,15 @@ E UNIX64_RET_ST_RAX_XMM0
 	jmp	2f
 E UNIX64_RET_ST_XMM0_XMM1
 	movq	%xmm1, 8(%rsi)
-
-	.align 8
-3:	movq	%xmm0, (%rsi)
+	jmp	3f
+E UNIX64_RET_ST_RAX_RDX
+	movq	%rdx, 8(%rsi)
+2:	movq	%rax, (%rsi)
 	shrl	$UNIX64_SIZE_SHIFT, %ecx
 	rep movsb
 	ret
 	.align 8
-2:	movq	%rax, (%rsi)
+3:	movq	%xmm0, (%rsi)
 	shrl	$UNIX64_SIZE_SHIFT, %ecx
 	rep movsb
 	ret
@@ -201,11 +203,11 @@ E UNIX64_RET_ST_XMM0_XMM1
 	.size    ffi_call_unix64,.-ffi_call_unix64
 
 /* 6 general registers, 8 vector registers,
-   16 bytes of rvalue, 8 bytes of alignment.  */
+   32 bytes of rvalue, 8 bytes of alignment.  */
 #define ffi_closure_OFS_G	0
 #define ffi_closure_OFS_V	(6*8)
 #define ffi_closure_OFS_RVALUE	(ffi_closure_OFS_V + 8*16)
-#define ffi_closure_FS		(ffi_closure_OFS_RVALUE + 16 + 8)
+#define ffi_closure_FS		(ffi_closure_OFS_RVALUE + 32 + 8)
 
 /* The location of rvalue within the red zone after deallocating the frame.  */
 #define ffi_closure_RED_RVALUE	(ffi_closure_OFS_RVALUE - ffi_closure_FS)
@@ -275,6 +277,7 @@ ffi_closure_unix64:
 	leaq	0f(%rip), %r11
 	ja	9f
 	leaq	(%r11, %r10, 8), %r10
+	leaq	ffi_closure_RED_RVALUE(%rsp), %rsi
 	jmp	*%r10
 
 	.align	8
@@ -282,52 +285,54 @@ ffi_closure_unix64:
 E UNIX64_RET_VOID
 	ret
 E UNIX64_RET_UINT8
-	movzbl	ffi_closure_RED_RVALUE(%rsp), %eax
+	movzbl	(%rsi), %eax
 	ret
 E UNIX64_RET_UINT16
-	movzwl	ffi_closure_RED_RVALUE(%rsp), %eax
+	movzwl	(%rsi), %eax
 	ret
 E UNIX64_RET_UINT32
-	movl	ffi_closure_RED_RVALUE(%rsp), %eax
+	movl	(%rsi), %eax
 	ret
 E UNIX64_RET_SINT8
-	movsbl	ffi_closure_RED_RVALUE(%rsp), %eax
+	movsbl	(%rsi), %eax
 	ret
 E UNIX64_RET_SINT16
-	movswl	ffi_closure_RED_RVALUE(%rsp), %eax
+	movswl	(%rsi), %eax
 	ret
 E UNIX64_RET_SINT32
-	movl	ffi_closure_RED_RVALUE(%rsp), %eax
+	movl	(%rsi), %eax
 	ret
 E UNIX64_RET_INT64
-	movq	ffi_closure_RED_RVALUE(%rsp), %rax
+	movq	(%rsi), %rax
 	ret
 E UNIX64_RET_XMM32
-	movd	ffi_closure_RED_RVALUE(%rsp), %xmm0
+	movd	(%rsi), %xmm0
 	ret
 E UNIX64_RET_XMM64
-	movq	ffi_closure_RED_RVALUE(%rsp), %xmm0
+	movq	(%rsi), %xmm0
 	ret
 E UNIX64_RET_X87
-	fldt	ffi_closure_RED_RVALUE(%rsp)
+	fldt	(%rsi)
+	ret
+E UNIX64_RET_X87_2
+	fldt	16(%rsi)
+	fldt	(%rsi)
 	ret
-E UNIX64_RET_ST_RAX_RDX
-	movq	ffi_closure_RED_RVALUE+8(%rsp), %rdx
-	jmp	2f
 E UNIX64_RET_ST_XMM0_RAX
-	movq	ffi_closure_RED_RVALUE+8(%rsp), %rax
+	movq	8(%rsi), %rax
 	jmp	3f
 E UNIX64_RET_ST_RAX_XMM0
-	movq	ffi_closure_RED_RVALUE+8(%rsp), %xmm0
+	movq	8(%rsi), %xmm0
 	jmp	2f
 E UNIX64_RET_ST_XMM0_XMM1
-	movq	ffi_closure_RED_RVALUE+8(%rsp), %xmm1
-
-	.align	8
-3:	movq	ffi_closure_RED_RVALUE(%rsp), %xmm0
+	movq	8(%rsi), %xmm1
+	jmp	3f
+E UNIX64_RET_ST_RAX_RDX
+	movq	8(%rsi), %rdx
+2:	movq	(%rsi), %rax
 	ret
 	.align	8
-2:	movq	ffi_closure_RED_RVALUE(%rsp), %rax
+3:	movq	(%rsi), %xmm0
 	ret
 
 9:	call	abort@PLT