Commit ea9b6639c69cbffeacd1ce0c1953c1997cf29d2e

Samuel Holland 2019-10-08T05:57:28

PowerPC bugfixes (#520) * powerpc: Silence warnings about unused labels * powerpc: Fix a couple of comments * powerpc: Fix alignment after float structs * powerpc: Don't pad rvalues copied from FP regs * powerpc: Add missing check in struct alignment * powerpc: Support homogeneous long double structs

diff --git a/src/powerpc/ffi.c b/src/powerpc/ffi.c
index 7eb543e..94a1170 100644
--- a/src/powerpc/ffi.c
+++ b/src/powerpc/ffi.c
@@ -121,8 +121,9 @@ ffi_call_int (ffi_cif *cif,
 # endif
 	/* The SYSV ABI returns a structure of up to 8 bytes in size
 	   left-padded in r3/r4, and the ELFv2 ABI similarly returns a
-	   structure of up to 8 bytes in size left-padded in r3.  */
-	if (rsize <= 8)
+	   structure of up to 8 bytes in size left-padded in r3. But
+	   note that a structure of a single float is not paddded.  */
+	if (rsize <= 8 && (cif->flags & FLAG_RETURNS_FP) == 0)
 	  memcpy (rvalue, (char *) smst_buffer + 8 - rsize, rsize);
 	else
 #endif
diff --git a/src/powerpc/ffi_linux64.c b/src/powerpc/ffi_linux64.c
index 93a31f9..4cf59a4 100644
--- a/src/powerpc/ffi_linux64.c
+++ b/src/powerpc/ffi_linux64.c
@@ -63,10 +63,30 @@ ffi_prep_types_linux64 (ffi_abi abi)
 
 
 static unsigned int
-discover_homogeneous_aggregate (const ffi_type *t, unsigned int *elnum)
+discover_homogeneous_aggregate (ffi_abi abi,
+                                const ffi_type *t,
+                                unsigned int *elnum)
 {
   switch (t->type)
     {
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+    case FFI_TYPE_LONGDOUBLE:
+      /* 64-bit long doubles are equivalent to doubles. */
+      if ((abi & FFI_LINUX_LONG_DOUBLE_128) == 0)
+        {
+          *elnum = 1;
+          return FFI_TYPE_DOUBLE;
+        }
+      /* IBM extended precision values use unaligned pairs
+         of FPRs, but according to the ABI must be considered
+         distinct from doubles. They are also limited to a
+         maximum of four members in a homogeneous aggregate. */
+      else
+        {
+          *elnum = 2;
+          return FFI_TYPE_LONGDOUBLE;
+        }
+#endif
     case FFI_TYPE_FLOAT:
     case FFI_TYPE_DOUBLE:
       *elnum = 1;
@@ -79,7 +99,7 @@ discover_homogeneous_aggregate (const ffi_type *t, unsigned int *elnum)
 	while (*el)
 	  {
 	    unsigned int el_elt, el_elnum = 0;
-	    el_elt = discover_homogeneous_aggregate (*el, &el_elnum);
+	    el_elt = discover_homogeneous_aggregate (abi, *el, &el_elnum);
 	    if (el_elt == 0
 		|| (base_elt && base_elt != el_elt))
 	      return 0;
@@ -112,7 +132,7 @@ ffi_prep_cif_linux64_core (ffi_cif *cif)
   unsigned bytes;
   unsigned i, fparg_count = 0, intarg_count = 0;
   unsigned flags = cif->flags;
-  unsigned int elt, elnum;
+  unsigned elt, elnum, rtype;
 
 #if FFI_TYPE_LONGDOUBLE == FFI_TYPE_DOUBLE
   /* If compiled without long double support..  */
@@ -138,7 +158,11 @@ ffi_prep_cif_linux64_core (ffi_cif *cif)
 #endif
 
   /* Return value handling.  */
-  switch (cif->rtype->type)
+  rtype = cif->rtype->type;
+#if _CALL_ELF == 2
+homogeneous:
+#endif
+  switch (rtype)
     {
 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
     case FFI_TYPE_LONGDOUBLE:
@@ -164,19 +188,18 @@ ffi_prep_cif_linux64_core (ffi_cif *cif)
 
     case FFI_TYPE_STRUCT:
 #if _CALL_ELF == 2
-      elt = discover_homogeneous_aggregate (cif->rtype, &elnum);
+      elt = discover_homogeneous_aggregate (cif->abi, cif->rtype, &elnum);
       if (elt)
-	{
-	  if (elt == FFI_TYPE_DOUBLE)
-	    flags |= FLAG_RETURNS_64BITS;
-	  flags |= FLAG_RETURNS_FP | FLAG_RETURNS_SMST;
-	  break;
-	}
+        {
+          flags |= FLAG_RETURNS_SMST;
+          rtype = elt;
+          goto homogeneous;
+        }
       if (cif->rtype->size <= 16)
-	{
-	  flags |= FLAG_RETURNS_SMST;
-	  break;
-	}
+        {
+          flags |= FLAG_RETURNS_SMST;
+          break;
+        }
 #endif
       intarg_count++;
       flags |= FLAG_RETVAL_REFERENCE;
@@ -224,7 +247,7 @@ ffi_prep_cif_linux64_core (ffi_cif *cif)
 		intarg_count = FFI_ALIGN (intarg_count, align);
 	    }
 	  intarg_count += ((*ptr)->size + 7) / 8;
-	  elt = discover_homogeneous_aggregate (*ptr, &elnum);
+	  elt = discover_homogeneous_aggregate (cif->abi, *ptr, &elnum);
 	  if (elt)
 	    {
 	      fparg_count += elnum;
@@ -391,7 +414,7 @@ ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack)
   valp rest;
   valp next_arg;
 
-  /* 'fpr_base' points at the space for fpr3, and grows upwards as
+  /* 'fpr_base' points at the space for f1, and grows upwards as
      we use FPR registers.  */
   valp fpr_base;
   unsigned int fparg_count;
@@ -492,7 +515,9 @@ ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack)
 	  /* Fall through.  */
 #endif
 	case FFI_TYPE_DOUBLE:
+#if _CALL_ELF != 2
 	do_double:
+#endif
 	  double_tmp = **p_argv.d;
 	  if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
 	    {
@@ -511,7 +536,9 @@ ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack)
 	  break;
 
 	case FFI_TYPE_FLOAT:
+#if _CALL_ELF != 2
 	do_float:
+#endif
 	  double_tmp = **p_argv.f;
 	  if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
 	    {
@@ -548,9 +575,13 @@ ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack)
 	      if (align > 16)
 		align = 16;
 	      if (align > 1)
-		next_arg.p = FFI_ALIGN (next_arg.p, align);
+                {
+                  next_arg.p = FFI_ALIGN (next_arg.p, align);
+                  if (next_arg.ul == gpr_end.ul)
+                    next_arg.ul = rest.ul;
+                }
 	    }
-	  elt = discover_homogeneous_aggregate (*ptr, &elnum);
+	  elt = discover_homogeneous_aggregate (ecif->cif->abi, *ptr, &elnum);
 	  if (elt)
 	    {
 #if _CALL_ELF == 2
@@ -576,11 +607,9 @@ ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack)
 		      fparg_count++;
 		    }
 		  while (--elnum != 0);
-		  if ((next_arg.p & 3) != 0)
-		    {
-		      if (++next_arg.f == gpr_end.f)
-			next_arg.f = rest.f;
-		    }
+		  if ((next_arg.p & 7) != 0)
+                    if (++next_arg.f == gpr_end.f)
+                      next_arg.f = rest.f;
 		}
 	      else
 		do
@@ -813,7 +842,7 @@ ffi_closure_helper_LINUX64 (ffi_cif *cif,
 	      if (align > 1)
 		pst = (unsigned long *) FFI_ALIGN ((size_t) pst, align);
 	    }
-	  elt = discover_homogeneous_aggregate (arg_types[i], &elnum);
+	  elt = discover_homogeneous_aggregate (cif->abi, arg_types[i], &elnum);
 	  if (elt)
 	    {
 #if _CALL_ELF == 2
@@ -915,7 +944,9 @@ ffi_closure_helper_LINUX64 (ffi_cif *cif,
 	  /* Fall through.  */
 #endif
 	case FFI_TYPE_DOUBLE:
+#if _CALL_ELF != 2
 	do_double:
+#endif
 	  /* On the outgoing stack all values are aligned to 8 */
 	  /* there are 13 64bit floating point registers */
 
@@ -930,7 +961,9 @@ ffi_closure_helper_LINUX64 (ffi_cif *cif,
 	  break;
 
 	case FFI_TYPE_FLOAT:
+#if _CALL_ELF != 2
 	do_float:
+#endif
 	  if (pfr < end_pfr && i < nfixedargs)
 	    {
 	      /* Float values are stored as doubles in the
diff --git a/src/powerpc/linux64_closure.S b/src/powerpc/linux64_closure.S
index 6487d2a..7f2a214 100644
--- a/src/powerpc/linux64_closure.S
+++ b/src/powerpc/linux64_closure.S
@@ -143,7 +143,7 @@ ffi_closure_LINUX64:
 	stfd	%f12, -104+(11*8)(%r1)
 	stfd	%f13, -104+(12*8)(%r1)
 
-	# load up the pointer to the saved fpr registers */
+	# load up the pointer to the saved fpr registers
 	addi	%r8, %r1, -104
 
 	# load up the pointer to the result storage