Commit af18df2bc2f52df81e7b5c619bd86db8489dc873

Anthony Green 2011-11-12T15:52:08

Remove use of ppc string instructions

diff --git a/ChangeLog b/ChangeLog
index d193641..148af6d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2011-11-12  Kyle Moffett <Kyle.D.Moffett@boeing.com>
+	    Anthony Green <green@moxielogic.com>
+
+	* src/ppc/sysv.S, src/ppc/ffi.c: Remove use of ppc string
+	instructions (not available on some cores, like the PPC440).
+
 2011-11-12  Kimura Wataru  <kimuraw@i.nifty.jp>
 
 	* m4/ax_enable_builddir: Change from string comparison to numeric
diff --git a/patches/powerpc-sysv-without-string-ops b/patches/powerpc-sysv-without-string-ops
new file mode 100644
index 0000000..a1261b9
--- /dev/null
+++ b/patches/powerpc-sysv-without-string-ops
@@ -0,0 +1,171 @@
+Index: libffi/ChangeLog
+===================================================================
+--- libffi.orig/ChangeLog
++++ libffi/ChangeLog
+@@ -1,3 +1,9 @@
++2011-11-12  Kyle Moffett <Kyle.D.Moffett@boeing.com>
++	    Anthony Green <green@moxielogic.com>
++
++	* src/ppc/sysv.S, src/ppc/ffi.c: Remove use of ppc string
++	instructions (not available on some cores, like the PPC440).
++
+ 2011-11-12  Kimura Wataru  <kimuraw@i.nifty.jp>
+ 
+ 	* m4/ax_enable_builddir: Change from string comparison to numeric
+Index: libffi/src/powerpc/ffi.c
+===================================================================
+--- libffi.orig/src/powerpc/ffi.c
++++ libffi/src/powerpc/ffi.c
+@@ -1,5 +1,6 @@
+ /* -----------------------------------------------------------------------
+    ffi.c - Copyright (C) 2011 Anthony Green
++           Copyright (C) 2011 Kyle Moffett
+            Copyright (C) 2008 Red Hat, Inc
+            Copyright (C) 2007, 2008 Free Software Foundation, Inc
+ 	   Copyright (c) 1998 Geoffrey Keating
+@@ -44,11 +45,6 @@ enum {
+   FLAG_RETURNS_64BITS   = 1 << (31-28),
+ 
+   FLAG_RETURNS_128BITS  = 1 << (31-27), /* cr6  */
+-  FLAG_SYSV_SMST_R4     = 1 << (31-26), /* use r4 for FFI_SYSV 8 byte
+-					   structs.  */
+-  FLAG_SYSV_SMST_R3     = 1 << (31-25), /* use r3 for FFI_SYSV 4 byte
+-					   structs.  */
+-  /* Bits (31-24) through (31-19) store shift value for SMST */
+ 
+   FLAG_ARG_NEEDS_COPY   = 1 << (31- 7),
+   FLAG_FP_ARGUMENTS     = 1 << (31- 6), /* cr1.eq; specified by ABI */
+@@ -672,34 +668,19 @@ ffi_prep_cif_machdep (ffi_cif *cif)
+       break;
+ 
+     case FFI_TYPE_STRUCT:
+-      if (cif->abi == FFI_SYSV)
+-	{
+-	  /* The final SYSV ABI says that structures smaller or equal 8 bytes
+-	     are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them
+-	     in memory.  */
+-
+-	  /* Treat structs with size <= 8 bytes.  */
+-	  if (size <= 8)
+-	    {
+-	      flags |= FLAG_RETURNS_SMST;
+-	      /* These structs are returned in r3. We pack the type and the
+-		 precalculated shift value (needed in the sysv.S) into flags.
+-		 The same applies for the structs returned in r3/r4.  */
+-	      if (size <= 4)
+-		{
+-		  flags |= FLAG_SYSV_SMST_R3;
+-		  flags |= 8 * (4 - size) << 8;
+-		  break;
+-		}
+-	      /* These structs are returned in r3 and r4. See above.   */
+-	      if  (size <= 8)
+-		{
+-		  flags |= FLAG_SYSV_SMST_R3 | FLAG_SYSV_SMST_R4;
+-		  flags |= 8 * (8 - size) << 8;
+-		  break;
+-		}
+-	    }
+-	}
++      /*
++       * The final SYSV ABI says that structures smaller or equal 8 bytes
++       * are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them
++       * in memory.
++       *
++       * NOTE: The assembly code can safely assume that it just needs to
++       *       store both r3 and r4 into a 8-byte word-aligned buffer, as
++       *       we allocate a temporary buffer in ffi_call() if this flag is
++       *       set.
++       */
++      if (cif->abi == FFI_SYSV && size <= 8)
++	flags |= FLAG_RETURNS_SMST;
++      
+ #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+     byref:
+ #endif
+@@ -887,21 +868,32 @@ extern void FFI_HIDDEN ffi_call_LINUX64(
+ void
+ ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+ {
++  /*
++   * The final SYSV ABI says that structures smaller or equal 8 bytes
++   * are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them
++   * in memory.
++   *
++   * Just to keep things simple for the assembly code, we will always
++   * bounce-buffer struct return values less than or equal to 8 bytes.
++   * This allows the ASM to handle SYSV small structures by directly
++   * writing r3 and r4 to memory without worrying about struct size.
++   */
++  unsigned int smst_buffer[2];
+   extended_cif ecif;
++  unsigned int rsize;
+ 
+   ecif.cif = cif;
+   ecif.avalue = avalue;
+ 
+-  /* If the return value is a struct and we don't have a return	*/
+-  /* value address then we need to make one		        */
+-
+-  if ((rvalue == NULL) && (cif->rtype->type == FFI_TYPE_STRUCT))
+-    {
+-      ecif.rvalue = alloca(cif->rtype->size);
+-    }
+-  else
+-    ecif.rvalue = rvalue;
+-
++  /* Ensure that we have a valid struct return value */
++  ecif.rvalue = rvalue;
++  if (cif->rtype->type == FFI_TYPE_STRUCT) {
++    rsize = cif->rtype->size;
++    if (rsize <= 8)
++      ecif.rvalue = smst_buffer;
++    else if (!rvalue)
++      ecif.rvalue = alloca(rsize);
++  }
+ 
+   switch (cif->abi)
+     {
+@@ -921,6 +913,10 @@ ffi_call(ffi_cif *cif, void (*fn)(void),
+       FFI_ASSERT (0);
+       break;
+     }
++
++  /* Check for a bounce-buffered return value */
++  if (rvalue && ecif.rvalue == smst_buffer)
++    memcpy(rvalue, smst_buffer, rsize);
+ }
+ 
+ 
+Index: libffi/src/powerpc/sysv.S
+===================================================================
+--- libffi.orig/src/powerpc/sysv.S
++++ libffi/src/powerpc/sysv.S
+@@ -136,19 +136,14 @@ L(float_return_value):
+ 	b	L(done_return_value)
+ 
+ L(small_struct_return_value):
+-	extrwi	%r6,%r31,2,19         /* number of bytes padding = shift/8 */
+-	mtcrf	0x02,%r31	      /* copy flags to cr[24:27] (cr6) */
+-	extrwi	%r5,%r31,5,19         /* r5 <- number of bits of padding */
+-	subfic  %r6,%r6,4             /* r6 <- number of useful bytes in r3 */
+-	bf-	25,L(done_return_value) /* struct in r3 ? if not, done. */
+-/* smst_one_register: */
+-	slw	%r3,%r3,%r5           /* Left-justify value in r3 */
+-	mtxer	%r6                   /* move byte count to XER ... */
+-	stswx	%r3,0,%r30            /* ... and store that many bytes */
+-	bf+	26,L(done_return_value)  /* struct in r3:r4 ? */
+-	add	%r6,%r6,%r30          /* adjust pointer */
+-	stswi	%r4,%r6,4             /* store last four bytes */
+-	b	L(done_return_value)
++	/*
++	 * The C code always allocates a properly-aligned 8-byte bounce
++	 * buffer to make this assembly code very simple.  Just write out
++	 * r3 and r4 to the buffer to allow the C code to handle the rest.
++	 */
++	stw %r3, 0(%r30)
++	stw %r4, 4(%r30)
++	b L(done_return_value)
+ 
+ .LFE1:
+ END(ffi_call_SYSV)
diff --git a/patches/series b/patches/series
index 3ebccee..86a000e 100644
--- a/patches/series
+++ b/patches/series
@@ -28,6 +28,7 @@ darwin-EH-fix
 more-openbsd
 more-openbsd-mips
 darwin11
+powerpc-sysv-without-string-ops
 minix
 
 interix-patch
diff --git a/src/powerpc/ffi.c b/src/powerpc/ffi.c
index fb2a39f..53c7e8e 100644
--- a/src/powerpc/ffi.c
+++ b/src/powerpc/ffi.c
@@ -1,5 +1,6 @@
 /* -----------------------------------------------------------------------
    ffi.c - Copyright (C) 2011 Anthony Green
+           Copyright (C) 2011 Kyle Moffett
            Copyright (C) 2008 Red Hat, Inc
            Copyright (C) 2007, 2008 Free Software Foundation, Inc
 	   Copyright (c) 1998 Geoffrey Keating
@@ -44,11 +45,6 @@ enum {
   FLAG_RETURNS_64BITS   = 1 << (31-28),
 
   FLAG_RETURNS_128BITS  = 1 << (31-27), /* cr6  */
-  FLAG_SYSV_SMST_R4     = 1 << (31-26), /* use r4 for FFI_SYSV 8 byte
-					   structs.  */
-  FLAG_SYSV_SMST_R3     = 1 << (31-25), /* use r3 for FFI_SYSV 4 byte
-					   structs.  */
-  /* Bits (31-24) through (31-19) store shift value for SMST */
 
   FLAG_ARG_NEEDS_COPY   = 1 << (31- 7),
   FLAG_FP_ARGUMENTS     = 1 << (31- 6), /* cr1.eq; specified by ABI */
@@ -672,34 +668,19 @@ ffi_prep_cif_machdep (ffi_cif *cif)
       break;
 
     case FFI_TYPE_STRUCT:
-      if (cif->abi == FFI_SYSV)
-	{
-	  /* The final SYSV ABI says that structures smaller or equal 8 bytes
-	     are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them
-	     in memory.  */
-
-	  /* Treat structs with size <= 8 bytes.  */
-	  if (size <= 8)
-	    {
-	      flags |= FLAG_RETURNS_SMST;
-	      /* These structs are returned in r3. We pack the type and the
-		 precalculated shift value (needed in the sysv.S) into flags.
-		 The same applies for the structs returned in r3/r4.  */
-	      if (size <= 4)
-		{
-		  flags |= FLAG_SYSV_SMST_R3;
-		  flags |= 8 * (4 - size) << 8;
-		  break;
-		}
-	      /* These structs are returned in r3 and r4. See above.   */
-	      if  (size <= 8)
-		{
-		  flags |= FLAG_SYSV_SMST_R3 | FLAG_SYSV_SMST_R4;
-		  flags |= 8 * (8 - size) << 8;
-		  break;
-		}
-	    }
-	}
+      /*
+       * The final SYSV ABI says that structures smaller or equal 8 bytes
+       * are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them
+       * in memory.
+       *
+       * NOTE: The assembly code can safely assume that it just needs to
+       *       store both r3 and r4 into a 8-byte word-aligned buffer, as
+       *       we allocate a temporary buffer in ffi_call() if this flag is
+       *       set.
+       */
+      if (cif->abi == FFI_SYSV && size <= 8)
+	flags |= FLAG_RETURNS_SMST;
+      
 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
     byref:
 #endif
@@ -887,21 +868,32 @@ extern void FFI_HIDDEN ffi_call_LINUX64(extended_cif *, unsigned long,
 void
 ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
 {
+  /*
+   * The final SYSV ABI says that structures smaller or equal 8 bytes
+   * are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them
+   * in memory.
+   *
+   * Just to keep things simple for the assembly code, we will always
+   * bounce-buffer struct return values less than or equal to 8 bytes.
+   * This allows the ASM to handle SYSV small structures by directly
+   * writing r3 and r4 to memory without worrying about struct size.
+   */
+  unsigned int smst_buffer[2];
   extended_cif ecif;
+  unsigned int rsize;
 
   ecif.cif = cif;
   ecif.avalue = avalue;
 
-  /* If the return value is a struct and we don't have a return	*/
-  /* value address then we need to make one		        */
-
-  if ((rvalue == NULL) && (cif->rtype->type == FFI_TYPE_STRUCT))
-    {
-      ecif.rvalue = alloca(cif->rtype->size);
-    }
-  else
-    ecif.rvalue = rvalue;
-
+  /* Ensure that we have a valid struct return value */
+  ecif.rvalue = rvalue;
+  if (cif->rtype->type == FFI_TYPE_STRUCT) {
+    rsize = cif->rtype->size;
+    if (rsize <= 8)
+      ecif.rvalue = smst_buffer;
+    else if (!rvalue)
+      ecif.rvalue = alloca(rsize);
+  }
 
   switch (cif->abi)
     {
@@ -921,6 +913,10 @@ ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
       FFI_ASSERT (0);
       break;
     }
+
+  /* Check for a bounce-buffered return value */
+  if (rvalue && ecif.rvalue == smst_buffer)
+    memcpy(rvalue, smst_buffer, rsize);
 }
 
 
diff --git a/src/powerpc/sysv.S b/src/powerpc/sysv.S
index 96ea22b..9ad8d9f 100644
--- a/src/powerpc/sysv.S
+++ b/src/powerpc/sysv.S
@@ -136,19 +136,14 @@ L(float_return_value):
 	b	L(done_return_value)
 
 L(small_struct_return_value):
-	extrwi	%r6,%r31,2,19         /* number of bytes padding = shift/8 */
-	mtcrf	0x02,%r31	      /* copy flags to cr[24:27] (cr6) */
-	extrwi	%r5,%r31,5,19         /* r5 <- number of bits of padding */
-	subfic  %r6,%r6,4             /* r6 <- number of useful bytes in r3 */
-	bf-	25,L(done_return_value) /* struct in r3 ? if not, done. */
-/* smst_one_register: */
-	slw	%r3,%r3,%r5           /* Left-justify value in r3 */
-	mtxer	%r6                   /* move byte count to XER ... */
-	stswx	%r3,0,%r30            /* ... and store that many bytes */
-	bf+	26,L(done_return_value)  /* struct in r3:r4 ? */
-	add	%r6,%r6,%r30          /* adjust pointer */
-	stswi	%r4,%r6,4             /* store last four bytes */
-	b	L(done_return_value)
+	/*
+	 * The C code always allocates a properly-aligned 8-byte bounce
+	 * buffer to make this assembly code very simple.  Just write out
+	 * r3 and r4 to the buffer to allow the C code to handle the rest.
+	 */
+	stw %r3, 0(%r30)
+	stw %r4, 4(%r30)
+	b L(done_return_value)
 
 .LFE1:
 END(ffi_call_SYSV)