Commit f47d263f1bb01fe7701249fe5df4ac2e1534bd51

David Turner 2008-09-02T02:21:58

* include/freetype/ftoption.h, include/freetype/ftconfig.h, builds/unix/ftconfig.in, include/freetype/freetype.h, src/base/ftcalc.c: Make FT_MulFix an inlined function. Also provide an assembler implementation for ARM architectures. this is done to speedup FreeType a little (on x86 3% when loading+hinting, 10% when rendering, ARM savings are more important though). Disable this by undefining FT_CONFIG_OPTION_INLINE_MULFIX in ftconfig.h

diff --git a/ChangeLog b/ChangeLog
index 5210b35..f35316a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,15 @@
 2008-09-01  david turner <david@freetype.org>
 
+	* include/freetype/ftoption.h, include/freetype/ftconfig.h,
+	builds/unix/ftconfig.in, include/freetype/freetype.h,
+	src/base/ftcalc.c:
+	Make FT_MulFix an inlined function. Also provide an assembler
+	implementation for ARM architectures. this is done to speedup
+	FreeType a little (on x86 3% when loading+hinting, 10% when
+	rendering, ARM savings are more important though).
+	Disable this by undefining FT_CONFIG_OPTION_INLINE_MULFIX in
+	ftconfig.h
+
 	* include/freetype/ftadvanc.h, src/base/ftadvanc.c,
 	include/freetype/config/ftheader.h, include/freetype/freetype.h,
 	src/base/Jamfile, src/base/rules.mk, src/cff/cffdrivr.c,
diff --git a/builds/unix/ftconfig.in b/builds/unix/ftconfig.in
index 1a96264..6430abf 100644
--- a/builds/unix/ftconfig.in
+++ b/builds/unix/ftconfig.in
@@ -197,6 +197,67 @@ FT_BEGIN_HEADER
 
 #endif /* FT_SIZEOF_LONG == 8 */
 
+#if !defined(FT_CONFIG_OPTION_NO_ASSEMBLER)
+/* provide assembler fragments for performance-critical
+ * functions. these must be defined static __inline__
+ * with GCC
+ */
+#if defined(__GNUC__)
+
+#  if defined(__arm__) && !defined(__thumb__)
+#    define FT_MULFIX_ASSEMBLER   FT_MulFix_arm
+    static __inline__ FT_Int32
+    FT_MulFix_arm( FT_Int32  a, FT_Int32  b )
+    {
+        register FT_Int32  t, t2;
+        asm __volatile__ (
+            "smull  %1, %2, %4, %3\n\t"   /* (lo=%1,hi=%2) = a*b */
+            "mov    %0, %2, asr #31\n\t"  /* %0  = (hi >> 31) */
+            "add    %0, %0, #0x8000\n\t"  /* %0 += 0x8000 */
+            "adds   %1, %1, %0\n\t"       /* %1 += %0 */
+            "adc    %2, %2, #0\n\t"       /* %2 += carry */
+            "mov    %0, %1, lsr #16\n\t"  /* %0  = %1 >> 16 */
+            "orr    %0, %2, lsl #16\n\t"  /* %0 |= %2 << 16 */
+            : "=r"(a), "=&r"(t2), "=&r"(t)
+            : "r"(a), "r"(b)
+            );
+        return a;
+    }
+#  endif /* __arm__ */
+
+#  if defined(i386)
+#    define FT_MULFIX_ASSEMBLER  FT_MulFix_i386
+    static __inline__ FT_Int32
+    FT_MulFix_i386( FT_Int32  a, FT_Int32  b )
+    {
+        register FT_Int32  result;
+
+        __asm__ __volatile__ (
+          "imul  %%edx\n"
+          "movl  %%edx, %%ecx\n"
+          "sarl  $31, %%ecx\n"
+          "addl  $0x8000, %%ecx\n"
+          "addl  %%ecx, %%eax\n"
+          "adcl  $0, %%edx\n"
+          "shrl  $16, %%eax\n"
+          "shll  $16, %%edx\n"
+          "addl  %%edx, %%eax\n"
+          : "=a"(result), "+d"(b)
+          : "a"(a)
+          : "%ecx"
+        );
+        return result;
+    }
+#  endif /* i386 */
+#endif /* __GNUC__ */
+#endif /* !NO_ASSEMBLER */
+
+#ifdef FT_CONFIG_OPTION_INLINE_MULFIX
+#  ifdef FT_MULFIX_ASSEMBLER
+#    define FT_MULFIX_INLINED   FT_MULFIX_ASSEMBLER
+#  endif
+#endif
+
 
 #define FT_BEGIN_STMNT  do {
 #define FT_END_STMNT    } while ( 0 )
diff --git a/include/freetype/config/ftconfig.h b/include/freetype/config/ftconfig.h
index 09b2cf9..0e9daf3 100644
--- a/include/freetype/config/ftconfig.h
+++ b/include/freetype/config/ftconfig.h
@@ -225,6 +225,67 @@ FT_BEGIN_HEADER
 
 #endif
 
+#if !defined(FT_CONFIG_OPTION_NO_ASSEMBLER)
+/* provide assembler fragments for performance-critical
+ * functions. these must be defined static __inline__
+ * with GCC
+ */
+#if defined(__GNUC__)
+
+#  if defined(__arm__) && !defined(__thumb__)
+#    define FT_MULFIX_ASSEMBLER   FT_MulFix_arm
+    static __inline__ FT_Int32
+    FT_MulFix_arm( FT_Int32  a, FT_Int32  b )
+    {
+        register FT_Int32  t, t2;
+        asm __volatile__ (
+            "smull  %1, %2, %4, %3\n\t"   /* (lo=%1,hi=%2) = a*b */
+            "mov    %0, %2, asr #31\n\t"  /* %0  = (hi >> 31) */
+            "add    %0, %0, #0x8000\n\t"  /* %0 += 0x8000 */
+            "adds   %1, %1, %0\n\t"       /* %1 += %0 */
+            "adc    %2, %2, #0\n\t"       /* %2 += carry */
+            "mov    %0, %1, lsr #16\n\t"  /* %0  = %1 >> 16 */
+            "orr    %0, %2, lsl #16\n\t"  /* %0 |= %2 << 16 */
+            : "=r"(a), "=&r"(t2), "=&r"(t)
+            : "r"(a), "r"(b)
+            );
+        return a;
+    }
+#  endif /* __arm__ */
+
+#  if defined(i386)
+#    define FT_MULFIX_ASSEMBLER  FT_MulFix_i386
+    static __inline__ FT_Int32
+    FT_MulFix_i386( FT_Int32  a, FT_Int32  b )
+    {
+        register FT_Int32  result;
+
+        __asm__ __volatile__ (
+          "imul  %%edx\n"
+          "movl  %%edx, %%ecx\n"
+          "sarl  $31, %%ecx\n"
+          "addl  $0x8000, %%ecx\n"
+          "addl  %%ecx, %%eax\n"
+          "adcl  $0, %%edx\n"
+          "shrl  $16, %%eax\n"
+          "shll  $16, %%edx\n"
+          "addl  %%edx, %%eax\n"
+          : "=a"(result), "+d"(b)
+          : "a"(a)
+          : "%ecx"
+        );
+        return result;
+    }
+#  endif /* i386 */
+#endif /* __GNUC__ */
+#endif /* !NO_ASSEMBLER */
+
+#ifdef FT_CONFIG_OPTION_INLINE_MULFIX
+#  ifdef FT_MULFIX_ASSEMBLER
+#    define FT_MULFIX_INLINED   FT_MULFIX_ASSEMBLER
+#  endif
+#endif
+
 
   /* determine whether we have a 64-bit int type for platforms without */
   /* Autoconf                                                          */
diff --git a/include/freetype/config/ftoption.h b/include/freetype/config/ftoption.h
index a2d61f9..a92e19b 100644
--- a/include/freetype/config/ftoption.h
+++ b/include/freetype/config/ftoption.h
@@ -117,6 +117,26 @@ FT_BEGIN_HEADER
 
   /*************************************************************************/
   /*                                                                       */
+  /* When this macro is defined, do not try to use an assembler version    */
+  /* of performance-critical functions (e.g. FT_MulFix). you should only   */
+  /* do that to verify that the assembler function works properly, or even */
+  /* to benchmarks the various implementations...                          */
+/* #define FT_CONFIG_OPTION_NO_ASSEMBLER */
+
+  /*************************************************************************/
+  /*                                                                       */
+  /* When this macro is defined, try to use an inlined assembler version   */
+  /* of the FT_MulFix function, which appears to be a hotspot when loading */
+  /* and hinting glyphs.                                                   */
+  /*                                                                       */
+  /* note that if your compiler/cpu isn't supported, this will default to  */
+  /* the standard and portable implementation found in src/base/ftcalc.c   */
+  /*                                                                       */
+#define FT_CONFIG_OPTION_INLINE_MULFIX
+
+
+  /*************************************************************************/
+  /*                                                                       */
   /* LZW-compressed file support.                                          */
   /*                                                                       */
   /*   FreeType now handles font files that have been compressed with the  */
diff --git a/include/freetype/freetype.h b/include/freetype/freetype.h
index b0193c7..9289ca5 100644
--- a/include/freetype/freetype.h
+++ b/include/freetype/freetype.h
@@ -3468,10 +3468,13 @@ FT_BEGIN_HEADER
   /*    _second_ argument of this function; this can make a great          */
   /*    difference.                                                        */
   /*                                                                       */
+#ifdef FT_MULFIX_INLINED
+#  define  FT_MulFix(a,b)  FT_MULFIX_INLINED(a,b)
+#else
   FT_EXPORT( FT_Long )
   FT_MulFix( FT_Long  a,
              FT_Long  b );
-
+#endif
 
   /*************************************************************************/
   /*                                                                       */
diff --git a/src/base/ftcalc.c b/src/base/ftcalc.c
index 7d2381b..75e89c2 100644
--- a/src/base/ftcalc.c
+++ b/src/base/ftcalc.c
@@ -38,6 +38,9 @@
 #include FT_INTERNAL_DEBUG_H
 #include FT_INTERNAL_OBJECTS_H
 
+#ifdef  FT_MULFIX_INLINED
+#undef  FT_MulFix
+#endif
 
 /* we need to define a 64-bits data type here */
 
@@ -193,6 +196,9 @@
   FT_MulFix( FT_Long  a,
              FT_Long  b )
   {
+#ifdef FT_MULFIX_ASSEMBLER
+    return FT_MULFIX_ASSEMBLER(a,b);
+#else
     FT_Int   s = 1;
     FT_Long  c;
 
@@ -202,6 +208,7 @@
 
     c = (FT_Long)( ( (FT_Int64)a * b + 0x8000L ) >> 16 );
     return ( s > 0 ) ? c : -c ;
+#endif
   }
 
 
@@ -413,30 +420,8 @@
   FT_MulFix( FT_Long  a,
              FT_Long  b )
   {
-    /* use inline assembly to speed up things a bit */
-
-#if defined( __GNUC__ ) && defined( i386 )
-
-    FT_Long  result;
-
-
-    __asm__ __volatile__ (
-      "imul  %%edx\n"
-      "movl  %%edx, %%ecx\n"
-      "sarl  $31, %%ecx\n"
-      "addl  $0x8000, %%ecx\n"
-      "addl  %%ecx, %%eax\n"
-      "adcl  $0, %%edx\n"
-      "shrl  $16, %%eax\n"
-      "shll  $16, %%edx\n"
-      "addl  %%edx, %%eax\n"
-      "mov   %%eax, %0\n"
-      : "=a"(result), "+d"(b)
-      : "a"(a)
-      : "%ecx"
-    );
-    return result;
-
+#ifdef FT_MULFIX_ASSEMBLER
+    return FT_MULFIX_ASSEMBLER(a,b);
 #elif 0
 
     /*