* include/freetype/ftoption.h, include/freetype/ftconfig.h, builds/unix/ftconfig.in, include/freetype/freetype.h, src/base/ftcalc.c: Make FT_MulFix an inlined function. Also provide an assembler implementation for ARM architectures. this is done to speedup FreeType a little (on x86 3% when loading+hinting, 10% when rendering, ARM savings are more important though). Disable this by undefining FT_CONFIG_OPTION_INLINE_MULFIX in ftconfig.h
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
diff --git a/ChangeLog b/ChangeLog
index 5210b35..f35316a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,15 @@
2008-09-01 david turner <david@freetype.org>
+ * include/freetype/ftoption.h, include/freetype/ftconfig.h,
+ builds/unix/ftconfig.in, include/freetype/freetype.h,
+ src/base/ftcalc.c:
+ Make FT_MulFix an inlined function. Also provide an assembler
+ implementation for ARM architectures. this is done to speedup
+ FreeType a little (on x86 3% when loading+hinting, 10% when
+ rendering, ARM savings are more important though).
+ Disable this by undefining FT_CONFIG_OPTION_INLINE_MULFIX in
+ ftconfig.h
+
* include/freetype/ftadvanc.h, src/base/ftadvanc.c,
include/freetype/config/ftheader.h, include/freetype/freetype.h,
src/base/Jamfile, src/base/rules.mk, src/cff/cffdrivr.c,
diff --git a/builds/unix/ftconfig.in b/builds/unix/ftconfig.in
index 1a96264..6430abf 100644
--- a/builds/unix/ftconfig.in
+++ b/builds/unix/ftconfig.in
@@ -197,6 +197,67 @@ FT_BEGIN_HEADER
#endif /* FT_SIZEOF_LONG == 8 */
+#if !defined(FT_CONFIG_OPTION_NO_ASSEMBLER)
+/* provide assembler fragments for performance-critical
+ * functions. these must be defined static __inline__
+ * with GCC
+ */
+#if defined(__GNUC__)
+
+# if defined(__arm__) && !defined(__thumb__)
+# define FT_MULFIX_ASSEMBLER FT_MulFix_arm
+ static __inline__ FT_Int32
+ FT_MulFix_arm( FT_Int32 a, FT_Int32 b )
+ {
+ register FT_Int32 t, t2;
+ asm __volatile__ (
+ "smull %1, %2, %4, %3\n\t" /* (lo=%1,hi=%2) = a*b */
+ "mov %0, %2, asr #31\n\t" /* %0 = (hi >> 31) */
+ "add %0, %0, #0x8000\n\t" /* %0 += 0x8000 */
+ "adds %1, %1, %0\n\t" /* %1 += %0 */
+ "adc %2, %2, #0\n\t" /* %2 += carry */
+ "mov %0, %1, lsr #16\n\t" /* %0 = %1 >> 16 */
+ "orr %0, %2, lsl #16\n\t" /* %0 |= %2 << 16 */
+ : "=r"(a), "=&r"(t2), "=&r"(t)
+ : "r"(a), "r"(b)
+ );
+ return a;
+ }
+# endif /* __arm__ */
+
+# if defined(i386)
+# define FT_MULFIX_ASSEMBLER FT_MulFix_i386
+ static __inline__ FT_Int32
+ FT_MulFix_i386( FT_Int32 a, FT_Int32 b )
+ {
+ register FT_Int32 result;
+
+ __asm__ __volatile__ (
+ "imul %%edx\n"
+ "movl %%edx, %%ecx\n"
+ "sarl $31, %%ecx\n"
+ "addl $0x8000, %%ecx\n"
+ "addl %%ecx, %%eax\n"
+ "adcl $0, %%edx\n"
+ "shrl $16, %%eax\n"
+ "shll $16, %%edx\n"
+ "addl %%edx, %%eax\n"
+ : "=a"(result), "+d"(b)
+ : "a"(a)
+ : "%ecx"
+ );
+ return result;
+ }
+# endif /* i386 */
+#endif /* __GNUC__ */
+#endif /* !NO_ASSEMBLER */
+
+#ifdef FT_CONFIG_OPTION_INLINE_MULFIX
+# ifdef FT_MULFIX_ASSEMBLER
+# define FT_MULFIX_INLINED FT_MULFIX_ASSEMBLER
+# endif
+#endif
+
#define FT_BEGIN_STMNT do {
#define FT_END_STMNT } while ( 0 )
diff --git a/include/freetype/config/ftconfig.h b/include/freetype/config/ftconfig.h
index 09b2cf9..0e9daf3 100644
--- a/include/freetype/config/ftconfig.h
+++ b/include/freetype/config/ftconfig.h
@@ -225,6 +225,67 @@ FT_BEGIN_HEADER
#endif
+#if !defined(FT_CONFIG_OPTION_NO_ASSEMBLER)
+/* provide assembler fragments for performance-critical
+ * functions. these must be defined static __inline__
+ * with GCC
+ */
+#if defined(__GNUC__)
+
+# if defined(__arm__) && !defined(__thumb__)
+# define FT_MULFIX_ASSEMBLER FT_MulFix_arm
+ static __inline__ FT_Int32
+ FT_MulFix_arm( FT_Int32 a, FT_Int32 b )
+ {
+ register FT_Int32 t, t2;
+ asm __volatile__ (
+ "smull %1, %2, %4, %3\n\t" /* (lo=%1,hi=%2) = a*b */
+ "mov %0, %2, asr #31\n\t" /* %0 = (hi >> 31) */
+ "add %0, %0, #0x8000\n\t" /* %0 += 0x8000 */
+ "adds %1, %1, %0\n\t" /* %1 += %0 */
+ "adc %2, %2, #0\n\t" /* %2 += carry */
+ "mov %0, %1, lsr #16\n\t" /* %0 = %1 >> 16 */
+ "orr %0, %2, lsl #16\n\t" /* %0 |= %2 << 16 */
+ : "=r"(a), "=&r"(t2), "=&r"(t)
+ : "r"(a), "r"(b)
+ );
+ return a;
+ }
+# endif /* __arm__ */
+
+# if defined(i386)
+# define FT_MULFIX_ASSEMBLER FT_MulFix_i386
+ static __inline__ FT_Int32
+ FT_MulFix_i386( FT_Int32 a, FT_Int32 b )
+ {
+ register FT_Int32 result;
+
+ __asm__ __volatile__ (
+ "imul %%edx\n"
+ "movl %%edx, %%ecx\n"
+ "sarl $31, %%ecx\n"
+ "addl $0x8000, %%ecx\n"
+ "addl %%ecx, %%eax\n"
+ "adcl $0, %%edx\n"
+ "shrl $16, %%eax\n"
+ "shll $16, %%edx\n"
+ "addl %%edx, %%eax\n"
+ : "=a"(result), "+d"(b)
+ : "a"(a)
+ : "%ecx"
+ );
+ return result;
+ }
+# endif /* i386 */
+#endif /* __GNUC__ */
+#endif /* !NO_ASSEMBLER */
+
+#ifdef FT_CONFIG_OPTION_INLINE_MULFIX
+# ifdef FT_MULFIX_ASSEMBLER
+# define FT_MULFIX_INLINED FT_MULFIX_ASSEMBLER
+# endif
+#endif
+
/* determine whether we have a 64-bit int type for platforms without */
/* Autoconf */
diff --git a/include/freetype/config/ftoption.h b/include/freetype/config/ftoption.h
index a2d61f9..a92e19b 100644
--- a/include/freetype/config/ftoption.h
+++ b/include/freetype/config/ftoption.h
@@ -117,6 +117,26 @@ FT_BEGIN_HEADER
/*************************************************************************/
/* */
+ /* When this macro is defined, do not try to use an assembler version */
+ /* of performance-critical functions (e.g. FT_MulFix). you should only */
+ /* do that to verify that the assembler function works properly, or even */
+ /* to benchmarks the various implementations... */
+/* #define FT_CONFIG_OPTION_NO_ASSEMBLER */
+
+ /*************************************************************************/
+ /* */
+ /* When this macro is defined, try to use an inlined assembler version */
+ /* of the FT_MulFix function, which appears to be a hotspot when loading */
+ /* and hinting glyphs. */
+ /* */
+ /* note that if your compiler/cpu isn't supported, this will default to */
+ /* the standard and portable implementation found in src/base/ftcalc.c */
+ /* */
+#define FT_CONFIG_OPTION_INLINE_MULFIX
+
+
+ /*************************************************************************/
+ /* */
/* LZW-compressed file support. */
/* */
/* FreeType now handles font files that have been compressed with the */
diff --git a/include/freetype/freetype.h b/include/freetype/freetype.h
index b0193c7..9289ca5 100644
--- a/include/freetype/freetype.h
+++ b/include/freetype/freetype.h
@@ -3468,10 +3468,13 @@ FT_BEGIN_HEADER
/* _second_ argument of this function; this can make a great */
/* difference. */
/* */
+#ifdef FT_MULFIX_INLINED
+# define FT_MulFix(a,b) FT_MULFIX_INLINED(a,b)
+#else
FT_EXPORT( FT_Long )
FT_MulFix( FT_Long a,
FT_Long b );
-
+#endif
/*************************************************************************/
/* */
diff --git a/src/base/ftcalc.c b/src/base/ftcalc.c
index 7d2381b..75e89c2 100644
--- a/src/base/ftcalc.c
+++ b/src/base/ftcalc.c
@@ -38,6 +38,9 @@
#include FT_INTERNAL_DEBUG_H
#include FT_INTERNAL_OBJECTS_H
+#ifdef FT_MULFIX_INLINED
+#undef FT_MulFix
+#endif
/* we need to define a 64-bits data type here */
@@ -193,6 +196,9 @@
FT_MulFix( FT_Long a,
FT_Long b )
{
+#ifdef FT_MULFIX_ASSEMBLER
+ return FT_MULFIX_ASSEMBLER(a,b);
+#else
FT_Int s = 1;
FT_Long c;
@@ -202,6 +208,7 @@
c = (FT_Long)( ( (FT_Int64)a * b + 0x8000L ) >> 16 );
return ( s > 0 ) ? c : -c ;
+#endif
}
@@ -413,30 +420,8 @@
FT_MulFix( FT_Long a,
FT_Long b )
{
- /* use inline assembly to speed up things a bit */
-
-#if defined( __GNUC__ ) && defined( i386 )
-
- FT_Long result;
-
-
- __asm__ __volatile__ (
- "imul %%edx\n"
- "movl %%edx, %%ecx\n"
- "sarl $31, %%ecx\n"
- "addl $0x8000, %%ecx\n"
- "addl %%ecx, %%eax\n"
- "adcl $0, %%edx\n"
- "shrl $16, %%eax\n"
- "shll $16, %%edx\n"
- "addl %%edx, %%eax\n"
- "mov %%eax, %0\n"
- : "=a"(result), "+d"(b)
- : "a"(a)
- : "%ecx"
- );
- return result;
-
+#ifdef FT_MULFIX_ASSEMBLER
+ return FT_MULFIX_ASSEMBLER(a,b);
#elif 0
/*