Speed up ARMv7 support. When building for ARMv7 with thumb2 instructions, the optimized `FT_MulFix_arm' assembly routine was not being used. The reason for this is in the `ftconfig.h' header, namely: - The assembly routine uses the `smull' instruction which is not available when generating Thumb-1 machine code. It is available in Thumb-2 mode, though. - The header was written a long time ago before Thumb-2 became widely popular (e.g. with Android). So it simply doesn't use the assembly routine if the `__thumb__' built-in macro is defined. - When compiling in Thumb-2 mode, the compiler will define both `__thumb__' and `__thumb2__'. By checking for `(__thumb2__ || !__thumb__)', we ensure that the assembly routine is only avoided when generating Thumb-1 code. Given that this is performance-sensitive function, this improves `ftbench' as follows on a Galaxy Nexus: Before (us/op) After (us/op) - loading Arial.ttf glyphs at 14 ppem [1] Load 34.285 33.098 - same operation with the light auto-hinter [2] Load 31.317 29.590 - same operation without hinting [3] Load 6.143 5.376 - loading Arial.ttf advances at 14 ppem [4] Load_Advances (normal) 34.216 33.016 Load_Advances (fast) 0.176 0.176 [1] ftbench -t 5 -p -s 14 -b a -f 0008 Arial.ttf [2] ftbench -t 5 -p -s 14 -b a -r 1 -f 0028 Arial.ttf [3] ftbench -t 5 -p -s 14 -b a -f 000a Arial.ttf [4] ftbench -t 5 -p -s 14 -b b -f 0008 Arial.ttf * builds/unix/ftconfig.in, include/freetype/config/ftconfig.h (FT_MULFIX_ASSEMBLER): Fix handling for ARMv7.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
diff --git a/ChangeLog b/ChangeLog
index 053d80b..41c0d0d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,56 @@
+2013-07-16 David Turner <digit@google.com>
+
+ Speed up ARMv7 support.
+
+ When building for ARMv7 with thumb2 instructions, the optimized
+ `FT_MulFix_arm' assembly routine was not being used.
+
+ The reason for this is in the `ftconfig.h' header, namely:
+
+ - The assembly routine uses the `smull' instruction which is not
+ available when generating Thumb-1 machine code. It is available
+ in Thumb-2 mode, though.
+
+ - The header was written a long time ago before Thumb-2 became
+ widely popular (e.g. with Android). So it simply doesn't use the
+ assembly routine if the `__thumb__' built-in macro is defined.
+
+ - When compiling in Thumb-2 mode, the compiler will define both
+ `__thumb__' and `__thumb2__'.
+
+ By checking for `(__thumb2__ || !__thumb__)', we ensure that the
+ assembly routine is only avoided when generating Thumb-1 code.
+
+ Given that this is performance-sensitive function, this improves
+ `ftbench' as follows on a Galaxy Nexus:
+
+ Before (us/op) After (us/op)
+
+ - loading Arial.ttf glyphs at 14 ppem [1]
+
+ Load 34.285 33.098
+
+ - same operation with the light auto-hinter [2]
+
+ Load 31.317 29.590
+
+ - same operation without hinting [3]
+
+ Load 6.143 5.376
+
+ - loading Arial.ttf advances at 14 ppem [4]
+
+ Load_Advances (normal) 34.216 33.016
+ Load_Advances (fast) 0.176 0.176
+
+ [1] ftbench -t 5 -p -s 14 -b a -f 0008 Arial.ttf
+ [2] ftbench -t 5 -p -s 14 -b a -r 1 -f 0028 Arial.ttf
+ [3] ftbench -t 5 -p -s 14 -b a -f 000a Arial.ttf
+ [4] ftbench -t 5 -p -s 14 -b b -f 0008 Arial.ttf
+
+ * builds/unix/ftconfig.in, include/freetype/config/ftconfig.h
+ (FT_MULFIX_ASSEMBLER): Fix handling for ARMv7.
+
2013-06-28 Werner Lemberg <wl@gnu.org>
* docs/CHANGES: Updated.
diff --git a/builds/unix/ftconfig.in b/builds/unix/ftconfig.in
index d171e24..c82fe5d 100644
--- a/builds/unix/ftconfig.in
+++ b/builds/unix/ftconfig.in
@@ -395,8 +395,10 @@ FT_BEGIN_HEADER
#ifdef __GNUC__
-#if defined( __arm__ ) && !defined( __thumb__ ) && \
+#if defined( __arm__ ) && \
+ ( defined( __thumb2__ ) || !defined( __thumb__ ) ) && \
!( defined( __CC_ARM ) || defined( __ARMCC__ ) )
+
#define FT_MULFIX_ASSEMBLER FT_MulFix_arm
/* documentation is in freetype.h */
@@ -422,7 +424,9 @@ FT_BEGIN_HEADER
return a;
}
-#endif /* __arm__ && !__thumb__ && !( __CC_ARM || __ARMCC__ ) */
+#endif /* __arm__ && */
+ /* ( __thumb2__ || !__thumb__ ) && */
+ /* !( __CC_ARM || __ARMCC__ ) */
#if defined( __i386__ )
#define FT_MULFIX_ASSEMBLER FT_MulFix_i386
diff --git a/include/freetype/config/ftconfig.h b/include/freetype/config/ftconfig.h
index 5dce30e..3349e29 100644
--- a/include/freetype/config/ftconfig.h
+++ b/include/freetype/config/ftconfig.h
@@ -367,7 +367,8 @@ FT_BEGIN_HEADER
#ifdef __GNUC__
-#if defined( __arm__ ) && !defined( __thumb__ ) && \
+#if defined( __arm__ ) && \
+ ( !defined( __thumb__ ) || defined( __thumb2__ ) ) && \
!( defined( __CC_ARM ) || defined( __ARMCC__ ) )
#define FT_MULFIX_ASSEMBLER FT_MulFix_arm
@@ -394,7 +395,9 @@ FT_BEGIN_HEADER
return a;
}
-#endif /* __arm__ && !__thumb__ && !( __CC_ARM || __ARMCC__ ) */
+#endif /* __arm__ && */
+ /* ( __thumb2__ || !__thumb__ ) && */
+ /* !( __CC_ARM || __ARMCC__ ) */
#if defined( __i386__ )
#define FT_MULFIX_ASSEMBLER FT_MulFix_i386