Commit 89929ec6b961456dba8fe3126815a0d49e2e9cb8

David Turner 2013-07-16T13:36:07

[smooth] Improve performance. Provide a work-around for an ARM-specific performance bug in GCC. This speeds up the rasterizer by more than 5%. Also slightly optimize `set_gray_cell' and `gray_record_cell' (which also improves performance on other platforms by a tiny bit (<1%). * src/smooth/ftgrays.c (FT_DIV_MOD): New macro. Use it where appropriate. (gray_record_cell, gray_set_cell, gray_move_to, gray_convert_glyph_inner): Streamline condition handling.

diff --git a/ChangeLog b/ChangeLog
index 807a81c..d167937 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,21 @@
 2013-07-16  David Turner  <digit@google.com>
 
+	[smooth] Improve performance.
+
+	Provide a work-around for an ARM-specific performance bug in GCC. 
+	This speeds up the rasterizer by more than 5%.
+
+	Also slightly optimize `set_gray_cell' and `gray_record_cell' (which
+	also improves performance on other platforms by a tiny bit (<1%).
+
+	* src/smooth/ftgrays.c (FT_DIV_MOD): New macro.
+	Use it where appropriate.
+
+	(gray_record_cell, gray_set_cell, gray_move_to,
+	gray_convert_glyph_inner): Streamline condition handling.
+
+2013-07-16  David Turner  <digit@google.com>
+
 	[truetype] Add assembler code for TT_MulFix14 and TT_DotFix14.
 
 	This patch provides slightly optimized versions for ARM, x86, and
diff --git a/src/smooth/ftgrays.c b/src/smooth/ftgrays.c
index 7532a35..ab76829 100644
--- a/src/smooth/ftgrays.c
+++ b/src/smooth/ftgrays.c
@@ -310,6 +310,40 @@ typedef ptrdiff_t  FT_PtrDist;
 #endif
 
 
+  /* Compute `divident / divisor' and return both its quotient and     */
+  /* remainder, cast to a specific type.  This macro also ensures that */
+  /* the remainder is always positive.                                 */
+#define FT_DIV_MOD( type, dividend, divisor, quotient, remainder ) \
+  FT_BEGIN_STMNT                                                   \
+    (quotient)  = (type)( (dividend) / (divisor) );                \
+    (remainder) = (type)( (dividend) % (divisor) );                \
+    if ( (remainder) < 0 )                                         \
+    {                                                              \
+      (quotient)--;                                                \
+      (remainder) += (type)(divisor);                              \
+    }                                                              \
+  FT_END_STMNT
+
+#ifdef  __arm__
+  /* Work around a bug specific to GCC which make the compiler fail to */
+  /* optimize a division and modulo operation on the same parameters   */
+  /* into a single call to `__aeabi_idivmod'.  See                     */
+  /*                                                                   */
+  /*  http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43721                */
+#undef FT_DIV_MOD
+#define FT_DIV_MOD( type, dividend, divisor, quotient, remainder ) \
+  FT_BEGIN_STMNT                                                   \
+    (quotient)  = (type)( (dividend) / (divisor) );                \
+    (remainder) = (type)( (dividend) - (quotient) * (divisor) );   \
+    if ( (remainder) < 0 )                                         \
+    {                                                              \
+      (quotient)--;                                                \
+      (remainder) += (type)(divisor);                              \
+    }                                                              \
+  FT_END_STMNT
+#endif /* __arm__ */
+
+
   /*************************************************************************/
   /*                                                                       */
   /*   TYPE DEFINITIONS                                                    */
@@ -548,7 +582,7 @@ typedef ptrdiff_t  FT_PtrDist;
   static void
   gray_record_cell( RAS_ARG )
   {
-    if ( !ras.invalid && ( ras.area | ras.cover ) )
+    if ( ras.area | ras.cover )
     {
       PCell  cell = gray_find_cell( RAS_VAR );
 
@@ -597,12 +631,12 @@ typedef ptrdiff_t  FT_PtrDist;
 
       ras.area  = 0;
       ras.cover = 0;
-    }
+      ras.ex    = ex;
+      ras.ey    = ey;
 
-    ras.ex      = ex;
-    ras.ey      = ey;
-    ras.invalid = ( (unsigned)ey >= (unsigned)ras.count_ey ||
-                              ex >= ras.count_ex           );
+      ras.invalid = ( (unsigned)ey >= (unsigned)ras.count_ey ||
+                                ex >= ras.count_ex           );
+    }
   }
 
 
@@ -686,13 +720,7 @@ typedef ptrdiff_t  FT_PtrDist;
       dx    = -dx;
     }
 
-    delta = (TCoord)( p / dx );
-    mod   = (TCoord)( p % dx );
-    if ( mod < 0 )
-    {
-      delta--;
-      mod += (TCoord)dx;
-    }
+    FT_DIV_MOD( TCoord, p, dx, delta, mod );
 
     ras.area  += (TArea)(( fx1 + first ) * delta);
     ras.cover += delta;
@@ -706,14 +734,8 @@ typedef ptrdiff_t  FT_PtrDist;
       TCoord  lift, rem;
 
 
-      p    = ONE_PIXEL * ( y2 - y1 + delta );
-      lift = (TCoord)( p / dx );
-      rem  = (TCoord)( p % dx );
-      if ( rem < 0 )
-      {
-        lift--;
-        rem += (TCoord)dx;
-      }
+      p = ONE_PIXEL * ( y2 - y1 + delta );
+      FT_DIV_MOD( TCoord, p, dx, lift, rem );
 
       mod -= (int)dx;
 
@@ -763,9 +785,6 @@ typedef ptrdiff_t  FT_PtrDist;
     dx = to_x - ras.x;
     dy = to_y - ras.y;
 
-    /* XXX: we should do something about the trivial case where dx == 0, */
-    /*      as it happens very often!                                    */
-
     /* perform vertical clipping */
     {
       TCoord  min, max;
@@ -844,13 +863,7 @@ typedef ptrdiff_t  FT_PtrDist;
       dy    = -dy;
     }
 
-    delta = (int)( p / dy );
-    mod   = (int)( p % dy );
-    if ( mod < 0 )
-    {
-      delta--;
-      mod += (TCoord)dy;
-    }
+    FT_DIV_MOD( int, p, dy, delta, mod );
 
     x = ras.x + delta;
     gray_render_scanline( RAS_VAR_ ey1, ras.x, fy1, x, (TCoord)first );
@@ -861,13 +874,7 @@ typedef ptrdiff_t  FT_PtrDist;
     if ( ey1 != ey2 )
     {
       p     = ONE_PIXEL * dx;
-      lift  = (int)( p / dy );
-      rem   = (int)( p % dy );
-      if ( rem < 0 )
-      {
-        lift--;
-        rem += (int)dy;
-      }
+      FT_DIV_MOD( int, p, dy, lift, rem );
       mod -= (int)dy;
 
       while ( ey1 != ey2 )
@@ -1171,7 +1178,8 @@ typedef ptrdiff_t  FT_PtrDist;
 
 
     /* record current cell, if any */
-    gray_record_cell( RAS_VAR );
+    if ( !ras.invalid )
+      gray_record_cell( RAS_VAR );
 
     /* start to a new position */
     x = UPSCALE( to->x );
@@ -1781,7 +1789,8 @@ typedef ptrdiff_t  FT_PtrDist;
     if ( ft_setjmp( ras.jump_buffer ) == 0 )
     {
       error = FT_Outline_Decompose( &ras.outline, &func_interface, &ras );
-      gray_record_cell( RAS_VAR );
+      if ( !ras.invalid )
+        gray_record_cell( RAS_VAR );
     }
     else
       error = FT_THROW( Memory_Overflow );