* src/smooth/ftgrays.c: minor source cleanups and optimization
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
diff --git a/ChangeLog b/ChangeLog
index ab3762a..1ae251b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,7 +1,10 @@
 2006-09-05  David Turner  <david@freetype.org>
 
+        * src/smooth/ftgrays.c: minor source cleanups and optimization
+
         * src/smooth/ftgrays.c (gray_sweep): Avoid buffer overwrites
-        when to the end of a bitmap scanline. The fun never ends ;-)
+        when to drawing the end of a bitmap scanline. The fun never ends ;-)
+
 
 2006-09-04  David Turner  <david@freetype.org>
 
diff --git a/src/smooth/ftgrays.c b/src/smooth/ftgrays.c
index bb590c6..600c967 100644
--- a/src/smooth/ftgrays.c
+++ b/src/smooth/ftgrays.c
@@ -207,13 +207,6 @@
 #define DOWNSCALE( x )  ( (x) << ( 6 - PIXEL_BITS ) )
 #endif
 
-  /* Define this if you want to use a more compact storage scheme.  This   */
-  /* increases the number of cells available in the render pool but slows  */
-  /* down the rendering a bit.  It is useful if you have a really tiny     */
-  /* render pool.                                                          */
-#undef GRAYS_COMPACT
-
-
   /*************************************************************************/
   /*                                                                       */
   /*   TYPE DEFINITIONS                                                    */
@@ -264,18 +257,19 @@ typedef struct TCell_
 
   typedef struct  TRaster_
   {
-    PCell   cells;
-    int     max_cells;
-    int     num_cells;
-
+    TCoord  ex, ey;
     TPos    min_ex, max_ex;
     TPos    min_ey, max_ey;
+    TPos    count_ex, count_ey;
 
     TArea   area;
     int     cover;
     int     invalid;
 
-    TCoord  ex, ey;
+    PCell   cells;
+    int     max_cells;
+    int     num_cells;
+
     TCoord  cx, cy;
     TPos    x,  y;
 
@@ -382,13 +376,13 @@ typedef struct TCell_
   /* Record the current cell in the table.                                 */
   /*                                                                       */
   static PCell
-  gray_find_cell( RAS_ARG_ TCoord  x,
-                           TCoord  y )
+  gray_find_cell( RAS_ARG )
   {
     PCell  *pcell, cell;
+    int     x = ras.ex;
 
 
-    pcell = &ras.ycells[y];
+    pcell = &ras.ycells[ ras.ey ];
     for (;;)
     {
       cell = *pcell;
@@ -422,9 +416,7 @@ typedef struct TCell_
   {
     if ( !ras.invalid && (ras.area | ras.cover) )
     {
-      TCoord  x    = (TCoord)( ras.ex - ras.min_ex );
-      TCoord  y    = (TCoord)( ras.ey - ras.min_ey );
-      PCell   cell = gray_find_cell( RAS_VAR_ x, y );
+      PCell   cell = gray_find_cell( RAS_VAR );
 
       cell->area  += ras.area;
       cell->cover += ras.cover;
@@ -440,9 +432,6 @@ typedef struct TCell_
   gray_set_cell( RAS_ARG_ TCoord  ex,
                           TCoord  ey )
   {
-    int  invalid, record, clean;
-
-
     /* Move the cell pointer to a new position.  We set the `invalid'      */
     /* flag to indicate that the cell isn't part of those we're interested */
     /* in during the render phase.  This means that:                       */
@@ -455,8 +444,10 @@ typedef struct TCell_
 
     /* All cells that are on the left of the clipping region go to the */
     /* min_ex - 1 horizontal position.                                 */
-    if ( ex < ras.min_ex )
-      ex = (TCoord)(ras.min_ex - 1);
+    ey -= ras.min_ey;
+    ex -= ras.min_ex;
+    if ( ex < 0 )
+      ex = -1;
 
     /* are we moving to a different cell ? */
     if ( ex != ras.ex || ey != ras.ey )
@@ -469,9 +460,10 @@ typedef struct TCell_
       ras.cover = 0;
     }
 
-    ras.invalid = ( ey < ras.min_ey || ey >= ras.max_ey || ex >= ras.max_ex );
     ras.ex      = ex;
     ras.ey      = ey;
+    ras.invalid = ( (unsigned)ey >= (unsigned)ras.count_ey ||
+                              ex >= ras.count_ex );
   }
 
 
@@ -488,8 +480,8 @@ typedef struct TCell_
 
     ras.area    = 0;
     ras.cover   = 0;
-    ras.ex      = ex;
-    ras.ey      = ey;
+    ras.ex      = ex - ras.min_ex;
+    ras.ey      = ey - ras.min_ey;
     ras.last_ey = SUBPIXELS( ey );
     ras.invalid = 0;
 
@@ -515,8 +507,8 @@ typedef struct TCell_
 
     dx = x2 - x1;
 
-    ex1 = TRUNC( x1 ); /* if (ex1 >= ras.max_ex) ex1 = ras.max_ex-1; */
-    ex2 = TRUNC( x2 ); /* if (ex2 >= ras.max_ex) ex2 = ras.max_ex-1; */
+    ex1 = TRUNC( x1 );
+    ex2 = TRUNC( x2 );
     fx1 = (TCoord)( x1 - SUBPIXELS( ex1 ) );
     fx2 = (TCoord)( x2 - SUBPIXELS( ex2 ) );
 
@@ -1113,16 +1105,31 @@ typedef struct TCell_
 
 
       if ( coverage )
-#if 1
-        FT_MEM_SET( p + spans->x, (unsigned char)coverage, spans->len );
-#else /* 1 */
       {
-        q     = p + spans->x;
-        limit = q + spans->len;
-        for ( ; q < limit; q++ )
-          q[0] = (unsigned char)coverage;
+       /* for small-spans, it's faster to do it ourselves than
+        * calling memset. this is mainly due to the cost of the
+        * function call.
+        */
+        if ( spans->len >= 8 )
+          FT_MEM_SET( p + spans->x, (unsigned char)coverage, spans->len );
+        else
+        {
+          unsigned char*  q = p + spans->x;
+
+          switch ( spans->len )
+          {
+            case 7: *q++ = (unsigned char)coverage;
+            case 6: *q++ = (unsigned char)coverage;
+            case 5: *q++ = (unsigned char)coverage;
+            case 4: *q++ = (unsigned char)coverage;
+            case 3: *q++ = (unsigned char)coverage;
+            case 2: *q++ = (unsigned char)coverage;
+            case 1: *q   = (unsigned char)coverage;
+            default:
+               ;
+          }
+        }
       }
-#endif /* 1 */
     }
   }
 
@@ -1223,7 +1230,9 @@ typedef struct TCell_
   }
 
 
-#if 1
+#ifdef DEBUG_GRAYS
+
+  /* to be called while in the debugger */
   gray_dump_cells( RAS_ARG )
   {
     int  yindex;
@@ -1239,7 +1248,9 @@ typedef struct TCell_
       printf( "\n" );
     }
   }
-#endif
+
+#endif /* DEBUG_GRAYS */
+
 
   static void
   gray_sweep( RAS_ARG_ const FT_Bitmap*  target )
@@ -1281,7 +1292,7 @@ typedef struct TCell_
 
       if ( cover != 0 )
         gray_hline( RAS_VAR_ x, yindex, cover * ( ONE_PIXEL * 2 ),
-                    (ras.max_ex - ras.min_ex) - x );
+                    ras.count_ex - x );
     }
 
     if ( ras.render_span && ras.num_gray_spans > 0 )
@@ -1597,6 +1608,9 @@ typedef struct TCell_
     if ( ras.max_ex > clip->xMax ) ras.max_ex = clip->xMax;
     if ( ras.max_ey > clip->yMax ) ras.max_ey = clip->yMax;
 
+    ras.count_ex = ras.max_ex - ras.min_ex;
+    ras.count_ey = ras.max_ey - ras.min_ey;
+
     /* simple heuristic used to speed-up the bezier decomposition -- see */
     /* the code in gray_render_conic() and gray_render_cubic() for more  */
     /* details                                                           */
@@ -1607,9 +1621,9 @@ typedef struct TCell_
       int level = 0;
 
 
-      if ( ras.max_ex > 24 || ras.max_ey > 24 )
+      if ( ras.count_ex > 24 || ras.count_ey > 24 )
         level++;
-      if ( ras.max_ex > 120 || ras.max_ey > 120 )
+      if ( ras.count_ex > 120 || ras.count_ey > 120 )
         level++;
 
       ras.conic_level <<= level;