[smooth] Minor speedup to smooth rasterizer This speeds up the smooth rasterizer by avoiding a conditional branches in the hot path. Namely: - Define a fixed "null cell" which will be pointed to whenever the current cell is outside of the current target region. This avoids a "ras.cell != NULL" check in the FT_INTEGRATE() macro. - Also use the null cell as a sentinel at the end of all ycells[] linked-lists, by setting its x coordinate to INT_MAX. This avoids a 'if (!cell)' check in gray_set_cell() as well. - Slightly change the worker struct fields to perform a little less operations during rendering. Example results (on a 2013 Corei5-3337U CPU) out/ftbench -p -s10 -t5 -bc /usr/share/fonts/truetype/droid/DroidSansFallbackFull.ttf Before: 5.472 us/op After: 5.275 us/op out/ftbench -p -s60 -t5 -bc /usr/share/fonts/truetype/droid/DroidSansFallbackFull.ttf Before: 17.988 us/op After: 17.389 us/op
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
diff --git a/ChangeLog b/ChangeLog
index 790ef56..ea269da 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,17 @@
2021-07-15 David Turner <david@freetype.org>
+ [smooth] Minor speedup to smooth rasterizer
+
+ This speeds up the smooth rasterizer by avoiding a conditional
+ branches in the hot path.
+
+ * src/smooth/ftgrays.c: Define a null cell used to both as a
+ sentinel for all linked-lists, and to accumulate coverage and
+ area values for "out-of-bounds" cell positions without a
+ conditional check.
+
+2021-07-15 David Turner <david@freetype.org>
+
Replaces download-test-fonts.sh with download-test-fonts.py which
does the same work, and also avoids downloading anything if the
files are already installed with the right content.
diff --git a/src/smooth/ftgrays.c b/src/smooth/ftgrays.c
index 60cd5e0..e66ec34 100644
--- a/src/smooth/ftgrays.c
+++ b/src/smooth/ftgrays.c
@@ -479,19 +479,24 @@ typedef ptrdiff_t FT_PtrDist;
{
ft_jmp_buf jump_buffer;
- TCoord min_ex, max_ex;
+ TCoord min_ex, max_ex; /* min and max integer pixel coordinates */
TCoord min_ey, max_ey;
+ TCoord count_ey; /* same as (max_ey - min_ey) */
- PCell cell;
- PCell* ycells;
- PCell cells;
- FT_PtrDist max_cells;
- FT_PtrDist num_cells;
+ PCell cell; /* current cell */
+ PCell cell_free; /* call allocation next free slot */
+ PCell cell_limit; /* cell allocation limit */
- TPos x, y;
+ PCell* ycells; /* array of cell linked-lists, one per */
+ /* vertical coordinate in the current band. */
- FT_Outline outline;
- TPixmap target;
+ PCell cells; /* cell storage area */
+ FT_PtrDist max_cells; /* cell storage capacity */
+
+ TPos x, y; /* last point position */
+
+ FT_Outline outline; /* input outline */
+ TPixmap target; /* target pixmap */
FT_Raster_Span_Func render_span;
void* render_span_data;
@@ -502,21 +507,34 @@ typedef ptrdiff_t FT_PtrDist;
#pragma warning( pop )
#endif
-
#ifndef FT_STATIC_RASTER
#define ras (*worker)
#else
static gray_TWorker ras;
#endif
-#define FT_INTEGRATE( ras, a, b ) \
- if ( ras.cell ) \
- ras.cell->cover += (a), ras.cell->area += (a) * (TArea)(b)
+/* Return a pointer to the "null cell", used as a sentinel at the end */
+/* of all ycells[] linked lists. Its x coordinate should be maximal */
+/* to ensure no NULL checks are necessary when looking for an insertion */
+/* point in gray_set_cell(). Other loops should check the cell pointer */
+/* with CELL_IS_NULL() to detect the end of the list. */
+#define NULL_CELL_PTR(ras) (ras).cells
+
+/* The |x| value of the null cell. Must be the largest possible */
+/* integer value stored in a TCell.x field. */
+#define CELL_MAX_X_VALUE INT_MAX
+
+/* Return true iff |cell| points to the null cell. */
+#define CELL_IS_NULL(cell) ((cell)->x == CELL_MAX_X_VALUE)
+
+
+#define FT_INTEGRATE( ras, a, b ) \
+ ras.cell->cover += (a), ras.cell->area += (a) * (TArea)(b)
typedef struct gray_TRaster_
{
- void* memory;
+ void* memory;
} gray_TRaster, *gray_PRaster;
@@ -538,7 +556,7 @@ typedef ptrdiff_t FT_PtrDist;
printf( "%3d:", y );
- for ( ; cell != NULL; cell = cell->next )
+ for ( ; !CELL_IS_NULL(cell); cell = cell->next )
printf( " (%3d, c:%4d, a:%6d)",
cell->x, cell->cover, cell->area );
printf( "\n" );
@@ -566,11 +584,12 @@ typedef ptrdiff_t FT_PtrDist;
/* Note that if a cell is to the left of the clipping region, it is */
/* actually set to the (min_ex-1) horizontal position. */
- if ( ey >= ras.max_ey || ey < ras.min_ey || ex >= ras.max_ex )
- ras.cell = NULL;
+ TCoord ey_index = ey - ras.min_ey;
+ if ( ey_index < 0 || ey_index >= ras.count_ey || ex >= ras.max_ex )
+ ras.cell = NULL_CELL_PTR(ras);
else
{
- PCell* pcell = ras.ycells + ey - ras.min_ey;
+ PCell* pcell = ras.ycells + ey_index;
PCell cell;
@@ -580,7 +599,7 @@ typedef ptrdiff_t FT_PtrDist;
{
cell = *pcell;
- if ( !cell || cell->x > ex )
+ if ( cell->x > ex )
break;
if ( cell->x == ex )
@@ -589,11 +608,11 @@ typedef ptrdiff_t FT_PtrDist;
pcell = &cell->next;
}
- if ( ras.num_cells >= ras.max_cells )
+ /* insert new cell */
+ cell = ras.cell_free++;
+ if (cell >= ras.cell_limit)
ft_longjmp( ras.jump_buffer, 1 );
- /* insert new cell */
- cell = ras.cells + ras.num_cells++;
cell->x = ex;
cell->area = 0;
cell->cover = 0;
@@ -1218,7 +1237,7 @@ typedef ptrdiff_t FT_PtrDist;
unsigned char* line = ras.target.origin - ras.target.pitch * y;
- for ( ; cell != NULL; cell = cell->next )
+ for ( ; !CELL_IS_NULL(cell); cell = cell->next )
{
if ( cover != 0 && cell->x > x )
{
@@ -1266,7 +1285,7 @@ typedef ptrdiff_t FT_PtrDist;
TArea area;
- for ( ; cell != NULL; cell = cell->next )
+ for ( ; !CELL_IS_NULL(cell); cell = cell->next )
{
if ( cover != 0 && cell->x > x )
{
@@ -1646,8 +1665,8 @@ typedef ptrdiff_t FT_PtrDist;
FT_TRACE7(( "band [%d..%d]: %ld cell%s\n",
ras.min_ey,
ras.max_ey,
- ras.num_cells,
- ras.num_cells == 1 ? "" : "s" ));
+ ras.cell_free - ras.cells.,
+ ras.cell_free - ras.cells == 1 ? "" : "s" ));
}
else
{
@@ -1690,8 +1709,18 @@ typedef ptrdiff_t FT_PtrDist;
ras.cells = buffer + n;
ras.max_cells = (FT_PtrDist)( FT_MAX_GRAY_POOL - n );
+ ras.cell_limit = ras.cells + ras.max_cells;
ras.ycells = (PCell*)buffer;
+ /* Initialize the null cell is at the start of the 'cells' array. */
+ /* Note that this requires ras.cell_free initialization to skip */
+ /* over the first entry in the array. */
+ PCell null_cell = NULL_CELL_PTR(ras);
+ null_cell->x = CELL_MAX_X_VALUE;
+ null_cell->area = 0;
+ null_cell->cover = 0;
+ null_cell->next = NULL;;
+
for ( y = yMin; y < yMax; )
{
ras.min_ey = y;
@@ -1705,15 +1734,17 @@ typedef ptrdiff_t FT_PtrDist;
do
{
TCoord width = band[0] - band[1];
+ TCoord w;
int error;
+ for (w = 0; w < width; ++w)
+ ras.ycells[w] = null_cell;
- FT_MEM_ZERO( ras.ycells, height * sizeof ( PCell ) );
-
- ras.num_cells = 0;
- ras.cell = NULL;
+ ras.cell_free = ras.cells + 1; /* NOTE: Skip over the null cell. */
+ ras.cell = null_cell;
ras.min_ey = band[1];
ras.max_ey = band[0];
+ ras.count_ey = width;
error = gray_convert_glyph_inner( RAS_VAR, continued );
continued = 1;