Commit 6139f2b647cd7b432da3877714befbbf7515e607

Alexei Podtelezhnikov 2022-11-06T13:12:47

[bdf, pfr, psnames] Accelarate charmap searches. The binary searches within charmaps can be accelerated because they often contain dense continuous blocks of character codes. Within such blocks, you can predict matches based on misses. This method has been deployed in `bdf` since 0f122fef34; we only refactor it there. We now use it in `pfr` and `psnames`, which speeds up the unicode charmap access by about 50% in PFR and Type 1 fonts. * src/bdf/bdfdrivr.c (bdf_cmap_char_{index,next}): Refactor. * src/pfr/pfrcmap.c (pfr_cmap_char_{index,next}): Predict `mid` based on the mismatch distance. * src/psnames/psmodule.c (ps_unicodes_char_{index,next}): Ditto.

diff --git a/src/bdf/bdfdrivr.c b/src/bdf/bdfdrivr.c
index eb73a7c..d7e8e0e 100644
--- a/src/bdf/bdfdrivr.c
+++ b/src/bdf/bdfdrivr.c
@@ -92,24 +92,18 @@ THE SOFTWARE.
   {
     BDF_CMap          cmap      = (BDF_CMap)bdfcmap;
     BDF_encoding_el*  encodings = cmap->encodings;
-    FT_ULong          min, max, mid; /* num_encodings */
     FT_UShort         result    = 0; /* encodings->glyph */
 
+    FT_ULong  min = 0;
+    FT_ULong  max = cmap->num_encodings;
+    FT_ULong  mid = ( min + max ) >> 1;
 
-    min = 0;
-    max = cmap->num_encodings;
-    mid = ( min + max ) >> 1;
 
     while ( min < max )
     {
-      FT_ULong  code;
+      FT_ULong  code = encodings[mid].enc;
 
 
-      if ( mid >= max || mid < min )
-        mid = ( min + max ) >> 1;
-
-      code = encodings[mid].enc;
-
       if ( charcode == code )
       {
         /* increase glyph index by 1 --              */
@@ -123,8 +117,10 @@ THE SOFTWARE.
       else
         min = mid + 1;
 
-      /* prediction in a continuous block */
+      /* reasonable prediction in a continuous block */
       mid += charcode - code;
+      if ( mid >= max || mid < min )
+        mid = ( min + max ) >> 1;
     }
 
     return result;
@@ -137,25 +133,19 @@ THE SOFTWARE.
   {
     BDF_CMap          cmap      = (BDF_CMap)bdfcmap;
     BDF_encoding_el*  encodings = cmap->encodings;
-    FT_ULong          min, max, mid; /* num_encodings */
     FT_UShort         result   = 0;  /* encodings->glyph */
     FT_ULong          charcode = *acharcode + 1;
 
+    FT_ULong  min = 0;
+    FT_ULong  max = cmap->num_encodings;
+    FT_ULong  mid = ( min + max ) >> 1;
 
-    min = 0;
-    max = cmap->num_encodings;
-    mid = ( min + max ) >> 1;
 
     while ( min < max )
     {
-      FT_ULong  code; /* same as BDF_encoding_el.enc */
+      FT_ULong  code = encodings[mid].enc;
 
 
-      if ( mid >= max || mid < min )
-        mid = ( min + max ) >> 1;
-
-      code = encodings[mid].enc;
-
       if ( charcode == code )
       {
         /* increase glyph index by 1 --              */
@@ -171,6 +161,8 @@ THE SOFTWARE.
 
       /* prediction in a continuous block */
       mid += charcode - code;
+      if ( mid >= max || mid < min )
+        mid = ( min + max ) >> 1;
     }
 
     charcode = 0;
diff --git a/src/pfr/pfrcmap.c b/src/pfr/pfrcmap.c
index 6fa2417..0dd0e23 100644
--- a/src/pfr/pfrcmap.c
+++ b/src/pfr/pfrcmap.c
@@ -69,17 +69,14 @@
   pfr_cmap_char_index( PFR_CMap   cmap,
                        FT_UInt32  char_code )
   {
-    FT_UInt  min = 0;
-    FT_UInt  max = cmap->num_chars;
+    FT_UInt   min = 0;
+    FT_UInt   max = cmap->num_chars;
+    FT_UInt   mid = min + ( max - min ) / 2;
+    PFR_Char  gchar;
 
 
     while ( min < max )
     {
-      PFR_Char  gchar;
-      FT_UInt   mid;
-
-
-      mid   = min + ( max - min ) / 2;
       gchar = cmap->chars + mid;
 
       if ( gchar->char_code == char_code )
@@ -89,6 +86,11 @@
         min = mid + 1;
       else
         max = mid;
+
+      /* reasonable prediction in a continuous block */
+      mid += char_code - gchar->char_code;
+      if ( mid >= max || mid < min )
+        mid = min + ( max - min ) / 2;
     }
     return 0;
   }
@@ -106,13 +108,12 @@
     {
       FT_UInt   min = 0;
       FT_UInt   max = cmap->num_chars;
-      FT_UInt   mid;
+      FT_UInt   mid = min + ( max - min ) / 2;
       PFR_Char  gchar;
 
 
       while ( min < max )
       {
-        mid   = min + ( ( max - min ) >> 1 );
         gchar = cmap->chars + mid;
 
         if ( gchar->char_code == char_code )
@@ -132,6 +133,11 @@
           min = mid + 1;
         else
           max = mid;
+
+        /* reasonable prediction in a continuous block */
+        mid += char_code - gchar->char_code;
+        if ( mid >= max || mid < min )
+          mid = min + ( max - min ) / 2;
       }
 
       /* we didn't find it, but we have a pair just above it */
diff --git a/src/psnames/psmodule.c b/src/psnames/psmodule.c
index e7d51e9..4730d8b 100644
--- a/src/psnames/psmodule.c
+++ b/src/psnames/psmodule.c
@@ -412,21 +412,18 @@
   ps_unicodes_char_index( PS_Unicodes  table,
                           FT_UInt32    unicode )
   {
-    PS_UniMap  *min, *max, *mid, *result = NULL;
+    PS_UniMap  *result = NULL;
+    PS_UniMap  *min = table->maps;
+    PS_UniMap  *max = min + table->num_maps;
+    PS_UniMap  *mid = min + ( ( max - min ) >> 1 );
 
 
     /* Perform a binary search on the table. */
-
-    min = table->maps;
-    max = min + table->num_maps - 1;
-
-    while ( min <= max )
+    while ( min < max )
     {
       FT_UInt32  base_glyph;
 
 
-      mid = min + ( ( max - min ) >> 1 );
-
       if ( mid->unicode == unicode )
       {
         result = mid;
@@ -438,13 +435,15 @@
       if ( base_glyph == unicode )
         result = mid; /* remember match but continue search for base glyph */
 
-      if ( min == max )
-        break;
-
       if ( base_glyph < unicode )
         min = mid + 1;
       else
-        max = mid - 1;
+        max = mid;
+
+      /* reasonable prediction in a continuous block */
+      mid += unicode - base_glyph;
+      if ( mid >= max || mid < min )
+        mid = min + ( ( max - min ) >> 1 );
     }
 
     if ( result )
@@ -465,14 +464,13 @@
     {
       FT_UInt     min = 0;
       FT_UInt     max = table->num_maps;
-      FT_UInt     mid;
+      FT_UInt     mid = min + ( ( max - min ) >> 1 );
       PS_UniMap*  map;
       FT_UInt32   base_glyph;
 
 
       while ( min < max )
       {
-        mid = min + ( ( max - min ) >> 1 );
         map = table->maps + mid;
 
         if ( map->unicode == char_code )
@@ -490,6 +488,11 @@
           min = mid + 1;
         else
           max = mid;
+
+        /* reasonable prediction in a continuous block */
+        mid += char_code - base_glyph;
+        if ( mid >= max || mid < min )
+          mid = min + ( max - min ) / 2;
       }
 
       if ( result )