Commit e75da4576e77ad2113f06905d8f8479a92cf687c

Werner Lemberg 2008-07-22T20:59:59

* src/sfnt/ttcmap.c (tt_cmap4_validate, tt_cmap4_char_map_linear, tt_cmap4_char_map_binary): Handle fonts which treat the last segment specially. According to the specification, such fonts would be invalid but acroread accepts them.

diff --git a/ChangeLog b/ChangeLog
index 9d2bd2a..84b3bd0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2008-07-22  Martin McBride  <mmcbride@emtex.com>
+
+	* src/sfnt/ttcmap.c (tt_cmap4_validate, tt_cmap4_char_map_linear,
+	tt_cmap4_char_map_binary): Handle fonts which treat the last segment
+	specially.  According to the specification, such fonts would be
+	invalid but acroread accepts them.
+
 2008-07-16  Jon Foster  <Jon.Foster@cabot.co.uk>
 
 	* src/pfr/pfrdrivr.c (pfr_get_advance): Fix off-by-one error.
@@ -9,7 +16,7 @@
 
 2008-07-16  Werner Lemberg  <wl@gnu.org>
 
-	Handle CID-keyed fonts wrapped in a SFNT (with cmaps) correctly.
+	Handle CID-keyed fonts wrapped in an SFNT (with cmaps) correctly.
 
 	* src/cff/cffload.c (cff_font_load): Pass `pure_cff'.
 	Invert sids table only if `pure_cff' is set.
@@ -18,6 +25,8 @@
 	* src/cff/cffobjs.c (cff_face_init): Updated.
 	Set FT_FACE_FLAG_CID_KEYED only if pure_cff is set.
 
+	* docs/CHANGES: Updated.
+
 2008-07-09  Werner Lemberg  <wl@gnu.org>
 
 	* src/truetype/ttpload.c (tt_face_load_loca): Handle buggy fonts
diff --git a/src/sfnt/ttcmap.c b/src/sfnt/ttcmap.c
index b70b64c..21387b5 100644
--- a/src/sfnt/ttcmap.c
+++ b/src/sfnt/ttcmap.c
@@ -4,7 +4,7 @@
 /*                                                                         */
 /*    TrueType character mapping table (cmap) support (body).              */
 /*                                                                         */
-/*  Copyright 2002, 2003, 2004, 2005, 2006, 2007 by                        */
+/*  Copyright 2002, 2003, 2004, 2005, 2006, 2007, 2008 by                  */
 /*  David Turner, Robert Wilhelm, and Werner Lemberg.                      */
 /*                                                                         */
 /*  This file is part of the FreeType project, and may only be used,       */
@@ -134,7 +134,7 @@
     FT_UInt    gindex   = 0;
 
 
-    table += 6;  /* go to glyph ids */
+    table += 6;  /* go to glyph IDs */
     while ( ++charcode < 256 )
     {
       gindex = table[charcode];
@@ -260,14 +260,14 @@
   /*                                                                       */
   /* * The value of `offset' is read.  This is a _byte_ distance from the  */
   /*   location of the `offset' field itself into a slice of the           */
-  /*   `glyph_ids' table.  Let's call it `slice' (it's a USHORT[] too).    */
+  /*   `glyph_ids' table.  Let's call it `slice' (it is a USHORT[] too).   */
   /*                                                                       */
   /* * The value `slice[char.lo - first]' is read.  If it is 0, there is   */
   /*   no glyph for the charcode.  Otherwise, the value of `delta' is      */
   /*   added to it (modulo 65536) to form a new glyph index.               */
   /*                                                                       */
   /* It is up to the validation routine to check that all offsets fall     */
-  /* within the glyph ids table (and not within the `subs' table itself or */
+  /* within the glyph IDs table (and not within the `subs' table itself or */
   /* outside of the CMap).                                                 */
   /*                                                                       */
 
@@ -342,7 +342,7 @@
         if ( ids < glyph_ids || ids + code_count*2 > table + length )
           FT_INVALID_OFFSET;
 
-        /* check glyph ids */
+        /* check glyph IDs */
         if ( valid->level >= FT_VALIDATE_TIGHT )
         {
           FT_Byte*  limit = p + code_count * 2;
@@ -393,7 +393,7 @@
         sub = subs;  /* jump to first sub-header */
 
         /* check that the sub-header for this byte is 0, which */
-        /* indicates that it's really a valid one-byte value   */
+        /* indicates that it is really a valid one-byte value  */
         /* Otherwise, return 0                                 */
         /*                                                     */
         p += char_lo * 2;
@@ -621,7 +621,7 @@
   /* charcode within the segment is obtained by adding the value of        */
   /* `idDelta' directly to the charcode, modulo 65536.                     */
   /*                                                                       */
-  /* Otherwise, a glyph index is taken from the glyph ids sub-array for    */
+  /* Otherwise, a glyph index is taken from the glyph IDs sub-array for    */
   /* the segment, and the value of `idDelta' is added to it.               */
   /*                                                                       */
   /*                                                                       */
@@ -831,7 +831,7 @@
     /*                                                             */
     if ( valid->level >= FT_VALIDATE_PARANOID )
     {
-      /* check the values of 'searchRange', 'entrySelector', 'rangeShift' */
+      /* check the values of `searchRange', `entrySelector', `rangeShift' */
       FT_UInt  search_range   = TT_NEXT_USHORT( p );
       FT_UInt  entry_selector = TT_NEXT_USHORT( p );
       FT_UInt  range_shift    = TT_NEXT_USHORT( p );
@@ -858,7 +858,7 @@
     offsets   = deltas  + num_segs * 2;
     glyph_ids = offsets + num_segs * 2;
 
-    /* check last segment, its end count must be FFFF */
+    /* check last segment, its end count must be 0xFFFF */
     if ( valid->level >= FT_VALIDATE_PARANOID )
     {
       p = ends + ( num_segs - 1 ) * 2;
@@ -867,9 +867,9 @@
     }
 
     {
-      FT_UInt  start, end, offset, n;
-      FT_UInt  last_start = 0, last_end = 0;
-      FT_Int   delta;
+      FT_UInt   start, end, offset, n;
+      FT_UInt   last_start = 0, last_end = 0;
+      FT_Int    delta;
       FT_Byte*  p_start   = starts;
       FT_Byte*  p_end     = ends;
       FT_Byte*  p_delta   = deltas;
@@ -911,14 +911,18 @@
         {
           p += offset;  /* start of glyph id array */
 
-          /* check that we point within the glyph ids table only */
+          /* check that we point within the glyph IDs table only */
           if ( valid->level >= FT_VALIDATE_TIGHT )
           {
             if ( p < glyph_ids                                ||
                  p + ( end - start + 1 ) * 2 > table + length )
               FT_INVALID_DATA;
           }
-          else
+          /* some fonts handle the last segment incorrectly */
+          else if ( n != num_segs - 1     ||
+                    !( start == 0xFFFFU &&
+                       end   == 0xFFFFU &&
+                       delta == 0x1U    ) )
           {
             if ( p < glyph_ids                              ||
                  p + ( end - start + 1 ) * 2 > valid->limit )
@@ -946,7 +950,7 @@
         }
         else if ( offset == 0xFFFFU )
         {
-          /* Some fonts (erroneously?) use a range offset of 0xFFFF */
+          /* some fonts (erroneously?) use a range offset of 0xFFFF */
           /* to mean missing glyph in cmap table                    */
           /*                                                        */
           if ( valid->level >= FT_VALIDATE_PARANOID                     ||
@@ -1009,6 +1013,12 @@
           p      += num_segs2;
           offset  = TT_PEEK_USHORT( p );
 
+          /* some fonts handle the last segment incorrectly; */
+          /* we have to catch it                             */
+          if ( i >= num_segs - 1                                   &&
+               start == 0xFFFFU && end == 0xFFFFU && delta == 0x1U )
+            offset = 0;
+
           if ( offset == 0xFFFFU )
             continue;
 
@@ -1088,6 +1098,12 @@
         p     += num_segs2;
         offset = TT_PEEK_USHORT( p );
 
+        /* some fonts handle the last segment incorrectly; */
+        /* we have to catch it                             */
+        if ( mid >= num_segs - 1                                 &&
+             start == 0xFFFFU && end == 0xFFFFU && delta == 0x1U )
+          offset = 0;
+
         /* search the first segment containing `charcode' */
         if ( cmap->flags & TT_CMAP_FLAG_OVERLAPPING )
         {
@@ -1359,7 +1375,7 @@
   /*                                                                       */
   /*   first        6              USHORT           first segment code     */
   /*   count        8              USHORT           segment size in chars  */
-  /*   glyphIds     10             USHORT[count]    glyph ids              */
+  /*   glyphIds     10             USHORT[count]    glyph IDs              */
   /*                                                                       */
   /* A very simplified segment mapping.                                    */
   /*                                                                       */
@@ -1506,7 +1522,7 @@
   /*****                                                               *****/
   /*****                          FORMAT 8                             *****/
   /*****                                                               *****/
-  /***** It's hard to completely understand what the OpenType spec     *****/
+  /***** It is hard to completely understand what the OpenType spec    *****/
   /***** says about this format, but here is my conclusion.            *****/
   /*****                                                               *****/
   /***** The purpose of this format is to easily map UTF-16 text to    *****/
@@ -1521,7 +1537,7 @@
   /*****     `char_hi' and `char_lo' must be in the Surrogates Area.   *****/
   /*****      Area.                                                    *****/
   /*****                                                               *****/
-  /***** The 'is32' table embedded in the charmap indicates whether a  *****/
+  /***** The `is32' table embedded in the charmap indicates whether a  *****/
   /***** given 16-bit value is in the surrogates area or not.          *****/
   /*****                                                               *****/
   /***** So, for any given `char_code', we can assert the following:   *****/
@@ -1548,7 +1564,7 @@
   /*   is32        12             BYTE[8192]  32-bitness bitmap            */
   /*   count       8204           ULONG       number of groups             */
   /*                                                                       */
-  /* This header is followed by 'count' groups of the following format:    */
+  /* This header is followed by `count' groups of the following format:    */
   /*                                                                       */
   /*   start       0              ULONG       first charcode               */
   /*   end         4              ULONG       last charcode                */
@@ -2727,7 +2743,7 @@
     FT_UInt    tot       = 0;
 
 
-    p += 3;  /* point to the first 'cnt' field */
+    p += 3;  /* point to the first `cnt' field */
     for ( ; numRanges > 0; numRanges-- )
     {
       tot += 1 + p[0];