Commit 15fef219d6ed191ef09efad4d8a3a4f4d2b7158b

Dominik Röttsches 2022-06-29T11:48:10

New function `FT_MulAddFix` to compute the sum of fixed-point products. This function, based on the code of `FT_MulFix`, uses 64-bit precision internally for intermediate computations. * include/freetype/internal/ftcalc.h, base/ftcalc.c (FT_MulAddFix): Implement it.

diff --git a/include/freetype/internal/ftcalc.h b/include/freetype/internal/ftcalc.h
index e6a87db..96a52b5 100644
--- a/include/freetype/internal/ftcalc.h
+++ b/include/freetype/internal/ftcalc.h
@@ -278,6 +278,40 @@ FT_BEGIN_HEADER
                       FT_Long  c );
 
 
+  /**************************************************************************
+   *
+   * @function:
+   *   FT_MulAddFix
+   *
+   * @description:
+   *   Compute `(s[0] * f[0] + s[1] * f[1] + ...) / 0x10000`, where `s[n]` is
+   *   usually a 16.16 scalar.
+   *
+   * @input:
+   *   s ::
+   *     The array of scalars.
+   *   f ::
+   *     The array of factors.
+   *   count ::
+   *     The number of entries in the array.
+   *
+   * @return:
+   *   The result of `(s[0] * f[0] + s[1] * f[1] + ...) / 0x10000`.
+   *
+   * @note:
+   *   This function is currently used for the scaled delta computation of
+   *   variation stores.  It internally uses 64-bit data types when
+   *   available, otherwise it emulates 64-bit math by using 32-bit
+   *   operations, which produce a correct result but most likely at a slower
+   *   performance in comparison to the implementation base on `int64_t`.
+   *
+   */
+  FT_BASE( FT_Int32 )
+  FT_MulAddFix( FT_Fixed*  s,
+                FT_Int32*  f,
+                FT_UInt    count );
+
+
   /*
    * A variant of FT_Matrix_Multiply which scales its result afterwards.  The
    * idea is that both `a' and `b' are scaled by factors of 10 so that the
diff --git a/src/base/ftcalc.c b/src/base/ftcalc.c
index 6c1e7fb..0f1395d 100644
--- a/src/base/ftcalc.c
+++ b/src/base/ftcalc.c
@@ -1085,4 +1085,64 @@
   }
 
 
+  FT_BASE_DEF( FT_Int32 )
+  FT_MulAddFix( FT_Fixed*  s,
+                FT_Int32*  f,
+                FT_UInt    count )
+  {
+    FT_UInt   i;
+    FT_Int64  temp;
+
+
+#ifdef FT_INT64
+
+    for ( i = 0; i < count; ++i )
+      temp += (FT_Int64)s[i] * f[i];
+
+    return temp >> 16;
+
+#else
+
+    temp.hi = 0;
+    temp.lo = 0;
+
+    for ( i = 0; i < count; ++i )
+    {
+      FT_Int64  multResult;
+
+      FT_Int     sign  = 1;
+      FT_UInt32  carry = 0;
+
+      FT_UInt32  scalar;
+      FT_UInt32  factor;
+
+
+      scalar = (FT_UInt32)s[i];
+      factor = (FT_UInt32)f[i];
+
+      FT_MOVE_SIGN( s[i], scalar, sign );
+      FT_MOVE_SIGN( f[i], factor, sign );
+
+      ft_multo64( scalar, factor, &multResult );
+
+      if ( sign < 0 )
+      {
+        /* Emulated `FT_Int64` negation. */
+        carry = ( multResult.lo == 0 );
+
+        multResult.lo = ~multResult.lo + 1;
+        multResult.hi = ~multResult.hi + carry;
+      }
+
+      FT_Add64( &temp, &multResult, &temp );
+    }
+
+    return (FT_Int32)( ( (FT_Int32)( temp.hi & 0xFFFF ) << 16 ) |
+                                   ( temp.lo >> 16 )            );
+
+#endif /* !FT_INT64 */
+
+  }
+
+
 /* END */