[base] CORDIC improvements. The scaling between the hypotenuse and its CORDIC approximation is based on regression analysis. The smaller padding for `theta' is justifed by its maximum error of less than 6. * src/base/fttrigon.c (ft_trig_downscale): Borrow code from ./ftcalc.c (ft_multo64), change linear intercept. (ft_trig_pseudo_polarize): Decrease `theta' padding.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
diff --git a/ChangeLog b/ChangeLog
index 386103d..7cc5f07 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2014-11-10 Alexei Podtelezhnikov <apodtele@gmail.com>
+
+ [base] CORDIC improvements.
+
+ The scaling between the hypotenuse and its CORDIC approximation is
+ based on regression analysis. The smaller padding for `theta' is
+ justifed by its maximum error of less than 6.
+
+ * src/base/fttrigon.c (ft_trig_downscale): Borrow code from
+ ./ftcalc.c (ft_multo64), change linear intercept.
+ (ft_trig_pseudo_polarize): Decrease `theta' padding.
+
2014-11-09 Werner Lemberg <wl@gnu.org>
* src/base/ftstroke.c (ft_stroker_inside): Fix border intersections.
diff --git a/src/base/fttrigon.c b/src/base/fttrigon.c
index e3ce8a6..eaa25af 100644
--- a/src/base/fttrigon.c
+++ b/src/base/fttrigon.c
@@ -60,15 +60,18 @@
static FT_Fixed
ft_trig_downscale( FT_Fixed val )
{
- FT_Fixed s;
- FT_Int64 v;
+ FT_Int s = 1;
- s = val;
- val = FT_ABS( val );
+ if ( val < 0 )
+ {
+ val = -val;
+ s = -1;
+ }
- v = ( val * (FT_Int64)FT_TRIG_SCALE ) + 0x100000000UL;
- val = (FT_Fixed)( v >> 32 );
+ /* 0x40000000 comes from regression analysis between true */
+ /* and CORDIC hypotenuse, so it minimizes the error */
+ val = (FT_Fixed)( ( (FT_Int64)val * FT_TRIG_SCALE + 0x40000000UL ) >> 32 );
return ( s >= 0 ) ? val : -val;
}
@@ -79,29 +82,43 @@
static FT_Fixed
ft_trig_downscale( FT_Fixed val )
{
- FT_Fixed s;
- FT_UInt32 v1, v2, k1, k2, hi, lo1, lo2, lo3;
+ FT_Int s = 1;
+ FT_UInt32 lo1, hi1, lo2, hi2, lo, hi, i1, i2;
+
+
+ if ( val < 0 )
+ {
+ val = -val;
+ s = -1;
+ }
+ lo1 = val & 0x0000FFFFU;
+ hi1 = val >> 16;
+ lo2 = FT_TRIG_SCALE & 0x0000FFFFU;
+ hi2 = FT_TRIG_SCALE >> 16;
- s = val;
- val = FT_ABS( val );
+ lo = lo1 * lo2;
+ i1 = lo1 * hi2;
+ i2 = lo2 * hi1;
+ hi = hi1 * hi2;
- v1 = (FT_UInt32)val >> 16;
- v2 = (FT_UInt32)( val & 0xFFFFL );
+ /* Check carry overflow of i1 + i2 */
+ i1 += i2;
+ hi += (FT_UInt32)( i1 < i2 ) << 16;
- k1 = (FT_UInt32)FT_TRIG_SCALE >> 16; /* constant */
- k2 = (FT_UInt32)( FT_TRIG_SCALE & 0xFFFFL ); /* constant */
+ hi += i1 >> 16;
+ i1 = i1 << 16;
- hi = k1 * v1;
- lo1 = k1 * v2 + k2 * v1; /* can't overflow */
+ /* Check carry overflow of i1 + lo */
+ lo += i1;
+ hi += ( lo < i1 );
- lo2 = ( k2 * v2 ) >> 16;
- lo3 = FT_MAX( lo1, lo2 );
- lo1 += lo2;
+ /* 0x40000000 comes from regression analysis between true */
+ /* and CORDIC hypotenuse, so it minimizes the error */
- hi += lo1 >> 16;
- if ( lo1 < lo3 )
- hi += (FT_UInt32)0x10000UL;
+ /* Check carry overflow of lo + 0x40000000 */
+ lo += 0x40000000U;
+ hi += ( lo < 0x40000000U );
val = (FT_Fixed)hi;
@@ -111,7 +128,7 @@
#endif /* !FT_LONG64 */
- /* undefined and never called for zero vector */
+ /* undefined and never called for zero vector */
static FT_Int
ft_trig_prenorm( FT_Vector* vec )
{
@@ -262,11 +279,12 @@
}
}
- /* round theta */
+ /* round theta to acknowledge its error that mostly comes */
+ /* from accumulated rounding errors in the arctan table */
if ( theta >= 0 )
- theta = FT_PAD_ROUND( theta, 32 );
+ theta = FT_PAD_ROUND( theta, 16 );
else
- theta = -FT_PAD_ROUND( -theta, 32 );
+ theta = -FT_PAD_ROUND( -theta, 16 );
vec->x = x;
vec->y = theta;