Fix dbl-64 atan2 in non-default rounding modes (bug 18210, bug 18211).

The dbl-64 implementation of atan2 does computations that expect to run in round-to-nearest mode, and in other modes the errors can accumulate to more than the maximum accepted 9ulp. This patch makes it use FE_TONEAREST internally, similar to other functions with such issues. Tests that previously produced large errors are added for atan2 and the closely related carg, clog and clog10 functions. Tested for x86_64 and x86 and ulps updated accordingly. [BZ #18210] [BZ #18211] * sysdeps/ieee754/dbl-64/e_atan2.c: Include <fenv.h>. (__ieee754_atan2): Set FE_TONEAREST mode for internal computations. * math/auto-libm-test-in: Add more tests of atan2, carg, clog and clog10. * math/auto-libm-test-out: Regenerated. * sysdeps/i386/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Likewise.
git-mirror · Apr 8, 2015 · 8431838 · 8431838
1 parent ae63c7e
commit 8431838
Show file tree

Hide file tree

Showing 7 changed files with 893 additions and 37 deletions.
diff --git a/ChangeLog b/ChangeLog
@@ -1,5 +1,16 @@
 2015-04-08  Joseph Myers  <joseph@codesourcery.com>
 
+	[BZ #18210]
+	[BZ #18211]
+	* sysdeps/ieee754/dbl-64/e_atan2.c: Include <fenv.h>.
+	(__ieee754_atan2): Set FE_TONEAREST mode for internal
+	computations.
+	* math/auto-libm-test-in: Add more tests of atan2, carg, clog and
+	clog10.
+	* math/auto-libm-test-out: Regenerated.
+	* sysdeps/i386/fpu/libm-test-ulps: Update.
+	* sysdeps/x86_64/fpu/libm-test-ulps: Likewise.
+
 	[BZ #18197]
 	* sysdeps/ieee754/dbl-64/s_atan.c: Include <fenv.h>.
 	(atan): Set FE_TONEAREST mode for internal computations.

diff --git a/NEWS b/NEWS
@@ -15,7 +15,8 @@ Version 2.22
   17836, 17912, 17916, 17930, 17932, 17944, 17949, 17964, 17965, 17967,
   17969, 17978, 17987, 17991, 17996, 17998, 17999, 18019, 18020, 18029,
   18030, 18032, 18036, 18038, 18039, 18042, 18043, 18046, 18047, 18068,
-  18080, 18093, 18100, 18104, 18110, 18111, 18128, 18138, 18185, 18197.
+  18080, 18093, 18100, 18104, 18110, 18111, 18128, 18138, 18185, 18197,
+  18210, 18211.
 
 * A powerpc and powerpc64 optimization for TLS, similar to TLS descriptors
   for LD and GD on x86 and x86-64, has been implemented.  You will need

diff --git a/math/auto-libm-test-in b/math/auto-libm-test-in
@@ -287,6 +287,14 @@ atan2 0.390625 .00029
 atan2 1.390625 0.9296875
 atan2 -0.00756827042671106339 -.001792735857538728036
 atan2 0x1.00000000000001p0 0x1.00000000000001p0
+atan2 0x4.c3841p-4 0x2.f2f308p+0
+atan2 -0xe.cf143p-40 0xd.3de7ap-36
+atan2 0x5.576cf8p-4 0x2.21e65p+0
+atan2 -0x4.29411p-4 0x1.f4755cp+0
+atan2 -0xa.b4101p+20 -0xf.9c4c8p-4
+atan2 0x4.251bb8p-4 0x7.40ac68p+0
+atan2 0x1.47239ep+68 0xa.3ac3cp+68
+atan2 -0x6.b0794p-4 0x3.8ff10cp+0
 atan2 min min
 atan2 min -min
 atan2 -min min
@@ -409,6 +417,14 @@ carg -0 2.0
 carg 0 -2.0
 # carg (-0 + i y) == -pi/2 for y < 0.
 carg -0 -2.0
+carg 0x2.f2f308p+0 0x4.c3841p-4
+carg 0xd.3de7ap-36 -0xe.cf143p-40
+carg 0x2.21e65p+0 0x5.576cf8p-4
+carg 0x1.f4755cp+0 -0x4.29411p-4
+carg -0xf.9c4c8p-4 -0xa.b4101p+20
+carg 0x7.40ac68p+0 0x4.251bb8p-4
+carg 0xa.3ac3cp+68 0x1.47239ep+68
+carg 0x3.8ff10cp+0 -0x6.b0794p-4
 
 cbrt 0.0
 cbrt -0
@@ -524,6 +540,15 @@ cexp min -min_subnorm spurious-underflow:ldbl-96-intel:x86 spurious-underflow:ld
 clog 0.75 1.25
 clog -2 -3
 
+clog 0x2.f2f308p+0 0x4.c3841p-4
+clog 0xd.3de7ap-36 -0xe.cf143p-40
+clog 0x2.21e65p+0 0x5.576cf8p-4
+clog 0x1.f4755cp+0 -0x4.29411p-4
+clog -0xf.9c4c8p-4 -0xa.b4101p+20
+clog 0x7.40ac68p+0 0x4.251bb8p-4
+clog 0xa.3ac3cp+68 0x1.47239ep+68
+clog 0x3.8ff10cp+0 -0x6.b0794p-4
+
 clog 0x1.fffffep+127 0x1.fffffep+127
 clog 0x1.fffffep+127 1.0
 clog 0x1p-149 0x1p-149
@@ -643,6 +668,15 @@ clog 0x1415bcaf2105940d49a636e98ae59p-115 0x7e6a150adfcd1b0921d44b31f40f4p-115
 clog10 0.75 1.25
 clog10 -2 -3
 
+clog10 0x2.f2f308p+0 0x4.c3841p-4
+clog10 0xd.3de7ap-36 -0xe.cf143p-40
+clog10 0x2.21e65p+0 0x5.576cf8p-4
+clog10 0x1.f4755cp+0 -0x4.29411p-4
+clog10 -0xf.9c4c8p-4 -0xa.b4101p+20
+clog10 0x7.40ac68p+0 0x4.251bb8p-4
+clog10 0xa.3ac3cp+68 0x1.47239ep+68
+clog10 0x3.8ff10cp+0 -0x6.b0794p-4
+
 clog10 0x1.fffffep+127 0x1.fffffep+127
 clog10 0x1.fffffep+127 1.0
 clog10 0x1p-149 0x1p-149

diff --git a/math/auto-libm-test-out b/math/auto-libm-test-out
diff --git a/sysdeps/i386/fpu/libm-test-ulps b/sysdeps/i386/fpu/libm-test-ulps
@@ -283,6 +283,10 @@ ifloat: 2
 ildouble: 2
 ldouble: 2
 
+Function: "carg":
+ildouble: 1
+ldouble: 1
+
 Function: "carg_downward":
 double: 1
 float: 1
@@ -827,9 +831,9 @@ ldouble: 1
 
 Function: Real part of "clog_upward":
 double: 1
-float: 1
+float: 2
 idouble: 1
-ifloat: 1
+ifloat: 2
 ildouble: 2
 ldouble: 2
 

diff --git a/sysdeps/ieee754/dbl-64/e_atan2.c b/sysdeps/ieee754/dbl-64/e_atan2.c
@@ -41,6 +41,7 @@
 #include "MathLib.h"
 #include "uatan.tbl"
 #include "atnat2.h"
+#include <fenv.h>
 #include <float.h>
 #include <math.h>
 #include <math_private.h>
@@ -192,6 +193,7 @@ __ieee754_atan2 (double y, double x)
 	return mhpi.d;
     }
 
+  SET_RESTORE_ROUND (FE_TONEAREST);
   /* either x/y or y/x is very close to zero */
   ax = (x < 0) ? -x : x;
   ay = (y < 0) ? -y : y;

diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -228,9 +228,9 @@ ildouble: 2
 ldouble: 2
 
 Function: Real part of "cacos_downward":
-double: 1
+double: 2
 float: 2
-idouble: 1
+idouble: 2
 ifloat: 2
 ildouble: 2
 ldouble: 2
@@ -244,9 +244,9 @@ ildouble: 5
 ldouble: 5
 
 Function: Real part of "cacos_towardzero":
-double: 1
+double: 2
 float: 2
-idouble: 1
+idouble: 2
 ifloat: 2
 ildouble: 2
 ldouble: 2
@@ -300,9 +300,9 @@ ildouble: 5
 ldouble: 5
 
 Function: Imaginary part of "cacosh_downward":
-double: 1
+double: 2
 float: 2
-idouble: 1
+idouble: 2
 ifloat: 2
 ildouble: 2
 ldouble: 2
@@ -316,9 +316,9 @@ ildouble: 5
 ldouble: 5
 
 Function: Imaginary part of "cacosh_towardzero":
-double: 1
+double: 2
 float: 2
-idouble: 1
+idouble: 2
 ifloat: 2
 ildouble: 2
 ldouble: 2
@@ -339,6 +339,12 @@ ifloat: 2
 ildouble: 2
 ldouble: 2
 
+Function: "carg":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
 Function: "carg_downward":
 double: 1
 float: 2
@@ -348,8 +354,10 @@ ildouble: 1
 ldouble: 1
 
 Function: "carg_towardzero":
-float: 1
-ifloat: 1
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
 ildouble: 1
 ldouble: 1
 
@@ -410,9 +418,9 @@ ildouble: 5
 ldouble: 5
 
 Function: Real part of "casin_upward":
-double: 1
+double: 2
 float: 1
-idouble: 1
+idouble: 2
 ifloat: 1
 ildouble: 2
 ldouble: 2
@@ -482,9 +490,9 @@ ildouble: 5
 ldouble: 5
 
 Function: Imaginary part of "casinh_upward":
-double: 1
+double: 2
 float: 2
-idouble: 1
+idouble: 2
 ifloat: 2
 ildouble: 2
 ldouble: 2
@@ -534,9 +542,7 @@ ildouble: 4
 ldouble: 4
 
 Function: Real part of "catan_upward":
-double: 1
 float: 1
-idouble: 1
 ifloat: 1
 ildouble: 1
 ldouble: 1
@@ -602,9 +608,7 @@ ildouble: 4
 ldouble: 4
 
 Function: Imaginary part of "catanh_upward":
-double: 1
 float: 1
-idouble: 1
 ifloat: 1
 ildouble: 1
 ldouble: 1
@@ -820,9 +824,9 @@ ildouble: 4
 ldouble: 4
 
 Function: Imaginary part of "clog10_downward":
-double: 3
+double: 1
 float: 2
-idouble: 3
+idouble: 1
 ifloat: 2
 ildouble: 2
 ldouble: 2
@@ -836,9 +840,9 @@ ildouble: 4
 ldouble: 4
 
 Function: Imaginary part of "clog10_towardzero":
-double: 3
+double: 1
 float: 2
-idouble: 3
+idouble: 1
 ifloat: 2
 ildouble: 2
 ldouble: 2
@@ -852,10 +856,10 @@ ildouble: 4
 ldouble: 4
 
 Function: Imaginary part of "clog10_upward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
+double: 1
+float: 3
+idouble: 1
+ifloat: 3
 ildouble: 2
 ldouble: 2
 
@@ -893,16 +897,16 @@ ldouble: 1
 
 Function: Real part of "clog_upward":
 double: 2
-float: 1
+float: 2
 idouble: 2
-ifloat: 1
+ifloat: 2
 ildouble: 2
 ldouble: 2
 
 Function: Imaginary part of "clog_upward":
-double: 2
+double: 1
 float: 2
-idouble: 2
+idouble: 1
 ifloat: 2
 ildouble: 1
 ldouble: 1
@@ -984,9 +988,9 @@ ildouble: 7
 ldouble: 7
 
 Function: Imaginary part of "cpow_downward":
-double: 2
+double: 1
 float: 2
-idouble: 2
+idouble: 1
 ifloat: 2
 ildouble: 2
 ldouble: 2
@@ -1000,9 +1004,9 @@ ildouble: 7
 ldouble: 7
 
 Function: Imaginary part of "cpow_towardzero":
-double: 2
+double: 1
 float: 2
-idouble: 2
+idouble: 1
 ifloat: 2
 ildouble: 1
 ldouble: 1