Upstream-Status: Pending 2011-03-22 Joseph Myers Merge from SG++ 2.11: 2010-10-05 Nathan Froyd Issue #9382 * sysdeps/powerpc/powerpc32/603e/: New directory. * sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/: New directory. * sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/: New directory. * sysdeps/unix/sysv/linux/powerpc/powerpc32/7400/: New directory. * sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c: Update. * sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c: Update. * sysdeps/powerpc/powerpc64/e5500/fpu/Implies: New file. Index: libc/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c =================================================================== --- /dev/null +++ libc/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c @@ -0,0 +1,134 @@ +/* Double-precision floating point square root. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include + +#include +#include + +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; +static const float two108 = 3.245185536584267269e+32; +static const float twom54 = 5.551115123125782702e-17; +static const float half = 0.5; + +/* The method is based on the descriptions in: + + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 + + We find the actual square root and half of its reciprocal + simultaneously. */ + +#ifdef __STDC__ +double +__ieee754_sqrt (double b) +#else +double +__ieee754_sqrt (b) + double b; +#endif +{ + if (__builtin_expect (b > 0, 1)) + { + double y, g, h, d, r; + ieee_double_shape_type u; + + if (__builtin_expect (b != a_inf.value, 1)) + { + fenv_t fe; + + fe = fegetenv_register (); + + u.value = b; + + relax_fenv_state (); + + __asm__ ("frsqrte %[estimate], %[x]\n" + : [estimate] "=f" (y) : [x] "f" (b)); + + /* Following Muller et al, page 168, equation 5.20. + + h goes to 1/(2*sqrt(b)) + g goes to sqrt(b). + + We need three iterations to get within 1ulp. */ + + /* Indicate that these can be performed prior to the branch. GCC + insists on sinking them below the branch, however; it seems like + they'd be better before the branch so that we can cover any latency + from storing the argument and loading its high word. Oh well. */ + + g = b * y; + h = 0.5 * y; + + /* Handle small numbers by scaling. */ + if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0)) + return __ieee754_sqrt (b * two108) * twom54; + +#define FMADD(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) +#define FNMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + /* g is now +/- 1ulp, or exactly equal to, the square root of b. */ + + /* Final refinement. */ + d = FNMSUB (g, g, b); + + fesetenv_register (fe); + return FMADD (d, h, g); + } + } + else if (b < 0) + { + /* For some reason, some PowerPC32 processors don't implement + FE_INVALID_SQRT. */ +#ifdef FE_INVALID_SQRT + feraiseexcept (FE_INVALID_SQRT); + + fenv_union_t u = { .fenv = fegetenv_register () }; + if ((u.l[1] & FE_INVALID) == 0) +#endif + feraiseexcept (FE_INVALID); + b = a_nan.value; + } + return f_wash (b); +} Index: libc/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c =================================================================== --- /dev/null +++ libc/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c @@ -0,0 +1,101 @@ +/* Single-precision floating point square root. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include + +#include +#include + +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; +static const float threehalf = 1.5; + +/* The method is based on the descriptions in: + + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 + + We find the reciprocal square root and use that to compute the actual + square root. */ + +#ifdef __STDC__ +float +__ieee754_sqrtf (float b) +#else +float +__ieee754_sqrtf (b) + float b; +#endif +{ + if (__builtin_expect (b > 0, 1)) + { +#define FMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) +#define FNMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) + + if (__builtin_expect (b != a_inf.value, 1)) + { + double y, x; + fenv_t fe; + + fe = fegetenv_register (); + + relax_fenv_state (); + + /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */ + y = FMSUB (threehalf, b, b); + + /* Initial estimate. */ + __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b)); + + /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */ + x = x * FNMSUB (y, x * x, threehalf); + x = x * FNMSUB (y, x * x, threehalf); + x = x * FNMSUB (y, x * x, threehalf); + + /* All done. */ + fesetenv_register (fe); + return x * b; + } + } + else if (b < 0) + { + /* For some reason, some PowerPC32 processors don't implement + FE_INVALID_SQRT. */ +#ifdef FE_INVALID_SQRT + feraiseexcept (FE_INVALID_SQRT); + + fenv_union_t u = { .fenv = fegetenv_register () }; + if ((u.l[1] & FE_INVALID) == 0) +#endif + feraiseexcept (FE_INVALID); + b = a_nan.value; + } + return f_washf (b); +} Index: libc/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c =================================================================== --- /dev/null +++ libc/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c @@ -0,0 +1,134 @@ +/* Double-precision floating point square root. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include + +#include +#include + +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; +static const float two108 = 3.245185536584267269e+32; +static const float twom54 = 5.551115123125782702e-17; +static const float half = 0.5; + +/* The method is based on the descriptions in: + + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 + + We find the actual square root and half of its reciprocal + simultaneously. */ + +#ifdef __STDC__ +double +__ieee754_sqrt (double b) +#else +double +__ieee754_sqrt (b) + double b; +#endif +{ + if (__builtin_expect (b > 0, 1)) + { + double y, g, h, d, r; + ieee_double_shape_type u; + + if (__builtin_expect (b != a_inf.value, 1)) + { + fenv_t fe; + + fe = fegetenv_register (); + + u.value = b; + + relax_fenv_state (); + + __asm__ ("frsqrte %[estimate], %[x]\n" + : [estimate] "=f" (y) : [x] "f" (b)); + + /* Following Muller et al, page 168, equation 5.20. + + h goes to 1/(2*sqrt(b)) + g goes to sqrt(b). + + We need three iterations to get within 1ulp. */ + + /* Indicate that these can be performed prior to the branch. GCC + insists on sinking them below the branch, however; it seems like + they'd be better before the branch so that we can cover any latency + from storing the argument and loading its high word. Oh well. */ + + g = b * y; + h = 0.5 * y; + + /* Handle small numbers by scaling. */ + if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0)) + return __ieee754_sqrt (b * two108) * twom54; + +#define FMADD(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) +#define FNMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + /* g is now +/- 1ulp, or exactly equal to, the square root of b. */ + + /* Final refinement. */ + d = FNMSUB (g, g, b); + + fesetenv_register (fe); + return FMADD (d, h, g); + } + } + else if (b < 0) + { + /* For some reason, some PowerPC32 processors don't implement + FE_INVALID_SQRT. */ +#ifdef FE_INVALID_SQRT + feraiseexcept (FE_INVALID_SQRT); + + fenv_union_t u = { .fenv = fegetenv_register () }; + if ((u.l[1] & FE_INVALID) == 0) +#endif + feraiseexcept (FE_INVALID); + b = a_nan.value; + } + return f_wash (b); +} Index: libc/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c =================================================================== --- /dev/null +++ libc/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c @@ -0,0 +1,101 @@ +/* Single-precision floating point square root. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include + +#include +#include + +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; +static const float threehalf = 1.5; + +/* The method is based on the descriptions in: + + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 + + We find the reciprocal square root and use that to compute the actual + square root. */ + +#ifdef __STDC__ +float +__ieee754_sqrtf (float b) +#else +float +__ieee754_sqrtf (b) + float b; +#endif +{ + if (__builtin_expect (b > 0, 1)) + { +#define FMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) +#define FNMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) + + if (__builtin_expect (b != a_inf.value, 1)) + { + double y, x; + fenv_t fe; + + fe = fegetenv_register (); + + relax_fenv_state (); + + /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */ + y = FMSUB (threehalf, b, b); + + /* Initial estimate. */ + __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b)); + + /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */ + x = x * FNMSUB (y, x * x, threehalf); + x = x * FNMSUB (y, x * x, threehalf); + x = x * FNMSUB (y, x * x, threehalf); + + /* All done. */ + fesetenv_register (fe); + return x * b; + } + } + else if (b < 0) + { + /* For some reason, some PowerPC32 processors don't implement + FE_INVALID_SQRT. */ +#ifdef FE_INVALID_SQRT + feraiseexcept (FE_INVALID_SQRT); + + fenv_union_t u = { .fenv = fegetenv_register () }; + if ((u.l[1] & FE_INVALID) == 0) +#endif + feraiseexcept (FE_INVALID); + b = a_nan.value; + } + return f_washf (b); +} Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies =================================================================== --- /dev/null +++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/603e/fpu Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/7400/fpu/Implies =================================================================== --- /dev/null +++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/7400/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/603e/fpu Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies =================================================================== --- /dev/null +++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/603e/fpu Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies =================================================================== --- /dev/null +++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/e5500/fpu Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies =================================================================== --- /dev/null +++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/603e/fpu