New module 'fma'.
authorBruno Haible <bruno@clisp.org>
Mon, 17 Oct 2011 21:48:01 +0000 (23:48 +0200)
committerBruno Haible <bruno@clisp.org>
Sun, 6 Nov 2011 01:36:13 +0000 (02:36 +0100)
* lib/math.in.h (fma): New declaration.
* lib/fma.c: New file.
* m4/fma.m4: New file.
* m4/fegetround.m4: New file.
* m4/math_h.m4 (gl_MATH_H): Test whethern fma is declared.
(gl_MATH_H_DEFAULTS): Initialize GNULIB_FMA, HAVE_FMA, REPLACE_FMA.
* modules/math (Makefile.am): Substitute GNULIB_FMA, HAVE_FMA,
REPLACE_FMA.
* modules/fma: New file.
* doc/posix-functions/fma.texi: Mention the new module and the various
bugs.

ChangeLog
doc/posix-functions/fma.texi
lib/fma.c [new file with mode: 0644]
lib/math.in.h
m4/fegetround.m4 [new file with mode: 0644]
m4/fma.m4 [new file with mode: 0644]
m4/math_h.m4
modules/fma [new file with mode: 0644]
modules/math

index f46a0a3..3f5b617 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,18 @@
 2011-11-05  Bruno Haible  <bruno@clisp.org>
 
+       New module 'fma'.
+       * lib/math.in.h (fma): New declaration.
+       * lib/fma.c: New file.
+       * m4/fma.m4: New file.
+       * m4/fegetround.m4: New file.
+       * m4/math_h.m4 (gl_MATH_H): Test whethern fma is declared.
+       (gl_MATH_H_DEFAULTS): Initialize GNULIB_FMA, HAVE_FMA, REPLACE_FMA.
+       * modules/math (Makefile.am): Substitute GNULIB_FMA, HAVE_FMA,
+       REPLACE_FMA.
+       * modules/fma: New file.
+       * doc/posix-functions/fma.texi: Mention the new module and the various
+       bugs.
+
        Extend gl_MATHFUNC.
        * m4/mathfunc.m4 (gl_MATHFUNC): Accept an 4th parameter of INCLUDES.
        Support 'void' as argument type.
index 83ff777..4e3abe6 100644 (file)
@@ -4,15 +4,18 @@
 
 POSIX specification:@* @url{http://www.opengroup.org/onlinepubs/9699919799/functions/fma.html}
 
-Gnulib module: ---
+Gnulib module: fma
 
 Portability problems fixed by Gnulib:
 @itemize
+@item
+This function is missing on some platforms:
+FreeBSD 5.2.1, NetBSD 5.0, OpenBSD 3.8, Minix 3.1.8, AIX 5.1, HP-UX 11, IRIX 6.5, OSF/1 4.0, Solaris 9, MSVC 9, Interix 3.5.
+@item
+This function produces wrong results on some platforms:
+glibc 2.11, MacOS X 10.5, FreeBSD 6.4/x86, OSF/1 5.1, Cygwin 1.5, mingw.
 @end itemize
 
 Portability problems not fixed by Gnulib:
 @itemize
-@item
-This function is missing on some platforms:
-FreeBSD 5.2.1, NetBSD 5.0, OpenBSD 3.8, Minix 3.1.8, AIX 5.1, HP-UX 11, IRIX 6.5, OSF/1 4.0, Solaris 9, MSVC 9, Interix 3.5.
 @end itemize
diff --git a/lib/fma.c b/lib/fma.c
new file mode 100644 (file)
index 0000000..d3fe25a
--- /dev/null
+++ b/lib/fma.c
@@ -0,0 +1,896 @@
+/* Fused multiply-add.
+   Copyright (C) 2007, 2009, 2011 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2011.  */
+
+#if ! defined USE_LONG_DOUBLE
+# include <config.h>
+#endif
+
+/* Specification.  */
+#include <math.h>
+
+#include <stdbool.h>
+#include <stdlib.h>
+
+#if HAVE_FEGETROUND
+# include <fenv.h>
+#endif
+
+#include "float+.h"
+#include "integer_length.h"
+#include "verify.h"
+
+#ifdef USE_LONG_DOUBLE
+# define FUNC fmal
+# define DOUBLE long double
+# define FREXP frexpl
+# define LDEXP ldexpl
+# define MIN_EXP LDBL_MIN_EXP
+# define MANT_BIT LDBL_MANT_BIT
+# define DECL_ROUNDING DECL_LONG_DOUBLE_ROUNDING
+# define BEGIN_ROUNDING() BEGIN_LONG_DOUBLE_ROUNDING ()
+# define END_ROUNDING() END_LONG_DOUBLE_ROUNDING ()
+# define L_(literal) literal##L
+#elif ! defined USE_FLOAT
+# define FUNC fma
+# define DOUBLE double
+# define FREXP frexp
+# define LDEXP ldexp
+# define MIN_EXP DBL_MIN_EXP
+# define MANT_BIT DBL_MANT_BIT
+# define DECL_ROUNDING
+# define BEGIN_ROUNDING()
+# define END_ROUNDING()
+# define L_(literal) literal
+#else /* defined USE_FLOAT */
+# define FUNC fmaf
+# define DOUBLE float
+# define FREXP frexpf
+# define LDEXP ldexpf
+# define MIN_EXP FLT_MIN_EXP
+# define MANT_BIT FLT_MANT_BIT
+# define DECL_ROUNDING
+# define BEGIN_ROUNDING()
+# define END_ROUNDING()
+# define L_(literal) literal##f
+#endif
+
+#undef MAX
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+
+#undef MIN
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+
+/* It is possible to write an implementation of fused multiply-add with
+   floating-point operations alone.  See
+     Sylvie Boldo, Guillaume Melquiond:
+     Emulation of FMA and correctly-rounded sums: proved algorithms using
+     rounding to odd.
+     <http://www.lri.fr/~melquion/doc/08-tc.pdf>
+   But is it complicated.
+   Here we take the simpler (and probably slower) approach of doing
+   multi-precision arithmetic.  */
+
+/* We use the naming conventions of GNU gmp, but vastly simpler (and slower)
+   algorithms.  */
+
+typedef unsigned int mp_limb_t;
+#define GMP_LIMB_BITS 32
+verify (sizeof (mp_limb_t) * CHAR_BIT == GMP_LIMB_BITS);
+
+typedef unsigned long long mp_twolimb_t;
+#define GMP_TWOLIMB_BITS 64
+verify (sizeof (mp_twolimb_t) * CHAR_BIT == GMP_TWOLIMB_BITS);
+
+/* Number of limbs needed for a single DOUBLE.  */
+#define NLIMBS1 ((MANT_BIT + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS)
+
+/* Number of limbs needed for the accumulator.  */
+#define NLIMBS3 (3 * NLIMBS1 + 1)
+
+/* Assuming 0.5 <= x < 1.0:
+   Convert the mantissa (x * 2^DBL_MANT_BIT) to a sequence of limbs.  */
+static void
+decode (DOUBLE x, mp_limb_t limbs[NLIMBS1])
+{
+  /* I'm not sure whether it's safe to cast a 'double' value between
+     2^31 and 2^32 to 'unsigned int', therefore play safe and cast only
+     'double' values between 0 and 2^31 (to 'unsigned int' or 'int',
+     doesn't matter).
+     So, we split the MANT_BIT bits of x into a number of chunks of
+     at most 31 bits.  */
+  enum { chunk_count = (MANT_BIT - 1) / 31 + 1 };
+  /* Variables used for storing the bits limb after limb.  */
+  mp_limb_t *p = limbs + NLIMBS1 - 1;
+  mp_limb_t accu = 0;
+  unsigned int bits_needed = MANT_BIT - (NLIMBS1 - 1) * GMP_LIMB_BITS;
+  /* The bits bits_needed-1...0 need to be ORed into the accu.
+     1 <= bits_needed <= GMP_LIMB_BITS.  */
+  /* Unroll the first 4 loop rounds.  */
+  if (chunk_count > 0)
+    {
+      /* Here we still have MANT_BIT-0*31 bits to extract from x.  */
+      enum { chunk_bits = MIN (31, MANT_BIT - 0 * 31) }; /* > 0, <= 31 */
+      mp_limb_t d;
+      x *= (mp_limb_t) 1 << chunk_bits;
+      d = (int) x; /* 0 <= d < 2^chunk_bits.  */
+      x -= d;
+      if (!(x >= L_(0.0) && x < L_(1.0)))
+        abort ();
+      if (bits_needed < chunk_bits)
+        {
+          /* store bits_needed bits */
+          accu |= d >> (chunk_bits - bits_needed);
+          *p = accu;
+          if (p == limbs)
+            goto done;
+          p--;
+          /* hold (chunk_bits - bits_needed) bits */
+          accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
+          bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
+        }
+      else
+        {
+          /* store chunk_bits bits */
+          accu |= d << (bits_needed - chunk_bits);
+          bits_needed -= chunk_bits;
+          if (bits_needed == 0)
+            {
+              *p = accu;
+              if (p == limbs)
+                goto done;
+              p--;
+              accu = 0;
+              bits_needed = GMP_LIMB_BITS;
+            }
+        }
+    }
+  if (chunk_count > 1)
+    {
+      /* Here we still have MANT_BIT-1*31 bits to extract from x.  */
+      enum { chunk_bits = MIN (31, MAX (MANT_BIT - 1 * 31, 0)) }; /* > 0, <= 31 */
+      mp_limb_t d;
+      x *= (mp_limb_t) 1 << chunk_bits;
+      d = (int) x; /* 0 <= d < 2^chunk_bits.  */
+      x -= d;
+      if (!(x >= L_(0.0) && x < L_(1.0)))
+        abort ();
+      if (bits_needed < chunk_bits)
+        {
+          /* store bits_needed bits */
+          accu |= d >> (chunk_bits - bits_needed);
+          *p = accu;
+          if (p == limbs)
+            goto done;
+          p--;
+          /* hold (chunk_bits - bits_needed) bits */
+          accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
+          bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
+        }
+      else
+        {
+          /* store chunk_bits bits */
+          accu |= d << (bits_needed - chunk_bits);
+          bits_needed -= chunk_bits;
+          if (bits_needed == 0)
+            {
+              *p = accu;
+              if (p == limbs)
+                goto done;
+              p--;
+              accu = 0;
+              bits_needed = GMP_LIMB_BITS;
+            }
+        }
+    }
+  if (chunk_count > 2)
+    {
+      /* Here we still have MANT_BIT-2*31 bits to extract from x.  */
+      enum { chunk_bits = MIN (31, MAX (MANT_BIT - 2 * 31, 0)) }; /* > 0, <= 31 */
+      mp_limb_t d;
+      x *= (mp_limb_t) 1 << chunk_bits;
+      d = (int) x; /* 0 <= d < 2^chunk_bits.  */
+      x -= d;
+      if (!(x >= L_(0.0) && x < L_(1.0)))
+        abort ();
+      if (bits_needed < chunk_bits)
+        {
+          /* store bits_needed bits */
+          accu |= d >> (chunk_bits - bits_needed);
+          *p = accu;
+          if (p == limbs)
+            goto done;
+          p--;
+          /* hold (chunk_bits - bits_needed) bits */
+          accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
+          bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
+        }
+      else
+        {
+          /* store chunk_bits bits */
+          accu |= d << (bits_needed - chunk_bits);
+          bits_needed -= chunk_bits;
+          if (bits_needed == 0)
+            {
+              *p = accu;
+              if (p == limbs)
+                goto done;
+              p--;
+              accu = 0;
+              bits_needed = GMP_LIMB_BITS;
+            }
+        }
+    }
+  if (chunk_count > 3)
+    {
+      /* Here we still have MANT_BIT-3*31 bits to extract from x.  */
+      enum { chunk_bits = MIN (31, MAX (MANT_BIT - 3 * 31, 0)) }; /* > 0, <= 31 */
+      mp_limb_t d;
+      x *= (mp_limb_t) 1 << chunk_bits;
+      d = (int) x; /* 0 <= d < 2^chunk_bits.  */
+      x -= d;
+      if (!(x >= L_(0.0) && x < L_(1.0)))
+        abort ();
+      if (bits_needed < chunk_bits)
+        {
+          /* store bits_needed bits */
+          accu |= d >> (chunk_bits - bits_needed);
+          *p = accu;
+          if (p == limbs)
+            goto done;
+          p--;
+          /* hold (chunk_bits - bits_needed) bits */
+          accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
+          bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
+        }
+      else
+        {
+          /* store chunk_bits bits */
+          accu |= d << (bits_needed - chunk_bits);
+          bits_needed -= chunk_bits;
+          if (bits_needed == 0)
+            {
+              *p = accu;
+              if (p == limbs)
+                goto done;
+              p--;
+              accu = 0;
+              bits_needed = GMP_LIMB_BITS;
+            }
+        }
+    }
+  if (chunk_count > 4)
+    {
+      /* Here we still have MANT_BIT-4*31 bits to extract from x.  */
+      /* Generic loop.  */
+      size_t k;
+      for (k = 4; k < chunk_count; k++)
+        {
+          size_t chunk_bits = MIN (31, MANT_BIT - k * 31); /* > 0, <= 31 */
+          mp_limb_t d;
+          x *= (mp_limb_t) 1 << chunk_bits;
+          d = (int) x; /* 0 <= d < 2^chunk_bits.  */
+          x -= d;
+          if (!(x >= L_(0.0) && x < L_(1.0)))
+            abort ();
+          if (bits_needed < chunk_bits)
+            {
+              /* store bits_needed bits */
+              accu |= d >> (chunk_bits - bits_needed);
+              *p = accu;
+              if (p == limbs)
+                goto done;
+              p--;
+              /* hold (chunk_bits - bits_needed) bits */
+              accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
+              bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
+            }
+          else
+            {
+              /* store chunk_bits bits */
+              accu |= d << (bits_needed - chunk_bits);
+              bits_needed -= chunk_bits;
+              if (bits_needed == 0)
+                {
+                  *p = accu;
+                  if (p == limbs)
+                    goto done;
+                  p--;
+                  accu = 0;
+                  bits_needed = GMP_LIMB_BITS;
+                }
+            }
+        }
+    }
+  /* We shouldn't get here.  */
+  abort ();
+
+ done: ;
+#ifndef USE_LONG_DOUBLE /* On FreeBSD 6.1/x86, 'long double' numbers sometimes
+                           have excess precision.  */
+  if (!(x == L_(0.0)))
+    abort ();
+#endif
+}
+
+/* Multiply two sequences of limbs.  */
+static void
+multiply (mp_limb_t xlimbs[NLIMBS1], mp_limb_t ylimbs[NLIMBS1],
+          mp_limb_t prod_limbs[2 * NLIMBS1])
+{
+  size_t k, i, j;
+  enum { len1 = NLIMBS1 };
+  enum { len2 = NLIMBS1 };
+
+  for (k = len2; k > 0; )
+    prod_limbs[--k] = 0;
+  for (i = 0; i < len1; i++)
+    {
+      mp_limb_t digit1 = xlimbs[i];
+      mp_twolimb_t carry = 0;
+      for (j = 0; j < len2; j++)
+        {
+          mp_limb_t digit2 = ylimbs[j];
+          carry += (mp_twolimb_t) digit1 * (mp_twolimb_t) digit2;
+          carry += prod_limbs[i + j];
+          prod_limbs[i + j] = (mp_limb_t) carry;
+          carry = carry >> GMP_LIMB_BITS;
+        }
+      prod_limbs[i + len2] = (mp_limb_t) carry;
+    }
+}
+
+DOUBLE
+FUNC (DOUBLE x, DOUBLE y, DOUBLE z)
+{
+  if (isfinite (x) && isfinite (y))
+    {
+      if (isfinite (z))
+        {
+          /* x, y, z are all finite.  */
+          if (x == L_(0.0) || y == L_(0.0))
+            return z;
+          if (z == L_(0.0))
+            return x * y;
+          /* x, y, z are all non-zero.
+             The result is x * y + z.  */
+          {
+            int e;                  /* exponent of x * y + z */
+            int sign;
+            mp_limb_t sum[NLIMBS3];
+            size_t sum_len;
+
+            {
+              int xys;                /* sign of x * y */
+              int zs;                 /* sign of z */
+              int xye;                /* sum of exponents of x and y */
+              int ze;                 /* exponent of z */
+              mp_limb_t summand1[NLIMBS3];
+              size_t summand1_len;
+              mp_limb_t summand2[NLIMBS3];
+              size_t summand2_len;
+
+              {
+                mp_limb_t zlimbs[NLIMBS1];
+                mp_limb_t xylimbs[2 * NLIMBS1];
+
+                {
+                  DOUBLE xn;              /* normalized part of x */
+                  DOUBLE yn;              /* normalized part of y */
+                  DOUBLE zn;              /* normalized part of z */
+                  int xe;                 /* exponent of x */
+                  int ye;                 /* exponent of y */
+                  mp_limb_t xlimbs[NLIMBS1];
+                  mp_limb_t ylimbs[NLIMBS1];
+
+                  xys = 0;
+                  xn = x;
+                  if (x < 0)
+                    {
+                      xys = 1;
+                      xn = - x;
+                    }
+                  yn = y;
+                  if (y < 0)
+                    {
+                      xys = 1 - xys;
+                      yn = - y;
+                    }
+
+                  zs = 0;
+                  zn = z;
+                  if (z < 0)
+                    {
+                      zs = 1;
+                      zn = - z;
+                    }
+
+                  /* xn, yn, zn are all positive.
+                     The result is (-1)^xys * xn * yn + (-1)^zs * zn.  */
+                  xn = FREXP (xn, &xe);
+                  yn = FREXP (yn, &ye);
+                  zn = FREXP (zn, &ze);
+                  xye = xe + ye;
+                  /* xn, yn, zn are all < 1.0 and >= 0.5.
+                     The result is
+                       (-1)^xys * 2^xye * xn * yn + (-1)^zs * 2^ze * zn.  */
+                  if (xye < ze - MANT_BIT)
+                    {
+                      /* 2^xye * xn * yn < 2^xye <= 2^(ze-MANT_BIT-1)  */
+                      return z;
+                    }
+                  if (xye - 2 * MANT_BIT > ze)
+                    {
+                      /* 2^ze * zn < 2^ze <= 2^(xye-2*MANT_BIT-1).
+                         We cannot simply do
+                           return x * y;
+                         because it would round differently: A round-to-even
+                         in the multiplication can be a round-up or round-down
+                         here, due to z.  So replace z with a value that doesn't
+                         require the use of long bignums but that rounds the
+                         same way.  */
+                      zn = L_(0.5);
+                      ze = xye - 2 * MANT_BIT - 1;
+                    }
+                  /* Convert mantissas of xn, yn, zn to limb sequences:
+                     xlimbs = 2^MANT_BIT * xn
+                     ylimbs = 2^MANT_BIT * yn
+                     zlimbs = 2^MANT_BIT * zn  */
+                  decode (xn, xlimbs);
+                  decode (yn, ylimbs);
+                  decode (zn, zlimbs);
+                  /* Multiply the mantissas of xn and yn:
+                     xylimbs = xlimbs * ylimbs  */
+                  multiply (xlimbs, ylimbs, xylimbs);
+                }
+                /* The result is
+                     (-1)^xys * 2^(xye-2*MANT_BIT) * xylimbs
+                     + (-1)^zs * 2^(ze-MANT_BIT) * zlimbs.
+                   Compute
+                     e = min (xye-2*MANT_BIT, ze-MANT_BIT)
+                   then
+                     summand1 = 2^(xye-2*MANT_BIT-e) * xylimbs
+                     summand2 = 2^(ze-MANT_BIT-e) * zlimbs  */
+                e = MIN (xye - 2 * MANT_BIT, ze - MANT_BIT);
+                if (e == xye - 2 * MANT_BIT)
+                  {
+                    /* Simply copy the limbs of xylimbs.  */
+                    size_t i;
+                    for (i = 0; i < 2 * NLIMBS1; i++)
+                      summand1[i] = xylimbs[i];
+                    summand1_len = 2 * NLIMBS1;
+                  }
+                else
+                  {
+                    size_t ediff = xye - 2 * MANT_BIT - e;
+                    /* Left shift the limbs of xylimbs by ediff bits.  */
+                    size_t ldiff = ediff / GMP_LIMB_BITS;
+                    size_t shift = ediff % GMP_LIMB_BITS;
+                    size_t i;
+                    for (i = 0; i < ldiff; i++)
+                      summand1[i] = 0;
+                    if (shift > 0)
+                      {
+                        mp_limb_t carry = 0;
+                        for (i = 0; i < 2 * NLIMBS1; i++)
+                          {
+                            summand1[ldiff + i] = (xylimbs[i] << shift) | carry;
+                            carry = xylimbs[i] >> (GMP_LIMB_BITS - shift);
+                          }
+                        summand1[ldiff + 2 * NLIMBS1] = carry;
+                        summand1_len = ldiff + 2 * NLIMBS1 + 1;
+                      }
+                    else
+                      {
+                        for (i = 0; i < 2 * NLIMBS1; i++)
+                          summand1[ldiff + i] = xylimbs[i];
+                        summand1_len = ldiff + 2 * NLIMBS1;
+                      }
+                    /* Estimation of needed array size:
+                       ediff = (xye - 2 * MANT_BIT) - (ze - MANT_BIT) <= MANT_BIT + 1
+                       therefore
+                       summand1_len
+                         = (ediff + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + 2 * NLIMBS1
+                         <= (MANT_BIT + GMP_LIMB_BITS) / GMP_LIMB_BITS + 2 * NLIMBS1
+                         <= 3 * NLIMBS1 + 1
+                         = NLIMBS3  */
+                    if (!(summand1_len <= NLIMBS3))
+                      abort ();
+                  }
+                if (e == ze - MANT_BIT)
+                  {
+                    /* Simply copy the limbs of zlimbs.  */
+                    size_t i;
+                    for (i = 0; i < NLIMBS1; i++)
+                      summand2[i] = zlimbs[i];
+                    summand2_len = NLIMBS1;
+                  }
+                else
+                  {
+                    size_t ediff = ze - MANT_BIT - e;
+                    /* Left shift the limbs of zlimbs by ediff bits.  */
+                    size_t ldiff = ediff / GMP_LIMB_BITS;
+                    size_t shift = ediff % GMP_LIMB_BITS;
+                    size_t i;
+                    for (i = 0; i < ldiff; i++)
+                      summand2[i] = 0;
+                    if (shift > 0)
+                      {
+                        mp_limb_t carry = 0;
+                        for (i = 0; i < NLIMBS1; i++)
+                          {
+                            summand2[ldiff + i] = (zlimbs[i] << shift) | carry;
+                            carry = zlimbs[i] >> (GMP_LIMB_BITS - shift);
+                          }
+                        summand2[ldiff + NLIMBS1] = carry;
+                        summand2_len = ldiff + NLIMBS1 + 1;
+                      }
+                    else
+                      {
+                        for (i = 0; i < NLIMBS1; i++)
+                          summand2[ldiff + i] = zlimbs[i];
+                        summand2_len = ldiff + NLIMBS1;
+                      }
+                    /* Estimation of needed array size:
+                       ediff = (ze - MANT_BIT) - (xye - 2 * MANT_BIT) <= 2 * MANT_BIT
+                       therefore
+                       summand2_len
+                         = (ediff + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + NLIMBS1
+                         <= (2 * MANT_BIT + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + NLIMBS1
+                         <= 3 * NLIMBS1 + 1
+                         = NLIMBS3  */
+                    if (!(summand2_len <= NLIMBS3))
+                      abort ();
+                  }
+              }
+              /* The result is
+                   (-1)^xys * 2^e * summand1 + (-1)^zs * 2^e * summand2.  */
+              if (xys == zs)
+                {
+                  /* Perform an addition.  */
+                  size_t i;
+                  mp_limb_t carry;
+
+                  sign = xys;
+                  carry = 0;
+                  for (i = 0; i < MIN (summand1_len, summand2_len); i++)
+                    {
+                      mp_limb_t digit1 = summand1[i];
+                      mp_limb_t digit2 = summand2[i];
+                      sum[i] = digit1 + digit2 + carry;
+                      carry = (carry
+                               ? digit1 >= (mp_limb_t)-1 - digit2
+                               : digit1 > (mp_limb_t)-1 - digit2);
+                    }
+                  if (summand1_len > summand2_len)
+                    for (; i < summand1_len; i++)
+                      {
+                        mp_limb_t digit1 = summand1[i];
+                        sum[i] = carry + digit1;
+                        carry = carry && digit1 == (mp_limb_t)-1;
+                      }
+                  else
+                    for (; i < summand2_len; i++)
+                      {
+                        mp_limb_t digit2 = summand2[i];
+                        sum[i] = carry + digit2;
+                        carry = carry && digit2 == (mp_limb_t)-1;
+                      }
+                  if (carry)
+                    sum[i++] = carry;
+                  sum_len = i;
+                }
+              else
+                {
+                  /* Perform a subtraction.  */
+                  /* Compare summand1 and summand2 by magnitude.  */
+                  while (summand1[summand1_len - 1] == 0)
+                    summand1_len--;
+                  while (summand2[summand2_len - 1] == 0)
+                    summand2_len--;
+                  if (summand1_len > summand2_len)
+                    sign = xys;
+                  else if (summand1_len < summand2_len)
+                    sign = zs;
+                  else
+                    {
+                      size_t i = summand1_len;
+                      for (;;)
+                        {
+                          --i;
+                          if (summand1[i] > summand2[i])
+                            {
+                              sign = xys;
+                              break;
+                            }
+                          if (summand1[i] < summand2[i])
+                            {
+                              sign = zs;
+                              break;
+                            }
+                          if (i == 0)
+                            /* summand1 and summand2 are equal.  */
+                            return L_(0.0);
+                        }
+                    }
+                  if (sign == xys)
+                    {
+                      /* Compute summand1 - summand2.  */
+                      size_t i;
+                      mp_limb_t carry;
+
+                      carry = 0;
+                      for (i = 0; i < summand2_len; i++)
+                        {
+                          mp_limb_t digit1 = summand1[i];
+                          mp_limb_t digit2 = summand2[i];
+                          sum[i] = digit1 - digit2 - carry;
+                          carry = (carry ? digit1 <= digit2 : digit1 < digit2);
+                        }
+                      for (; i < summand1_len; i++)
+                        {
+                          mp_limb_t digit1 = summand1[i];
+                          sum[i] = digit1 - carry;
+                          carry = carry && digit1 == 0;
+                        }
+                      if (carry)
+                        abort ();
+                      sum_len = summand1_len;
+                    }
+                  else
+                    {
+                      /* Compute summand2 - summand1.  */
+                      size_t i;
+                      mp_limb_t carry;
+
+                      carry = 0;
+                      for (i = 0; i < summand1_len; i++)
+                        {
+                          mp_limb_t digit1 = summand1[i];
+                          mp_limb_t digit2 = summand2[i];
+                          sum[i] = digit2 - digit1 - carry;
+                          carry = (carry ? digit2 <= digit1 : digit2 < digit1);
+                        }
+                      for (; i < summand2_len; i++)
+                        {
+                          mp_limb_t digit2 = summand2[i];
+                          sum[i] = digit2 - carry;
+                          carry = carry && digit2 == 0;
+                        }
+                      if (carry)
+                        abort ();
+                      sum_len = summand2_len;
+                    }
+                }
+            }
+            /* The result is
+                 (-1)^sign * 2^e * sum.  */
+            /* Now perform the rounding to MANT_BIT mantissa bits.  */
+            while (sum[sum_len - 1] == 0)
+              sum_len--;
+            /* Here we know that the most significant limb, sum[sum_len - 1], is
+               non-zero.  */
+            {
+              /* How many bits the sum has.  */
+              unsigned int sum_bits =
+                integer_length (sum[sum_len - 1]) + (sum_len - 1) * GMP_LIMB_BITS;
+              /* How many bits to keep when rounding.  */
+              unsigned int keep_bits;
+              /* How many bits to round off.  */
+              unsigned int roundoff_bits;
+              if (e + (int) sum_bits >= MIN_EXP)
+                /* 2^e * sum >= 2^(MIN_EXP-1).
+                   result will be a normalized number.  */
+                keep_bits = MANT_BIT;
+              else if (e + (int) sum_bits >= MIN_EXP - MANT_BIT)
+                /* 2^e * sum >= 2^(MIN_EXP-MANT_BIT-1).
+                   result will be a denormalized number or rounded to zero.  */
+                keep_bits = e + (int) sum_bits - (MIN_EXP + MANT_BIT);
+              else
+                /* 2^e * sum < 2^(MIN_EXP-MANT_BIT-1).  Round to zero.  */
+                return L_(0.0);
+              /* Note: 0 <= keep_bits <= MANT_BIT.  */
+              if (sum_bits <= keep_bits)
+                {
+                  /* Nothing to do.  */
+                  roundoff_bits = 0;
+                  keep_bits = sum_bits;
+                }
+              else
+                {
+                  int round_up;
+                  roundoff_bits = sum_bits - keep_bits; /* > 0, <= sum_bits */
+                  {
+#if HAVE_FEGETROUND && defined FE_TOWARDZERO
+                    /* Cf. <http://pubs.opengroup.org/onlinepubs/9699919799/functions/fegetround.html> */
+                    int rounding_mode = fegetround ();
+                    if (rounding_mode == FE_TOWARDZERO)
+                      round_up = 0;
+                    else if (rounding_mode == FE_DOWNWARD)
+                      round_up = sign;
+                    else if (rounding_mode == FE_UPWARD)
+                      round_up = !sign;
+#else
+                    /* Cf. <http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/float.h.html> */
+                    int rounding_mode = FLT_ROUNDS;
+                    if (rounding_mode == 0) /* toward zero */
+                      round_up = 0;
+                    else if (rounding_mode == 3) /* toward negative infinity */
+                      round_up = sign;
+                    else if (rounding_mode == 2) /* toward positive infinity */
+                      round_up = !sign;
+#endif
+                    else
+                      {
+                        /* Round to nearest.  */
+                        round_up = 0;
+                        /* Test bit (roundoff_bits-1).  */
+                        if ((sum[(roundoff_bits - 1) / GMP_LIMB_BITS]
+                             >> ((roundoff_bits - 1) % GMP_LIMB_BITS)) & 1)
+                          {
+                            /* Test bits roundoff_bits-1 .. 0.  */
+                            bool halfway =
+                              ((sum[(roundoff_bits - 1) / GMP_LIMB_BITS]
+                                & (((mp_limb_t) 1 << ((roundoff_bits - 1) % GMP_LIMB_BITS)) - 1))
+                               == 0);
+                            if (halfway)
+                              {
+                                int i;
+                                for (i = (roundoff_bits - 1) / GMP_LIMB_BITS - 1; i >= 0; i--)
+                                  if (sum[i] != 0)
+                                    {
+                                      halfway = false;
+                                      break;
+                                    }
+                              }
+                            if (halfway)
+                              /* Round to even.  Test bit roundoff_bits.  */
+                              round_up = ((sum[roundoff_bits / GMP_LIMB_BITS]
+                                          >> (roundoff_bits % GMP_LIMB_BITS)) & 1);
+                            else
+                              /* Round up.  */
+                              round_up = 1;
+                          }
+                      }
+                  }
+                  /* Perform the rounding.  */
+                  {
+                    size_t i = roundoff_bits / GMP_LIMB_BITS;
+                    {
+                      size_t j = i;
+                      while (j > 0)
+                        sum[--j] = 0;
+                    }
+                    if (round_up)
+                      {
+                        /* Round up.  */
+                        sum[i] =
+                          (sum[i]
+                           | (((mp_limb_t) 1 << (roundoff_bits % GMP_LIMB_BITS)) - 1))
+                          + 1;
+                        if (sum[i] == 0)
+                          {
+                            /* Propagate carry.  */
+                            while (i < sum_len - 1)
+                              {
+                                ++i;
+                                sum[i] += 1;
+                                if (sum[i] != 0)
+                                  break;
+                              }
+                          }
+                        /* sum[i] is the most significant limb that was
+                           incremented.  */
+                        if (i == sum_len - 1 && (sum[i] & (sum[i] - 1)) == 0)
+                          {
+                            /* Through the carry, one more bit is needed.  */
+                            if (sum[i] != 0)
+                              sum_bits += 1;
+                            else
+                              {
+                                /* Instead of requiring one more limb of memory,
+                                   perform a shift by one bit, and adjust the
+                                   exponent.  */
+                                sum[i] = (mp_limb_t) 1 << (GMP_LIMB_BITS - 1);
+                                e += 1;
+                              }
+                            /* The bit sequence has the form 1000...000.  */
+                            keep_bits = 1;
+                          }
+                      }
+                    else
+                      {
+                        /* Round down.  */
+                        sum[i] &= ((mp_limb_t) -1 << (roundoff_bits % GMP_LIMB_BITS));
+                        if (i == sum_len - 1 && sum[i] == 0)
+                          /* The entire sum has become zero.  */
+                          return L_(0.0);
+                      }
+                  }
+                }
+              /* The result is
+                   (-1)^sign * 2^e * sum
+                 and here we know that
+                   2^(sum_bits-1) <= sum < 2^sum_bits,
+                 and sum is a multiple of 2^(sum_bits-keep_bits), where
+                   0 < keep_bits <= MANT_BIT  and  keep_bits <= sum_bits.
+                 (If keep_bits was initially 0, the rounding either returned zero
+                 or produced a bit sequence of the form 1000...000, setting
+                 keep_bits to 1.)  */
+              {
+                /* Split the keep_bits bits into chunks of at most 32 bits.  */
+                unsigned int chunk_count = (keep_bits - 1) / GMP_LIMB_BITS + 1;
+                /* 1 <= chunk_count <= ceil (sum_bits / GMP_LIMB_BITS) = sum_len. */
+                static const DOUBLE chunk_multiplier = /* 2^-GMP_LIMB_BITS */
+                  L_(1.0) / ((DOUBLE) (1 << (GMP_LIMB_BITS / 2))
+                             * (DOUBLE) (1 << ((GMP_LIMB_BITS + 1) / 2)));
+                unsigned int shift = sum_bits % GMP_LIMB_BITS;
+                DOUBLE fsum;
+                if (MANT_BIT <= GMP_LIMB_BITS)
+                  {
+                    /* Since keep_bits <= MANT_BIT <= GMP_LIMB_BITS,
+                       chunk_count is 1.  No need for a loop.  */
+                    if (shift == 0)
+                      fsum = (DOUBLE) sum[sum_len - 1];
+                    else
+                      fsum = (DOUBLE)
+                             ((sum[sum_len - 1] << (GMP_LIMB_BITS - shift))
+                              | (sum_len >= 2 ? sum[sum_len - 2] >> shift : 0));
+                  }
+                else
+                  {
+                    int k;
+                    k = chunk_count - 1;
+                    if (shift == 0)
+                      {
+                        /* First loop round.  */
+                        fsum = (DOUBLE) sum[sum_len - k - 1];
+                        /* General loop.  */
+                        while (--k >= 0)
+                          {
+                            fsum *= chunk_multiplier;
+                            fsum += (DOUBLE) sum[sum_len - k - 1];
+                          }
+                      }
+                    else
+                      {
+                        /* First loop round.  */
+                        fsum = (DOUBLE)
+                               ((sum[sum_len - k - 1] << (GMP_LIMB_BITS - shift))
+                                | (sum_len >= k + 2 ? sum[sum_len - k - 2] >> shift : 0));
+                        /* General loop.  */
+                        while (--k >= 0)
+                          {
+                            fsum *= chunk_multiplier;
+                            fsum += (DOUBLE)
+                                    ((sum[sum_len - k - 1] << (GMP_LIMB_BITS - shift))
+                                     | (sum[sum_len - k - 2] >> shift));
+                          }
+                      }
+                  }
+                fsum = LDEXP (fsum, e + (int) sum_bits - GMP_LIMB_BITS);
+                return (sign ? - fsum : fsum);
+              }
+            }
+          }
+        }
+      else
+        return z;
+    }
+  else
+    return (x * y) + z;
+}
index e0742ec..ab4f411 100644 (file)
@@ -507,6 +507,30 @@ _GL_WARN_ON_USE (floorl, "floorl is unportable - "
 #endif
 
 
+#if @GNULIB_FMA@
+# if @REPLACE_FMA@
+#  if !(defined __cplusplus && defined GNULIB_NAMESPACE)
+#   undef fma
+#   define fma rpl_fma
+#  endif
+_GL_FUNCDECL_RPL (fma, double, (double x, double y, double z));
+_GL_CXXALIAS_RPL (fma, double, (double x, double y, double z));
+# else
+#  if !@HAVE_FMA@
+_GL_FUNCDECL_SYS (fma, double, (double x, double y, double z));
+#  endif
+_GL_CXXALIAS_SYS (fma, double, (double x, double y, double z));
+# endif
+_GL_CXXALIASWARN (fma);
+#elif defined GNULIB_POSIXCHECK
+# undef fma
+# if HAVE_RAW_DECL_FMA
+_GL_WARN_ON_USE (fma, "fma is unportable - "
+                 "use gnulib module fma for portability");
+# endif
+#endif
+
+
 #if @GNULIB_FMODF@
 # if !@HAVE_FMODF@
 #  undef fmodf
diff --git a/m4/fegetround.m4 b/m4/fegetround.m4
new file mode 100644 (file)
index 0000000..37db5c8
--- /dev/null
@@ -0,0 +1,20 @@
+# fegetround.m4 serial 1
+dnl Copyright (C) 2011 Free Software Foundation, Inc.
+dnl This file is free software; the Free Software Foundation
+dnl gives unlimited permission to copy and/or distribute it,
+dnl with or without modifications, as long as this notice is preserved.
+
+AC_DEFUN([gl_FUNC_FEGETROUND],
+[
+  dnl Determine FEGETROUND_LIBM.
+  gl_MATHFUNC([fegetround], [int], [(void)], [#include <fenv.h>])
+  if test $gl_cv_func_fegetround_no_libm = no \
+     && test $gl_cv_func_fegetround_in_libm = no; then
+    HAVE_FEGETROUND=0
+  else
+    HAVE_FEGETROUND=1
+    AC_DEFINE([HAVE_FEGETROUND], [1],
+      [Define to 1 if you have the 'fegetround' function.])
+  fi
+  AC_SUBST([FEGETROUND_LIBM])
+])
diff --git a/m4/fma.m4 b/m4/fma.m4
new file mode 100644 (file)
index 0000000..3ce3f9a
--- /dev/null
+++ b/m4/fma.m4
@@ -0,0 +1,166 @@
+# fma.m4 serial 1
+dnl Copyright (C) 2011 Free Software Foundation, Inc.
+dnl This file is free software; the Free Software Foundation
+dnl gives unlimited permission to copy and/or distribute it,
+dnl with or without modifications, as long as this notice is preserved.
+
+AC_DEFUN([gl_FUNC_FMA],
+[
+  AC_REQUIRE([gl_MATH_H_DEFAULTS])
+
+  dnl Determine FMA_LIBM.
+  gl_MATHFUNC([fma], [double], [(double, double, double)])
+  if test $gl_cv_func_fma_no_libm = yes \
+     || test $gl_cv_func_fma_in_libm = yes; then
+    gl_FUNC_FMA_WORKS
+    case "$gl_cv_func_fma_works" in
+      *no) REPLACE_FMA=1 ;;
+    esac
+  else
+    HAVE_FMA=0
+  fi
+  if test $HAVE_FMA = 0 || test $REPLACE_FMA = 1; then
+    dnl Find libraries needed to link lib/fmal.c.
+    AC_REQUIRE([gl_FUNC_FREXP])
+    AC_REQUIRE([gl_FUNC_LDEXP])
+    AC_REQUIRE([gl_FUNC_FEGETROUND])
+    FMA_LIBM=
+    dnl Append $FREXP_LIBM to FMA_LIBM, avoiding gratuitous duplicates.
+    case " $FMA_LIBM " in
+      *" $FREXP_LIBM "*) ;;
+      *) FMA_LIBM="$FMA_LIBM $FREXP_LIBM" ;;
+    esac
+    dnl Append $LDEXP_LIBM to FMA_LIBM, avoiding gratuitous duplicates.
+    case " $FMA_LIBM " in
+      *" $LDEXP_LIBM "*) ;;
+      *) FMA_LIBM="$FMA_LIBM $LDEXP_LIBM" ;;
+    esac
+    dnl Append $FEGETROUND_LIBM to FMA_LIBM, avoiding gratuitous duplicates.
+    case " $FMA_LIBM " in
+      *" $FEGETROUND_LIBM "*) ;;
+      *) FMA_LIBM="$FMA_LIBM $FEGETROUND_LIBM" ;;
+    esac
+  fi
+  AC_SUBST([FMA_LIBM])
+])
+
+dnl Test whether fma() has any of the 7 known bugs of glibc 2.11.3 on x86_64.
+AC_DEFUN([gl_FUNC_FMA_WORKS],
+[
+  AC_REQUIRE([AC_PROG_CC])
+  AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles
+  AC_REQUIRE([gl_FUNC_LDEXP])
+  save_LIBS="$LIBS"
+  LIBS="$LIBS $FMA_LIBM $LDEXP_LIBM"
+  AC_CACHE_CHECK([whether fma works], [gl_cv_func_fma_works],
+    [
+      AC_RUN_IFELSE(
+        [AC_LANG_SOURCE([[
+#include <float.h>
+#include <math.h>
+double p0 = 0.0;
+int main()
+{
+  int failed_tests = 0;
+  /* These tests fail with glibc 2.11.3 on x86_64.  */
+  {
+    volatile double x = 1.5; /* 3 * 2^-1 */
+    volatile double y = x;
+    volatile double z = ldexp (1.0, DBL_MANT_DIG + 1); /* 2^54 */
+    /* x * y + z with infinite precision: 2^54 + 9 * 2^-2.
+       Lies between (2^52 + 0) * 2^2 and (2^52 + 1) * 2^2
+       and is closer to (2^52 + 1) * 2^2, therefore the rounding
+       must round up and produce (2^52 + 1) * 2^2.  */
+    volatile double expected = z + 4.0;
+    volatile double result = fma (x, y, z);
+    if (result != expected)
+      failed_tests |= 1;
+  }
+  {
+    volatile double x = 1.25; /* 2^0 + 2^-2 */
+    volatile double y = - x;
+    volatile double z = ldexp (1.0, DBL_MANT_DIG + 1); /* 2^54 */
+    /* x * y + z with infinite precision: 2^54 - 2^0 - 2^-1 - 2^-4.
+       Lies between (2^53 - 1) * 2^1 and 2^53 * 2^1
+       and is closer to (2^53 - 1) * 2^1, therefore the rounding
+       must round down and produce (2^53 - 1) * 2^1.  */
+    volatile double expected = (ldexp (1.0, DBL_MANT_DIG) - 1.0) * 2.0;
+    volatile double result = fma (x, y, z);
+    if (result != expected)
+      failed_tests |= 2;
+  }
+  {
+    volatile double x = 1.0 + ldexp (1.0, 1 - DBL_MANT_DIG); /* 2^0 + 2^-52 */
+    volatile double y = x;
+    volatile double z = 4.0; /* 2^2 */
+    /* x * y + z with infinite precision: 2^2 + 2^0 + 2^-51 + 2^-104.
+       Lies between (2^52 + 2^50) * 2^-50 and (2^52 + 2^50 + 1) * 2^-50
+       and is closer to (2^52 + 2^50 + 1) * 2^-50, therefore the rounding
+       must round up and produce (2^52 + 2^50 + 1) * 2^-50.  */
+    volatile double expected = 4.0 + 1.0 + ldexp (1.0, 3 - DBL_MANT_DIG);
+    volatile double result = fma (x, y, z);
+    if (result != expected)
+      failed_tests |= 4;
+  }
+  {
+    volatile double x = 1.0 + ldexp (1.0, 1 - DBL_MANT_DIG); /* 2^0 + 2^-52 */
+    volatile double y = - x;
+    volatile double z = 8.0; /* 2^3 */
+    /* x * y + z with infinite precision: 2^2 + 2^1 + 2^0 - 2^-51 - 2^-104.
+       Lies between (2^52 + 2^51 + 2^50 - 1) * 2^-50 and
+       (2^52 + 2^51 + 2^50) * 2^-50 and is closer to
+       (2^52 + 2^51 + 2^50 - 1) * 2^-50, therefore the rounding
+       must round down and produce (2^52 + 2^51 + 2^50 - 1) * 2^-50.  */
+    volatile double expected = 7.0 - ldexp (1.0, 3 - DBL_MANT_DIG);
+    volatile double result = fma (x, y, z);
+    if (result != expected)
+      failed_tests |= 8;
+  }
+  {
+    volatile double x = 1.25; /* 2^0 + 2^-2 */
+    volatile double y = - 0.75; /* - 2^0 + 2^-2 */
+    volatile double z = ldexp (1.0, DBL_MANT_DIG); /* 2^53 */
+    /* x * y + z with infinite precision: 2^53 - 2^0 + 2^-4.
+       Lies between (2^53 - 2^0) and 2^53 and is closer to (2^53 - 2^0),
+       therefore the rounding must round down and produce (2^53 - 2^0).  */
+    volatile double expected = ldexp (1.0, DBL_MANT_DIG) - 1.0;
+    volatile double result = fma (x, y, z);
+    if (result != expected)
+      failed_tests |= 16;
+  }
+  if ((DBL_MANT_DIG % 2) == 1)
+    {
+      volatile double x = 1.0 + ldexp (1.0, - (DBL_MANT_DIG + 1) / 2); /* 2^0 + 2^-27 */
+      volatile double y = 1.0 - ldexp (1.0, - (DBL_MANT_DIG + 1) / 2); /* 2^0 - 2^-27 */
+      volatile double z = - ldexp (1.0, DBL_MIN_EXP - DBL_MANT_DIG); /* - 2^-1074 */
+      /* x * y + z with infinite precision: 2^0 - 2^-54 - 2^-1074.
+         Lies between (2^53 - 1) * 2^-53 and 2^53 * 2^-53 and is closer to
+         (2^53 - 1) * 2^-53, therefore the rounding must round down and
+         produce (2^53 - 1) * 2^-53.  */
+      volatile double expected = 1.0 - ldexp (1.0, - DBL_MANT_DIG);
+      volatile double result = fma (x, y, z);
+      if (result != expected)
+        failed_tests |= 32;
+    }
+  {
+    double minus_inf = -1.0 / p0;
+    volatile double x = ldexp (1.0, DBL_MAX_EXP - 1);
+    volatile double y = ldexp (1.0, DBL_MAX_EXP - 1);
+    volatile double z = minus_inf;
+    volatile double result = fma (x, y, z);
+    if (!(result == minus_inf))
+      failed_tests |= 64;
+  }
+  return failed_tests;
+}]])],
+        [gl_cv_func_fma_works=yes],
+        [gl_cv_func_fma_works=no],
+        [dnl Guess no, even on glibc systems.
+         gl_cv_func_fma_works="guessing no"
+        ])
+    ])
+  LIBS="$save_LIBS"
+])
+
+# Prerequisites of lib/fma.c.
+AC_DEFUN([gl_PREREQ_FMA], [:])
index e5a2892..878aaff 100644 (file)
@@ -1,4 +1,4 @@
-# math_h.m4 serial 53
+# math_h.m4 serial 54
 dnl Copyright (C) 2007-2011 Free Software Foundation, Inc.
 dnl This file is free software; the Free Software Foundation
 dnl gives unlimited permission to copy and/or distribute it,
@@ -41,7 +41,7 @@ AC_DEFUN([gl_MATH_H],
   gl_WARN_ON_USE_PREPARE([[#include <math.h>]],
     [acosf acosl asinf asinl atanf atanl
      ceilf ceill copysign copysignf copysignl cosf cosl coshf
-     expf expl fabsf floorf floorl fmodf frexpf frexpl
+     expf expl fabsf floorf floorl fma fmodf frexpf frexpl
      ldexpf ldexpl logb logf logl log10f modff powf
      rint rintf rintl round roundf roundl sinf sinl sinhf sqrtf sqrtl
      tanf tanl tanhf trunc truncf truncl])
@@ -80,6 +80,7 @@ AC_DEFUN([gl_MATH_H_DEFAULTS],
   GNULIB_FLOOR=0;     AC_SUBST([GNULIB_FLOOR])
   GNULIB_FLOORF=0;    AC_SUBST([GNULIB_FLOORF])
   GNULIB_FLOORL=0;    AC_SUBST([GNULIB_FLOORL])
+  GNULIB_FMA=0;       AC_SUBST([GNULIB_FMA])
   GNULIB_FMODF=0;     AC_SUBST([GNULIB_FMODF])
   GNULIB_FREXPF=0;    AC_SUBST([GNULIB_FREXPF])
   GNULIB_FREXP=0;     AC_SUBST([GNULIB_FREXP])
@@ -133,6 +134,7 @@ AC_DEFUN([gl_MATH_H_DEFAULTS],
   HAVE_EXPF=1;                 AC_SUBST([HAVE_EXPF])
   HAVE_EXPL=1;                 AC_SUBST([HAVE_EXPL])
   HAVE_FABSF=1;                AC_SUBST([HAVE_FABSF])
+  HAVE_FMA=1;                  AC_SUBST([HAVE_FMA])
   HAVE_FMODF=1;                AC_SUBST([HAVE_FMODF])
   HAVE_FREXPF=1;               AC_SUBST([HAVE_FREXPF])
   HAVE_ISNANF=1;               AC_SUBST([HAVE_ISNANF])
@@ -183,6 +185,7 @@ AC_DEFUN([gl_MATH_H_DEFAULTS],
   REPLACE_FLOOR=0;             AC_SUBST([REPLACE_FLOOR])
   REPLACE_FLOORF=0;            AC_SUBST([REPLACE_FLOORF])
   REPLACE_FLOORL=0;            AC_SUBST([REPLACE_FLOORL])
+  REPLACE_FMA=0;               AC_SUBST([REPLACE_FMA])
   REPLACE_FREXPF=0;            AC_SUBST([REPLACE_FREXPF])
   REPLACE_FREXP=0;             AC_SUBST([REPLACE_FREXP])
   REPLACE_FREXPL=0;            AC_SUBST([REPLACE_FREXPL])
diff --git a/modules/fma b/modules/fma
new file mode 100644 (file)
index 0000000..8c8057a
--- /dev/null
@@ -0,0 +1,41 @@
+Description:
+fma() function: fused multiply-add.
+
+Files:
+lib/fma.c
+lib/float+.h
+m4/fma.m4
+m4/fegetround.m4
+m4/mathfunc.m4
+
+Depends-on:
+math
+float           [test $HAVE_FMA = 0 || test $REPLACE_FMA = 1]
+stdbool         [test $HAVE_FMA = 0 || test $REPLACE_FMA = 1]
+verify          [test $HAVE_FMA = 0 || test $REPLACE_FMA = 1]
+isfinite        [test $HAVE_FMA = 0 || test $REPLACE_FMA = 1]
+integer_length  [test $HAVE_FMA = 0 || test $REPLACE_FMA = 1]
+frexp           [test $HAVE_FMA = 0 || test $REPLACE_FMA = 1]
+ldexp           [test $HAVE_FMA = 0 || test $REPLACE_FMA = 1]
+
+configure.ac:
+gl_FUNC_FMA
+if test $HAVE_FMA = 0 || test $REPLACE_FMA = 1; then
+  AC_LIBOBJ([fma])
+  gl_PREREQ_FMA
+fi
+gl_MATH_MODULE_INDICATOR([fma])
+
+Makefile.am:
+
+Include:
+<math.h>
+
+Link:
+$(FMA_LIBM)
+
+License:
+LGPL
+
+Maintainer:
+Bruno Haible
index d3b14de..6b1958b 100644 (file)
@@ -50,6 +50,7 @@ math.h: math.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H) $(
              -e 's/@''GNULIB_FLOOR''@/$(GNULIB_FLOOR)/g' \
              -e 's/@''GNULIB_FLOORF''@/$(GNULIB_FLOORF)/g' \
              -e 's/@''GNULIB_FLOORL''@/$(GNULIB_FLOORL)/g' \
+             -e 's/@''GNULIB_FMA''@/$(GNULIB_FMA)/g' \
              -e 's/@''GNULIB_FMODF''@/$(GNULIB_FMODF)/g' \
              -e 's/@''GNULIB_FREXPF''@/$(GNULIB_FREXPF)/g' \
              -e 's/@''GNULIB_FREXP''@/$(GNULIB_FREXP)/g' \
@@ -103,6 +104,7 @@ math.h: math.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H) $(
              -e 's|@''HAVE_EXPF''@|$(HAVE_EXPF)|g' \
              -e 's|@''HAVE_EXPL''@|$(HAVE_EXPL)|g' \
              -e 's|@''HAVE_FABSF''@|$(HAVE_FABSF)|g' \
+             -e 's|@''HAVE_FMA''@|$(HAVE_FMA)|g' \
              -e 's|@''HAVE_FMODF''@|$(HAVE_FMODF)|g' \
              -e 's|@''HAVE_FREXPF''@|$(HAVE_FREXPF)|g' \
              -e 's|@''HAVE_ISNANF''@|$(HAVE_ISNANF)|g' \
@@ -154,6 +156,7 @@ math.h: math.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H) $(
              -e 's|@''REPLACE_FLOOR''@|$(REPLACE_FLOOR)|g' \
              -e 's|@''REPLACE_FLOORF''@|$(REPLACE_FLOORF)|g' \
              -e 's|@''REPLACE_FLOORL''@|$(REPLACE_FLOORL)|g' \
+             -e 's|@''REPLACE_FMA''@|$(REPLACE_FMA)|g' \
              -e 's|@''REPLACE_FREXPF''@|$(REPLACE_FREXPF)|g' \
              -e 's|@''REPLACE_FREXP''@|$(REPLACE_FREXP)|g' \
              -e 's|@''REPLACE_FREXPL''@|$(REPLACE_FREXPL)|g' \