lib/fma.c

   1 /* Fused multiply-add.
   2    Copyright (C) 2007, 2009, 2011-2012 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  16
  17 /* Written by Bruno Haible <bruno@clisp.org>, 2011.  */
  18
  19 #if ! defined USE_LONG_DOUBLE
  20 # include <config.h>
  21 #endif
  22
  23 /* Specification.  */
  24 #include <math.h>
  25
  26 #include <stdbool.h>
  27 #include <stdlib.h>
  28
  29 #if HAVE_FEGETROUND
  30 # include <fenv.h>
  31 #endif
  32
  33 #include "float+.h"
  34 #include "integer_length.h"
  35 #include "verify.h"
  36
  37 #ifdef USE_LONG_DOUBLE
  38 # define FUNC fmal
  39 # define DOUBLE long double
  40 # define FREXP frexpl
  41 # define LDEXP ldexpl
  42 # define MIN_EXP LDBL_MIN_EXP
  43 # define MANT_BIT LDBL_MANT_BIT
  44 # define L_(literal) literal##L
  45 #elif ! defined USE_FLOAT
  46 # define FUNC fma
  47 # define DOUBLE double
  48 # define FREXP frexp
  49 # define LDEXP ldexp
  50 # define MIN_EXP DBL_MIN_EXP
  51 # define MANT_BIT DBL_MANT_BIT
  52 # define L_(literal) literal
  53 #else /* defined USE_FLOAT */
  54 # define FUNC fmaf
  55 # define DOUBLE float
  56 # define FREXP frexpf
  57 # define LDEXP ldexpf
  58 # define MIN_EXP FLT_MIN_EXP
  59 # define MANT_BIT FLT_MANT_BIT
  60 # define L_(literal) literal##f
  61 #endif
  62
  63 #undef MAX
  64 #define MAX(a,b) ((a) > (b) ? (a) : (b))
  65
  66 #undef MIN
  67 #define MIN(a,b) ((a) < (b) ? (a) : (b))
  68
  69 /* MSVC with option -fp:strict refuses to compile constant initializers that
  70    contain floating-point operations.  Pacify this compiler.  */
  71 #ifdef _MSC_VER
  72 # pragma fenv_access (off)
  73 #endif
  74
  75 /* It is possible to write an implementation of fused multiply-add with
  76    floating-point operations alone.  See
  77      Sylvie Boldo, Guillaume Melquiond:
  78      Emulation of FMA and correctly-rounded sums: proved algorithms using
  79      rounding to odd.
  80      <http://www.lri.fr/~melquion/doc/08-tc.pdf>
  81    But is it complicated.
  82    Here we take the simpler (and probably slower) approach of doing
  83    multi-precision arithmetic.  */
  84
  85 /* We use the naming conventions of GNU gmp, but vastly simpler (and slower)
  86    algorithms.  */
  87
  88 typedef unsigned int mp_limb_t;
  89 #define GMP_LIMB_BITS 32
  90 verify (sizeof (mp_limb_t) * CHAR_BIT == GMP_LIMB_BITS);
  91
  92 typedef unsigned long long mp_twolimb_t;
  93 #define GMP_TWOLIMB_BITS 64
  94 verify (sizeof (mp_twolimb_t) * CHAR_BIT == GMP_TWOLIMB_BITS);
  95
  96 /* Number of limbs needed for a single DOUBLE.  */
  97 #define NLIMBS1 ((MANT_BIT + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS)
  98
  99 /* Number of limbs needed for the accumulator.  */
 100 #define NLIMBS3 (3 * NLIMBS1 + 1)
 101
 102 /* Assuming 0.5 <= x < 1.0:
 103    Convert the mantissa (x * 2^DBL_MANT_BIT) to a sequence of limbs.  */
 104 static void
 105 decode (DOUBLE x, mp_limb_t limbs[NLIMBS1])
 106 {
 107   /* I'm not sure whether it's safe to cast a 'double' value between
 108      2^31 and 2^32 to 'unsigned int', therefore play safe and cast only
 109      'double' values between 0 and 2^31 (to 'unsigned int' or 'int',
 110      doesn't matter).
 111      So, we split the MANT_BIT bits of x into a number of chunks of
 112      at most 31 bits.  */
 113   enum { chunk_count = (MANT_BIT - 1) / 31 + 1 };
 114   /* Variables used for storing the bits limb after limb.  */
 115   mp_limb_t *p = limbs + NLIMBS1 - 1;
 116   mp_limb_t accu = 0;
 117   unsigned int bits_needed = MANT_BIT - (NLIMBS1 - 1) * GMP_LIMB_BITS;
 118   /* The bits bits_needed-1...0 need to be ORed into the accu.
 119      1 <= bits_needed <= GMP_LIMB_BITS.  */
 120   /* Unroll the first 4 loop rounds.  */
 121   if (chunk_count > 0)
 122     {
 123       /* Here we still have MANT_BIT-0*31 bits to extract from x.  */
 124       enum { chunk_bits = MIN (31, MANT_BIT - 0 * 31) }; /* > 0, <= 31 */
 125       mp_limb_t d;
 126       x *= (mp_limb_t) 1 << chunk_bits;
 127       d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 128       x -= d;
 129       if (!(x >= L_(0.0) && x < L_(1.0)))
 130         abort ();
 131       if (bits_needed < chunk_bits)
 132         {
 133           /* store bits_needed bits */
 134           accu |= d >> (chunk_bits - bits_needed);
 135           *p = accu;
 136           if (p == limbs)
 137             goto done;
 138           p--;
 139           /* hold (chunk_bits - bits_needed) bits */
 140           accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 141           bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 142         }
 143       else
 144         {
 145           /* store chunk_bits bits */
 146           accu |= d << (bits_needed - chunk_bits);
 147           bits_needed -= chunk_bits;
 148           if (bits_needed == 0)
 149             {
 150               *p = accu;
 151               if (p == limbs)
 152                 goto done;
 153               p--;
 154               accu = 0;
 155               bits_needed = GMP_LIMB_BITS;
 156             }
 157         }
 158     }
 159   if (chunk_count > 1)
 160     {
 161       /* Here we still have MANT_BIT-1*31 bits to extract from x.  */
 162       enum { chunk_bits = MIN (31, MAX (MANT_BIT - 1 * 31, 0)) }; /* > 0, <= 31 */
 163       mp_limb_t d;
 164       x *= (mp_limb_t) 1 << chunk_bits;
 165       d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 166       x -= d;
 167       if (!(x >= L_(0.0) && x < L_(1.0)))
 168         abort ();
 169       if (bits_needed < chunk_bits)
 170         {
 171           /* store bits_needed bits */
 172           accu |= d >> (chunk_bits - bits_needed);
 173           *p = accu;
 174           if (p == limbs)
 175             goto done;
 176           p--;
 177           /* hold (chunk_bits - bits_needed) bits */
 178           accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 179           bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 180         }
 181       else
 182         {
 183           /* store chunk_bits bits */
 184           accu |= d << (bits_needed - chunk_bits);
 185           bits_needed -= chunk_bits;
 186           if (bits_needed == 0)
 187             {
 188               *p = accu;
 189               if (p == limbs)
 190                 goto done;
 191               p--;
 192               accu = 0;
 193               bits_needed = GMP_LIMB_BITS;
 194             }
 195         }
 196     }
 197   if (chunk_count > 2)
 198     {
 199       /* Here we still have MANT_BIT-2*31 bits to extract from x.  */
 200       enum { chunk_bits = MIN (31, MAX (MANT_BIT - 2 * 31, 0)) }; /* > 0, <= 31 */
 201       mp_limb_t d;
 202       x *= (mp_limb_t) 1 << chunk_bits;
 203       d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 204       x -= d;
 205       if (!(x >= L_(0.0) && x < L_(1.0)))
 206         abort ();
 207       if (bits_needed < chunk_bits)
 208         {
 209           /* store bits_needed bits */
 210           accu |= d >> (chunk_bits - bits_needed);
 211           *p = accu;
 212           if (p == limbs)
 213             goto done;
 214           p--;
 215           /* hold (chunk_bits - bits_needed) bits */
 216           accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 217           bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 218         }
 219       else
 220         {
 221           /* store chunk_bits bits */
 222           accu |= d << (bits_needed - chunk_bits);
 223           bits_needed -= chunk_bits;
 224           if (bits_needed == 0)
 225             {
 226               *p = accu;
 227               if (p == limbs)
 228                 goto done;
 229               p--;
 230               accu = 0;
 231               bits_needed = GMP_LIMB_BITS;
 232             }
 233         }
 234     }
 235   if (chunk_count > 3)
 236     {
 237       /* Here we still have MANT_BIT-3*31 bits to extract from x.  */
 238       enum { chunk_bits = MIN (31, MAX (MANT_BIT - 3 * 31, 0)) }; /* > 0, <= 31 */
 239       mp_limb_t d;
 240       x *= (mp_limb_t) 1 << chunk_bits;
 241       d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 242       x -= d;
 243       if (!(x >= L_(0.0) && x < L_(1.0)))
 244         abort ();
 245       if (bits_needed < chunk_bits)
 246         {
 247           /* store bits_needed bits */
 248           accu |= d >> (chunk_bits - bits_needed);
 249           *p = accu;
 250           if (p == limbs)
 251             goto done;
 252           p--;
 253           /* hold (chunk_bits - bits_needed) bits */
 254           accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 255           bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 256         }
 257       else
 258         {
 259           /* store chunk_bits bits */
 260           accu |= d << (bits_needed - chunk_bits);
 261           bits_needed -= chunk_bits;
 262           if (bits_needed == 0)
 263             {
 264               *p = accu;
 265               if (p == limbs)
 266                 goto done;
 267               p--;
 268               accu = 0;
 269               bits_needed = GMP_LIMB_BITS;
 270             }
 271         }
 272     }
 273   if (chunk_count > 4)
 274     {
 275       /* Here we still have MANT_BIT-4*31 bits to extract from x.  */
 276       /* Generic loop.  */
 277       size_t k;
 278       for (k = 4; k < chunk_count; k++)
 279         {
 280           size_t chunk_bits = MIN (31, MANT_BIT - k * 31); /* > 0, <= 31 */
 281           mp_limb_t d;
 282           x *= (mp_limb_t) 1 << chunk_bits;
 283           d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 284           x -= d;
 285           if (!(x >= L_(0.0) && x < L_(1.0)))
 286             abort ();
 287           if (bits_needed < chunk_bits)
 288             {
 289               /* store bits_needed bits */
 290               accu |= d >> (chunk_bits - bits_needed);
 291               *p = accu;
 292               if (p == limbs)
 293                 goto done;
 294               p--;
 295               /* hold (chunk_bits - bits_needed) bits */
 296               accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 297               bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 298             }
 299           else
 300             {
 301               /* store chunk_bits bits */
 302               accu |= d << (bits_needed - chunk_bits);
 303               bits_needed -= chunk_bits;
 304               if (bits_needed == 0)
 305                 {
 306                   *p = accu;
 307                   if (p == limbs)
 308                     goto done;
 309                   p--;
 310                   accu = 0;
 311                   bits_needed = GMP_LIMB_BITS;
 312                 }
 313             }
 314         }
 315     }
 316   /* We shouldn't get here.  */
 317   abort ();
 318
 319  done: ;
 320 #ifndef USE_LONG_DOUBLE /* On FreeBSD 6.1/x86, 'long double' numbers sometimes
 321                            have excess precision.  */
 322   if (!(x == L_(0.0)))
 323     abort ();
 324 #endif
 325 }
 326
 327 /* Multiply two sequences of limbs.  */
 328 static void
 329 multiply (mp_limb_t xlimbs[NLIMBS1], mp_limb_t ylimbs[NLIMBS1],
 330           mp_limb_t prod_limbs[2 * NLIMBS1])
 331 {
 332   size_t k, i, j;
 333   enum { len1 = NLIMBS1 };
 334   enum { len2 = NLIMBS1 };
 335
 336   for (k = len2; k > 0; )
 337     prod_limbs[--k] = 0;
 338   for (i = 0; i < len1; i++)
 339     {
 340       mp_limb_t digit1 = xlimbs[i];
 341       mp_twolimb_t carry = 0;
 342       for (j = 0; j < len2; j++)
 343         {
 344           mp_limb_t digit2 = ylimbs[j];
 345           carry += (mp_twolimb_t) digit1 * (mp_twolimb_t) digit2;
 346           carry += prod_limbs[i + j];
 347           prod_limbs[i + j] = (mp_limb_t) carry;
 348           carry = carry >> GMP_LIMB_BITS;
 349         }
 350       prod_limbs[i + len2] = (mp_limb_t) carry;
 351     }
 352 }
 353
 354 DOUBLE
 355 FUNC (DOUBLE x, DOUBLE y, DOUBLE z)
 356 {
 357   if (isfinite (x) && isfinite (y))
 358     {
 359       if (isfinite (z))
 360         {
 361           /* x, y, z are all finite.  */
 362           if (x == L_(0.0) || y == L_(0.0))
 363             return z;
 364           if (z == L_(0.0))
 365             return x * y;
 366           /* x, y, z are all non-zero.
 367              The result is x * y + z.  */
 368           {
 369             int e;                  /* exponent of x * y + z */
 370             int sign;
 371             mp_limb_t sum[NLIMBS3];
 372             size_t sum_len;
 373
 374             {
 375               int xys;                /* sign of x * y */
 376               int zs;                 /* sign of z */
 377               int xye;                /* sum of exponents of x and y */
 378               int ze;                 /* exponent of z */
 379               mp_limb_t summand1[NLIMBS3];
 380               size_t summand1_len;
 381               mp_limb_t summand2[NLIMBS3];
 382               size_t summand2_len;
 383
 384               {
 385                 mp_limb_t zlimbs[NLIMBS1];
 386                 mp_limb_t xylimbs[2 * NLIMBS1];
 387
 388                 {
 389                   DOUBLE xn;              /* normalized part of x */
 390                   DOUBLE yn;              /* normalized part of y */
 391                   DOUBLE zn;              /* normalized part of z */
 392                   int xe;                 /* exponent of x */
 393                   int ye;                 /* exponent of y */
 394                   mp_limb_t xlimbs[NLIMBS1];
 395                   mp_limb_t ylimbs[NLIMBS1];
 396
 397                   xys = 0;
 398                   xn = x;
 399                   if (x < 0)
 400                     {
 401                       xys = 1;
 402                       xn = - x;
 403                     }
 404                   yn = y;
 405                   if (y < 0)
 406                     {
 407                       xys = 1 - xys;
 408                       yn = - y;
 409                     }
 410
 411                   zs = 0;
 412                   zn = z;
 413                   if (z < 0)
 414                     {
 415                       zs = 1;
 416                       zn = - z;
 417                     }
 418
 419                   /* xn, yn, zn are all positive.
 420                      The result is (-1)^xys * xn * yn + (-1)^zs * zn.  */
 421                   xn = FREXP (xn, &xe);
 422                   yn = FREXP (yn, &ye);
 423                   zn = FREXP (zn, &ze);
 424                   xye = xe + ye;
 425                   /* xn, yn, zn are all < 1.0 and >= 0.5.
 426                      The result is
 427                        (-1)^xys * 2^xye * xn * yn + (-1)^zs * 2^ze * zn.  */
 428                   if (xye < ze - MANT_BIT)
 429                     {
 430                       /* 2^xye * xn * yn < 2^xye <= 2^(ze-MANT_BIT-1)  */
 431                       return z;
 432                     }
 433                   if (xye - 2 * MANT_BIT > ze)
 434                     {
 435                       /* 2^ze * zn < 2^ze <= 2^(xye-2*MANT_BIT-1).
 436                          We cannot simply do
 437                            return x * y;
 438                          because it would round differently: A round-to-even
 439                          in the multiplication can be a round-up or round-down
 440                          here, due to z.  So replace z with a value that doesn't
 441                          require the use of long bignums but that rounds the
 442                          same way.  */
 443                       zn = L_(0.5);
 444                       ze = xye - 2 * MANT_BIT - 1;
 445                     }
 446                   /* Convert mantissas of xn, yn, zn to limb sequences:
 447                      xlimbs = 2^MANT_BIT * xn
 448                      ylimbs = 2^MANT_BIT * yn
 449                      zlimbs = 2^MANT_BIT * zn  */
 450                   decode (xn, xlimbs);
 451                   decode (yn, ylimbs);
 452                   decode (zn, zlimbs);
 453                   /* Multiply the mantissas of xn and yn:
 454                      xylimbs = xlimbs * ylimbs  */
 455                   multiply (xlimbs, ylimbs, xylimbs);
 456                 }
 457                 /* The result is
 458                      (-1)^xys * 2^(xye-2*MANT_BIT) * xylimbs
 459                      + (-1)^zs * 2^(ze-MANT_BIT) * zlimbs.
 460                    Compute
 461                      e = min (xye-2*MANT_BIT, ze-MANT_BIT)
 462                    then
 463                      summand1 = 2^(xye-2*MANT_BIT-e) * xylimbs
 464                      summand2 = 2^(ze-MANT_BIT-e) * zlimbs  */
 465                 e = MIN (xye - 2 * MANT_BIT, ze - MANT_BIT);
 466                 if (e == xye - 2 * MANT_BIT)
 467                   {
 468                     /* Simply copy the limbs of xylimbs.  */
 469                     size_t i;
 470                     for (i = 0; i < 2 * NLIMBS1; i++)
 471                       summand1[i] = xylimbs[i];
 472                     summand1_len = 2 * NLIMBS1;
 473                   }
 474                 else
 475                   {
 476                     size_t ediff = xye - 2 * MANT_BIT - e;
 477                     /* Left shift the limbs of xylimbs by ediff bits.  */
 478                     size_t ldiff = ediff / GMP_LIMB_BITS;
 479                     size_t shift = ediff % GMP_LIMB_BITS;
 480                     size_t i;
 481                     for (i = 0; i < ldiff; i++)
 482                       summand1[i] = 0;
 483                     if (shift > 0)
 484                       {
 485                         mp_limb_t carry = 0;
 486                         for (i = 0; i < 2 * NLIMBS1; i++)
 487                           {
 488                             summand1[ldiff + i] = (xylimbs[i] << shift) | carry;
 489                             carry = xylimbs[i] >> (GMP_LIMB_BITS - shift);
 490                           }
 491                         summand1[ldiff + 2 * NLIMBS1] = carry;
 492                         summand1_len = ldiff + 2 * NLIMBS1 + 1;
 493                       }
 494                     else
 495                       {
 496                         for (i = 0; i < 2 * NLIMBS1; i++)
 497                           summand1[ldiff + i] = xylimbs[i];
 498                         summand1_len = ldiff + 2 * NLIMBS1;
 499                       }
 500                     /* Estimation of needed array size:
 501                        ediff = (xye - 2 * MANT_BIT) - (ze - MANT_BIT) <= MANT_BIT + 1
 502                        therefore
 503                        summand1_len
 504                          = (ediff + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + 2 * NLIMBS1
 505                          <= (MANT_BIT + GMP_LIMB_BITS) / GMP_LIMB_BITS + 2 * NLIMBS1
 506                          <= 3 * NLIMBS1 + 1
 507                          = NLIMBS3  */
 508                     if (!(summand1_len <= NLIMBS3))
 509                       abort ();
 510                   }
 511                 if (e == ze - MANT_BIT)
 512                   {
 513                     /* Simply copy the limbs of zlimbs.  */
 514                     size_t i;
 515                     for (i = 0; i < NLIMBS1; i++)
 516                       summand2[i] = zlimbs[i];
 517                     summand2_len = NLIMBS1;
 518                   }
 519                 else
 520                   {
 521                     size_t ediff = ze - MANT_BIT - e;
 522                     /* Left shift the limbs of zlimbs by ediff bits.  */
 523                     size_t ldiff = ediff / GMP_LIMB_BITS;
 524                     size_t shift = ediff % GMP_LIMB_BITS;
 525                     size_t i;
 526                     for (i = 0; i < ldiff; i++)
 527                       summand2[i] = 0;
 528                     if (shift > 0)
 529                       {
 530                         mp_limb_t carry = 0;
 531                         for (i = 0; i < NLIMBS1; i++)
 532                           {
 533                             summand2[ldiff + i] = (zlimbs[i] << shift) | carry;
 534                             carry = zlimbs[i] >> (GMP_LIMB_BITS - shift);
 535                           }
 536                         summand2[ldiff + NLIMBS1] = carry;
 537                         summand2_len = ldiff + NLIMBS1 + 1;
 538                       }
 539                     else
 540                       {
 541                         for (i = 0; i < NLIMBS1; i++)
 542                           summand2[ldiff + i] = zlimbs[i];
 543                         summand2_len = ldiff + NLIMBS1;
 544                       }
 545                     /* Estimation of needed array size:
 546                        ediff = (ze - MANT_BIT) - (xye - 2 * MANT_BIT) <= 2 * MANT_BIT
 547                        therefore
 548                        summand2_len
 549                          = (ediff + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + NLIMBS1
 550                          <= (2 * MANT_BIT + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + NLIMBS1
 551                          <= 3 * NLIMBS1 + 1
 552                          = NLIMBS3  */
 553                     if (!(summand2_len <= NLIMBS3))
 554                       abort ();
 555                   }
 556               }
 557               /* The result is
 558                    (-1)^xys * 2^e * summand1 + (-1)^zs * 2^e * summand2.  */
 559               if (xys == zs)
 560                 {
 561                   /* Perform an addition.  */
 562                   size_t i;
 563                   mp_limb_t carry;
 564
 565                   sign = xys;
 566                   carry = 0;
 567                   for (i = 0; i < MIN (summand1_len, summand2_len); i++)
 568                     {
 569                       mp_limb_t digit1 = summand1[i];
 570                       mp_limb_t digit2 = summand2[i];
 571                       sum[i] = digit1 + digit2 + carry;
 572                       carry = (carry
 573                                ? digit1 >= (mp_limb_t)-1 - digit2
 574                                : digit1 > (mp_limb_t)-1 - digit2);
 575                     }
 576                   if (summand1_len > summand2_len)
 577                     for (; i < summand1_len; i++)
 578                       {
 579                         mp_limb_t digit1 = summand1[i];
 580                         sum[i] = carry + digit1;
 581                         carry = carry && digit1 == (mp_limb_t)-1;
 582                       }
 583                   else
 584                     for (; i < summand2_len; i++)
 585                       {
 586                         mp_limb_t digit2 = summand2[i];
 587                         sum[i] = carry + digit2;
 588                         carry = carry && digit2 == (mp_limb_t)-1;
 589                       }
 590                   if (carry)
 591                     sum[i++] = carry;
 592                   sum_len = i;
 593                 }
 594               else
 595                 {
 596                   /* Perform a subtraction.  */
 597                   /* Compare summand1 and summand2 by magnitude.  */
 598                   while (summand1[summand1_len - 1] == 0)
 599                     summand1_len--;
 600                   while (summand2[summand2_len - 1] == 0)
 601                     summand2_len--;
 602                   if (summand1_len > summand2_len)
 603                     sign = xys;
 604                   else if (summand1_len < summand2_len)
 605                     sign = zs;
 606                   else
 607                     {
 608                       size_t i = summand1_len;
 609                       for (;;)
 610                         {
 611                           --i;
 612                           if (summand1[i] > summand2[i])
 613                             {
 614                               sign = xys;
 615                               break;
 616                             }
 617                           if (summand1[i] < summand2[i])
 618                             {
 619                               sign = zs;
 620                               break;
 621                             }
 622                           if (i == 0)
 623                             /* summand1 and summand2 are equal.  */
 624                             return L_(0.0);
 625                         }
 626                     }
 627                   if (sign == xys)
 628                     {
 629                       /* Compute summand1 - summand2.  */
 630                       size_t i;
 631                       mp_limb_t carry;
 632
 633                       carry = 0;
 634                       for (i = 0; i < summand2_len; i++)
 635                         {
 636                           mp_limb_t digit1 = summand1[i];
 637                           mp_limb_t digit2 = summand2[i];
 638                           sum[i] = digit1 - digit2 - carry;
 639                           carry = (carry ? digit1 <= digit2 : digit1 < digit2);
 640                         }
 641                       for (; i < summand1_len; i++)
 642                         {
 643                           mp_limb_t digit1 = summand1[i];
 644                           sum[i] = digit1 - carry;
 645                           carry = carry && digit1 == 0;
 646                         }
 647                       if (carry)
 648                         abort ();
 649                       sum_len = summand1_len;
 650                     }
 651                   else
 652                     {
 653                       /* Compute summand2 - summand1.  */
 654                       size_t i;
 655                       mp_limb_t carry;
 656
 657                       carry = 0;
 658                       for (i = 0; i < summand1_len; i++)
 659                         {
 660                           mp_limb_t digit1 = summand1[i];
 661                           mp_limb_t digit2 = summand2[i];
 662                           sum[i] = digit2 - digit1 - carry;
 663                           carry = (carry ? digit2 <= digit1 : digit2 < digit1);
 664                         }
 665                       for (; i < summand2_len; i++)
 666                         {
 667                           mp_limb_t digit2 = summand2[i];
 668                           sum[i] = digit2 - carry;
 669                           carry = carry && digit2 == 0;
 670                         }
 671                       if (carry)
 672                         abort ();
 673                       sum_len = summand2_len;
 674                     }
 675                 }
 676             }
 677             /* The result is
 678                  (-1)^sign * 2^e * sum.  */
 679             /* Now perform the rounding to MANT_BIT mantissa bits.  */
 680             while (sum[sum_len - 1] == 0)
 681               sum_len--;
 682             /* Here we know that the most significant limb, sum[sum_len - 1], is
 683                non-zero.  */
 684             {
 685               /* How many bits the sum has.  */
 686               unsigned int sum_bits =
 687                 integer_length (sum[sum_len - 1]) + (sum_len - 1) * GMP_LIMB_BITS;
 688               /* How many bits to keep when rounding.  */
 689               unsigned int keep_bits;
 690               /* How many bits to round off.  */
 691               unsigned int roundoff_bits;
 692               if (e + (int) sum_bits >= MIN_EXP)
 693                 /* 2^e * sum >= 2^(MIN_EXP-1).
 694                    result will be a normalized number.  */
 695                 keep_bits = MANT_BIT;
 696               else if (e + (int) sum_bits >= MIN_EXP - MANT_BIT)
 697                 /* 2^e * sum >= 2^(MIN_EXP-MANT_BIT-1).
 698                    result will be a denormalized number or rounded to zero.  */
 699                 keep_bits = e + (int) sum_bits - (MIN_EXP + MANT_BIT);
 700               else
 701                 /* 2^e * sum < 2^(MIN_EXP-MANT_BIT-1).  Round to zero.  */
 702                 return L_(0.0);
 703               /* Note: 0 <= keep_bits <= MANT_BIT.  */
 704               if (sum_bits <= keep_bits)
 705                 {
 706                   /* Nothing to do.  */
 707                   roundoff_bits = 0;
 708                   keep_bits = sum_bits;
 709                 }
 710               else
 711                 {
 712                   int round_up;
 713                   roundoff_bits = sum_bits - keep_bits; /* > 0, <= sum_bits */
 714                   {
 715 #if HAVE_FEGETROUND && defined FE_TOWARDZERO
 716                     /* Cf. <http://pubs.opengroup.org/onlinepubs/9699919799/functions/fegetround.html> */
 717                     int rounding_mode = fegetround ();
 718                     if (rounding_mode == FE_TOWARDZERO)
 719                       round_up = 0;
 720                     else if (rounding_mode == FE_DOWNWARD)
 721                       round_up = sign;
 722                     else if (rounding_mode == FE_UPWARD)
 723                       round_up = !sign;
 724 #else
 725                     /* Cf. <http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/float.h.html> */
 726                     int rounding_mode = FLT_ROUNDS;
 727                     if (rounding_mode == 0) /* toward zero */
 728                       round_up = 0;
 729                     else if (rounding_mode == 3) /* toward negative infinity */
 730                       round_up = sign;
 731                     else if (rounding_mode == 2) /* toward positive infinity */
 732                       round_up = !sign;
 733 #endif
 734                     else
 735                       {
 736                         /* Round to nearest.  */
 737                         round_up = 0;
 738                         /* Test bit (roundoff_bits-1).  */
 739                         if ((sum[(roundoff_bits - 1) / GMP_LIMB_BITS]
 740                              >> ((roundoff_bits - 1) % GMP_LIMB_BITS)) & 1)
 741                           {
 742                             /* Test bits roundoff_bits-1 .. 0.  */
 743                             bool halfway =
 744                               ((sum[(roundoff_bits - 1) / GMP_LIMB_BITS]
 745                                 & (((mp_limb_t) 1 << ((roundoff_bits - 1) % GMP_LIMB_BITS)) - 1))
 746                                == 0);
 747                             if (halfway)
 748                               {
 749                                 int i;
 750                                 for (i = (roundoff_bits - 1) / GMP_LIMB_BITS - 1; i >= 0; i--)
 751                                   if (sum[i] != 0)
 752                                     {
 753                                       halfway = false;
 754                                       break;
 755                                     }
 756                               }
 757                             if (halfway)
 758                               /* Round to even.  Test bit roundoff_bits.  */
 759                               round_up = ((sum[roundoff_bits / GMP_LIMB_BITS]
 760                                           >> (roundoff_bits % GMP_LIMB_BITS)) & 1);
 761                             else
 762                               /* Round up.  */
 763                               round_up = 1;
 764                           }
 765                       }
 766                   }
 767                   /* Perform the rounding.  */
 768                   {
 769                     size_t i = roundoff_bits / GMP_LIMB_BITS;
 770                     {
 771                       size_t j = i;
 772                       while (j > 0)
 773                         sum[--j] = 0;
 774                     }
 775                     if (round_up)
 776                       {
 777                         /* Round up.  */
 778                         sum[i] =
 779                           (sum[i]
 780                            | (((mp_limb_t) 1 << (roundoff_bits % GMP_LIMB_BITS)) - 1))
 781                           + 1;
 782                         if (sum[i] == 0)
 783                           {
 784                             /* Propagate carry.  */
 785                             while (i < sum_len - 1)
 786                               {
 787                                 ++i;
 788                                 sum[i] += 1;
 789                                 if (sum[i] != 0)
 790                                   break;
 791                               }
 792                           }
 793                         /* sum[i] is the most significant limb that was
 794                            incremented.  */
 795                         if (i == sum_len - 1 && (sum[i] & (sum[i] - 1)) == 0)
 796                           {
 797                             /* Through the carry, one more bit is needed.  */
 798                             if (sum[i] != 0)
 799                               sum_bits += 1;
 800                             else
 801                               {
 802                                 /* Instead of requiring one more limb of memory,
 803                                    perform a shift by one bit, and adjust the
 804                                    exponent.  */
 805                                 sum[i] = (mp_limb_t) 1 << (GMP_LIMB_BITS - 1);
 806                                 e += 1;
 807                               }
 808                             /* The bit sequence has the form 1000...000.  */
 809                             keep_bits = 1;
 810                           }
 811                       }
 812                     else
 813                       {
 814                         /* Round down.  */
 815                         sum[i] &= ((mp_limb_t) -1 << (roundoff_bits % GMP_LIMB_BITS));
 816                         if (i == sum_len - 1 && sum[i] == 0)
 817                           /* The entire sum has become zero.  */
 818                           return L_(0.0);
 819                       }
 820                   }
 821                 }
 822               /* The result is
 823                    (-1)^sign * 2^e * sum
 824                  and here we know that
 825                    2^(sum_bits-1) <= sum < 2^sum_bits,
 826                  and sum is a multiple of 2^(sum_bits-keep_bits), where
 827                    0 < keep_bits <= MANT_BIT  and  keep_bits <= sum_bits.
 828                  (If keep_bits was initially 0, the rounding either returned zero
 829                  or produced a bit sequence of the form 1000...000, setting
 830                  keep_bits to 1.)  */
 831               {
 832                 /* Split the keep_bits bits into chunks of at most 32 bits.  */
 833                 unsigned int chunk_count = (keep_bits - 1) / GMP_LIMB_BITS + 1;
 834                 /* 1 <= chunk_count <= ceil (sum_bits / GMP_LIMB_BITS) = sum_len. */
 835                 static const DOUBLE chunk_multiplier = /* 2^-GMP_LIMB_BITS */
 836                   L_(1.0) / ((DOUBLE) (1 << (GMP_LIMB_BITS / 2))
 837                              * (DOUBLE) (1 << ((GMP_LIMB_BITS + 1) / 2)));
 838                 unsigned int shift = sum_bits % GMP_LIMB_BITS;
 839                 DOUBLE fsum;
 840                 if (MANT_BIT <= GMP_LIMB_BITS)
 841                   {
 842                     /* Since keep_bits <= MANT_BIT <= GMP_LIMB_BITS,
 843                        chunk_count is 1.  No need for a loop.  */
 844                     if (shift == 0)
 845                       fsum = (DOUBLE) sum[sum_len - 1];
 846                     else
 847                       fsum = (DOUBLE)
 848                              ((sum[sum_len - 1] << (GMP_LIMB_BITS - shift))
 849                               | (sum_len >= 2 ? sum[sum_len - 2] >> shift : 0));
 850                   }
 851                 else
 852                   {
 853                     int k;
 854                     k = chunk_count - 1;
 855                     if (shift == 0)
 856                       {
 857                         /* First loop round.  */
 858                         fsum = (DOUBLE) sum[sum_len - k - 1];
 859                         /* General loop.  */
 860                         while (--k >= 0)
 861                           {
 862                             fsum *= chunk_multiplier;
 863                             fsum += (DOUBLE) sum[sum_len - k - 1];
 864                           }
 865                       }
 866                     else
 867                       {
 868                         /* First loop round.  */
 869                         fsum = (DOUBLE)
 870                                ((sum[sum_len - k - 1] << (GMP_LIMB_BITS - shift))
 871                                 | (sum_len >= k + 2 ? sum[sum_len - k - 2] >> shift : 0));
 872                         /* General loop.  */
 873                         while (--k >= 0)
 874                           {
 875                             fsum *= chunk_multiplier;
 876                             fsum += (DOUBLE)
 877                                     ((sum[sum_len - k - 1] << (GMP_LIMB_BITS - shift))
 878                                      | (sum[sum_len - k - 2] >> shift));
 879                           }
 880                       }
 881                   }
 882                 fsum = LDEXP (fsum, e + (int) sum_bits - GMP_LIMB_BITS);
 883                 return (sign ? - fsum : fsum);
 884               }
 885             }
 886           }
 887         }
 888       else
 889         return z;
 890     }
 891   else
 892     return (x * y) + z;
 893 }