lib/fma.c

   1 /* Fused multiply-add.
   2    Copyright (C) 2007, 2009, 2011-2012 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  16
  17 /* Written by Bruno Haible <bruno@clisp.org>, 2011.  */
  18
  19 #if ! defined USE_LONG_DOUBLE
  20 # include <config.h>
  21 #endif
  22
  23 /* Specification.  */
  24 #include <math.h>
  25
  26 #include <stdbool.h>
  27 #include <stdlib.h>
  28
  29 #if HAVE_FEGETROUND
  30 # include <fenv.h>
  31 #endif
  32
  33 #include "float+.h"
  34 #include "integer_length.h"
  35 #include "verify.h"
  36
  37 #ifdef USE_LONG_DOUBLE
  38 # define FUNC fmal
  39 # define DOUBLE long double
  40 # define FREXP frexpl
  41 # define LDEXP ldexpl
  42 # define MIN_EXP LDBL_MIN_EXP
  43 # define MANT_BIT LDBL_MANT_BIT
  44 # define L_(literal) literal##L
  45 #elif ! defined USE_FLOAT
  46 # define FUNC fma
  47 # define DOUBLE double
  48 # define FREXP frexp
  49 # define LDEXP ldexp
  50 # define MIN_EXP DBL_MIN_EXP
  51 # define MANT_BIT DBL_MANT_BIT
  52 # define L_(literal) literal
  53 #else /* defined USE_FLOAT */
  54 # define FUNC fmaf
  55 # define DOUBLE float
  56 # define FREXP frexpf
  57 # define LDEXP ldexpf
  58 # define MIN_EXP FLT_MIN_EXP
  59 # define MANT_BIT FLT_MANT_BIT
  60 # define L_(literal) literal##f
  61 #endif
  62
  63 #undef MAX
  64 #define MAX(a,b) ((a) > (b) ? (a) : (b))
  65
  66 #undef MIN
  67 #define MIN(a,b) ((a) < (b) ? (a) : (b))
  68
  69 /* It is possible to write an implementation of fused multiply-add with
  70    floating-point operations alone.  See
  71      Sylvie Boldo, Guillaume Melquiond:
  72      Emulation of FMA and correctly-rounded sums: proved algorithms using
  73      rounding to odd.
  74      <http://www.lri.fr/~melquion/doc/08-tc.pdf>
  75    But is it complicated.
  76    Here we take the simpler (and probably slower) approach of doing
  77    multi-precision arithmetic.  */
  78
  79 /* We use the naming conventions of GNU gmp, but vastly simpler (and slower)
  80    algorithms.  */
  81
  82 typedef unsigned int mp_limb_t;
  83 #define GMP_LIMB_BITS 32
  84 verify (sizeof (mp_limb_t) * CHAR_BIT == GMP_LIMB_BITS);
  85
  86 typedef unsigned long long mp_twolimb_t;
  87 #define GMP_TWOLIMB_BITS 64
  88 verify (sizeof (mp_twolimb_t) * CHAR_BIT == GMP_TWOLIMB_BITS);
  89
  90 /* Number of limbs needed for a single DOUBLE.  */
  91 #define NLIMBS1 ((MANT_BIT + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS)
  92
  93 /* Number of limbs needed for the accumulator.  */
  94 #define NLIMBS3 (3 * NLIMBS1 + 1)
  95
  96 /* Assuming 0.5 <= x < 1.0:
  97    Convert the mantissa (x * 2^DBL_MANT_BIT) to a sequence of limbs.  */
  98 static void
  99 decode (DOUBLE x, mp_limb_t limbs[NLIMBS1])
 100 {
 101   /* I'm not sure whether it's safe to cast a 'double' value between
 102      2^31 and 2^32 to 'unsigned int', therefore play safe and cast only
 103      'double' values between 0 and 2^31 (to 'unsigned int' or 'int',
 104      doesn't matter).
 105      So, we split the MANT_BIT bits of x into a number of chunks of
 106      at most 31 bits.  */
 107   enum { chunk_count = (MANT_BIT - 1) / 31 + 1 };
 108   /* Variables used for storing the bits limb after limb.  */
 109   mp_limb_t *p = limbs + NLIMBS1 - 1;
 110   mp_limb_t accu = 0;
 111   unsigned int bits_needed = MANT_BIT - (NLIMBS1 - 1) * GMP_LIMB_BITS;
 112   /* The bits bits_needed-1...0 need to be ORed into the accu.
 113      1 <= bits_needed <= GMP_LIMB_BITS.  */
 114   /* Unroll the first 4 loop rounds.  */
 115   if (chunk_count > 0)
 116     {
 117       /* Here we still have MANT_BIT-0*31 bits to extract from x.  */
 118       enum { chunk_bits = MIN (31, MANT_BIT - 0 * 31) }; /* > 0, <= 31 */
 119       mp_limb_t d;
 120       x *= (mp_limb_t) 1 << chunk_bits;
 121       d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 122       x -= d;
 123       if (!(x >= L_(0.0) && x < L_(1.0)))
 124         abort ();
 125       if (bits_needed < chunk_bits)
 126         {
 127           /* store bits_needed bits */
 128           accu |= d >> (chunk_bits - bits_needed);
 129           *p = accu;
 130           if (p == limbs)
 131             goto done;
 132           p--;
 133           /* hold (chunk_bits - bits_needed) bits */
 134           accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 135           bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 136         }
 137       else
 138         {
 139           /* store chunk_bits bits */
 140           accu |= d << (bits_needed - chunk_bits);
 141           bits_needed -= chunk_bits;
 142           if (bits_needed == 0)
 143             {
 144               *p = accu;
 145               if (p == limbs)
 146                 goto done;
 147               p--;
 148               accu = 0;
 149               bits_needed = GMP_LIMB_BITS;
 150             }
 151         }
 152     }
 153   if (chunk_count > 1)
 154     {
 155       /* Here we still have MANT_BIT-1*31 bits to extract from x.  */
 156       enum { chunk_bits = MIN (31, MAX (MANT_BIT - 1 * 31, 0)) }; /* > 0, <= 31 */
 157       mp_limb_t d;
 158       x *= (mp_limb_t) 1 << chunk_bits;
 159       d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 160       x -= d;
 161       if (!(x >= L_(0.0) && x < L_(1.0)))
 162         abort ();
 163       if (bits_needed < chunk_bits)
 164         {
 165           /* store bits_needed bits */
 166           accu |= d >> (chunk_bits - bits_needed);
 167           *p = accu;
 168           if (p == limbs)
 169             goto done;
 170           p--;
 171           /* hold (chunk_bits - bits_needed) bits */
 172           accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 173           bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 174         }
 175       else
 176         {
 177           /* store chunk_bits bits */
 178           accu |= d << (bits_needed - chunk_bits);
 179           bits_needed -= chunk_bits;
 180           if (bits_needed == 0)
 181             {
 182               *p = accu;
 183               if (p == limbs)
 184                 goto done;
 185               p--;
 186               accu = 0;
 187               bits_needed = GMP_LIMB_BITS;
 188             }
 189         }
 190     }
 191   if (chunk_count > 2)
 192     {
 193       /* Here we still have MANT_BIT-2*31 bits to extract from x.  */
 194       enum { chunk_bits = MIN (31, MAX (MANT_BIT - 2 * 31, 0)) }; /* > 0, <= 31 */
 195       mp_limb_t d;
 196       x *= (mp_limb_t) 1 << chunk_bits;
 197       d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 198       x -= d;
 199       if (!(x >= L_(0.0) && x < L_(1.0)))
 200         abort ();
 201       if (bits_needed < chunk_bits)
 202         {
 203           /* store bits_needed bits */
 204           accu |= d >> (chunk_bits - bits_needed);
 205           *p = accu;
 206           if (p == limbs)
 207             goto done;
 208           p--;
 209           /* hold (chunk_bits - bits_needed) bits */
 210           accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 211           bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 212         }
 213       else
 214         {
 215           /* store chunk_bits bits */
 216           accu |= d << (bits_needed - chunk_bits);
 217           bits_needed -= chunk_bits;
 218           if (bits_needed == 0)
 219             {
 220               *p = accu;
 221               if (p == limbs)
 222                 goto done;
 223               p--;
 224               accu = 0;
 225               bits_needed = GMP_LIMB_BITS;
 226             }
 227         }
 228     }
 229   if (chunk_count > 3)
 230     {
 231       /* Here we still have MANT_BIT-3*31 bits to extract from x.  */
 232       enum { chunk_bits = MIN (31, MAX (MANT_BIT - 3 * 31, 0)) }; /* > 0, <= 31 */
 233       mp_limb_t d;
 234       x *= (mp_limb_t) 1 << chunk_bits;
 235       d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 236       x -= d;
 237       if (!(x >= L_(0.0) && x < L_(1.0)))
 238         abort ();
 239       if (bits_needed < chunk_bits)
 240         {
 241           /* store bits_needed bits */
 242           accu |= d >> (chunk_bits - bits_needed);
 243           *p = accu;
 244           if (p == limbs)
 245             goto done;
 246           p--;
 247           /* hold (chunk_bits - bits_needed) bits */
 248           accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 249           bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 250         }
 251       else
 252         {
 253           /* store chunk_bits bits */
 254           accu |= d << (bits_needed - chunk_bits);
 255           bits_needed -= chunk_bits;
 256           if (bits_needed == 0)
 257             {
 258               *p = accu;
 259               if (p == limbs)
 260                 goto done;
 261               p--;
 262               accu = 0;
 263               bits_needed = GMP_LIMB_BITS;
 264             }
 265         }
 266     }
 267   if (chunk_count > 4)
 268     {
 269       /* Here we still have MANT_BIT-4*31 bits to extract from x.  */
 270       /* Generic loop.  */
 271       size_t k;
 272       for (k = 4; k < chunk_count; k++)
 273         {
 274           size_t chunk_bits = MIN (31, MANT_BIT - k * 31); /* > 0, <= 31 */
 275           mp_limb_t d;
 276           x *= (mp_limb_t) 1 << chunk_bits;
 277           d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 278           x -= d;
 279           if (!(x >= L_(0.0) && x < L_(1.0)))
 280             abort ();
 281           if (bits_needed < chunk_bits)
 282             {
 283               /* store bits_needed bits */
 284               accu |= d >> (chunk_bits - bits_needed);
 285               *p = accu;
 286               if (p == limbs)
 287                 goto done;
 288               p--;
 289               /* hold (chunk_bits - bits_needed) bits */
 290               accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 291               bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 292             }
 293           else
 294             {
 295               /* store chunk_bits bits */
 296               accu |= d << (bits_needed - chunk_bits);
 297               bits_needed -= chunk_bits;
 298               if (bits_needed == 0)
 299                 {
 300                   *p = accu;
 301                   if (p == limbs)
 302                     goto done;
 303                   p--;
 304                   accu = 0;
 305                   bits_needed = GMP_LIMB_BITS;
 306                 }
 307             }
 308         }
 309     }
 310   /* We shouldn't get here.  */
 311   abort ();
 312
 313  done: ;
 314 #ifndef USE_LONG_DOUBLE /* On FreeBSD 6.1/x86, 'long double' numbers sometimes
 315                            have excess precision.  */
 316   if (!(x == L_(0.0)))
 317     abort ();
 318 #endif
 319 }
 320
 321 /* Multiply two sequences of limbs.  */
 322 static void
 323 multiply (mp_limb_t xlimbs[NLIMBS1], mp_limb_t ylimbs[NLIMBS1],
 324           mp_limb_t prod_limbs[2 * NLIMBS1])
 325 {
 326   size_t k, i, j;
 327   enum { len1 = NLIMBS1 };
 328   enum { len2 = NLIMBS1 };
 329
 330   for (k = len2; k > 0; )
 331     prod_limbs[--k] = 0;
 332   for (i = 0; i < len1; i++)
 333     {
 334       mp_limb_t digit1 = xlimbs[i];
 335       mp_twolimb_t carry = 0;
 336       for (j = 0; j < len2; j++)
 337         {
 338           mp_limb_t digit2 = ylimbs[j];
 339           carry += (mp_twolimb_t) digit1 * (mp_twolimb_t) digit2;
 340           carry += prod_limbs[i + j];
 341           prod_limbs[i + j] = (mp_limb_t) carry;
 342           carry = carry >> GMP_LIMB_BITS;
 343         }
 344       prod_limbs[i + len2] = (mp_limb_t) carry;
 345     }
 346 }
 347
 348 DOUBLE
 349 FUNC (DOUBLE x, DOUBLE y, DOUBLE z)
 350 {
 351   if (isfinite (x) && isfinite (y))
 352     {
 353       if (isfinite (z))
 354         {
 355           /* x, y, z are all finite.  */
 356           if (x == L_(0.0) || y == L_(0.0))
 357             return z;
 358           if (z == L_(0.0))
 359             return x * y;
 360           /* x, y, z are all non-zero.
 361              The result is x * y + z.  */
 362           {
 363             int e;                  /* exponent of x * y + z */
 364             int sign;
 365             mp_limb_t sum[NLIMBS3];
 366             size_t sum_len;
 367
 368             {
 369               int xys;                /* sign of x * y */
 370               int zs;                 /* sign of z */
 371               int xye;                /* sum of exponents of x and y */
 372               int ze;                 /* exponent of z */
 373               mp_limb_t summand1[NLIMBS3];
 374               size_t summand1_len;
 375               mp_limb_t summand2[NLIMBS3];
 376               size_t summand2_len;
 377
 378               {
 379                 mp_limb_t zlimbs[NLIMBS1];
 380                 mp_limb_t xylimbs[2 * NLIMBS1];
 381
 382                 {
 383                   DOUBLE xn;              /* normalized part of x */
 384                   DOUBLE yn;              /* normalized part of y */
 385                   DOUBLE zn;              /* normalized part of z */
 386                   int xe;                 /* exponent of x */
 387                   int ye;                 /* exponent of y */
 388                   mp_limb_t xlimbs[NLIMBS1];
 389                   mp_limb_t ylimbs[NLIMBS1];
 390
 391                   xys = 0;
 392                   xn = x;
 393                   if (x < 0)
 394                     {
 395                       xys = 1;
 396                       xn = - x;
 397                     }
 398                   yn = y;
 399                   if (y < 0)
 400                     {
 401                       xys = 1 - xys;
 402                       yn = - y;
 403                     }
 404
 405                   zs = 0;
 406                   zn = z;
 407                   if (z < 0)
 408                     {
 409                       zs = 1;
 410                       zn = - z;
 411                     }
 412
 413                   /* xn, yn, zn are all positive.
 414                      The result is (-1)^xys * xn * yn + (-1)^zs * zn.  */
 415                   xn = FREXP (xn, &xe);
 416                   yn = FREXP (yn, &ye);
 417                   zn = FREXP (zn, &ze);
 418                   xye = xe + ye;
 419                   /* xn, yn, zn are all < 1.0 and >= 0.5.
 420                      The result is
 421                        (-1)^xys * 2^xye * xn * yn + (-1)^zs * 2^ze * zn.  */
 422                   if (xye < ze - MANT_BIT)
 423                     {
 424                       /* 2^xye * xn * yn < 2^xye <= 2^(ze-MANT_BIT-1)  */
 425                       return z;
 426                     }
 427                   if (xye - 2 * MANT_BIT > ze)
 428                     {
 429                       /* 2^ze * zn < 2^ze <= 2^(xye-2*MANT_BIT-1).
 430                          We cannot simply do
 431                            return x * y;
 432                          because it would round differently: A round-to-even
 433                          in the multiplication can be a round-up or round-down
 434                          here, due to z.  So replace z with a value that doesn't
 435                          require the use of long bignums but that rounds the
 436                          same way.  */
 437                       zn = L_(0.5);
 438                       ze = xye - 2 * MANT_BIT - 1;
 439                     }
 440                   /* Convert mantissas of xn, yn, zn to limb sequences:
 441                      xlimbs = 2^MANT_BIT * xn
 442                      ylimbs = 2^MANT_BIT * yn
 443                      zlimbs = 2^MANT_BIT * zn  */
 444                   decode (xn, xlimbs);
 445                   decode (yn, ylimbs);
 446                   decode (zn, zlimbs);
 447                   /* Multiply the mantissas of xn and yn:
 448                      xylimbs = xlimbs * ylimbs  */
 449                   multiply (xlimbs, ylimbs, xylimbs);
 450                 }
 451                 /* The result is
 452                      (-1)^xys * 2^(xye-2*MANT_BIT) * xylimbs
 453                      + (-1)^zs * 2^(ze-MANT_BIT) * zlimbs.
 454                    Compute
 455                      e = min (xye-2*MANT_BIT, ze-MANT_BIT)
 456                    then
 457                      summand1 = 2^(xye-2*MANT_BIT-e) * xylimbs
 458                      summand2 = 2^(ze-MANT_BIT-e) * zlimbs  */
 459                 e = MIN (xye - 2 * MANT_BIT, ze - MANT_BIT);
 460                 if (e == xye - 2 * MANT_BIT)
 461                   {
 462                     /* Simply copy the limbs of xylimbs.  */
 463                     size_t i;
 464                     for (i = 0; i < 2 * NLIMBS1; i++)
 465                       summand1[i] = xylimbs[i];
 466                     summand1_len = 2 * NLIMBS1;
 467                   }
 468                 else
 469                   {
 470                     size_t ediff = xye - 2 * MANT_BIT - e;
 471                     /* Left shift the limbs of xylimbs by ediff bits.  */
 472                     size_t ldiff = ediff / GMP_LIMB_BITS;
 473                     size_t shift = ediff % GMP_LIMB_BITS;
 474                     size_t i;
 475                     for (i = 0; i < ldiff; i++)
 476                       summand1[i] = 0;
 477                     if (shift > 0)
 478                       {
 479                         mp_limb_t carry = 0;
 480                         for (i = 0; i < 2 * NLIMBS1; i++)
 481                           {
 482                             summand1[ldiff + i] = (xylimbs[i] << shift) | carry;
 483                             carry = xylimbs[i] >> (GMP_LIMB_BITS - shift);
 484                           }
 485                         summand1[ldiff + 2 * NLIMBS1] = carry;
 486                         summand1_len = ldiff + 2 * NLIMBS1 + 1;
 487                       }
 488                     else
 489                       {
 490                         for (i = 0; i < 2 * NLIMBS1; i++)
 491                           summand1[ldiff + i] = xylimbs[i];
 492                         summand1_len = ldiff + 2 * NLIMBS1;
 493                       }
 494                     /* Estimation of needed array size:
 495                        ediff = (xye - 2 * MANT_BIT) - (ze - MANT_BIT) <= MANT_BIT + 1
 496                        therefore
 497                        summand1_len
 498                          = (ediff + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + 2 * NLIMBS1
 499                          <= (MANT_BIT + GMP_LIMB_BITS) / GMP_LIMB_BITS + 2 * NLIMBS1
 500                          <= 3 * NLIMBS1 + 1
 501                          = NLIMBS3  */
 502                     if (!(summand1_len <= NLIMBS3))
 503                       abort ();
 504                   }
 505                 if (e == ze - MANT_BIT)
 506                   {
 507                     /* Simply copy the limbs of zlimbs.  */
 508                     size_t i;
 509                     for (i = 0; i < NLIMBS1; i++)
 510                       summand2[i] = zlimbs[i];
 511                     summand2_len = NLIMBS1;
 512                   }
 513                 else
 514                   {
 515                     size_t ediff = ze - MANT_BIT - e;
 516                     /* Left shift the limbs of zlimbs by ediff bits.  */
 517                     size_t ldiff = ediff / GMP_LIMB_BITS;
 518                     size_t shift = ediff % GMP_LIMB_BITS;
 519                     size_t i;
 520                     for (i = 0; i < ldiff; i++)
 521                       summand2[i] = 0;
 522                     if (shift > 0)
 523                       {
 524                         mp_limb_t carry = 0;
 525                         for (i = 0; i < NLIMBS1; i++)
 526                           {
 527                             summand2[ldiff + i] = (zlimbs[i] << shift) | carry;
 528                             carry = zlimbs[i] >> (GMP_LIMB_BITS - shift);
 529                           }
 530                         summand2[ldiff + NLIMBS1] = carry;
 531                         summand2_len = ldiff + NLIMBS1 + 1;
 532                       }
 533                     else
 534                       {
 535                         for (i = 0; i < NLIMBS1; i++)
 536                           summand2[ldiff + i] = zlimbs[i];
 537                         summand2_len = ldiff + NLIMBS1;
 538                       }
 539                     /* Estimation of needed array size:
 540                        ediff = (ze - MANT_BIT) - (xye - 2 * MANT_BIT) <= 2 * MANT_BIT
 541                        therefore
 542                        summand2_len
 543                          = (ediff + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + NLIMBS1
 544                          <= (2 * MANT_BIT + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + NLIMBS1
 545                          <= 3 * NLIMBS1 + 1
 546                          = NLIMBS3  */
 547                     if (!(summand2_len <= NLIMBS3))
 548                       abort ();
 549                   }
 550               }
 551               /* The result is
 552                    (-1)^xys * 2^e * summand1 + (-1)^zs * 2^e * summand2.  */
 553               if (xys == zs)
 554                 {
 555                   /* Perform an addition.  */
 556                   size_t i;
 557                   mp_limb_t carry;
 558
 559                   sign = xys;
 560                   carry = 0;
 561                   for (i = 0; i < MIN (summand1_len, summand2_len); i++)
 562                     {
 563                       mp_limb_t digit1 = summand1[i];
 564                       mp_limb_t digit2 = summand2[i];
 565                       sum[i] = digit1 + digit2 + carry;
 566                       carry = (carry
 567                                ? digit1 >= (mp_limb_t)-1 - digit2
 568                                : digit1 > (mp_limb_t)-1 - digit2);
 569                     }
 570                   if (summand1_len > summand2_len)
 571                     for (; i < summand1_len; i++)
 572                       {
 573                         mp_limb_t digit1 = summand1[i];
 574                         sum[i] = carry + digit1;
 575                         carry = carry && digit1 == (mp_limb_t)-1;
 576                       }
 577                   else
 578                     for (; i < summand2_len; i++)
 579                       {
 580                         mp_limb_t digit2 = summand2[i];
 581                         sum[i] = carry + digit2;
 582                         carry = carry && digit2 == (mp_limb_t)-1;
 583                       }
 584                   if (carry)
 585                     sum[i++] = carry;
 586                   sum_len = i;
 587                 }
 588               else
 589                 {
 590                   /* Perform a subtraction.  */
 591                   /* Compare summand1 and summand2 by magnitude.  */
 592                   while (summand1[summand1_len - 1] == 0)
 593                     summand1_len--;
 594                   while (summand2[summand2_len - 1] == 0)
 595                     summand2_len--;
 596                   if (summand1_len > summand2_len)
 597                     sign = xys;
 598                   else if (summand1_len < summand2_len)
 599                     sign = zs;
 600                   else
 601                     {
 602                       size_t i = summand1_len;
 603                       for (;;)
 604                         {
 605                           --i;
 606                           if (summand1[i] > summand2[i])
 607                             {
 608                               sign = xys;
 609                               break;
 610                             }
 611                           if (summand1[i] < summand2[i])
 612                             {
 613                               sign = zs;
 614                               break;
 615                             }
 616                           if (i == 0)
 617                             /* summand1 and summand2 are equal.  */
 618                             return L_(0.0);
 619                         }
 620                     }
 621                   if (sign == xys)
 622                     {
 623                       /* Compute summand1 - summand2.  */
 624                       size_t i;
 625                       mp_limb_t carry;
 626
 627                       carry = 0;
 628                       for (i = 0; i < summand2_len; i++)
 629                         {
 630                           mp_limb_t digit1 = summand1[i];
 631                           mp_limb_t digit2 = summand2[i];
 632                           sum[i] = digit1 - digit2 - carry;
 633                           carry = (carry ? digit1 <= digit2 : digit1 < digit2);
 634                         }
 635                       for (; i < summand1_len; i++)
 636                         {
 637                           mp_limb_t digit1 = summand1[i];
 638                           sum[i] = digit1 - carry;
 639                           carry = carry && digit1 == 0;
 640                         }
 641                       if (carry)
 642                         abort ();
 643                       sum_len = summand1_len;
 644                     }
 645                   else
 646                     {
 647                       /* Compute summand2 - summand1.  */
 648                       size_t i;
 649                       mp_limb_t carry;
 650
 651                       carry = 0;
 652                       for (i = 0; i < summand1_len; i++)
 653                         {
 654                           mp_limb_t digit1 = summand1[i];
 655                           mp_limb_t digit2 = summand2[i];
 656                           sum[i] = digit2 - digit1 - carry;
 657                           carry = (carry ? digit2 <= digit1 : digit2 < digit1);
 658                         }
 659                       for (; i < summand2_len; i++)
 660                         {
 661                           mp_limb_t digit2 = summand2[i];
 662                           sum[i] = digit2 - carry;
 663                           carry = carry && digit2 == 0;
 664                         }
 665                       if (carry)
 666                         abort ();
 667                       sum_len = summand2_len;
 668                     }
 669                 }
 670             }
 671             /* The result is
 672                  (-1)^sign * 2^e * sum.  */
 673             /* Now perform the rounding to MANT_BIT mantissa bits.  */
 674             while (sum[sum_len - 1] == 0)
 675               sum_len--;
 676             /* Here we know that the most significant limb, sum[sum_len - 1], is
 677                non-zero.  */
 678             {
 679               /* How many bits the sum has.  */
 680               unsigned int sum_bits =
 681                 integer_length (sum[sum_len - 1]) + (sum_len - 1) * GMP_LIMB_BITS;
 682               /* How many bits to keep when rounding.  */
 683               unsigned int keep_bits;
 684               /* How many bits to round off.  */
 685               unsigned int roundoff_bits;
 686               if (e + (int) sum_bits >= MIN_EXP)
 687                 /* 2^e * sum >= 2^(MIN_EXP-1).
 688                    result will be a normalized number.  */
 689                 keep_bits = MANT_BIT;
 690               else if (e + (int) sum_bits >= MIN_EXP - MANT_BIT)
 691                 /* 2^e * sum >= 2^(MIN_EXP-MANT_BIT-1).
 692                    result will be a denormalized number or rounded to zero.  */
 693                 keep_bits = e + (int) sum_bits - (MIN_EXP + MANT_BIT);
 694               else
 695                 /* 2^e * sum < 2^(MIN_EXP-MANT_BIT-1).  Round to zero.  */
 696                 return L_(0.0);
 697               /* Note: 0 <= keep_bits <= MANT_BIT.  */
 698               if (sum_bits <= keep_bits)
 699                 {
 700                   /* Nothing to do.  */
 701                   roundoff_bits = 0;
 702                   keep_bits = sum_bits;
 703                 }
 704               else
 705                 {
 706                   int round_up;
 707                   roundoff_bits = sum_bits - keep_bits; /* > 0, <= sum_bits */
 708                   {
 709 #if HAVE_FEGETROUND && defined FE_TOWARDZERO
 710                     /* Cf. <http://pubs.opengroup.org/onlinepubs/9699919799/functions/fegetround.html> */
 711                     int rounding_mode = fegetround ();
 712                     if (rounding_mode == FE_TOWARDZERO)
 713                       round_up = 0;
 714                     else if (rounding_mode == FE_DOWNWARD)
 715                       round_up = sign;
 716                     else if (rounding_mode == FE_UPWARD)
 717                       round_up = !sign;
 718 #else
 719                     /* Cf. <http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/float.h.html> */
 720                     int rounding_mode = FLT_ROUNDS;
 721                     if (rounding_mode == 0) /* toward zero */
 722                       round_up = 0;
 723                     else if (rounding_mode == 3) /* toward negative infinity */
 724                       round_up = sign;
 725                     else if (rounding_mode == 2) /* toward positive infinity */
 726                       round_up = !sign;
 727 #endif
 728                     else
 729                       {
 730                         /* Round to nearest.  */
 731                         round_up = 0;
 732                         /* Test bit (roundoff_bits-1).  */
 733                         if ((sum[(roundoff_bits - 1) / GMP_LIMB_BITS]
 734                              >> ((roundoff_bits - 1) % GMP_LIMB_BITS)) & 1)
 735                           {
 736                             /* Test bits roundoff_bits-1 .. 0.  */
 737                             bool halfway =
 738                               ((sum[(roundoff_bits - 1) / GMP_LIMB_BITS]
 739                                 & (((mp_limb_t) 1 << ((roundoff_bits - 1) % GMP_LIMB_BITS)) - 1))
 740                                == 0);
 741                             if (halfway)
 742                               {
 743                                 int i;
 744                                 for (i = (roundoff_bits - 1) / GMP_LIMB_BITS - 1; i >= 0; i--)
 745                                   if (sum[i] != 0)
 746                                     {
 747                                       halfway = false;
 748                                       break;
 749                                     }
 750                               }
 751                             if (halfway)
 752                               /* Round to even.  Test bit roundoff_bits.  */
 753                               round_up = ((sum[roundoff_bits / GMP_LIMB_BITS]
 754                                           >> (roundoff_bits % GMP_LIMB_BITS)) & 1);
 755                             else
 756                               /* Round up.  */
 757                               round_up = 1;
 758                           }
 759                       }
 760                   }
 761                   /* Perform the rounding.  */
 762                   {
 763                     size_t i = roundoff_bits / GMP_LIMB_BITS;
 764                     {
 765                       size_t j = i;
 766                       while (j > 0)
 767                         sum[--j] = 0;
 768                     }
 769                     if (round_up)
 770                       {
 771                         /* Round up.  */
 772                         sum[i] =
 773                           (sum[i]
 774                            | (((mp_limb_t) 1 << (roundoff_bits % GMP_LIMB_BITS)) - 1))
 775                           + 1;
 776                         if (sum[i] == 0)
 777                           {
 778                             /* Propagate carry.  */
 779                             while (i < sum_len - 1)
 780                               {
 781                                 ++i;
 782                                 sum[i] += 1;
 783                                 if (sum[i] != 0)
 784                                   break;
 785                               }
 786                           }
 787                         /* sum[i] is the most significant limb that was
 788                            incremented.  */
 789                         if (i == sum_len - 1 && (sum[i] & (sum[i] - 1)) == 0)
 790                           {
 791                             /* Through the carry, one more bit is needed.  */
 792                             if (sum[i] != 0)
 793                               sum_bits += 1;
 794                             else
 795                               {
 796                                 /* Instead of requiring one more limb of memory,
 797                                    perform a shift by one bit, and adjust the
 798                                    exponent.  */
 799                                 sum[i] = (mp_limb_t) 1 << (GMP_LIMB_BITS - 1);
 800                                 e += 1;
 801                               }
 802                             /* The bit sequence has the form 1000...000.  */
 803                             keep_bits = 1;
 804                           }
 805                       }
 806                     else
 807                       {
 808                         /* Round down.  */
 809                         sum[i] &= ((mp_limb_t) -1 << (roundoff_bits % GMP_LIMB_BITS));
 810                         if (i == sum_len - 1 && sum[i] == 0)
 811                           /* The entire sum has become zero.  */
 812                           return L_(0.0);
 813                       }
 814                   }
 815                 }
 816               /* The result is
 817                    (-1)^sign * 2^e * sum
 818                  and here we know that
 819                    2^(sum_bits-1) <= sum < 2^sum_bits,
 820                  and sum is a multiple of 2^(sum_bits-keep_bits), where
 821                    0 < keep_bits <= MANT_BIT  and  keep_bits <= sum_bits.
 822                  (If keep_bits was initially 0, the rounding either returned zero
 823                  or produced a bit sequence of the form 1000...000, setting
 824                  keep_bits to 1.)  */
 825               {
 826                 /* Split the keep_bits bits into chunks of at most 32 bits.  */
 827                 unsigned int chunk_count = (keep_bits - 1) / GMP_LIMB_BITS + 1;
 828                 /* 1 <= chunk_count <= ceil (sum_bits / GMP_LIMB_BITS) = sum_len. */
 829                 static const DOUBLE chunk_multiplier = /* 2^-GMP_LIMB_BITS */
 830                   L_(1.0) / ((DOUBLE) (1 << (GMP_LIMB_BITS / 2))
 831                              * (DOUBLE) (1 << ((GMP_LIMB_BITS + 1) / 2)));
 832                 unsigned int shift = sum_bits % GMP_LIMB_BITS;
 833                 DOUBLE fsum;
 834                 if (MANT_BIT <= GMP_LIMB_BITS)
 835                   {
 836                     /* Since keep_bits <= MANT_BIT <= GMP_LIMB_BITS,
 837                        chunk_count is 1.  No need for a loop.  */
 838                     if (shift == 0)
 839                       fsum = (DOUBLE) sum[sum_len - 1];
 840                     else
 841                       fsum = (DOUBLE)
 842                              ((sum[sum_len - 1] << (GMP_LIMB_BITS - shift))
 843                               | (sum_len >= 2 ? sum[sum_len - 2] >> shift : 0));
 844                   }
 845                 else
 846                   {
 847                     int k;
 848                     k = chunk_count - 1;
 849                     if (shift == 0)
 850                       {
 851                         /* First loop round.  */
 852                         fsum = (DOUBLE) sum[sum_len - k - 1];
 853                         /* General loop.  */
 854                         while (--k >= 0)
 855                           {
 856                             fsum *= chunk_multiplier;
 857                             fsum += (DOUBLE) sum[sum_len - k - 1];
 858                           }
 859                       }
 860                     else
 861                       {
 862                         /* First loop round.  */
 863                         fsum = (DOUBLE)
 864                                ((sum[sum_len - k - 1] << (GMP_LIMB_BITS - shift))
 865                                 | (sum_len >= k + 2 ? sum[sum_len - k - 2] >> shift : 0));
 866                         /* General loop.  */
 867                         while (--k >= 0)
 868                           {
 869                             fsum *= chunk_multiplier;
 870                             fsum += (DOUBLE)
 871                                     ((sum[sum_len - k - 1] << (GMP_LIMB_BITS - shift))
 872                                      | (sum[sum_len - k - 2] >> shift));
 873                           }
 874                       }
 875                   }
 876                 fsum = LDEXP (fsum, e + (int) sum_bits - GMP_LIMB_BITS);
 877                 return (sign ? - fsum : fsum);
 878               }
 879             }
 880           }
 881         }
 882       else
 883         return z;
 884     }
 885   else
 886     return (x * y) + z;
 887 }