lib/fma.c

   1 /* Fused multiply-add.
   2    Copyright (C) 2007, 2009, 2011 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  16
  17 /* Written by Bruno Haible <bruno@clisp.org>, 2011.  */
  18
  19 #if ! defined USE_LONG_DOUBLE
  20 # include <config.h>
  21 #endif
  22
  23 /* Specification.  */
  24 #include <math.h>
  25
  26 #include <stdbool.h>
  27 #include <stdlib.h>
  28
  29 #if HAVE_FEGETROUND
  30 # include <fenv.h>
  31 #endif
  32
  33 #include "float+.h"
  34 #include "integer_length.h"
  35 #include "verify.h"
  36
  37 #ifdef USE_LONG_DOUBLE
  38 # define FUNC fmal
  39 # define DOUBLE long double
  40 # define FREXP frexpl
  41 # define LDEXP ldexpl
  42 # define MIN_EXP LDBL_MIN_EXP
  43 # define MANT_BIT LDBL_MANT_BIT
  44 # define DECL_ROUNDING DECL_LONG_DOUBLE_ROUNDING
  45 # define BEGIN_ROUNDING() BEGIN_LONG_DOUBLE_ROUNDING ()
  46 # define END_ROUNDING() END_LONG_DOUBLE_ROUNDING ()
  47 # define L_(literal) literal##L
  48 #elif ! defined USE_FLOAT
  49 # define FUNC fma
  50 # define DOUBLE double
  51 # define FREXP frexp
  52 # define LDEXP ldexp
  53 # define MIN_EXP DBL_MIN_EXP
  54 # define MANT_BIT DBL_MANT_BIT
  55 # define DECL_ROUNDING
  56 # define BEGIN_ROUNDING()
  57 # define END_ROUNDING()
  58 # define L_(literal) literal
  59 #else /* defined USE_FLOAT */
  60 # define FUNC fmaf
  61 # define DOUBLE float
  62 # define FREXP frexpf
  63 # define LDEXP ldexpf
  64 # define MIN_EXP FLT_MIN_EXP
  65 # define MANT_BIT FLT_MANT_BIT
  66 # define DECL_ROUNDING
  67 # define BEGIN_ROUNDING()
  68 # define END_ROUNDING()
  69 # define L_(literal) literal##f
  70 #endif
  71
  72 #undef MAX
  73 #define MAX(a,b) ((a) > (b) ? (a) : (b))
  74
  75 #undef MIN
  76 #define MIN(a,b) ((a) < (b) ? (a) : (b))
  77
  78 /* It is possible to write an implementation of fused multiply-add with
  79    floating-point operations alone.  See
  80      Sylvie Boldo, Guillaume Melquiond:
  81      Emulation of FMA and correctly-rounded sums: proved algorithms using
  82      rounding to odd.
  83      <http://www.lri.fr/~melquion/doc/08-tc.pdf>
  84    But is it complicated.
  85    Here we take the simpler (and probably slower) approach of doing
  86    multi-precision arithmetic.  */
  87
  88 /* We use the naming conventions of GNU gmp, but vastly simpler (and slower)
  89    algorithms.  */
  90
  91 typedef unsigned int mp_limb_t;
  92 #define GMP_LIMB_BITS 32
  93 verify (sizeof (mp_limb_t) * CHAR_BIT == GMP_LIMB_BITS);
  94
  95 typedef unsigned long long mp_twolimb_t;
  96 #define GMP_TWOLIMB_BITS 64
  97 verify (sizeof (mp_twolimb_t) * CHAR_BIT == GMP_TWOLIMB_BITS);
  98
  99 /* Number of limbs needed for a single DOUBLE.  */
 100 #define NLIMBS1 ((MANT_BIT + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS)
 101
 102 /* Number of limbs needed for the accumulator.  */
 103 #define NLIMBS3 (3 * NLIMBS1 + 1)
 104
 105 /* Assuming 0.5 <= x < 1.0:
 106    Convert the mantissa (x * 2^DBL_MANT_BIT) to a sequence of limbs.  */
 107 static void
 108 decode (DOUBLE x, mp_limb_t limbs[NLIMBS1])
 109 {
 110   /* I'm not sure whether it's safe to cast a 'double' value between
 111      2^31 and 2^32 to 'unsigned int', therefore play safe and cast only
 112      'double' values between 0 and 2^31 (to 'unsigned int' or 'int',
 113      doesn't matter).
 114      So, we split the MANT_BIT bits of x into a number of chunks of
 115      at most 31 bits.  */
 116   enum { chunk_count = (MANT_BIT - 1) / 31 + 1 };
 117   /* Variables used for storing the bits limb after limb.  */
 118   mp_limb_t *p = limbs + NLIMBS1 - 1;
 119   mp_limb_t accu = 0;
 120   unsigned int bits_needed = MANT_BIT - (NLIMBS1 - 1) * GMP_LIMB_BITS;
 121   /* The bits bits_needed-1...0 need to be ORed into the accu.
 122      1 <= bits_needed <= GMP_LIMB_BITS.  */
 123   /* Unroll the first 4 loop rounds.  */
 124   if (chunk_count > 0)
 125     {
 126       /* Here we still have MANT_BIT-0*31 bits to extract from x.  */
 127       enum { chunk_bits = MIN (31, MANT_BIT - 0 * 31) }; /* > 0, <= 31 */
 128       mp_limb_t d;
 129       x *= (mp_limb_t) 1 << chunk_bits;
 130       d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 131       x -= d;
 132       if (!(x >= L_(0.0) && x < L_(1.0)))
 133         abort ();
 134       if (bits_needed < chunk_bits)
 135         {
 136           /* store bits_needed bits */
 137           accu |= d >> (chunk_bits - bits_needed);
 138           *p = accu;
 139           if (p == limbs)
 140             goto done;
 141           p--;
 142           /* hold (chunk_bits - bits_needed) bits */
 143           accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 144           bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 145         }
 146       else
 147         {
 148           /* store chunk_bits bits */
 149           accu |= d << (bits_needed - chunk_bits);
 150           bits_needed -= chunk_bits;
 151           if (bits_needed == 0)
 152             {
 153               *p = accu;
 154               if (p == limbs)
 155                 goto done;
 156               p--;
 157               accu = 0;
 158               bits_needed = GMP_LIMB_BITS;
 159             }
 160         }
 161     }
 162   if (chunk_count > 1)
 163     {
 164       /* Here we still have MANT_BIT-1*31 bits to extract from x.  */
 165       enum { chunk_bits = MIN (31, MAX (MANT_BIT - 1 * 31, 0)) }; /* > 0, <= 31 */
 166       mp_limb_t d;
 167       x *= (mp_limb_t) 1 << chunk_bits;
 168       d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 169       x -= d;
 170       if (!(x >= L_(0.0) && x < L_(1.0)))
 171         abort ();
 172       if (bits_needed < chunk_bits)
 173         {
 174           /* store bits_needed bits */
 175           accu |= d >> (chunk_bits - bits_needed);
 176           *p = accu;
 177           if (p == limbs)
 178             goto done;
 179           p--;
 180           /* hold (chunk_bits - bits_needed) bits */
 181           accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 182           bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 183         }
 184       else
 185         {
 186           /* store chunk_bits bits */
 187           accu |= d << (bits_needed - chunk_bits);
 188           bits_needed -= chunk_bits;
 189           if (bits_needed == 0)
 190             {
 191               *p = accu;
 192               if (p == limbs)
 193                 goto done;
 194               p--;
 195               accu = 0;
 196               bits_needed = GMP_LIMB_BITS;
 197             }
 198         }
 199     }
 200   if (chunk_count > 2)
 201     {
 202       /* Here we still have MANT_BIT-2*31 bits to extract from x.  */
 203       enum { chunk_bits = MIN (31, MAX (MANT_BIT - 2 * 31, 0)) }; /* > 0, <= 31 */
 204       mp_limb_t d;
 205       x *= (mp_limb_t) 1 << chunk_bits;
 206       d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 207       x -= d;
 208       if (!(x >= L_(0.0) && x < L_(1.0)))
 209         abort ();
 210       if (bits_needed < chunk_bits)
 211         {
 212           /* store bits_needed bits */
 213           accu |= d >> (chunk_bits - bits_needed);
 214           *p = accu;
 215           if (p == limbs)
 216             goto done;
 217           p--;
 218           /* hold (chunk_bits - bits_needed) bits */
 219           accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 220           bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 221         }
 222       else
 223         {
 224           /* store chunk_bits bits */
 225           accu |= d << (bits_needed - chunk_bits);
 226           bits_needed -= chunk_bits;
 227           if (bits_needed == 0)
 228             {
 229               *p = accu;
 230               if (p == limbs)
 231                 goto done;
 232               p--;
 233               accu = 0;
 234               bits_needed = GMP_LIMB_BITS;
 235             }
 236         }
 237     }
 238   if (chunk_count > 3)
 239     {
 240       /* Here we still have MANT_BIT-3*31 bits to extract from x.  */
 241       enum { chunk_bits = MIN (31, MAX (MANT_BIT - 3 * 31, 0)) }; /* > 0, <= 31 */
 242       mp_limb_t d;
 243       x *= (mp_limb_t) 1 << chunk_bits;
 244       d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 245       x -= d;
 246       if (!(x >= L_(0.0) && x < L_(1.0)))
 247         abort ();
 248       if (bits_needed < chunk_bits)
 249         {
 250           /* store bits_needed bits */
 251           accu |= d >> (chunk_bits - bits_needed);
 252           *p = accu;
 253           if (p == limbs)
 254             goto done;
 255           p--;
 256           /* hold (chunk_bits - bits_needed) bits */
 257           accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 258           bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 259         }
 260       else
 261         {
 262           /* store chunk_bits bits */
 263           accu |= d << (bits_needed - chunk_bits);
 264           bits_needed -= chunk_bits;
 265           if (bits_needed == 0)
 266             {
 267               *p = accu;
 268               if (p == limbs)
 269                 goto done;
 270               p--;
 271               accu = 0;
 272               bits_needed = GMP_LIMB_BITS;
 273             }
 274         }
 275     }
 276   if (chunk_count > 4)
 277     {
 278       /* Here we still have MANT_BIT-4*31 bits to extract from x.  */
 279       /* Generic loop.  */
 280       size_t k;
 281       for (k = 4; k < chunk_count; k++)
 282         {
 283           size_t chunk_bits = MIN (31, MANT_BIT - k * 31); /* > 0, <= 31 */
 284           mp_limb_t d;
 285           x *= (mp_limb_t) 1 << chunk_bits;
 286           d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 287           x -= d;
 288           if (!(x >= L_(0.0) && x < L_(1.0)))
 289             abort ();
 290           if (bits_needed < chunk_bits)
 291             {
 292               /* store bits_needed bits */
 293               accu |= d >> (chunk_bits - bits_needed);
 294               *p = accu;
 295               if (p == limbs)
 296                 goto done;
 297               p--;
 298               /* hold (chunk_bits - bits_needed) bits */
 299               accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 300               bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 301             }
 302           else
 303             {
 304               /* store chunk_bits bits */
 305               accu |= d << (bits_needed - chunk_bits);
 306               bits_needed -= chunk_bits;
 307               if (bits_needed == 0)
 308                 {
 309                   *p = accu;
 310                   if (p == limbs)
 311                     goto done;
 312                   p--;
 313                   accu = 0;
 314                   bits_needed = GMP_LIMB_BITS;
 315                 }
 316             }
 317         }
 318     }
 319   /* We shouldn't get here.  */
 320   abort ();
 321
 322  done: ;
 323 #ifndef USE_LONG_DOUBLE /* On FreeBSD 6.1/x86, 'long double' numbers sometimes
 324                            have excess precision.  */
 325   if (!(x == L_(0.0)))
 326     abort ();
 327 #endif
 328 }
 329
 330 /* Multiply two sequences of limbs.  */
 331 static void
 332 multiply (mp_limb_t xlimbs[NLIMBS1], mp_limb_t ylimbs[NLIMBS1],
 333           mp_limb_t prod_limbs[2 * NLIMBS1])
 334 {
 335   size_t k, i, j;
 336   enum { len1 = NLIMBS1 };
 337   enum { len2 = NLIMBS1 };
 338
 339   for (k = len2; k > 0; )
 340     prod_limbs[--k] = 0;
 341   for (i = 0; i < len1; i++)
 342     {
 343       mp_limb_t digit1 = xlimbs[i];
 344       mp_twolimb_t carry = 0;
 345       for (j = 0; j < len2; j++)
 346         {
 347           mp_limb_t digit2 = ylimbs[j];
 348           carry += (mp_twolimb_t) digit1 * (mp_twolimb_t) digit2;
 349           carry += prod_limbs[i + j];
 350           prod_limbs[i + j] = (mp_limb_t) carry;
 351           carry = carry >> GMP_LIMB_BITS;
 352         }
 353       prod_limbs[i + len2] = (mp_limb_t) carry;
 354     }
 355 }
 356
 357 DOUBLE
 358 FUNC (DOUBLE x, DOUBLE y, DOUBLE z)
 359 {
 360   if (isfinite (x) && isfinite (y))
 361     {
 362       if (isfinite (z))
 363         {
 364           /* x, y, z are all finite.  */
 365           if (x == L_(0.0) || y == L_(0.0))
 366             return z;
 367           if (z == L_(0.0))
 368             return x * y;
 369           /* x, y, z are all non-zero.
 370              The result is x * y + z.  */
 371           {
 372             int e;                  /* exponent of x * y + z */
 373             int sign;
 374             mp_limb_t sum[NLIMBS3];
 375             size_t sum_len;
 376
 377             {
 378               int xys;                /* sign of x * y */
 379               int zs;                 /* sign of z */
 380               int xye;                /* sum of exponents of x and y */
 381               int ze;                 /* exponent of z */
 382               mp_limb_t summand1[NLIMBS3];
 383               size_t summand1_len;
 384               mp_limb_t summand2[NLIMBS3];
 385               size_t summand2_len;
 386
 387               {
 388                 mp_limb_t zlimbs[NLIMBS1];
 389                 mp_limb_t xylimbs[2 * NLIMBS1];
 390
 391                 {
 392                   DOUBLE xn;              /* normalized part of x */
 393                   DOUBLE yn;              /* normalized part of y */
 394                   DOUBLE zn;              /* normalized part of z */
 395                   int xe;                 /* exponent of x */
 396                   int ye;                 /* exponent of y */
 397                   mp_limb_t xlimbs[NLIMBS1];
 398                   mp_limb_t ylimbs[NLIMBS1];
 399
 400                   xys = 0;
 401                   xn = x;
 402                   if (x < 0)
 403                     {
 404                       xys = 1;
 405                       xn = - x;
 406                     }
 407                   yn = y;
 408                   if (y < 0)
 409                     {
 410                       xys = 1 - xys;
 411                       yn = - y;
 412                     }
 413
 414                   zs = 0;
 415                   zn = z;
 416                   if (z < 0)
 417                     {
 418                       zs = 1;
 419                       zn = - z;
 420                     }
 421
 422                   /* xn, yn, zn are all positive.
 423                      The result is (-1)^xys * xn * yn + (-1)^zs * zn.  */
 424                   xn = FREXP (xn, &xe);
 425                   yn = FREXP (yn, &ye);
 426                   zn = FREXP (zn, &ze);
 427                   xye = xe + ye;
 428                   /* xn, yn, zn are all < 1.0 and >= 0.5.
 429                      The result is
 430                        (-1)^xys * 2^xye * xn * yn + (-1)^zs * 2^ze * zn.  */
 431                   if (xye < ze - MANT_BIT)
 432                     {
 433                       /* 2^xye * xn * yn < 2^xye <= 2^(ze-MANT_BIT-1)  */
 434                       return z;
 435                     }
 436                   if (xye - 2 * MANT_BIT > ze)
 437                     {
 438                       /* 2^ze * zn < 2^ze <= 2^(xye-2*MANT_BIT-1).
 439                          We cannot simply do
 440                            return x * y;
 441                          because it would round differently: A round-to-even
 442                          in the multiplication can be a round-up or round-down
 443                          here, due to z.  So replace z with a value that doesn't
 444                          require the use of long bignums but that rounds the
 445                          same way.  */
 446                       zn = L_(0.5);
 447                       ze = xye - 2 * MANT_BIT - 1;
 448                     }
 449                   /* Convert mantissas of xn, yn, zn to limb sequences:
 450                      xlimbs = 2^MANT_BIT * xn
 451                      ylimbs = 2^MANT_BIT * yn
 452                      zlimbs = 2^MANT_BIT * zn  */
 453                   decode (xn, xlimbs);
 454                   decode (yn, ylimbs);
 455                   decode (zn, zlimbs);
 456                   /* Multiply the mantissas of xn and yn:
 457                      xylimbs = xlimbs * ylimbs  */
 458                   multiply (xlimbs, ylimbs, xylimbs);
 459                 }
 460                 /* The result is
 461                      (-1)^xys * 2^(xye-2*MANT_BIT) * xylimbs
 462                      + (-1)^zs * 2^(ze-MANT_BIT) * zlimbs.
 463                    Compute
 464                      e = min (xye-2*MANT_BIT, ze-MANT_BIT)
 465                    then
 466                      summand1 = 2^(xye-2*MANT_BIT-e) * xylimbs
 467                      summand2 = 2^(ze-MANT_BIT-e) * zlimbs  */
 468                 e = MIN (xye - 2 * MANT_BIT, ze - MANT_BIT);
 469                 if (e == xye - 2 * MANT_BIT)
 470                   {
 471                     /* Simply copy the limbs of xylimbs.  */
 472                     size_t i;
 473                     for (i = 0; i < 2 * NLIMBS1; i++)
 474                       summand1[i] = xylimbs[i];
 475                     summand1_len = 2 * NLIMBS1;
 476                   }
 477                 else
 478                   {
 479                     size_t ediff = xye - 2 * MANT_BIT - e;
 480                     /* Left shift the limbs of xylimbs by ediff bits.  */
 481                     size_t ldiff = ediff / GMP_LIMB_BITS;
 482                     size_t shift = ediff % GMP_LIMB_BITS;
 483                     size_t i;
 484                     for (i = 0; i < ldiff; i++)
 485                       summand1[i] = 0;
 486                     if (shift > 0)
 487                       {
 488                         mp_limb_t carry = 0;
 489                         for (i = 0; i < 2 * NLIMBS1; i++)
 490                           {
 491                             summand1[ldiff + i] = (xylimbs[i] << shift) | carry;
 492                             carry = xylimbs[i] >> (GMP_LIMB_BITS - shift);
 493                           }
 494                         summand1[ldiff + 2 * NLIMBS1] = carry;
 495                         summand1_len = ldiff + 2 * NLIMBS1 + 1;
 496                       }
 497                     else
 498                       {
 499                         for (i = 0; i < 2 * NLIMBS1; i++)
 500                           summand1[ldiff + i] = xylimbs[i];
 501                         summand1_len = ldiff + 2 * NLIMBS1;
 502                       }
 503                     /* Estimation of needed array size:
 504                        ediff = (xye - 2 * MANT_BIT) - (ze - MANT_BIT) <= MANT_BIT + 1
 505                        therefore
 506                        summand1_len
 507                          = (ediff + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + 2 * NLIMBS1
 508                          <= (MANT_BIT + GMP_LIMB_BITS) / GMP_LIMB_BITS + 2 * NLIMBS1
 509                          <= 3 * NLIMBS1 + 1
 510                          = NLIMBS3  */
 511                     if (!(summand1_len <= NLIMBS3))
 512                       abort ();
 513                   }
 514                 if (e == ze - MANT_BIT)
 515                   {
 516                     /* Simply copy the limbs of zlimbs.  */
 517                     size_t i;
 518                     for (i = 0; i < NLIMBS1; i++)
 519                       summand2[i] = zlimbs[i];
 520                     summand2_len = NLIMBS1;
 521                   }
 522                 else
 523                   {
 524                     size_t ediff = ze - MANT_BIT - e;
 525                     /* Left shift the limbs of zlimbs by ediff bits.  */
 526                     size_t ldiff = ediff / GMP_LIMB_BITS;
 527                     size_t shift = ediff % GMP_LIMB_BITS;
 528                     size_t i;
 529                     for (i = 0; i < ldiff; i++)
 530                       summand2[i] = 0;
 531                     if (shift > 0)
 532                       {
 533                         mp_limb_t carry = 0;
 534                         for (i = 0; i < NLIMBS1; i++)
 535                           {
 536                             summand2[ldiff + i] = (zlimbs[i] << shift) | carry;
 537                             carry = zlimbs[i] >> (GMP_LIMB_BITS - shift);
 538                           }
 539                         summand2[ldiff + NLIMBS1] = carry;
 540                         summand2_len = ldiff + NLIMBS1 + 1;
 541                       }
 542                     else
 543                       {
 544                         for (i = 0; i < NLIMBS1; i++)
 545                           summand2[ldiff + i] = zlimbs[i];
 546                         summand2_len = ldiff + NLIMBS1;
 547                       }
 548                     /* Estimation of needed array size:
 549                        ediff = (ze - MANT_BIT) - (xye - 2 * MANT_BIT) <= 2 * MANT_BIT
 550                        therefore
 551                        summand2_len
 552                          = (ediff + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + NLIMBS1
 553                          <= (2 * MANT_BIT + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + NLIMBS1
 554                          <= 3 * NLIMBS1 + 1
 555                          = NLIMBS3  */
 556                     if (!(summand2_len <= NLIMBS3))
 557                       abort ();
 558                   }
 559               }
 560               /* The result is
 561                    (-1)^xys * 2^e * summand1 + (-1)^zs * 2^e * summand2.  */
 562               if (xys == zs)
 563                 {
 564                   /* Perform an addition.  */
 565                   size_t i;
 566                   mp_limb_t carry;
 567
 568                   sign = xys;
 569                   carry = 0;
 570                   for (i = 0; i < MIN (summand1_len, summand2_len); i++)
 571                     {
 572                       mp_limb_t digit1 = summand1[i];
 573                       mp_limb_t digit2 = summand2[i];
 574                       sum[i] = digit1 + digit2 + carry;
 575                       carry = (carry
 576                                ? digit1 >= (mp_limb_t)-1 - digit2
 577                                : digit1 > (mp_limb_t)-1 - digit2);
 578                     }
 579                   if (summand1_len > summand2_len)
 580                     for (; i < summand1_len; i++)
 581                       {
 582                         mp_limb_t digit1 = summand1[i];
 583                         sum[i] = carry + digit1;
 584                         carry = carry && digit1 == (mp_limb_t)-1;
 585                       }
 586                   else
 587                     for (; i < summand2_len; i++)
 588                       {
 589                         mp_limb_t digit2 = summand2[i];
 590                         sum[i] = carry + digit2;
 591                         carry = carry && digit2 == (mp_limb_t)-1;
 592                       }
 593                   if (carry)
 594                     sum[i++] = carry;
 595                   sum_len = i;
 596                 }
 597               else
 598                 {
 599                   /* Perform a subtraction.  */
 600                   /* Compare summand1 and summand2 by magnitude.  */
 601                   while (summand1[summand1_len - 1] == 0)
 602                     summand1_len--;
 603                   while (summand2[summand2_len - 1] == 0)
 604                     summand2_len--;
 605                   if (summand1_len > summand2_len)
 606                     sign = xys;
 607                   else if (summand1_len < summand2_len)
 608                     sign = zs;
 609                   else
 610                     {
 611                       size_t i = summand1_len;
 612                       for (;;)
 613                         {
 614                           --i;
 615                           if (summand1[i] > summand2[i])
 616                             {
 617                               sign = xys;
 618                               break;
 619                             }
 620                           if (summand1[i] < summand2[i])
 621                             {
 622                               sign = zs;
 623                               break;
 624                             }
 625                           if (i == 0)
 626                             /* summand1 and summand2 are equal.  */
 627                             return L_(0.0);
 628                         }
 629                     }
 630                   if (sign == xys)
 631                     {
 632                       /* Compute summand1 - summand2.  */
 633                       size_t i;
 634                       mp_limb_t carry;
 635
 636                       carry = 0;
 637                       for (i = 0; i < summand2_len; i++)
 638                         {
 639                           mp_limb_t digit1 = summand1[i];
 640                           mp_limb_t digit2 = summand2[i];
 641                           sum[i] = digit1 - digit2 - carry;
 642                           carry = (carry ? digit1 <= digit2 : digit1 < digit2);
 643                         }
 644                       for (; i < summand1_len; i++)
 645                         {
 646                           mp_limb_t digit1 = summand1[i];
 647                           sum[i] = digit1 - carry;
 648                           carry = carry && digit1 == 0;
 649                         }
 650                       if (carry)
 651                         abort ();
 652                       sum_len = summand1_len;
 653                     }
 654                   else
 655                     {
 656                       /* Compute summand2 - summand1.  */
 657                       size_t i;
 658                       mp_limb_t carry;
 659
 660                       carry = 0;
 661                       for (i = 0; i < summand1_len; i++)
 662                         {
 663                           mp_limb_t digit1 = summand1[i];
 664                           mp_limb_t digit2 = summand2[i];
 665                           sum[i] = digit2 - digit1 - carry;
 666                           carry = (carry ? digit2 <= digit1 : digit2 < digit1);
 667                         }
 668                       for (; i < summand2_len; i++)
 669                         {
 670                           mp_limb_t digit2 = summand2[i];
 671                           sum[i] = digit2 - carry;
 672                           carry = carry && digit2 == 0;
 673                         }
 674                       if (carry)
 675                         abort ();
 676                       sum_len = summand2_len;
 677                     }
 678                 }
 679             }
 680             /* The result is
 681                  (-1)^sign * 2^e * sum.  */
 682             /* Now perform the rounding to MANT_BIT mantissa bits.  */
 683             while (sum[sum_len - 1] == 0)
 684               sum_len--;
 685             /* Here we know that the most significant limb, sum[sum_len - 1], is
 686                non-zero.  */
 687             {
 688               /* How many bits the sum has.  */
 689               unsigned int sum_bits =
 690                 integer_length (sum[sum_len - 1]) + (sum_len - 1) * GMP_LIMB_BITS;
 691               /* How many bits to keep when rounding.  */
 692               unsigned int keep_bits;
 693               /* How many bits to round off.  */
 694               unsigned int roundoff_bits;
 695               if (e + (int) sum_bits >= MIN_EXP)
 696                 /* 2^e * sum >= 2^(MIN_EXP-1).
 697                    result will be a normalized number.  */
 698                 keep_bits = MANT_BIT;
 699               else if (e + (int) sum_bits >= MIN_EXP - MANT_BIT)
 700                 /* 2^e * sum >= 2^(MIN_EXP-MANT_BIT-1).
 701                    result will be a denormalized number or rounded to zero.  */
 702                 keep_bits = e + (int) sum_bits - (MIN_EXP + MANT_BIT);
 703               else
 704                 /* 2^e * sum < 2^(MIN_EXP-MANT_BIT-1).  Round to zero.  */
 705                 return L_(0.0);
 706               /* Note: 0 <= keep_bits <= MANT_BIT.  */
 707               if (sum_bits <= keep_bits)
 708                 {
 709                   /* Nothing to do.  */
 710                   roundoff_bits = 0;
 711                   keep_bits = sum_bits;
 712                 }
 713               else
 714                 {
 715                   int round_up;
 716                   roundoff_bits = sum_bits - keep_bits; /* > 0, <= sum_bits */
 717                   {
 718 #if HAVE_FEGETROUND && defined FE_TOWARDZERO
 719                     /* Cf. <http://pubs.opengroup.org/onlinepubs/9699919799/functions/fegetround.html> */
 720                     int rounding_mode = fegetround ();
 721                     if (rounding_mode == FE_TOWARDZERO)
 722                       round_up = 0;
 723                     else if (rounding_mode == FE_DOWNWARD)
 724                       round_up = sign;
 725                     else if (rounding_mode == FE_UPWARD)
 726                       round_up = !sign;
 727 #else
 728                     /* Cf. <http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/float.h.html> */
 729                     int rounding_mode = FLT_ROUNDS;
 730                     if (rounding_mode == 0) /* toward zero */
 731                       round_up = 0;
 732                     else if (rounding_mode == 3) /* toward negative infinity */
 733                       round_up = sign;
 734                     else if (rounding_mode == 2) /* toward positive infinity */
 735                       round_up = !sign;
 736 #endif
 737                     else
 738                       {
 739                         /* Round to nearest.  */
 740                         round_up = 0;
 741                         /* Test bit (roundoff_bits-1).  */
 742                         if ((sum[(roundoff_bits - 1) / GMP_LIMB_BITS]
 743                              >> ((roundoff_bits - 1) % GMP_LIMB_BITS)) & 1)
 744                           {
 745                             /* Test bits roundoff_bits-1 .. 0.  */
 746                             bool halfway =
 747                               ((sum[(roundoff_bits - 1) / GMP_LIMB_BITS]
 748                                 & (((mp_limb_t) 1 << ((roundoff_bits - 1) % GMP_LIMB_BITS)) - 1))
 749                                == 0);
 750                             if (halfway)
 751                               {
 752                                 int i;
 753                                 for (i = (roundoff_bits - 1) / GMP_LIMB_BITS - 1; i >= 0; i--)
 754                                   if (sum[i] != 0)
 755                                     {
 756                                       halfway = false;
 757                                       break;
 758                                     }
 759                               }
 760                             if (halfway)
 761                               /* Round to even.  Test bit roundoff_bits.  */
 762                               round_up = ((sum[roundoff_bits / GMP_LIMB_BITS]
 763                                           >> (roundoff_bits % GMP_LIMB_BITS)) & 1);
 764                             else
 765                               /* Round up.  */
 766                               round_up = 1;
 767                           }
 768                       }
 769                   }
 770                   /* Perform the rounding.  */
 771                   {
 772                     size_t i = roundoff_bits / GMP_LIMB_BITS;
 773                     {
 774                       size_t j = i;
 775                       while (j > 0)
 776                         sum[--j] = 0;
 777                     }
 778                     if (round_up)
 779                       {
 780                         /* Round up.  */
 781                         sum[i] =
 782                           (sum[i]
 783                            | (((mp_limb_t) 1 << (roundoff_bits % GMP_LIMB_BITS)) - 1))
 784                           + 1;
 785                         if (sum[i] == 0)
 786                           {
 787                             /* Propagate carry.  */
 788                             while (i < sum_len - 1)
 789                               {
 790                                 ++i;
 791                                 sum[i] += 1;
 792                                 if (sum[i] != 0)
 793                                   break;
 794                               }
 795                           }
 796                         /* sum[i] is the most significant limb that was
 797                            incremented.  */
 798                         if (i == sum_len - 1 && (sum[i] & (sum[i] - 1)) == 0)
 799                           {
 800                             /* Through the carry, one more bit is needed.  */
 801                             if (sum[i] != 0)
 802                               sum_bits += 1;
 803                             else
 804                               {
 805                                 /* Instead of requiring one more limb of memory,
 806                                    perform a shift by one bit, and adjust the
 807                                    exponent.  */
 808                                 sum[i] = (mp_limb_t) 1 << (GMP_LIMB_BITS - 1);
 809                                 e += 1;
 810                               }
 811                             /* The bit sequence has the form 1000...000.  */
 812                             keep_bits = 1;
 813                           }
 814                       }
 815                     else
 816                       {
 817                         /* Round down.  */
 818                         sum[i] &= ((mp_limb_t) -1 << (roundoff_bits % GMP_LIMB_BITS));
 819                         if (i == sum_len - 1 && sum[i] == 0)
 820                           /* The entire sum has become zero.  */
 821                           return L_(0.0);
 822                       }
 823                   }
 824                 }
 825               /* The result is
 826                    (-1)^sign * 2^e * sum
 827                  and here we know that
 828                    2^(sum_bits-1) <= sum < 2^sum_bits,
 829                  and sum is a multiple of 2^(sum_bits-keep_bits), where
 830                    0 < keep_bits <= MANT_BIT  and  keep_bits <= sum_bits.
 831                  (If keep_bits was initially 0, the rounding either returned zero
 832                  or produced a bit sequence of the form 1000...000, setting
 833                  keep_bits to 1.)  */
 834               {
 835                 /* Split the keep_bits bits into chunks of at most 32 bits.  */
 836                 unsigned int chunk_count = (keep_bits - 1) / GMP_LIMB_BITS + 1;
 837                 /* 1 <= chunk_count <= ceil (sum_bits / GMP_LIMB_BITS) = sum_len. */
 838                 static const DOUBLE chunk_multiplier = /* 2^-GMP_LIMB_BITS */
 839                   L_(1.0) / ((DOUBLE) (1 << (GMP_LIMB_BITS / 2))
 840                              * (DOUBLE) (1 << ((GMP_LIMB_BITS + 1) / 2)));
 841                 unsigned int shift = sum_bits % GMP_LIMB_BITS;
 842                 DOUBLE fsum;
 843                 if (MANT_BIT <= GMP_LIMB_BITS)
 844                   {
 845                     /* Since keep_bits <= MANT_BIT <= GMP_LIMB_BITS,
 846                        chunk_count is 1.  No need for a loop.  */
 847                     if (shift == 0)
 848                       fsum = (DOUBLE) sum[sum_len - 1];
 849                     else
 850                       fsum = (DOUBLE)
 851                              ((sum[sum_len - 1] << (GMP_LIMB_BITS - shift))
 852                               | (sum_len >= 2 ? sum[sum_len - 2] >> shift : 0));
 853                   }
 854                 else
 855                   {
 856                     int k;
 857                     k = chunk_count - 1;
 858                     if (shift == 0)
 859                       {
 860                         /* First loop round.  */
 861                         fsum = (DOUBLE) sum[sum_len - k - 1];
 862                         /* General loop.  */
 863                         while (--k >= 0)
 864                           {
 865                             fsum *= chunk_multiplier;
 866                             fsum += (DOUBLE) sum[sum_len - k - 1];
 867                           }
 868                       }
 869                     else
 870                       {
 871                         /* First loop round.  */
 872                         fsum = (DOUBLE)
 873                                ((sum[sum_len - k - 1] << (GMP_LIMB_BITS - shift))
 874                                 | (sum_len >= k + 2 ? sum[sum_len - k - 2] >> shift : 0));
 875                         /* General loop.  */
 876                         while (--k >= 0)
 877                           {
 878                             fsum *= chunk_multiplier;
 879                             fsum += (DOUBLE)
 880                                     ((sum[sum_len - k - 1] << (GMP_LIMB_BITS - shift))
 881                                      | (sum[sum_len - k - 2] >> shift));
 882                           }
 883                       }
 884                   }
 885                 fsum = LDEXP (fsum, e + (int) sum_bits - GMP_LIMB_BITS);
 886                 return (sign ? - fsum : fsum);
 887               }
 888             }
 889           }
 890         }
 891       else
 892         return z;
 893     }
 894   else
 895     return (x * y) + z;
 896 }