gopher.r-36.net

       s_fma.c - vx32 - Local 9vx git repository for patches.
 (HTM) git clone git://r-36.net/vx32
 (DIR) Log
 (DIR) Files
 (DIR) Refs
       ---
       s_fma.c (5003B)
       ---
            1 /*-
            2  * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
            3  * All rights reserved.
            4  *
            5  * Redistribution and use in source and binary forms, with or without
            6  * modification, are permitted provided that the following conditions
            7  * are met:
            8  * 1. Redistributions of source code must retain the above copyright
            9  *    notice, this list of conditions and the following disclaimer.
           10  * 2. Redistributions in binary form must reproduce the above copyright
           11  *    notice, this list of conditions and the following disclaimer in the
           12  *    documentation and/or other materials provided with the distribution.
           13  *
           14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
           15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
           16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
           17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
           18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
           19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
           20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
           21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
           22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
           23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
           24  * SUCH DAMAGE.
           25  */
           26 
           27 #include <fenv.h>
           28 #include <float.h>
           29 #include <math.h>
           30 
           31 /*
           32  * Fused multiply-add: Compute x * y + z with a single rounding error.
           33  *
           34  * We use scaling to avoid overflow/underflow, along with the
           35  * canonical precision-doubling technique adapted from:
           36  *
           37  *        Dekker, T.  A Floating-Point Technique for Extending the
           38  *        Available Precision.  Numer. Math. 18, 224-242 (1971).
           39  *
           40  * This algorithm is sensitive to the rounding precision.  FPUs such
           41  * as the i387 must be set in double-precision mode if variables are
           42  * to be stored in FP registers in order to avoid incorrect results.
           43  * This is the default on FreeBSD, but not on many other systems.
           44  *
           45  * Tests on an Itanium 2 indicate that the hardware's FMA instruction
           46  * is almost twice as fast as this implementation.  The hardware
           47  * instruction should be used on platforms that support it.
           48  *
           49  * XXX May incur an absolute error of 0x1p-1074 for subnormal results
           50  *     due to double rounding induced by the final scaling operation.
           51  *
           52  * XXX On machines supporting quad precision, we should use that, but
           53  *     see the caveat in s_fmaf.c.
           54  */
           55 double
           56 fma(double x, double y, double z)
           57 {
           58         static const double split = 0x1p27 + 1.0;
           59         double xs, ys, zs;
           60         double c, cc, hx, hy, p, q, tx, ty;
           61         double r, rr, s;
           62         int oround;
           63         int ex, ey, ez;
           64         int spread;
           65 
           66         if (x == 0.0 || y == 0.0)
           67                 return (z);
           68         if (z == 0.0)
           69                 return (x * y);
           70 
           71         /* Results of frexp() are undefined for these cases. */
           72         if (!isfinite(x) || !isfinite(y) || !isfinite(z))
           73                 return (x * y + z);
           74 
           75         xs = frexp(x, &ex);
           76         ys = frexp(y, &ey);
           77         zs = frexp(z, &ez);
           78         oround = fegetround();
           79         spread = ex + ey - ez;
           80 
           81         /*
           82          * If x * y and z are many orders of magnitude apart, the scaling
           83          * will overflow, so we handle these cases specially.  Rounding
           84          * modes other than FE_TONEAREST are painful.
           85          */
           86         if (spread > DBL_MANT_DIG * 2) {
           87                 fenv_t env;
           88                 feraiseexcept(FE_INEXACT);
           89                 switch(oround) {
           90                 case FE_TONEAREST:
           91                         return (x * y);
           92                 case FE_TOWARDZERO:
           93                         if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
           94                                 return (x * y);
           95                         feholdexcept(&env);
           96                         r = x * y;
           97                         if (!fetestexcept(FE_INEXACT))
           98                                 r = nextafter(r, 0);
           99                         feupdateenv(&env);
          100                         return (r);
          101                 case FE_DOWNWARD:
          102                         if (z > 0.0)
          103                                 return (x * y);
          104                         feholdexcept(&env);
          105                         r = x * y;
          106                         if (!fetestexcept(FE_INEXACT))
          107                                 r = nextafter(r, -INFINITY);
          108                         feupdateenv(&env);
          109                         return (r);
          110                 default:        /* FE_UPWARD */
          111                         if (z < 0.0)
          112                                 return (x * y);
          113                         feholdexcept(&env);
          114                         r = x * y;
          115                         if (!fetestexcept(FE_INEXACT))
          116                                 r = nextafter(r, INFINITY);
          117                         feupdateenv(&env);
          118                         return (r);
          119                 }
          120         }
          121         if (spread < -DBL_MANT_DIG) {
          122                 feraiseexcept(FE_INEXACT);
          123                 if (!isnormal(z))
          124                         feraiseexcept(FE_UNDERFLOW);
          125                 switch (oround) {
          126                 case FE_TONEAREST:
          127                         return (z);
          128                 case FE_TOWARDZERO:
          129                         if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
          130                                 return (z);
          131                         else
          132                                 return (nextafter(z, 0));
          133                 case FE_DOWNWARD:
          134                         if (x > 0.0 ^ y < 0.0)
          135                                 return (z);
          136                         else
          137                                 return (nextafter(z, -INFINITY));
          138                 default:        /* FE_UPWARD */
          139                         if (x > 0.0 ^ y < 0.0)
          140                                 return (nextafter(z, INFINITY));
          141                         else
          142                                 return (z);
          143                 }
          144         }
          145 
          146         /*
          147          * Use Dekker's algorithm to perform the multiplication and
          148          * subsequent addition in twice the machine precision.
          149          * Arrange so that x * y = c + cc, and x * y + z = r + rr.
          150          */
          151         fesetround(FE_TONEAREST);
          152 
          153         p = xs * split;
          154         hx = xs - p;
          155         hx += p;
          156         tx = xs - hx;
          157 
          158         p = ys * split;
          159         hy = ys - p;
          160         hy += p;
          161         ty = ys - hy;
          162 
          163         p = hx * hy;
          164         q = hx * ty + tx * hy;
          165         c = p + q;
          166         cc = p - c + q + tx * ty;
          167 
          168         zs = ldexp(zs, -spread);
          169         r = c + zs;
          170         s = r - c;
          171         rr = (c - (r - s)) + (zs - s) + cc;
          172 
          173         fesetround(oround);
          174         return (ldexp(r + rr, ex + ey));
          175 }
          176