30位有效数字的浮点数结构解决double数据类型多次累加后的明显的误差

类别:编程语言 点击:0 评论:0 推荐:

30位有效数字的浮点数结构解决double数据类型多次累加后的明显的误差
标准的c或者c++的double数据类型只有15位有效数字(好像有这么回事,参看IEEE 754 ),
因此产生了大的数字多次累加后的明显的误差,在财务计算中,这种误差是不能接受的。
参看http://blog.csdn.net/l1t/archive/2004/10/09/129110.aspx
来自Lcc-win32 的src\doubledouble\doubledouble.c
(作者主页:http://members.lycos.co.uk/keithmbriggs/doubledouble.html
上还有一个c++封装的类,有兴趣的人可以去看。)
利用2个double变量构造出一个doubledouble结构,解决了这个问题。
测试办法:把main()函数的内容替换成下面内容。
int main(void)
{
 doubledouble a = new_doubledouble(5.0,0.0);
 doubledouble b = new_doubledouble(4.0,0.0);
 doubledouble c = a*b;
 double x1,x2;
 char buffer[256];
 int i;
 a=a-a+99999999.99;
 c=c-c+0.0;
 for(i=0;i<81920;i++)
 {
  c=c+a;
 }
 doubledoubletoa(c,buffer);
 printf("sum() =%s\n",buffer);
 c=a*81920.0;
 doubledoubletoa(c,buffer);
 printf("mult()=%s\n",buffer);
 x1=c.hi+c.lo;
 printf("mult()=%18.2lf\n",x1);
 return 0;
}
运行结果:
sum() =8.191999999180799560546875000000000e12
mult()=8.191999999180799560546875000000000e12
mult()=  8191999999180.80
在Lcc-win32 中,可以不加修改地通过编译。
如果在gcc下编译,需要定义宏#define GCC
在gcc和vs.net 2003 中,函数
doubledouble ddexp(const doubledouble& x)
{
...
 sum1=y*((((ysq+3960.)*ysq+2162160.)*ysq+302702400.)*ysq+8821612800.);
 sum2=(((90.*ysq+110880.)*ysq+30270240.)*ysq+2075673600.)*ysq+17643225600.;

...
}
上述2行不能通过编译,如果不需要这个函数,可以删除。

/* doubledouble.c */
#include <string.h>
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
/*
This software was written by the mathematician Keith Martin Briggs. It implements
an extended precision data type "doubledouble", that is made up of two floating
point numbers. I have rewritten it for lcc-win32 and extensiveley used this code
to test the compiler's operator subclassing module.

The code should compile without problems in any C++ compiler.
---------------------------------------------------------------jacob
Copyright (C) 1997 Keith Martin Briggs

Except where otherwise indicated,
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with This program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
*/

// 97 Aug 04 KMB added ldexp
// 97 Jul 11 Moved This stuff out of quad.h, created inline.h so it can
// be #included even if we're not inlining, by just "#define inline"
// 97 Jul 12 Added all combos of doubledouble/double/int +-*/.  Only a couple actually
// written; most just call the others by swapping arguments.  Generates
// equivalent code with a good inlining compiler (tested with g++ 2.7.2).
// - e.g., all subtraction is now done by addition of unary minus.
// - removed if's from doubledouble*int. Zero-branch code is 2.5 faster (tested)
// - generally cleaned up and re-organized the order of the functions,
//   now they're grouped nicely by function.
// - tested Newton's division.  Works but is terribly slow, because
//   it needs to do several doubledouble + and * operations, and doubledouble /
//   without it is about the same speed at doubledouble * anyway, so no win.
// - moved recip here as an inline.
// - checked and tested doubledouble/double (BUG?), seems fine.
typedef struct doubledouble {
 double hi;
 double lo;
} doubledouble;
#ifdef __LCC__
#define x86_FIX \
unsigned short __old_cw, __new_cw;\
  _asm("\tfnstcw\t%__old_cw");\
  __new_cw = (__old_cw & ~0x300) | 0x200;\
  _asm ("\tfldcw\t%__new_cw")

#define END_x86_FIX  _asm ("\tfldcw\t%__old_cw")
#else
#define x86_FIX unsigned short __old_cw,__new_cw; _asm {fnstcw __old_cw}\
  __new_cw = (__old_cw & ~0x300) | 0x200;\
  _asm { fldcw __new_cw};
#define END_x86_FIX _asm { fldcw __new_cw}
#endif
#ifdef GCC
#undef x86_FIX
#undef END_x86_FIX
#define x86_FIX
#define END_x86_FIX
#endif

static double Split = 134217729.0;
doubledouble new_doubledouble(double x,double y);

doubledouble operator-(const doubledouble &x)
{
 doubledouble z;

 z.hi = -x.hi;
 z.lo = -x.lo;
 return z;
}

// Absolute value
doubledouble ddfabs(const doubledouble& x)
{
 if (x.hi>=0.0) return x;
 else {
  doubledouble result;

  result.hi = -x.hi;
  result.lo = -x.lo;
  return result;
 }
}

// Addition
/*      (C) W. Kahan 1989
*       NOTICE:
*       Copyrighted programs may not be translated, used, nor
*       reproduced without the author's permission.  Normally that
*       permission is granted freely for academic and scientific
*       purposes subject to the following three requirements:
*       1. This NOTICE and the copyright notices must remain attached
*          to the programs and their translations.
*       2. Users of such a program should regard themselves as voluntary
*          participants in the author's researches and so are obliged
*          to report their experience with the program back to the author.
*       3. Neither the author nor the institution that employs him will
*          be held responsible for the consequences of using a program
*          for which neither has received payment.
*       Would-be commercial users of these programs must correspond
*       with the author to arrange terms of payment and warranty.
*/


///////////////////////////////////////////////////////////////
///////////////////  Addition and Subtraction /////////////////
///////////////////////////////////////////////////////////////

// Binary addition
doubledouble operator +(const doubledouble& x,const doubledouble& y)
{
 double H, h, T, t, S, s, e, f;
 doubledouble z;
 x86_FIX;
 S = x.hi+y.hi;
 T = x.lo+y.lo;
 e = S-x.hi;
 f = T-x.lo;
 // s = S-e;
 t = T-f;
 s = (y.hi-e)+(x.hi-(S-e));
 t = (y.lo-f)+(x.lo-(T-f));
 e = s+T;
 H = S+e;
 h = e+(S-H);
 e = t+h;
 z.hi = H+e;
 z.lo = e+(double)(H-z.hi);
 END_x86_FIX;
 return z;
}


doubledouble operator +(const double& x,const doubledouble& y)
{
 double H, h, S, s, e;
 doubledouble z;
 x86_FIX;
 S = x+y.hi;
 e = S-x;
 s = S-e;
 s = (y.hi-e)+(x-s);
 H = S+(s+y.lo);
 h = (s+y.lo)+(S-H);
 z.hi = H+h;
 z.lo = h+(double)(H-z.hi);
 END_x86_FIX;
 return z;
}
doubledouble operator +(const doubledouble& x,const double& y )
{
 return y+x;
}
doubledouble operator +(int x,const doubledouble& y)
{
 return (double)(x)+y;
}
doubledouble operator +(doubledouble& x, int y) {
 return y+x;
}

// Subtraction
doubledouble operator -(const doubledouble& x,const doubledouble& y)
{
 return x+(-y);
}
doubledouble operator -(const double& x, const doubledouble& y)
{
 return x+(-y);
}
doubledouble operator -(const int x, const doubledouble& y)
{
 return x+(-y);
}
doubledouble operator -(const doubledouble& x, const double& y)
{
 return x+(-y);
}
doubledouble operator -(const doubledouble& x,const int y)
{
 return x+(-y);
}


//////////////////////////  Self-Addition  ///////////////////////
doubledouble& operator +=(doubledouble &This,const doubledouble& y)
{
 double H, h, T, t, S, s, e, f;
 x86_FIX;
 S =This.hi+y.hi;
 T = This.lo+y.lo;
 e = S-This.hi;
 f = T-This.lo;
 s = S-e;
 t = T-f;
 s = (y.hi-e)+(This.hi-s);
 t = (y.lo-f)+(This.lo-t);
 f = s+T;
 H = S+f;
 h = f+(S-H);
 This.hi = H+(t+h);
 This.lo = (t+h)+(double)(H-This.hi);
 END_x86_FIX;
 return This;
}

doubledouble& operator +=(doubledouble &This,const double& y)
{
 double H, h, S, s, e, f;
 x86_FIX;

 S =This.hi+y;
 e = S-This.hi;
 s = S-e;
 s = (y-e)+(This.hi-s);
 f = s+This.lo;
 H = S+f;
 h = f+(S-H);
 This.hi = H+h;
 This.lo = h+(double)(H-This.hi);
 END_x86_FIX;
 return This;
}
doubledouble& operator +=(doubledouble &This,const int y)
{
 return This += (double)(y);
}

// Self-Subtraction
doubledouble& operator -=(doubledouble &This,const doubledouble& y)
{
 return This += -y;
}
doubledouble& operator -=(doubledouble &This,const double& y)
{
 return This += -y;
}
doubledouble& operator -=(doubledouble &This,const int y)
{
 return This += -y;
}


/////////////////////////////////////////////////////////////
////////////////////  Multiplication  ///////////////////////
/////////////////////////////////////////////////////////////

// Binary Multiplication
doubledouble operator *(const doubledouble& x,const doubledouble& y)
{
 double hx, tx, hy, ty, C, c;
 doubledouble z;
 x86_FIX;

 C = Split*x.hi;
 hx = C-x.hi;
 c = Split*y.hi;
 hx = C-hx;
 tx = x.hi-hx;
 hy = c-y.hi;
 C = x.hi*y.hi;
 hy = c-hy;
 ty = y.hi-hy;
 c = ((((hx*hy-C)+hx*ty)+tx*hy)+tx*ty)+(x.hi*y.lo+x.lo*y.hi);
 z.hi = C+c;
 hx=C-z.hi;
 z.lo = c+hx;
 END_x86_FIX;
 return z;
}

// double*doubledouble
doubledouble operator *(const double& x, const doubledouble& y)
{
 double hx, tx, hy, ty, C, c;
 doubledouble z;
 x86_FIX;
 C = Split*x;
 hx = C-x;
 c = Split*y.hi;
 hx = C-hx ;
 tx = x-hx;
 hy = c-y.hi;
 C = x*y.hi;
 hy = c-hy;
 ty = y.hi - hy;
 c = ((((hx*hy-C)+hx*ty)+tx*hy)+tx*ty)+x*y.lo;
 z.hi = C+c;
 z.lo = c+(double)(C-z.hi);
 END_x86_FIX;
 return z;
}

doubledouble operator *(int x, const doubledouble& y )
{
 return ((const double)(x))*y;
}
doubledouble operator *(const doubledouble& x,const double& y )
{
 return y*x;
}
doubledouble operator *(const doubledouble& x,const int y )
{
 return y*x;
}

// Self-multiplication
doubledouble& operator *=(doubledouble &This,const doubledouble& y )
{
 double hx, tx, hy, ty, C, c;
 x86_FIX;
 C = Split*This.hi;
 hx = C-This.hi;
 c = Split*y.hi;
 hx = C-hx;
 tx =This.hi-hx;
 hy = c-y.hi;
 C =This.hi*y.hi;
 hy = c-hy;
 ty = y.hi-hy;
 c = ((((hx*hy-C)+hx*ty)+tx*hy)+tx*ty)+(This.hi*y.lo+This.lo*y.hi);
 hx = C+c;
 This.hi = hx;
 This.lo = c+(double)(C-hx);
 END_x86_FIX;
 return This;
}

doubledouble& operator *=(doubledouble &This,const double& y )
{
 double hx, tx, hy, ty, C, c;
 x86_FIX;
 C = Split*This.hi;
 hx = C-This.hi;
 c = Split*y;
 hx = C-hx;
 tx =This.hi-hx;
 hy = c-y;
 C =This.hi*y;
 hy = c-hy;
 ty = y-hy;
 c = ((((hx*hy-C)+hx*ty)+tx*hy)+tx*ty)+(This.lo*y);
 hx = C+c;
 This.hi = hx;
 This.lo = c+(double)(C-hx);
 END_x86_FIX;
 return This;
}

doubledouble& operator *=(doubledouble &This,const int y )
{
 return This *= (const double)(y);
}


////////////////////////////////////////////////////////////////
//////////////////////////  Division  //////////////////////////
////////////////////////////////////////////////////////////////

// Reciprocal
doubledouble recip(const doubledouble& y) {
 double  hc, tc, hy, ty, C, c, U, u;
 doubledouble z;
 x86_FIX;

 C = 1.0/y.hi;
 c = Split*C;
 hc =c-C;
 u = Split*y.hi;
 hc = c-hc;
 tc = C-hc;
 hy = u-y.hi;
 U = C*y.hi;
 hy = u-hy;
 ty = y.hi-hy;
 u = (((hc*hy-U)+hc*ty)+tc*hy)+tc*ty;
 c = ((((1.0-U)-u))-C*y.lo)/y.hi;
 z.hi = C+c;
 z.lo = (double)(C-z.hi)+c;
 END_x86_FIX;
 return z;
}

doubledouble operator /(const doubledouble& x,const doubledouble& y )
{
 double hc, tc, hy, ty, C, c, U, u;
 doubledouble z;
 x86_FIX;
 C = x.hi/y.hi;
 c =Split*C;
 hc =c-C;
 u = Split*y.hi;
 hc = c-hc;
 tc = C-hc;
 hy = u-y.hi;
 U = C * y.hi;
 hy = u-hy;
 ty = y.hi-hy;
 u = (((hc*hy-U)+hc*ty)+tc*hy)+tc*ty;
 c = ((((x.hi-U)-u)+x.lo)-C*y.lo)/y.hi;
 z.hi = C+c;
 z.lo = (double)(C-z.hi)+c;
 END_x86_FIX;
 return z;
}

// double/doubledouble:
doubledouble operator /(const double& x,const doubledouble& y )
{
 double  hc, tc, hy, ty, C, c, U, u;
 doubledouble z;
 x86_FIX;
 C = x/y.hi;
 c = Split*C;
 hc =c-C;
 u = Split*y.hi;
 hc = c-hc;
 tc = C-hc;
 hy = u-y.hi;
 U = C*y.hi;
 hy = u-hy;
 ty = y.hi-hy;
 u = (((hc*hy-U)+hc*ty)+tc*hy)+tc*ty;
 c = ((((x-U)-u))-C*y.lo)/y.hi;
 z.hi = C+c;
 z.lo = (double)(C-z.hi)+c;
 END_x86_FIX;
 return z;
}

doubledouble operator /(const doubledouble& x,const double& y )
{
 double hc, tc, hy, ty, C, c, U, u;
 doubledouble z;

 x86_FIX;
 C = x.hi/y;
 c = Split*C;
 hc = c-C;
 u = Split*y;
 hc = c-hc;
 tc = C-hc;
 hy = u-y;
 U = C*y;
 hy = u-hy;
 ty = y-hy;
 u = (((hc*hy-U)+hc*ty)+tc*hy)+tc*ty;
 c = (((x.hi-U)-u)+x.lo)/y;
 z.hi = C+c;
 z.lo = (double)(C-z.hi)+c;
 END_x86_FIX;
 return z;
}

// doubledouble/int
doubledouble operator /(const doubledouble& x,const int y)
{
 return x/(const double)(y);
}
doubledouble operator /(const int x,const doubledouble& y)
{
 return (const double)(x)/y;
}

// Self-division.
doubledouble& operator /=(doubledouble &This,const doubledouble& y)
{
 double hc, tc, hy, ty, C, c, U, u;
 x86_FIX;
 C =This.hi/y.hi;
 c = Split*C;
 hc =c-C;
 u = Split*y.hi;
 hc = c-hc;
 tc = C-hc;
 hy = u-y.hi;
 U = C * y.hi;
 hy = u-hy;
 ty = y.hi-hy;
 u = (((hc*hy-U)+hc*ty)+tc*hy)+tc*ty;
 c = ((((This.hi-U)-u)+This.lo)-C*y.lo)/y.hi;
 u = C+c;
 This.hi = u;
 This.lo = (double)(C-u)+c;
 END_x86_FIX;
 return This;
}

doubledouble& operator /=(doubledouble &This,const double& y)
{
 double hc, tc, hy, ty, C, c, U, u;
 x86_FIX;

 C =This.hi/y;
 c = Split*C;
 hc =c-C;
 u = Split*y;
 hc = c-hc;
 tc = C-hc;
 hy = u-y;
 U = C * y;
 hy = u-hy;
 ty = y-hy;
 u = (((hc*hy-U)+hc*ty)+tc*hy)+tc*ty;
 c = (((This.hi-U)-u)+This.lo)/y;
 u = C+c;
 This.hi = u;
 This.lo = (double)(C-u)+c;
 END_x86_FIX;
 return This;
}

doubledouble& operator /=(doubledouble &This,const int y)
{
 return (This) /=(const double)(y);
}
doubledouble Qcopysign(const doubledouble& x,const double y)
{
 if (y>=0.0) return ddfabs(x);
 else return -ddfabs(x);
}

doubledouble atodd(const char *s)
{
 doubledouble result = {
  0.0,0.0  };
 int n, sign, ex = 0;
 /* eat whitespace */
 while (*s == ' ' || *s == '\t' || *s == '\n') s++;
 switch (*s) { // get sign of mantissa

 case '-':
  {
   sign = -1;
   s++;
   break;
  }
 case '+':
  s++;  // no break

 default:
  sign = 1;
 }
 /* get digits before decimal point */
 while (n=(*s++)-'0', n>=0 && n<10) result = 10.0*result+n;
 s--;
 if (*s == '.')   /* get digits after decimal point */ {
  s++;
  while (n=(*s++)-'0', n>=0 && n<10) {
   result = 10.0*result+n;
   --ex;
  }
  s--;
 }
 if (*s == 'e' || *s == 'E') /* get exponent */ ex+=atoi(++s);
 /* exponent adjustment */
 while (ex-- > 0) result *= 10.0;
 while (++ex < 0) result /= 10.0;
 return (sign>=0) ? result : -result;
}
// Square  (faster than x*x)
doubledouble sqr(const doubledouble& x)
{
 double hx,tx,C,c;
 doubledouble r;
 x86_FIX;
 C=Split*x.hi;
 hx=C-x.hi;
 hx=C-hx;
 tx=x.hi-hx;
 C=x.hi*x.hi;
 c=((((hx*hx-C)+2.0*hx*tx))+tx*tx)+2.0*x.hi*x.lo;
 hx=C+c;
 r.hi=hx;
 r.lo = c+(C-hx);
 END_x86_FIX;
 return r;
}
extern double pow(double,double);
// doubledouble^int
doubledouble powint(const doubledouble& u, int c)
{
 doubledouble result;
 if (c<0) return recip(powint(u,-c));
 switch (c) {
 case 0:
  result.hi = result.lo = 0.0;
  if (u.hi == 0.0)
   result.lo = pow(0.0,0.0);
  else
      result.lo = 1.0;
  return result;
  // let math.h handle 0^0.
 case 1:
  return u;
 case 2:
  return sqr(u);
 case 3:
  return sqr(u)*u;
 default:
  { // binary method

   int n=c, m;
   doubledouble y={
    1.0,0.0      }
   , z=u;
   if (n<0) n=-n;
   while (1) {
    m=n;
    n/=2;
    if (n+n!=m) { // if m odd

     y*=z;
     if (0==n) return y;
    }
    z=sqr(z);
   }
  }
 }
 /* not reached */
 return u;
}

doubledouble new_doubledouble(double x,double y)
{
 doubledouble result;

 x86_FIX;
 result.hi = x+y;
 result.lo = y+(x-result.hi);
 END_x86_FIX;
 return result;
}


// Floor.  See Graham, Knuth and Patashnik `Concrete Mathematics' page 70.
// Greatest integer <= x
// maybe floor_s is better?
doubledouble ddfloor(const doubledouble& x)
{
 double fh=floor(x.hi), fl=floor(x.lo);
 double t1=x.hi-fh;
 double t2=x.lo-fl;
 double t3=t1+t2;
 int t;
 t3 = x.hi-fh+x.lo-fl;
 t=(int)(floor(t3));
 switch (t) {
 case 0:
  return new_doubledouble(fh,0.0)+new_doubledouble(fl,0.0);
 case 1:
  return new_doubledouble(fh,0.0)+new_doubledouble(fl+1,0.0);
  // case 2 can only arise when fp({hi})<=1, fp({lo})<=1, but

fp({hi}+{lo})=2
 case 2:
  return new_doubledouble(fh,0.0)+new_doubledouble(fl+1.0,0.0);
 }
 // never get here
  return new_doubledouble(0.0,0.0);
}
// Signum
int sign(doubledouble& x)
{
 if (x.hi>0.0) return 1;
 else if (x.hi<0.0) return -1;
 else return 0;
}

char *doubledoubletoa(doubledouble &x,char *s)
{
 int Digits;
 doubledouble ten = new_doubledouble(10.0,0.0),l,y,z;
 double q;
 int m,n,d,i;
 if (x.hi==0.0)   {
  strcpy(s, "0.0 ");
  return s;
 }
 if (x.hi!=x.hi) {
  strcpy(s , "NaN ");
  return s;
 }
 Digits=34;
 y=ddfabs(x);
 q=log10(y.hi);
 n=(int)(floor(q));
 if (n<0) n++;
 if (n == 0)
  l = new_doubledouble(1.0,0.0);
 else
     l = powint(ten,n);
 y = y/l;
 if (sign(x)<0) *s++ = '-'; //else s<<" ";

 d = Digits>34 ? 34 : Digits;
 d = d<3 ? 3 : d;
 for (i=1; i<=d; i++) {
  if (2==i) *s++ = '.';
  m = (int)(ddfloor(y).hi);
  if (m<0 || m>9) {
   // fprintf(stderr,
//  "Internal error in doubledouble.cc: digit=%d",m);
     }
  sprintf(s,"%d",m);
  s += strlen(s);
  //    z.lo = 0.0;
//    z.hi = (double)m;
 z = new_doubledouble((double)m,0.0);
  y = (y-z)*ten;
  if (y.hi<=0.0) break; // x must be an integer

  //if (y.hi==0.0) break; // ???????????
  }
 if (n!=0) {
  *s++ = 'e';
  sprintf(s,"%d",n);
  s += strlen(s);
 }
 else *s++ = 0;
 return s;
}
int operator> (const doubledouble& x, const doubledouble& y)
{
 return (x.hi> y.hi) || (x.hi==y.hi && x.lo> y.lo);
}
int operator>=(const doubledouble& x, const doubledouble& y)
{
 return (x.hi>y.hi) || (x.hi==y.hi && x.lo>=y.lo);
}
int operator< (const doubledouble& x, const doubledouble& y)
{
 return (x.hi< y.hi) || (x.hi==y.hi && x.lo< y.lo);
}
int operator<=(const doubledouble& x, const doubledouble& y)
{
 return (x.hi<y.hi) || (x.hi==y.hi && x.lo<=y.lo);
}
int operator==(const doubledouble& x, const doubledouble& y)
{
 return x.hi==y.hi && x.lo==y.lo;
}
int operator!=(const doubledouble& x, const doubledouble& y)
{
 return x.hi!=y.hi || x.lo!=y.lo;
}


struct ddConstants {
 doubledouble Log2;
 doubledouble Log10;
 doubledouble Pi;
 doubledouble TwoPi;
 doubledouble Pion2;
 doubledouble Pion4;
 doubledouble _Pi;
} ddconst;

void InitConstants(void)
{
 ddconst.Log2 =atodd("0.6931471805599453094172321214581765680755");
 ddconst.Log10=atodd("2.302585092994045684017991454684364207601");
 ddconst.Pi   =atodd("3.1415926535897932384626433832795028841972");
 ddconst.TwoPi=atodd("6.2831853071795864769252867665590057683943");
 ddconst.Pion2=atodd("1.5707963267948966192313216916397514420985");
 ddconst.Pion4=atodd("0.7853981633974483096156608458198757210493");
 ddconst._Pi  =atodd("0.3183098861837906715377675267450287240689");
}

doubledouble ddldexp(const doubledouble x, const int exp)
{ // x*2^exp

 return new_doubledouble(ldexp(x.hi,exp),ldexp(x.lo,exp));
}
// Another floor. V. Shoup 97 Sep 23...
doubledouble ddfloor_s(const doubledouble& x)
{
 double fhi=floor(x.hi);
 if (fhi!=x.hi)
  return new_doubledouble(fhi,0.0);
 else
     return new_doubledouble(fhi,floor(x.lo));
}

// rint (round to nearest int)
doubledouble ddrint(const doubledouble& x)
{
 const doubledouble &rx = x+new_doubledouble(0.5,0.0);
 return ddfloor(rx);
}

doubledouble ddexp(const doubledouble& x)
{
 /* Uses some ideas by Alan Miller
 Method:
 x    x.log2(e)    nint[x.log2(e)] + frac[x.log2(e)]
 e  = 2          = 2

 iy    fy
 = 2   . 2
 Then
 fy    y.loge(2)
 2   = e

 Now y.loge(2) will be less than 0.3466 in absolute value.
 This is halved and a Pade' approximant is used to approximate e^x over
 the region (-0.1733, +0.1733).   This approximation is then squared.
 */
 int iy;
 doubledouble y,temp,ysq,sum1,sum2;

 if (x.hi<-744.4400719213812) return new_doubledouble(0.0,0.0); //

exp(x)<1e-300

 y=x/ddconst.Log2;
 iy = (int)(ddrint(y)).hi;
 temp=new_doubledouble((double)iy,0.0);
 y=(y-temp)*ddconst.Log2;
 y=ddldexp(y,-1);
 ysq=sqr(y);
 sum1=y*((((ysq+3960.)*ysq+2162160.)*ysq+302702400.)*ysq+8821612800.);
 sum2=(((90.*ysq+110880.)*ysq+30270240.)*ysq+2075673600.)*ysq+17643225600.;
 /*
 sum2 + sum1         2.sum1
 Now approximation = ----------- = 1 + ----------- = 1 + 2.temp
 sum2 - sum1       sum2 - sum1

 Then (1 + 2.temp)^2 = 4.temp.(1 + temp) + 1
 */
 temp=sum1/(sum2-sum1);
 y=temp*(temp+1);
 y=ddldexp(y,2);
 return ddldexp(y+1,iy);
}

// Natural logarithm
doubledouble ddlog(const doubledouble& x)
{ // Newton method

 doubledouble s,e;
 if (x.hi<0.0) {
  fprintf(stderr,"\ndoubledouble: attempt to take log of negative

argument!\n");
  return new_doubledouble(0.0,0.0);
 }
 if (x.hi==0.0) {
  fprintf(stderr,"\ndoubledouble: attempt to take log(0)!\n");
  return new_doubledouble(0.0,0.0);
 }
 s=new_doubledouble(log(x.hi),0.0), e=ddexp(s); // s=double approximation to

result

 return s+(x-e)/e;  // Newton correction, good enough

 //doubledouble d=(x-e)/e; return s+d-0.5*sqr(d);  // or 2nd order correction
}

void base_and_prec(void) { // Linnainmaa ACM TOMS 7, 272 Thm 3

 int p;
 x86_FIX;
 printf("Base and precision determination by Linnainmaa's method:\n");
 {
  double U,R,u,r,beta;
  U=4.0/3.0;
  U-=1;
  U*=3;
  U-=1;
  U=fabs(U);
  R=U/2+1;
  R-=1;
  if (R!=0.0) U=R;
  u=2;
  u/=3;
  u-=0.5;
  u*=3;
  u-=0.5;
  u=fabs(u);
  r=u/2+0.5;
  r-=0.5;
  if (r!=0.0) u=r;
  beta=U/u;
  p=(int)(-log(u)/log(beta)+0.5);
  printf("Type double: base is %g, precision %d\n",beta,p);
 }
 {
  doubledouble ddU,ddR,u,r,beta;
  ddU=new_doubledouble(4.0,0.0);
  ddU/=new_doubledouble(3.0,0.0);
  ddU-=1;
  ddU*=3;
  ddU-=1;
  ddU=ddfabs(ddU);
  ddR=ddU/2+1;
  ddR-=1;
  if (ddR.hi!=0.0) ddU=ddR;
  u=new_doubledouble(2.0,0.0);
  u/=3.0;
  u-=0.5;
  u*=3;
  u-=0.5;
  u=ddfabs(u);
  r=u/2+0.5;
  r-=0.5;
  if (r.hi!=0.0) u=r;
  beta=ddU/u;
  p=(int)(-ddlog(u)/ddlog(beta)+0.5).hi;
  printf("Type doubledouble:   base is %g, precision %d\n",beta.hi,p);
 }
 END_x86_FIX;
}
// Assumption: if it works, don't even bother doing output
int Verify(char msg[], doubledouble correct, doubledouble computed)
{
 // default fractional error allowed.
 doubledouble delta = new_doubledouble(1.1e-31,0.0);
 if(correct == computed)
  return 1;
 else if(correct.hi == 0.0) // can't do a relative test
 {
error:
  printf("%s", msg);
  printf(" corret.hi %g correct.lo %g", correct.hi,correct.lo);
  printf("\ngot hi %g, lo %g\n",computed.hi,computed.lo);
  return 0;
 }
 else if(ddfabs((correct-computed)/correct) <= delta)
  return 1;
 else goto error;
}


int main(void)
{
 doubledouble a = new_doubledouble(5.0,0.0);
 doubledouble b = new_doubledouble(4.0,0.0);
 doubledouble c = a*b;
 double x1,x2;
 char buffer[256];
 int i;

 InitConstants();
 base_and_prec();
 doubledoubletoa(ddconst.Log2,buffer);
 printf("%s\n",buffer);
 x1=(pow(2,53)-2)/pow(2,53);
 x2=(3.0)/pow(2,55);
 Verify("Test -2: should be 0: ",
  new_doubledouble(0.0,0.0),
  ddfloor(new_doubledouble(x1,0.0)+new_doubledouble(x2,0.0)));
 Verify("Test -1: should be 0: ", new_doubledouble(0.0,0.0),
      ddfloor(atodd("0.9999999999999997779553950749686919152737")
  +

atodd("0.00000000000000008326672684688674053177237510681152343750")));
 for (i=0; i<5;i++) {
  c = c*c;
  doubledoubletoa(c,buffer);
  printf("%s\n",buffer);
  a = c - 1.0;
  b = new_doubledouble(-1.0,0.0);
  a = c + b;
  doubledoubletoa(a,buffer);
  printf("%s\n",buffer);
 }
 return 0;
}

本文地址:http://com.8s8s.com/it/it24454.htm