make type punning of floating point/int in float_math and double_math use union to avoid warnings (#600)

This commit is contained in:
Graham Sanderson
2021-10-12 09:07:36 -05:00
committed by GitHub
parent 30673fb6d0
commit d50e743e0e
2 changed files with 90 additions and 64 deletions

View File

@ -41,11 +41,25 @@ typedef int64_t i64;
#define DUNPACK(x,e,m) e=((x)>>52)&0x7ff,m=((x)&0x000fffffffffffffULL)|0x0010000000000000ULL
#define DUNPACKS(x,s,e,m) s=((x)>>63),DUNPACK((x),(e),(m))
_Pragma("GCC diagnostic push")
_Pragma("GCC diagnostic ignored \"-Wstrict-aliasing\"")
typedef union {
double d;
ui64 ix;
} double_ui64;
static inline double ui642double(ui64 ix) {
double_ui64 tmp;
tmp.ix = ix;
return tmp.d;
}
static inline ui64 double2ui64(double d) {
double_ui64 tmp;
tmp.d = d;
return tmp.ix;
}
static inline bool disnan(double x) {
ui64 ix=*(ui64*)&x;
ui64 ix= double2ui64(x);
// checks the top bit of the low 32 bit of the NAN, but it I think that is ok
return ((uint32_t)(ix >> 31)) > 0xffe00000u;
}
@ -59,17 +73,17 @@ static inline bool disnan(double x) {
#endif
static inline int dgetsignexp(double x) {
ui64 ix=*(ui64*)&x;
ui64 ix=double2ui64(x);
return (ix>>52)&0xfff;
}
static inline int dgetexp(double x) {
ui64 ix=*(ui64*)&x;
ui64 ix=double2ui64(x);
return (ix>>52)&0x7ff;
}
static inline double dldexp(double x,int de) {
ui64 ix=*(ui64*)&x,iy;
ui64 ix=double2ui64(x),iy;
int e;
e=dgetexp(x);
if(e==0||e==0x7ff) return x;
@ -77,7 +91,7 @@ static inline double dldexp(double x,int de) {
if(e<=0) iy=ix&0x8000000000000000ULL; // signed zero for underflow
else if(e>=0x7ff) iy=(ix&0x8000000000000000ULL)|0x7ff0000000000000ULL; // signed infinity on overflow
else iy=ix+((ui64)de<<52);
return *(double*)&iy;
return ui642double(iy);
}
double WRAPPER_FUNC(ldexp)(double x, int de) {
@ -87,9 +101,9 @@ double WRAPPER_FUNC(ldexp)(double x, int de) {
static inline double dcopysign(double x,double y) {
ui64 ix=*(ui64*)&x,iy=*(ui64*)&y;
ui64 ix=double2ui64(x),iy=double2ui64(y);
ix=((ix&0x7fffffffffffffffULL)|(iy&0x8000000000000000ULL));
return *(double*)&ix;
return ui642double(ix);
}
double WRAPPER_FUNC(copysign)(double x, double y) {
@ -104,7 +118,7 @@ static inline int dispinf(double x) { return dgetsignexp(x)==0x7ff; }
static inline int disminf(double x) { return dgetsignexp(x)==0xfff; }
static inline int disint(double x) {
ui64 ix=*(ui64*)&x,m;
ui64 ix=double2ui64(x),m;
int e=dgetexp(x);
if(e==0) return 1; // 0 is an integer
e-=0x3ff; // remove exponent bias
@ -117,7 +131,7 @@ static inline int disint(double x) {
}
static inline int disoddint(double x) {
ui64 ix=*(ui64*)&x,m;
ui64 ix=double2ui64(x),m;
int e=dgetexp(x);
e-=0x3ff; // remove exponent bias
if(e<0) return 0; // |x|<1; 0 is not odd
@ -130,24 +144,24 @@ static inline int disoddint(double x) {
}
static inline int disstrictneg(double x) {
ui64 ix=*(ui64*)&x;
ui64 ix=double2ui64(x);
if(diszero(x)) return 0;
return ix>>63;
}
static inline int disneg(double x) {
ui64 ix=*(ui64*)&x;
ui64 ix=double2ui64(x);
return ix>>63;
}
static inline double dneg(double x) {
ui64 ix=*(ui64*)&x;
ui64 ix=double2ui64(x);
ix^=0x8000000000000000ULL;
return *(double*)&ix;
return ui642double(ix);
}
static inline int dispo2(double x) {
ui64 ix=*(ui64*)&x;
ui64 ix=double2ui64(x);
if(diszero(x)) return 0;
if(disinf(x)) return 0;
ix&=0x000fffffffffffffULL;
@ -164,33 +178,33 @@ static inline double dnan_or(double x) {
double WRAPPER_FUNC(trunc)(double x) {
check_nan_d1(x);
ui64 ix=*(ui64*)&x,m;
ui64 ix=double2ui64(x),m;
int e=dgetexp(x);
e-=0x3ff; // remove exponent bias
if(e<0) { // |x|<1
ix&=0x8000000000000000ULL;
return *(double*)&ix;
return ui642double(ix);
}
e=52-e; // bit position in mantissa with significance 1
if(e<=0) return x; // |x| large, so must be an integer
m=(1ULL<<e)-1; // mask for bits of significance <1
ix&=~m;
return *(double*)&ix;
return ui642double(ix);
}
double WRAPPER_FUNC(round)(double x) {
check_nan_d1(x);
ui64 ix=*(ui64*)&x,m;
ui64 ix=double2ui64(x),m;
int e=dgetexp(x);
e-=0x3ff; // remove exponent bias
if(e<-1) { // |x|<0.5
ix&=0x8000000000000000ULL;
return *(double*)&ix;
return ui642double(ix);
}
if(e==-1) { // 0.5<=|x|<1
ix&=0x8000000000000000ULL;
ix|=0x3ff0000000000000ULL; // ±1
return *(double*)&ix;
return ui642double(ix);
}
e=52-e; // bit position in mantissa with significance 1, <=52
if(e<=0) return x; // |x| large, so must be an integer
@ -198,16 +212,16 @@ double WRAPPER_FUNC(round)(double x) {
ix+=m;
m=m+m-1; // mask for bits of significance <1
ix&=~m;
return *(double*)&ix;
return ui642double(ix);
}
double WRAPPER_FUNC(floor)(double x) {
check_nan_d1(x);
ui64 ix=*(ui64*)&x,m;
ui64 ix=double2ui64(x),m;
int e=dgetexp(x);
if(e==0) { // x==0
ix&=0x8000000000000000ULL;
return *(double*)&ix;
return ui642double(ix);
}
e-=0x3ff; // remove exponent bias
if(e<0) { // |x|<1, not zero
@ -219,16 +233,16 @@ double WRAPPER_FUNC(floor)(double x) {
m=(1ULL<<e)-1; // mask for bit of significance <1
if(disneg(x)) ix+=m; // add 1-ε to magnitude if negative
ix&=~m; // truncate
return *(double*)&ix;
return ui642double(ix);
}
double WRAPPER_FUNC(ceil)(double x) {
check_nan_d1(x);
ui64 ix=*(ui64*)&x,m;
ui64 ix=double2ui64(x),m;
int e=dgetexp(x);
if(e==0) { // x==0
ix&=0x8000000000000000ULL;
return *(double*)&ix;
return ui642double(ix);
}
e-=0x3ff; // remove exponent bias
if(e<0) { // |x|<1, not zero
@ -240,7 +254,7 @@ double WRAPPER_FUNC(ceil)(double x) {
m=(1ULL<<e)-1; // mask for bit of significance <1
if(!disneg(x)) ix+=m; // add 1-ε to magnitude if positive
ix&=~m; // truncate
return *(double*)&ix;
return ui642double(ix);
}
double WRAPPER_FUNC(asin)(double x) {
@ -549,7 +563,7 @@ static i64 drem_0(i64 mx,i64 my,int e,int*pquo) {
double WRAPPER_FUNC(fmod)(double x,double y) {
check_nan_d2(x, y);
ui64 ix=*(ui64*)&x,iy=*(ui64*)&y;
ui64 ix=double2ui64(x),iy=double2ui64(y);
int sx,ex,ey;
i64 mx,my;
DUNPACKS(ix,sx,ex,mx);
@ -568,7 +582,7 @@ double WRAPPER_FUNC(fmod)(double x,double y) {
double WRAPPER_FUNC(remquo)(double x,double y,int*quo) {
check_nan_d2(x, y);
ui64 ix=*(ui64*)&x,iy=*(ui64*)&y;
ui64 ix=double2ui64(x),iy=double2ui64(y);
int sx,sy,ex,ey,q;
i64 mx,my;
DUNPACKS(ix,sx,ex,mx);
@ -609,5 +623,4 @@ double WRAPPER_FUNC(drem)(double x,double y) { check_nan_d2(x, y); return remquo
double WRAPPER_FUNC(remainder)(double x,double y) { check_nan_d2(x, y); return remquo(x,y,0); }
_Pragma("GCC diagnostic pop") // strict-aliasing
_Pragma("GCC diagnostic pop") // conversion