DOSBox-X: include/fpu.h Source File

00001 /*
00002  *  Copyright (C) 2002-2020  The DOSBox Team
00003  *
00004  *  This program is free software; you can redistribute it and/or modify
00005  *  it under the terms of the GNU General Public License as published by
00006  *  the Free Software Foundation; either version 2 of the License, or
00007  *  (at your option) any later version.
00008  *
00009  *  This program is distributed in the hope that it will be useful,
00010  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *  GNU General Public License for more details.
00013  *
00014  *  You should have received a copy of the GNU General Public License along
00015  *  with this program; if not, write to the Free Software Foundation, Inc.,
00016  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 
00019 #ifndef DOSBOX_FPU_H
00020 #define DOSBOX_FPU_H
00021 
00022 #ifndef DOSBOX_MEM_H
00023 #include "mem.h"
00024 #endif
00025 
00026 #include "mmx.h"
00027 
00028 void FPU_ESC0_Normal(Bitu rm);
00029 void FPU_ESC0_EA(Bitu rm,PhysPt addr);
00030 void FPU_ESC1_Normal(Bitu rm);
00031 void FPU_ESC1_EA(Bitu rm,PhysPt addr);
00032 void FPU_ESC2_Normal(Bitu rm);
00033 void FPU_ESC2_EA(Bitu rm,PhysPt addr);
00034 void FPU_ESC3_Normal(Bitu rm);
00035 void FPU_ESC3_EA(Bitu rm,PhysPt addr);
00036 void FPU_ESC4_Normal(Bitu rm);
00037 void FPU_ESC4_EA(Bitu rm,PhysPt addr);
00038 void FPU_ESC5_Normal(Bitu rm);
00039 void FPU_ESC5_EA(Bitu rm,PhysPt addr);
00040 void FPU_ESC6_Normal(Bitu rm);
00041 void FPU_ESC6_EA(Bitu rm,PhysPt addr);
00042 void FPU_ESC7_Normal(Bitu rm);
00043 void FPU_ESC7_EA(Bitu rm,PhysPt addr);
00044 
00045 /* Floating point register, in the form the native host uses for "double".
00046  * This is slightly less precise than the 80-bit extended IEEE used by Intel,
00047  * but can be faster using the host processor "double" support. Most DOS games
00048  * using the FPU for 3D rendering are unaffected by the loss of precision.
00049  * However, there are cases where the full 80-bit precision is required such
00050  * as the "Fast Pentium memcpy trick" using the 80-bit versions of FLD/FST to
00051  * copy memory. */
00052 typedef union {
00053     double d;
00054 #ifndef WORDS_BIGENDIAN
00055     struct {
00056         Bit32u lower;
00057         Bit32s upper;
00058     } l;
00059 #else
00060     struct {
00061         Bit32s upper;
00062         Bit32u lower;
00063     } l;
00064 #endif
00065     Bit64s ll;
00066         MMX_reg reg_mmx;
00067 } FPU_Reg;
00068 
00069 // dynamic x86 core needs this
00070 typedef struct {
00071     Bit32u m1;
00072     Bit32u m2;
00073     Bit16u m3;
00074 
00075     Bit16u d1;
00076     Bit32u d2;
00077 } FPU_P_Reg;
00078 
00079 // memory barrier macro. to ensure that reads/stores to one half of the FPU reg struct
00080 // do not overlap with reads/stores from the other half. things can go wrong if the
00081 // compiler writes code to write the mantissa, then load the overall as float, then store
00082 // the exponent. note this is not a hardware level memory barrier, this is a compiler
00083 // level memory barrier against the optimization engine.
00084 #if defined(__GCC__)
00085 # define FPU_Reg_m_barrier()    __asm__ __volatile__ ("":::"memory")
00086 #else
00087 # define FPU_Reg_m_barrier()
00088 #endif
00089 
00090 #pragma pack(push,1)
00091 typedef union {
00092 // TODO: The configure script needs to use "long double" on x86/x86_64 and verify sizeof(long double) == 10,
00093 //       else undef a macro to let the code emulate long double 80-bit IEEE. Also needs to determine host
00094 //       byte order here so host long double matches our struct.
00095         struct {
00096                 uint64_t        mantissa;               // [63:0]
00097                 unsigned int    exponent:15;            // [78:64]
00098                 unsigned int    sign:1;                 // [79:79]
00099         } f;
00100 #if defined(HAS_LONG_DOUBLE)
00101         long double             v;                      // [79:0]
00102 #endif
00103         struct {
00104                 uint64_t        l;
00105                 uint16_t        h;
00106         } raw;
00107 } FPU_Reg_80;
00108 // ^ Remember that in 80-bit extended, the mantissa contains both the fraction and integer bit. There is no
00109 //   "implied bit" like 32-bit and 64-bit formats.
00110 #pragma pack(pop)
00111 
00112 #define FPU_Reg_80_exponent_bias        (16383)
00113 
00114 #pragma pack(push,1)
00115 typedef union {
00116         struct {
00117                 uint64_t        mantissa:52;            // [51:0]
00118                 uint64_t        exponent:11;            // [62:52]
00119                 uint64_t        sign:1;                 // [63:63]
00120         } f;
00121         double                  v;
00122         uint64_t                raw;
00123 } FPU_Reg_64;
00124 #pragma pack(pop)
00125 
00126 #define FPU_Reg_64_exponent_bias        (1023)
00127 static const uint64_t FPU_Reg_64_implied_bit = ((uint64_t)1ULL << (uint64_t)52ULL);
00128 
00129 #pragma pack(push,1)
00130 typedef union {
00131         struct {
00132                 uint32_t        mantissa:23;            // [22:0]
00133                 uint32_t        exponent:8;             // [30:23]
00134                 uint32_t        sign:1;                 // [31:31]
00135         } f;
00136         float                   v;
00137         uint32_t                raw;
00138 } FPU_Reg_32;
00139 #pragma pack(pop)
00140 
00141 #define FPU_Reg_32_exponent_bias        (127)
00142 static const uint32_t FPU_Reg_32_implied_bit = ((uint32_t)1UL << (uint32_t)23UL);
00143 
00144 enum FPU_Tag {
00145         TAG_Valid = 0,
00146         TAG_Zero  = 1,
00147         TAG_Weird = 2,
00148         TAG_Empty = 3
00149 };
00150 
00151 enum FPU_Round {
00152         ROUND_Nearest = 0,              
00153         ROUND_Down    = 1,
00154         ROUND_Up      = 2,      
00155         ROUND_Chop    = 3
00156 };
00157 
00158 typedef struct {
00159 #if defined(HAS_LONG_DOUBLE)//probably shouldn't allow struct to change size based on this
00160         FPU_Reg         _do_not_use__regs[9];
00161 #else
00162         FPU_Reg         regs[9];
00163 #endif
00164         FPU_P_Reg       p_regs[9];
00165         FPU_Reg_80      regs_80[9];
00166 #if defined(HAS_LONG_DOUBLE)//probably shouldn't allow struct to change size based on this
00167         bool            _do_not_use__use80[9];          // if set, use the 80-bit precision version
00168 #else
00169         bool            use80[9];               // if set, use the 80-bit precision version
00170 #endif
00171         FPU_Tag         tags[9];
00172         Bit16u          cw,cw_mask_all;
00173         Bit16u          sw;
00174         Bit32u          top;
00175         FPU_Round       round;
00176 } FPU_rec;
00177 
00178 
00179 //get pi from a real library
00180 #define PI              3.14159265358979323846
00181 #define L2E             1.4426950408889634
00182 #define L2T             3.3219280948873623
00183 #define LN2             0.69314718055994531
00184 #define LG2             0.3010299956639812
00185 
00186 
00187 extern FPU_rec fpu;
00188 
00189 #define TOP fpu.top
00190 #define STV(i)  ( (fpu.top+ (i) ) & 7 )
00191 
00192 
00193 Bit16u FPU_GetTag(void);
00194 void FPU_FLDCW(PhysPt addr);
00195 
00196 static INLINE void FPU_SetTag(Bit16u tag){
00197         for(Bitu i=0;i<8;i++)
00198                 fpu.tags[i] = static_cast<FPU_Tag>((tag >>(2*i))&3);
00199 }
00200 
00201 static INLINE void FPU_SetCW(Bitu word){
00202         // HACK: Bits 13-15 are not defined. Apparently, one program likes to test for
00203         //       Cyrix EMC87 by trying to set bit 15. We want the test program to see
00204         //       us as an Intel 287 when cputype == 286.
00205         word &= 0x7FFF;
00206 
00207         fpu.cw = (Bit16u)word;
00208         fpu.cw_mask_all = (Bit16u)(word | 0x3f);
00209         fpu.round = (FPU_Round)((word >> 10) & 3);
00210 }
00211 
00212 
00213 static INLINE Bit8u FPU_GET_TOP(void) {
00214         return (fpu.sw & 0x3800U) >> 11U;
00215 }
00216 
00217 static INLINE void FPU_SET_TOP(Bitu val){
00218         fpu.sw &= ~0x3800U;
00219         fpu.sw |= (val & 7U) << 11U;
00220 }
00221 
00222 
00223 static INLINE void FPU_SET_C0(Bitu C){
00224         fpu.sw &= ~0x0100U;
00225         if(C) fpu.sw |=  0x0100U;
00226 }
00227 
00228 static INLINE void FPU_SET_C1(Bitu C){
00229         fpu.sw &= ~0x0200U;
00230         if(C) fpu.sw |=  0x0200U;
00231 }
00232 
00233 static INLINE void FPU_SET_C2(Bitu C){
00234         fpu.sw &= ~0x0400U;
00235         if(C) fpu.sw |=  0x0400U;
00236 }
00237 
00238 static INLINE void FPU_SET_C3(Bitu C){
00239         fpu.sw &= ~0x4000U;
00240         if(C) fpu.sw |= 0x4000U;
00241 }
00242 
00243 static INLINE void FPU_SET_D(Bitu C){
00244         fpu.sw &= ~0x0002U;
00245         if(C) fpu.sw |= 0x0002U;
00246 }
00247 
00248 
00249 #endif