DOSBox-X
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator
src/cpu/core_prefetch.cpp
00001 /*
00002  *  Copyright (C) 2002-2015  The DOSBox Team
00003  *
00004  *  This program is free software; you can redistribute it and/or modify
00005  *  it under the terms of the GNU General Public License as published by
00006  *  the Free Software Foundation; either version 2 of the License, or
00007  *  (at your option) any later version.
00008  *
00009  *  This program is distributed in the hope that it will be useful,
00010  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *  GNU General Public License for more details.
00013  *
00014  *  You should have received a copy of the GNU General Public License
00015  *  along with this program; if not, write to the Free Software
00016  *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00017  */
00018 
00019 
00020 #include <stdio.h>
00021 #include <string.h>
00022 
00023 #include "dosbox.h"
00024 #include "mem.h"
00025 #include "cpu.h"
00026 #include "lazyflags.h"
00027 #include "inout.h"
00028 #include "callback.h"
00029 #include "pic.h"
00030 #include "fpu.h"
00031 #include "paging.h"
00032 #include "mmx.h"
00033 
00034 using namespace std;
00035 
00036 #include <algorithm>
00037 
00038 #define CPU_CORE CPU_ARCHTYPE_386
00039 
00040 #define DoString DoString_Prefetch
00041 
00042 extern bool ignore_opcode_63;
00043 
00044 #if C_DEBUG
00045 #include "debug.h"
00046 #endif
00047 
00048 #if (!C_CORE_INLINE)
00049 #define LoadMb(off) mem_readb(off)
00050 #define LoadMw(off) mem_readw(off)
00051 #define LoadMd(off) mem_readd(off)
00052 #define LoadMq(off) ((Bit64u)((Bit64u)mem_readd(off+4)<<32 | (Bit64u)mem_readd(off)))
00053 #define SaveMb(off,val) mem_writeb(off,val)
00054 #define SaveMw(off,val) mem_writew(off,val)
00055 #define SaveMd(off,val) mem_writed(off,val)
00056 #define SaveMq(off,val) {mem_writed(off,val&0xffffffff);mem_writed(off+4,(val>>32)&0xffffffff);}
00057 #else 
00058 #include "paging.h"
00059 #define LoadMb(off) mem_readb_inline(off)
00060 #define LoadMw(off) mem_readw_inline(off)
00061 #define LoadMd(off) mem_readd_inline(off)
00062 #define LoadMq(off) ((Bit64u)((Bit64u)mem_readd_inline(off+4)<<32 | (Bit64u)mem_readd_inline(off)))
00063 #define SaveMb(off,val) mem_writeb_inline(off,val)
00064 #define SaveMw(off,val) mem_writew_inline(off,val)
00065 #define SaveMd(off,val) mem_writed_inline(off,val)
00066 #define SaveMq(off,val) {mem_writed_inline(off,val&0xffffffff);mem_writed_inline(off+4,(val>>32)&0xffffffff);}
00067 #endif
00068 
00069 extern Bitu cycle_count;
00070 
00071 #if C_FPU
00072 #define CPU_FPU 1u                                              //Enable FPU escape instructions
00073 #endif
00074 
00075 #define CPU_PIC_CHECK 1u
00076 #define CPU_TRAP_CHECK 1u
00077 
00078 #define OPCODE_NONE                     0x000u
00079 #define OPCODE_0F                       0x100u
00080 #define OPCODE_SIZE                     0x200u
00081 
00082 #define PREFIX_ADDR                     0x1u
00083 #define PREFIX_REP                      0x2u
00084 
00085 #define TEST_PREFIX_ADDR        (core.prefixes & PREFIX_ADDR)
00086 #define TEST_PREFIX_REP         (core.prefixes & PREFIX_REP)
00087 
00088 #define DO_PREFIX_SEG(_SEG)                                     \
00089         BaseDS=SegBase(_SEG);                                   \
00090         BaseSS=SegBase(_SEG);                                   \
00091         core.base_val_ds=_SEG;                                  \
00092         goto restart_opcode;
00093 
00094 #define DO_PREFIX_ADDR()                                                                \
00095         core.prefixes=(core.prefixes & ~PREFIX_ADDR) |          \
00096         (cpu.code.big ^ PREFIX_ADDR);                                           \
00097         core.ea_table=&EATable[(core.prefixes&1u) * 256u];      \
00098         goto restart_opcode;
00099 
00100 #define DO_PREFIX_REP(_ZERO)                            \
00101         core.prefixes|=PREFIX_REP;                              \
00102         core.rep_zero=_ZERO;                                    \
00103         goto restart_opcode;
00104 
00105 typedef PhysPt (*GetEAHandler)(void);
00106 
00107 static const Bit32u AddrMaskTable[2]={0x0000ffffu,0xffffffffu};
00108 
00109 static struct {
00110         Bitu opcode_index;
00111         PhysPt cseip;
00112         PhysPt base_ds,base_ss;
00113         SegNames base_val_ds;
00114         bool rep_zero;
00115         Bitu prefixes;
00116         GetEAHandler * ea_table;
00117 } core;
00118 
00119 #define GETIP           (core.cseip-SegBase(cs))
00120 #define SAVEIP          reg_eip=GETIP;
00121 #define LOADIP          core.cseip=(SegBase(cs)+reg_eip);
00122 
00123 #define SegBase(c)      SegPhys(c)
00124 #define BaseDS          core.base_ds
00125 #define BaseSS          core.base_ss
00126 
00127 
00128 #define MAX_PQ_SIZE 32
00129 static Bit8u prefetch_buffer[MAX_PQ_SIZE];
00130 static bool pq_valid=false;
00131 static Bitu pq_start;
00132 static Bitu pq_fill;
00133 static Bitu pq_limit;
00134 static Bitu pq_reload;
00135 #ifdef PREFETCH_DEBUG
00136 static double pq_next_dbg=0;
00137 static unsigned int pq_hit=0,pq_miss=0;
00138 #endif
00139 
00140 //#define PREFETCH_DEBUG
00141 
00142 /* WARNING: This code needs MORE TESTING. So far, it seems to work fine. */
00143 
00144 template <class T> static inline bool prefetch_hit(const Bitu w) {
00145     return pq_valid && (w >= pq_start && (w + sizeof(T)) <= pq_fill);
00146 }
00147 
00148 template <class T> static inline T prefetch_read(const Bitu w);
00149 
00150 template <class T> static inline void prefetch_read_check(const Bitu w) {
00151     (void)w;//POSSIBLY UNUSED
00152 #ifdef PREFETCH_DEBUG
00153     if (!pq_valid) E_Exit("CPU: Prefetch read when not valid!");
00154     if (w < pq_start) E_Exit("CPU: Prefetch read below prefetch base");
00155     if ((w+sizeof(T)) > pq_fill) E_Exit("CPU: Prefetch read beyond prefetch fill");
00156 #endif
00157 }
00158 
00159 template <> uint8_t prefetch_read<uint8_t>(const Bitu w) {
00160     prefetch_read_check<uint8_t>(w);
00161     return prefetch_buffer[w - pq_start];
00162 }
00163 
00164 template <> uint16_t prefetch_read<uint16_t>(const Bitu w) {
00165     prefetch_read_check<uint16_t>(w);
00166     return host_readw(&prefetch_buffer[w - pq_start]);
00167 }
00168 
00169 template <> uint32_t prefetch_read<uint32_t>(const Bitu w) {
00170     prefetch_read_check<uint32_t>(w);
00171     return host_readd(&prefetch_buffer[w - pq_start]);
00172 }
00173 
00174 static inline void prefetch_init(const Bitu start) {
00175     /* start must be DWORD aligned */
00176     pq_start = pq_fill = start;
00177     pq_valid = true;
00178 }
00179 
00180 static inline void prefetch_filldword(void) {
00181     host_writed(&prefetch_buffer[pq_fill - pq_start],LoadMd(pq_fill));
00182     pq_fill += 4/*DWORD*/;
00183 }
00184 
00185 static inline void prefetch_refill(const Bitu stop) {
00186     while (pq_fill < stop) prefetch_filldword();
00187 }
00188 
00189 static inline void prefetch_lazyflush(const Bitu w) {
00190     /* assume: prefetch buffer hit.
00191      * assume: w >= pq_start + sizeof(T) and w + sizeof(T) <= pq_fill
00192      * assume: prefetch buffer is full.
00193      * assume: w is the memory address + sizeof(T)
00194      * assume: pq_start is DWORD aligned.
00195      * assume: CPU_PrefetchQueueSize >= 4 */
00196     if ((w - pq_start) >= pq_limit) {
00197         memmove(prefetch_buffer,prefetch_buffer+4,pq_limit-4);
00198         pq_start += 4;
00199 
00200         prefetch_filldword();
00201 #ifdef PREFETCH_DEBUG
00202         assert(pq_start+pq_limit == pq_fill);
00203 #endif
00204     }
00205 }
00206 
00207 /* this implementation follows what I think the Intel 80386/80486 is more likely
00208  * to do when fetching from prefetch and refilling prefetch --J.C. */
00209 template <class T> static inline T Fetch(void) {
00210     T temp;
00211 
00212     if (prefetch_hit<T>(core.cseip)) {
00213         /* as long as prefetch hits are occurring, keep loading more! */
00214         if ((pq_fill - pq_start) < pq_limit) {
00215             prefetch_filldword();
00216             if (sizeof(T) >= 4 && (pq_fill - pq_start) < pq_limit)
00217                 prefetch_filldword();
00218         }
00219         else {
00220             prefetch_lazyflush(core.cseip + 4 + sizeof(T));
00221         }
00222 
00223         temp = prefetch_read<T>(core.cseip);
00224 #ifdef PREFETCH_DEBUG
00225         pq_hit++;
00226 #endif
00227     }
00228     else {
00229         prefetch_init(core.cseip & (~0x3)); /* fill prefetch starting on DWORD boundary */
00230         prefetch_refill(pq_start + pq_reload); /* perhaps in the time it takes for a prefetch miss the 80486 can load two DWORDs */
00231         temp = prefetch_read<T>(core.cseip);
00232 #ifdef PREFETCH_DEBUG
00233         pq_miss++;
00234 #endif
00235     }
00236 
00237 #ifdef PREFETCH_DEBUG
00238     if (pq_valid) {
00239         assert(core.cseip >= pq_start && (core.cseip+sizeof(T)) <= pq_fill);
00240         assert(pq_fill >= pq_start && (pq_fill - pq_start) <= pq_limit);
00241     }
00242 #endif
00243 
00244     core.cseip += sizeof(T);
00245     return temp;
00246 }
00247 
00248 static Bit8u Fetchb() {
00249         return Fetch<uint8_t>();
00250 }
00251 
00252 static Bit16u Fetchw() {
00253         return Fetch<uint16_t>();
00254 }
00255 
00256 static Bit32u Fetchd() {
00257         return Fetch<uint32_t>();
00258 }
00259 
00260 bool CPU_RDMSR();
00261 bool CPU_WRMSR();
00262 
00263 #define Push_16 CPU_Push16
00264 #define Push_32 CPU_Push32
00265 #define Pop_16 CPU_Pop16
00266 #define Pop_32 CPU_Pop32
00267 
00268 #include "instructions.h"
00269 #include "core_normal/support.h"
00270 #include "core_normal/string.h"
00271 
00272 
00273 #define EALookupTable (core.ea_table)
00274 
00275 void CPU_Core_Prefetch_reset(void) {
00276     pq_valid=false;
00277     prefetch_init(0);
00278 }
00279 
00280 Bits CPU_Core_Prefetch_Run(void) {
00281         bool invalidate_pq=false;
00282 
00283     pq_limit = CPU_PrefetchQueueSize & (~0x3u);
00284     pq_reload = min(pq_limit,(Bitu)8u);
00285 
00286         while (CPU_Cycles-->0) {
00287                 if (invalidate_pq) {
00288                         pq_valid=false;
00289                         invalidate_pq=false;
00290                 }
00291                 LOADIP;
00292                 core.opcode_index=cpu.code.big*0x200u;
00293                 core.prefixes=cpu.code.big;
00294                 core.ea_table=&EATable[cpu.code.big*256u];
00295                 BaseDS=SegBase(ds);
00296                 BaseSS=SegBase(ss);
00297                 core.base_val_ds=ds;
00298 #if C_DEBUG
00299 #if C_HEAVY_DEBUG
00300                 if (DEBUG_HeavyIsBreakpoint()) {
00301                         FillFlags();
00302                         return (Bits)debugCallback;
00303                 };
00304 #endif
00305                 cycle_count++;
00306 #endif
00307 restart_opcode:
00308                 Bit8u next_opcode=Fetchb();
00309                 invalidate_pq=false;
00310                 if (core.opcode_index&OPCODE_0F) invalidate_pq=true;
00311                 else switch (next_opcode) {
00312                         case 0x70:      case 0x71:      case 0x72:      case 0x73:
00313                         case 0x74:      case 0x75:      case 0x76:      case 0x77:
00314                         case 0x78:      case 0x79:      case 0x7a:      case 0x7b:
00315                         case 0x7c:      case 0x7d:      case 0x7e:      case 0x7f:      // jcc
00316                         case 0x9a:      // call
00317                         case 0xc2:      case 0xc3:      // retn
00318                         case 0xc8:      // enter
00319                         case 0xc9:      // leave
00320                         case 0xca:      case 0xcb:      // retf
00321                         case 0xcc:      // int3
00322                         case 0xcd:      // int
00323                         case 0xce:      // into
00324                         case 0xcf:      // iret
00325                         case 0xe0:      // loopnz
00326                         case 0xe1:      // loopz
00327                         case 0xe2:      // loop
00328                         case 0xe3:      // jcxz
00329                         case 0xe8:      // call
00330                         case 0xe9:      case 0xea:      case 0xeb:      // jmp
00331                         case 0xff:
00332                                 invalidate_pq=true;
00333                                 break;
00334                         default:
00335                                 break;
00336                 }
00337                 switch (core.opcode_index+next_opcode) {
00338                 #include "core_normal/prefix_none.h"
00339                 #include "core_normal/prefix_0f.h"
00340                 #include "core_normal/prefix_66.h"
00341                 #include "core_normal/prefix_66_0f.h"
00342                 default:
00343                 illegal_opcode:
00344 #if C_DEBUG
00345                         {
00346                                 bool ignore=false;
00347                                 Bitu len=(GETIP-reg_eip);
00348                                 LOADIP;
00349                                 if (len>16) len=16;
00350                                 char tempcode[16*2+1];char * writecode=tempcode;
00351                                 if (ignore_opcode_63 && mem_readb(core.cseip) == 0x63)
00352                                         ignore = true;
00353                                 for (;len>0;len--) {
00354                                         sprintf(writecode,"%02X",mem_readb(core.cseip++));
00355                                         writecode+=2;
00356                                 }
00357                                 if (!ignore)
00358                                         LOG(LOG_CPU,LOG_NORMAL)("Illegal/Unhandled opcode %s",tempcode);
00359                         }
00360 #endif
00361                         CPU_Exception(6,0);
00362                         invalidate_pq=true;
00363                         continue;
00364                 gp_fault:
00365                         CPU_Exception(EXCEPTION_GP,0);
00366                         continue;
00367                 }
00368                 SAVEIP;
00369         }
00370 
00371 #ifdef PREFETCH_DEBUG
00372     if (PIC_FullIndex() > pq_next_dbg) {
00373         LOG_MSG("Prefetch core debug: prefetch cache hit=%u miss=%u",pq_hit,pq_miss);
00374         pq_next_dbg += 500.0;
00375     }
00376 #endif
00377 
00378         FillFlags();
00379         return CBRET_NONE;
00380 decode_end:
00381         SAVEIP;
00382         FillFlags();
00383         return CBRET_NONE;
00384 }
00385 
00386 Bits CPU_Core_Prefetch_Trap_Run(void) {
00387         Bits oldCycles = CPU_Cycles;
00388         CPU_Cycles = 1;
00389         cpu.trap_skip = false;
00390 
00391         Bits ret=CPU_Core_Prefetch_Run();
00392         if (!cpu.trap_skip) CPU_HW_Interrupt(1);
00393         CPU_Cycles = oldCycles-1;
00394         cpudecoder = &CPU_Core_Prefetch_Run;
00395 
00396         return ret;
00397 }
00398 
00399 
00400 
00401 void CPU_Core_Prefetch_Init(void) {
00402 
00403 }
00404