DOSBox-X
|
00001 00002 #define PREFETCH_CORE 00003 00004 /* WARNING: This code needs MORE TESTING. So far, it seems to work fine. */ 00005 00006 template <class T> static inline bool prefetch_hit(const Bitu w) { 00007 return pq_valid && (w >= pq_start && (w + sizeof(T)) <= pq_fill); 00008 } 00009 00010 template <class T> static inline T prefetch_read(const Bitu w); 00011 00012 template <class T> static inline void prefetch_read_check(const Bitu w) { 00013 (void)w;//POSSIBLY UNUSED 00014 #ifdef PREFETCH_DEBUG 00015 if (!pq_valid) E_Exit("CPU: Prefetch read when not valid!"); 00016 if (w < pq_start) E_Exit("CPU: Prefetch read below prefetch base"); 00017 if ((w+sizeof(T)) > pq_fill) E_Exit("CPU: Prefetch read beyond prefetch fill"); 00018 #endif 00019 } 00020 00021 template <> uint8_t prefetch_read<uint8_t>(const Bitu w) { 00022 prefetch_read_check<uint8_t>(w); 00023 return prefetch_buffer[w - pq_start]; 00024 } 00025 00026 template <> uint16_t prefetch_read<uint16_t>(const Bitu w) { 00027 prefetch_read_check<uint16_t>(w); 00028 return host_readw(&prefetch_buffer[w - pq_start]); 00029 } 00030 00031 template <> uint32_t prefetch_read<uint32_t>(const Bitu w) { 00032 prefetch_read_check<uint32_t>(w); 00033 return host_readd(&prefetch_buffer[w - pq_start]); 00034 } 00035 00036 static inline void prefetch_init(const Bitu start) { 00037 /* start must be DWORD aligned */ 00038 pq_start = pq_fill = start; 00039 pq_valid = true; 00040 } 00041 00042 static inline void prefetch_filldword(void) { 00043 host_writed(&prefetch_buffer[pq_fill - pq_start],LoadMd((PhysPt)pq_fill)); 00044 pq_fill += prefetch_unit; 00045 } 00046 00047 static inline void prefetch_refill(const Bitu stop) { 00048 while (pq_fill < stop) prefetch_filldword(); 00049 } 00050 00051 static inline void prefetch_lazyflush(const Bitu w) { 00052 /* assume: prefetch buffer hit. 00053 * assume: w >= pq_start + sizeof(T) and w + sizeof(T) <= pq_fill 00054 * assume: prefetch buffer is full. 00055 * assume: w is the memory address + sizeof(T) 00056 * assume: pq_start is DWORD aligned. 00057 * assume: CPU_PrefetchQueueSize >= 4 */ 00058 if ((w - pq_start) >= pq_limit) { 00059 memmove(prefetch_buffer,prefetch_buffer+prefetch_unit,pq_limit-prefetch_unit); 00060 pq_start += prefetch_unit; 00061 00062 prefetch_filldword(); 00063 } 00064 00065 #ifdef PREFETCH_DEBUG 00066 assert(pq_fill >= pq_start); 00067 assert((pq_fill - pq_start) <= pq_limit); 00068 #endif 00069 } 00070 00071 /* this implementation follows what I think the Intel 80386/80486 is more likely 00072 * to do when fetching from prefetch and refilling prefetch --J.C. */ 00073 template <class T> static inline T Fetch(void) { 00074 T temp; 00075 00076 if (prefetch_hit<T>(core.cseip)) { 00077 /* as long as prefetch hits are occurring, keep loading more! */ 00078 prefetch_lazyflush(core.cseip+sizeof(T)); 00079 if ((pq_fill - pq_start) < pq_limit) 00080 prefetch_filldword(); 00081 00082 if (sizeof(T) >= prefetch_unit) { 00083 if ((pq_fill - pq_start) < pq_limit) 00084 prefetch_filldword(); 00085 } 00086 00087 temp = prefetch_read<T>(core.cseip); 00088 #ifdef PREFETCH_DEBUG 00089 pq_hit++; 00090 #endif 00091 } 00092 else { 00093 prefetch_init(core.cseip & (~(prefetch_unit-1ul))); /* fill prefetch starting on DWORD boundary */ 00094 prefetch_refill(pq_start + pq_reload); /* perhaps in the time it takes for a prefetch miss the 80486 can load two DWORDs */ 00095 temp = prefetch_read<T>(core.cseip); 00096 #ifdef PREFETCH_DEBUG 00097 pq_miss++; 00098 #endif 00099 } 00100 00101 #ifdef PREFETCH_DEBUG 00102 if (pq_valid) { 00103 assert(core.cseip >= pq_start && (core.cseip+sizeof(T)) <= pq_fill); 00104 assert(pq_fill >= pq_start && (pq_fill - pq_start) <= pq_limit); 00105 } 00106 #endif 00107 00108 core.cseip += sizeof(T); 00109 return temp; 00110 } 00111 00112 template <class T> static inline void FetchDiscard(void) { 00113 core.cseip += sizeof(T); 00114 } 00115 00116 template <class T> static inline T FetchPeek(void) { 00117 T temp; 00118 00119 if (prefetch_hit<T>(core.cseip)) { 00120 /* as long as prefetch hits are occurring, keep loading more! */ 00121 prefetch_lazyflush(core.cseip+sizeof(T)); 00122 if ((pq_fill - pq_start) < pq_limit) 00123 prefetch_filldword(); 00124 00125 if (sizeof(T) >= prefetch_unit) { 00126 if ((pq_fill - pq_start) < pq_limit) 00127 prefetch_filldword(); 00128 } 00129 00130 temp = prefetch_read<T>(core.cseip); 00131 #ifdef PREFETCH_DEBUG 00132 pq_hit++; 00133 #endif 00134 } 00135 else { 00136 prefetch_init(core.cseip & (~(prefetch_unit-1ul))); /* fill prefetch starting on DWORD boundary */ 00137 prefetch_refill(pq_start + pq_reload); /* perhaps in the time it takes for a prefetch miss the 80486 can load two DWORDs */ 00138 temp = prefetch_read<T>(core.cseip); 00139 #ifdef PREFETCH_DEBUG 00140 pq_miss++; 00141 #endif 00142 } 00143 00144 #ifdef PREFETCH_DEBUG 00145 if (pq_valid) { 00146 assert(core.cseip >= pq_start && (core.cseip+sizeof(T)) <= pq_fill); 00147 assert(pq_fill >= pq_start && (pq_fill - pq_start) <= pq_limit); 00148 } 00149 #endif 00150 00151 return temp; 00152 } 00153