DOSBox-X
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines
src/cpu/core_prefetch_buf.h
00001 
00002 #define PREFETCH_CORE
00003 
00004 /* WARNING: This code needs MORE TESTING. So far, it seems to work fine. */
00005 
00006 template <class T> static inline bool prefetch_hit(const Bitu w) {
00007     return pq_valid && (w >= pq_start && (w + sizeof(T)) <= pq_fill);
00008 }
00009 
00010 template <class T> static inline T prefetch_read(const Bitu w);
00011 
00012 template <class T> static inline void prefetch_read_check(const Bitu w) {
00013     (void)w;//POSSIBLY UNUSED
00014 #ifdef PREFETCH_DEBUG
00015     if (!pq_valid) E_Exit("CPU: Prefetch read when not valid!");
00016     if (w < pq_start) E_Exit("CPU: Prefetch read below prefetch base");
00017     if ((w+sizeof(T)) > pq_fill) E_Exit("CPU: Prefetch read beyond prefetch fill");
00018 #endif
00019 }
00020 
00021 template <> uint8_t prefetch_read<uint8_t>(const Bitu w) {
00022     prefetch_read_check<uint8_t>(w);
00023     return prefetch_buffer[w - pq_start];
00024 }
00025 
00026 template <> uint16_t prefetch_read<uint16_t>(const Bitu w) {
00027     prefetch_read_check<uint16_t>(w);
00028     return host_readw(&prefetch_buffer[w - pq_start]);
00029 }
00030 
00031 template <> uint32_t prefetch_read<uint32_t>(const Bitu w) {
00032     prefetch_read_check<uint32_t>(w);
00033     return host_readd(&prefetch_buffer[w - pq_start]);
00034 }
00035 
00036 static inline void prefetch_init(const Bitu start) {
00037     /* start must be DWORD aligned */
00038     pq_start = pq_fill = start;
00039     pq_valid = true;
00040 }
00041 
00042 static inline void prefetch_filldword(void) {
00043     host_writed(&prefetch_buffer[pq_fill - pq_start],LoadMd((PhysPt)pq_fill));
00044     pq_fill += prefetch_unit;
00045 }
00046 
00047 static inline void prefetch_refill(const Bitu stop) {
00048     while (pq_fill < stop) prefetch_filldword();
00049 }
00050 
00051 static inline void prefetch_lazyflush(const Bitu w) {
00052     /* assume: prefetch buffer hit.
00053      * assume: w >= pq_start + sizeof(T) and w + sizeof(T) <= pq_fill
00054      * assume: prefetch buffer is full.
00055      * assume: w is the memory address + sizeof(T)
00056      * assume: pq_start is DWORD aligned.
00057      * assume: CPU_PrefetchQueueSize >= 4 */
00058     if ((w - pq_start) >= pq_limit) {
00059         memmove(prefetch_buffer,prefetch_buffer+prefetch_unit,pq_limit-prefetch_unit);
00060         pq_start += prefetch_unit;
00061 
00062         prefetch_filldword();
00063     }
00064 
00065 #ifdef PREFETCH_DEBUG
00066     assert(pq_fill >= pq_start);
00067     assert((pq_fill - pq_start) <= pq_limit);
00068 #endif
00069 }
00070 
00071 /* this implementation follows what I think the Intel 80386/80486 is more likely
00072  * to do when fetching from prefetch and refilling prefetch --J.C. */
00073 template <class T> static inline T Fetch(void) {
00074     T temp;
00075 
00076     if (prefetch_hit<T>(core.cseip)) {
00077         /* as long as prefetch hits are occurring, keep loading more! */
00078         prefetch_lazyflush(core.cseip+sizeof(T));
00079         if ((pq_fill - pq_start) < pq_limit)
00080             prefetch_filldword();
00081 
00082         if (sizeof(T) >= prefetch_unit) {
00083             if ((pq_fill - pq_start) < pq_limit)
00084                 prefetch_filldword();
00085         }
00086 
00087         temp = prefetch_read<T>(core.cseip);
00088 #ifdef PREFETCH_DEBUG
00089         pq_hit++;
00090 #endif
00091     }
00092     else {
00093         prefetch_init(core.cseip & (~(prefetch_unit-1ul))); /* fill prefetch starting on DWORD boundary */
00094         prefetch_refill(pq_start + pq_reload); /* perhaps in the time it takes for a prefetch miss the 80486 can load two DWORDs */
00095         temp = prefetch_read<T>(core.cseip);
00096 #ifdef PREFETCH_DEBUG
00097         pq_miss++;
00098 #endif
00099     }
00100 
00101 #ifdef PREFETCH_DEBUG
00102     if (pq_valid) {
00103         assert(core.cseip >= pq_start && (core.cseip+sizeof(T)) <= pq_fill);
00104         assert(pq_fill >= pq_start && (pq_fill - pq_start) <= pq_limit);
00105     }
00106 #endif
00107 
00108     core.cseip += sizeof(T);
00109     return temp;
00110 }
00111 
00112 template <class T> static inline void FetchDiscard(void) {
00113     core.cseip += sizeof(T);
00114 }
00115 
00116 template <class T> static inline T FetchPeek(void) {
00117     T temp;
00118 
00119     if (prefetch_hit<T>(core.cseip)) {
00120         /* as long as prefetch hits are occurring, keep loading more! */
00121         prefetch_lazyflush(core.cseip+sizeof(T));
00122         if ((pq_fill - pq_start) < pq_limit)
00123             prefetch_filldword();
00124 
00125         if (sizeof(T) >= prefetch_unit) {
00126             if ((pq_fill - pq_start) < pq_limit)
00127                 prefetch_filldword();
00128         }
00129 
00130         temp = prefetch_read<T>(core.cseip);
00131 #ifdef PREFETCH_DEBUG
00132         pq_hit++;
00133 #endif
00134     }
00135     else {
00136         prefetch_init(core.cseip & (~(prefetch_unit-1ul))); /* fill prefetch starting on DWORD boundary */
00137         prefetch_refill(pq_start + pq_reload); /* perhaps in the time it takes for a prefetch miss the 80486 can load two DWORDs */
00138         temp = prefetch_read<T>(core.cseip);
00139 #ifdef PREFETCH_DEBUG
00140         pq_miss++;
00141 #endif
00142     }
00143 
00144 #ifdef PREFETCH_DEBUG
00145     if (pq_valid) {
00146         assert(core.cseip >= pq_start && (core.cseip+sizeof(T)) <= pq_fill);
00147         assert(pq_fill >= pq_start && (pq_fill - pq_start) <= pq_limit);
00148     }
00149 #endif
00150 
00151     return temp;
00152 }
00153