DOSBox-X
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines
src/cpu/core_dynrec/risc_armv4le-thumb-niw.h
00001 /*
00002  *  Copyright (C) 2002-2020  The DOSBox Team
00003  *
00004  *  This program is free software; you can redistribute it and/or modify
00005  *  it under the terms of the GNU General Public License as published by
00006  *  the Free Software Foundation; either version 2 of the License, or
00007  *  (at your option) any later version.
00008  *
00009  *  This program is distributed in the hope that it will be useful,
00010  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *  GNU General Public License for more details.
00013  *
00014  *  You should have received a copy of the GNU General Public License along
00015  *  with this program; if not, write to the Free Software Foundation, Inc.,
00016  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 
00019 
00020 
00021 /* ARMv4 (little endian) backend by M-HT (thumb version with data pool) */
00022 
00023 
00024 // temporary "lo" registers
00025 #define templo1 HOST_v3
00026 #define templo2 HOST_v4
00027 #define templo3 HOST_v2
00028 
00029 // register that holds function return values
00030 #define FC_RETOP HOST_a1
00031 
00032 // register used for address calculations,
00033 #define FC_ADDR HOST_v1                 // has to be saved across calls, see DRC_PROTECT_ADDR_REG
00034 
00035 // register that holds the first parameter
00036 #define FC_OP1 HOST_a1
00037 
00038 // register that holds the second parameter
00039 #define FC_OP2 HOST_a2
00040 
00041 // special register that holds the third parameter for _R3 calls (byte accessible)
00042 #define FC_OP3 HOST_a4
00043 
00044 // register that holds byte-accessible temporary values
00045 #define FC_TMP_BA1 HOST_a1
00046 
00047 // register that holds byte-accessible temporary values
00048 #define FC_TMP_BA2 HOST_a2
00049 
00050 // temporary register for LEA
00051 #define TEMP_REG_DRC HOST_a4
00052 
00053 // used to hold the address of "cpu_regs" - preferably filled in function gen_run_code
00054 #define FC_REGS_ADDR HOST_v7
00055 
00056 // used to hold the address of "Segs" - preferably filled in function gen_run_code
00057 #define FC_SEGS_ADDR HOST_v8
00058 
00059 // used to hold the address of "core_dynrec.readdata" - filled in function gen_run_code
00060 #define readdata_addr HOST_v5
00061 
00062 
00063 // instruction encodings
00064 
00065 // move
00066 // mov dst, #imm                @       0 <= imm <= 255
00067 #define MOV_IMM(dst, imm) (0x2000 + ((dst) << 8) + (imm) )
00068 // mov dst, src
00069 #define MOV_REG(dst, src) ADD_IMM3(dst, src, 0)
00070 // mov dst, src
00071 #define MOV_LO_HI(dst, src) (0x4640 + (dst) + (((src) - HOST_r8) << 3) )
00072 // mov dst, src
00073 #define MOV_HI_LO(dst, src) (0x4680 + ((dst) - HOST_r8) + ((src) << 3) )
00074 
00075 // arithmetic
00076 // add dst, src, #imm           @       0 <= imm <= 7
00077 #define ADD_IMM3(dst, src, imm) (0x1c00 + (dst) + ((src) << 3) + ((imm) << 6) )
00078 // add dst, #imm                @       0 <= imm <= 255
00079 #define ADD_IMM8(dst, imm) (0x3000 + ((dst) << 8) + (imm) )
00080 // add dst, src1, src2
00081 #define ADD_REG(dst, src1, src2) (0x1800 + (dst) + ((src1) << 3) + ((src2) << 6) )
00082 // add dst, pc, #imm            @       0 <= imm < 1024 &       imm mod 4 = 0
00083 #define ADD_LO_PC_IMM(dst, imm) (0xa000 + ((dst) << 8) + ((imm) >> 2) )
00084 // sub dst, src1, src2
00085 #define SUB_REG(dst, src1, src2) (0x1a00 + (dst) + ((src1) << 3) + ((src2) << 6) )
00086 // sub dst, src, #imm           @       0 <= imm <= 7
00087 #define SUB_IMM3(dst, src, imm) (0x1e00 + (dst) + ((src) << 3) + ((imm) << 6) )
00088 // sub dst, #imm                @       0 <= imm <= 255
00089 #define SUB_IMM8(dst, imm) (0x3800 + ((dst) << 8) + (imm) )
00090 // neg dst, src
00091 #define NEG(dst, src) (0x4240 + (dst) + ((src) << 3) )
00092 // cmp dst, #imm                @       0 <= imm <= 255
00093 #define CMP_IMM(dst, imm) (0x2800 + ((dst) << 8) + (imm) )
00094 // nop
00095 #define NOP (0x46c0)
00096 
00097 // logical
00098 // and dst, src
00099 #define AND(dst, src) (0x4000 + (dst) + ((src) << 3) )
00100 // bic dst, src
00101 #define BIC(dst, src) (0x4380 + (dst) + ((src) << 3) )
00102 // eor dst, src
00103 #define EOR(dst, src) (0x4040 + (dst) + ((src) << 3) )
00104 // orr dst, src
00105 #define ORR(dst, src) (0x4300 + (dst) + ((src) << 3) )
00106 // mvn dst, src
00107 #define MVN(dst, src) (0x43c0 + (dst) + ((src) << 3) )
00108 
00109 // shift/rotate
00110 // lsl dst, src, #imm
00111 #define LSL_IMM(dst, src, imm) (0x0000 + (dst) + ((src) << 3) + ((imm) << 6) )
00112 // lsl dst, reg
00113 #define LSL_REG(dst, reg) (0x4080 + (dst) + ((reg) << 3) )
00114 // lsr dst, src, #imm
00115 #define LSR_IMM(dst, src, imm) (0x0800 + (dst) + ((src) << 3) + ((imm) << 6) )
00116 // lsr dst, reg
00117 #define LSR_REG(dst, reg) (0x40c0 + (dst) + ((reg) << 3) )
00118 // asr dst, src, #imm
00119 #define ASR_IMM(dst, src, imm) (0x1000 + (dst) + ((src) << 3) + ((imm) << 6) )
00120 // asr dst, reg
00121 #define ASR_REG(dst, reg) (0x4100 + (dst) + ((reg) << 3) )
00122 // ror dst, reg
00123 #define ROR_REG(dst, reg) (0x41c0 + (dst) + ((reg) << 3) )
00124 
00125 // load
00126 // ldr reg, [addr, #imm]                @       0 <= imm < 128  &       imm mod 4 = 0
00127 #define LDR_IMM(reg, addr, imm) (0x6800 + (reg) + ((addr) << 3) + ((imm) << 4) )
00128 // ldrh reg, [addr, #imm]               @       0 <= imm < 64   &       imm mod 2 = 0
00129 #define LDRH_IMM(reg, addr, imm) (0x8800 + (reg) + ((addr) << 3) + ((imm) << 5) )
00130 // ldrb reg, [addr, #imm]               @       0 <= imm < 32
00131 #define LDRB_IMM(reg, addr, imm) (0x7800 + (reg) + ((addr) << 3) + ((imm) << 6) )
00132 // ldr reg, [pc, #imm]          @       0 <= imm < 1024 &       imm mod 4 = 0
00133 #define LDR_PC_IMM(reg, imm) (0x4800 + ((reg) << 8) + ((imm) >> 2) )
00134 // ldr reg, [addr1, addr2]
00135 #define LDR_REG(reg, addr1, addr2) (0x5800 + (reg) + ((addr1) << 3) + ((addr2) << 6) )
00136 
00137 // store
00138 // str reg, [addr, #imm]                @       0 <= imm < 128  &       imm mod 4 = 0
00139 #define STR_IMM(reg, addr, imm) (0x6000 + (reg) + ((addr) << 3) + ((imm) << 4) )
00140 // strh reg, [addr, #imm]               @       0 <= imm < 64   &       imm mod 2 = 0
00141 #define STRH_IMM(reg, addr, imm) (0x8000 + (reg) + ((addr) << 3) + ((imm) << 5) )
00142 // strb reg, [addr, #imm]               @       0 <= imm < 32
00143 #define STRB_IMM(reg, addr, imm) (0x7000 + (reg) + ((addr) << 3) + ((imm) << 6) )
00144 
00145 // branch
00146 // beq pc+imm           @       0 <= imm < 256  &       imm mod 2 = 0
00147 #define BEQ_FWD(imm) (0xd000 + ((imm) >> 1) )
00148 // bne pc+imm           @       0 <= imm < 256  &       imm mod 2 = 0
00149 #define BNE_FWD(imm) (0xd100 + ((imm) >> 1) )
00150 // bgt pc+imm           @       0 <= imm < 256  &       imm mod 2 = 0
00151 #define BGT_FWD(imm) (0xdc00 + ((imm) >> 1) )
00152 // b pc+imm             @       0 <= imm < 2048 &       imm mod 2 = 0
00153 #define B_FWD(imm) (0xe000 + ((imm) >> 1) )
00154 // bx reg
00155 #define BX(reg) (0x4700 + ((reg) << 3) )
00156 
00157 
00158 // arm instructions
00159 
00160 // arithmetic
00161 // add dst, src, #(imm ror rimm)                @       0 <= imm <= 255 &       rimm mod 2 = 0
00162 #define ARM_ADD_IMM(dst, src, imm, rimm) (0xe2800000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) )
00163 
00164 // load
00165 // ldr reg, [addr, #imm]                @       0 <= imm < 4096
00166 #define ARM_LDR_IMM(reg, addr, imm) (0xe5900000 + ((reg) << 12) + ((addr) << 16) + (imm) )
00167 
00168 // store
00169 // str reg, [addr, #-(imm)]!            @       0 <= imm < 4096
00170 #define ARM_STR_IMM_M_W(reg, addr, imm) (0xe5200000 + ((reg) << 12) + ((addr) << 16) + (imm) )
00171 
00172 // branch
00173 // bx reg
00174 #define ARM_BX(reg) (0xe12fff10 + (reg) )
00175 
00176 
00177 // data pool defines
00178 #define CACHE_DATA_JUMP  (2)
00179 #define CACHE_DATA_ALIGN (32)
00180 #define CACHE_DATA_MIN   (32)
00181 #define CACHE_DATA_MAX   (288)
00182 
00183 // data pool variables
00184 static Bit8u * cache_datapos = NULL;    // position of data pool in the cache block
00185 static Bit32u cache_datasize = 0;               // total size of data pool
00186 static Bit32u cache_dataindex = 0;              // used size of data pool = index of free data item (in bytes) in data pool
00187 
00188 
00189 // forwarded function
00190 static void INLINE gen_create_branch_short(void * func);
00191 
00192 // function to check distance to data pool
00193 // if too close, then generate jump after data pool
00194 static void cache_checkinstr(Bit32u size) {
00195         if (cache_datasize == 0) {
00196                 if (cache_datapos != NULL) {
00197                         if (cache.pos + size + CACHE_DATA_JUMP >= cache_datapos) {
00198                                 cache_datapos = NULL;
00199                         }
00200                 }
00201                 return;
00202         }
00203 
00204         if (cache.pos + size + CACHE_DATA_JUMP <= cache_datapos) return;
00205 
00206         {
00207                 register Bit8u * newcachepos;
00208 
00209                 newcachepos = cache_datapos + cache_datasize;
00210                 gen_create_branch_short(newcachepos);
00211                 cache.pos = newcachepos;
00212         }
00213 
00214         if (cache.pos + CACHE_DATA_MAX + CACHE_DATA_ALIGN >= cache.block.active->cache.start + cache.block.active->cache.size &&
00215                 cache.pos + CACHE_DATA_MIN + CACHE_DATA_ALIGN + (CACHE_DATA_ALIGN - CACHE_ALIGN) < cache.block.active->cache.start + cache.block.active->cache.size)
00216         {
00217                 cache_datapos = (Bit8u *) (((Bitu)cache.block.active->cache.start + cache.block.active->cache.size - CACHE_DATA_ALIGN) & ~(CACHE_DATA_ALIGN - 1));
00218         } else {
00219                 register Bit32u cachemodsize;
00220 
00221                 cachemodsize = (cache.pos - cache.block.active->cache.start) & (CACHE_MAXSIZE - 1);
00222 
00223                 if (cachemodsize + CACHE_DATA_MAX + CACHE_DATA_ALIGN <= CACHE_MAXSIZE ||
00224                         cachemodsize + CACHE_DATA_MIN + CACHE_DATA_ALIGN + (CACHE_DATA_ALIGN - CACHE_ALIGN) > CACHE_MAXSIZE)
00225                 {
00226                         cache_datapos = (Bit8u *) (((Bitu)cache.pos + CACHE_DATA_MAX) & ~(CACHE_DATA_ALIGN - 1));
00227                 } else {
00228                         cache_datapos = (Bit8u *) (((Bitu)cache.pos + (CACHE_MAXSIZE - CACHE_DATA_ALIGN) - cachemodsize) & ~(CACHE_DATA_ALIGN - 1));
00229                 }
00230         }
00231 
00232         cache_datasize = 0;
00233         cache_dataindex = 0;
00234 }
00235 
00236 // function to reserve item in data pool
00237 // returns address of item
00238 static Bit8u * cache_reservedata(void) {
00239         // if data pool not yet initialized, then initialize data pool
00240         if (GCC_UNLIKELY(cache_datapos == NULL)) {
00241                 if (cache.pos + CACHE_DATA_MIN + CACHE_DATA_ALIGN < cache.block.active->cache.start + CACHE_DATA_MAX) {
00242                         cache_datapos = (Bit8u *) (((Bitu)cache.block.active->cache.start + CACHE_DATA_MAX) & ~(CACHE_DATA_ALIGN - 1));
00243                 }
00244         }
00245 
00246         // if data pool not yet used, then set data pool
00247         if (cache_datasize == 0) {
00248                 // set data pool address is too close (or behind)  cache.pos then set new data pool size
00249                 if (cache.pos + CACHE_DATA_MIN + CACHE_DATA_JUMP /*+ CACHE_DATA_ALIGN*/ > cache_datapos) {
00250                         if (cache.pos + CACHE_DATA_MAX + CACHE_DATA_ALIGN >= cache.block.active->cache.start + cache.block.active->cache.size &&
00251                                 cache.pos + CACHE_DATA_MIN + CACHE_DATA_ALIGN + (CACHE_DATA_ALIGN - CACHE_ALIGN) < cache.block.active->cache.start + cache.block.active->cache.size)
00252                         {
00253                                 cache_datapos = (Bit8u *) (((Bitu)cache.block.active->cache.start + cache.block.active->cache.size - CACHE_DATA_ALIGN) & ~(CACHE_DATA_ALIGN - 1));
00254                         } else {
00255                                 register Bit32u cachemodsize;
00256 
00257                                 cachemodsize = (cache.pos - cache.block.active->cache.start) & (CACHE_MAXSIZE - 1);
00258 
00259                                 if (cachemodsize + CACHE_DATA_MAX + CACHE_DATA_ALIGN <= CACHE_MAXSIZE ||
00260                                         cachemodsize + CACHE_DATA_MIN + CACHE_DATA_ALIGN + (CACHE_DATA_ALIGN - CACHE_ALIGN) > CACHE_MAXSIZE)
00261                                 {
00262                                         cache_datapos = (Bit8u *) (((Bitu)cache.pos + CACHE_DATA_MAX) & ~(CACHE_DATA_ALIGN - 1));
00263                                 } else {
00264                                         cache_datapos = (Bit8u *) (((Bitu)cache.pos + (CACHE_MAXSIZE - CACHE_DATA_ALIGN) - cachemodsize) & ~(CACHE_DATA_ALIGN - 1));
00265                                 }
00266                         }
00267                 }
00268                 // set initial data pool size
00269                 cache_datasize = CACHE_DATA_ALIGN;
00270         }
00271 
00272         // if data pool is full, then enlarge data pool
00273         if (cache_dataindex == cache_datasize) {
00274                 cache_datasize += CACHE_DATA_ALIGN;
00275         }
00276 
00277         cache_dataindex += 4;
00278         return (cache_datapos + (cache_dataindex - 4));
00279 }
00280 
00281 static void cache_block_before_close(void) {
00282         // if data pool in use, then resize cache block to include the data pool
00283         if (cache_datasize != 0)
00284         {
00285                 cache.pos = cache_datapos + cache_dataindex;
00286         }
00287 
00288         // clear the values before next use
00289         cache_datapos = NULL;
00290         cache_datasize = 0;
00291         cache_dataindex = 0;
00292 }
00293 
00294 
00295 // move a full register from reg_src to reg_dst
00296 static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) {
00297         if(reg_src == reg_dst) return;
00298         cache_checkinstr(2);
00299         cache_addw( MOV_REG(reg_dst, reg_src) );      // mov reg_dst, reg_src
00300 }
00301 
00302 // helper function
00303 static bool val_single_shift(Bit32u value, Bit32u *val_shift) {
00304         Bit32u shift;
00305 
00306         if (GCC_UNLIKELY(value == 0)) {
00307                 *val_shift = 0;
00308                 return true;
00309         }
00310 
00311         shift = 0;
00312         while ((value & 1) == 0) {
00313                 value>>=1;
00314                 shift+=1;
00315         }
00316 
00317         if ((value >> 8) != 0) return false;
00318 
00319         *val_shift = shift;
00320         return true;
00321 }
00322 
00323 // move a 32bit constant value into dest_reg
00324 static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) {
00325         Bit32u scale;
00326 
00327         if (imm < 256) {
00328                 cache_checkinstr(2);
00329                 cache_addw( MOV_IMM(dest_reg, imm) );      // mov dest_reg, #(imm)
00330         } else if ((~imm) < 256) {
00331                 cache_checkinstr(4);
00332                 cache_addw( MOV_IMM(dest_reg, ~imm) );      // mov dest_reg, #(~imm)
00333                 cache_addw( MVN(dest_reg, dest_reg) );      // mvn dest_reg, dest_reg
00334         } else if (val_single_shift(imm, &scale)) {
00335                 cache_checkinstr(4);
00336                 cache_addw( MOV_IMM(dest_reg, imm >> scale) );      // mov dest_reg, #(imm >> scale)
00337                 cache_addw( LSL_IMM(dest_reg, dest_reg, scale) );      // lsl dest_reg, dest_reg, #scale
00338         } else {
00339                 Bit32u diff;
00340 
00341                 cache_checkinstr(4);
00342 
00343                 diff = imm - ((Bit32u)cache.pos+4);
00344 
00345                 if ((diff < 1024) && ((imm & 0x03) == 0)) {
00346                         if (((Bit32u)cache.pos & 0x03) == 0) {
00347                                 cache_addw( ADD_LO_PC_IMM(dest_reg, diff >> 2) );      // add dest_reg, pc, #(diff >> 2)
00348                         } else {
00349                                 cache_addw( NOP );      // nop
00350                                 cache_addw( ADD_LO_PC_IMM(dest_reg, (diff - 2) >> 2) );      // add dest_reg, pc, #((diff - 2) >> 2)
00351                         }
00352                 } else {
00353                         Bit8u *datapos;
00354 
00355                         datapos = cache_reservedata();
00356                         *(Bit32u*)datapos=imm;
00357 
00358                         if (((Bit32u)cache.pos & 0x03) == 0) {
00359                                 cache_addw( LDR_PC_IMM(dest_reg, datapos - (cache.pos + 4)) );      // ldr dest_reg, [pc, datapos]
00360                         } else {
00361                                 cache_addw( LDR_PC_IMM(dest_reg, datapos - (cache.pos + 2)) );      // ldr dest_reg, [pc, datapos]
00362                         }
00363                 }
00364         }
00365 }
00366 
00367 // helper function
00368 static bool gen_mov_memval_to_reg_helper(HostReg dest_reg, Bit32u data, Bitu size, HostReg addr_reg, Bit32u addr_data) {
00369         switch (size) {
00370                 case 4:
00371 #if !defined(C_UNALIGNED_MEMORY)
00372                         if ((data & 3) == 0)
00373 #endif
00374                         {
00375                                 if ((data >= addr_data) && (data < addr_data + 128) && (((data - addr_data) & 3) == 0)) {
00376                                         cache_checkinstr(4);
00377                                         cache_addw( MOV_LO_HI(templo2, addr_reg) );      // mov templo2, addr_reg
00378                                         cache_addw( LDR_IMM(dest_reg, templo2, data - addr_data) );      // ldr dest_reg, [templo2, #(data - addr_data)]
00379                                         return true;
00380                                 }
00381                         }
00382                         break;
00383                 case 2:
00384 #if !defined(C_UNALIGNED_MEMORY)
00385                         if ((data & 1) == 0)
00386 #endif
00387                         {
00388                                 if ((data >= addr_data) && (data < addr_data + 64) && (((data - addr_data) & 1) == 0)) {
00389                                         cache_checkinstr(4);
00390                                         cache_addw( MOV_LO_HI(templo2, addr_reg) );      // mov templo2, addr_reg
00391                                         cache_addw( LDRH_IMM(dest_reg, templo2, data - addr_data) );      // ldrh dest_reg, [templo2, #(data - addr_data)]
00392                                         return true;
00393                                 }
00394                         }
00395                         break;
00396                 case 1:
00397                         if ((data >= addr_data) && (data < addr_data + 32)) {
00398                                 cache_checkinstr(4);
00399                                 cache_addw( MOV_LO_HI(templo2, addr_reg) );      // mov templo2, addr_reg
00400                                 cache_addw( LDRB_IMM(dest_reg, templo2, data - addr_data) );      // ldrb dest_reg, [templo2, #(data - addr_data)]
00401                                 return true;
00402                         }
00403                 default:
00404                         break;
00405         }
00406         return false;
00407 }
00408 
00409 // helper function
00410 static bool gen_mov_memval_to_reg(HostReg dest_reg, void *data, Bitu size) {
00411         if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, FC_REGS_ADDR, (Bit32u)&cpu_regs)) return true;
00412         if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, readdata_addr, (Bit32u)&core_dynrec.readdata)) return true;
00413         if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, FC_SEGS_ADDR, (Bit32u)&Segs)) return true;
00414         return false;
00415 }
00416 
00417 // helper function for gen_mov_word_to_reg
00418 static void gen_mov_word_to_reg_helper(HostReg dest_reg,void* data,bool dword,HostReg data_reg) {
00419         // alignment....
00420         if (dword) {
00421 #if !defined(C_UNALIGNED_MEMORY)
00422                 if ((Bit32u)data & 3) {
00423                         if ( ((Bit32u)data & 3) == 2 ) {
00424                                 cache_checkinstr(8);
00425                                 cache_addw( LDRH_IMM(dest_reg, data_reg, 0) );      // ldrh dest_reg, [data_reg]
00426                                 cache_addw( LDRH_IMM(templo1, data_reg, 2) );      // ldrh templo1, [data_reg, #2]
00427                                 cache_addw( LSL_IMM(templo1, templo1, 16) );      // lsl templo1, templo1, #16
00428                                 cache_addw( ORR(dest_reg, templo1) );      // orr dest_reg, templo1
00429                         } else {
00430                                 cache_checkinstr(16);
00431                                 cache_addw( LDRB_IMM(dest_reg, data_reg, 0) );      // ldrb dest_reg, [data_reg]
00432                                 cache_addw( ADD_IMM3(templo1, data_reg, 1) );      // add templo1, data_reg, #1
00433                                 cache_addw( LDRH_IMM(templo1, templo1, 0) );      // ldrh templo1, [templo1]
00434                                 cache_addw( LSL_IMM(templo1, templo1, 8) );      // lsl templo1, templo1, #8
00435                                 cache_addw( ORR(dest_reg, templo1) );      // orr dest_reg, templo1
00436                                 cache_addw( LDRB_IMM(templo1, data_reg, 3) );      // ldrb templo1, [data_reg, #3]
00437                                 cache_addw( LSL_IMM(templo1, templo1, 24) );      // lsl templo1, templo1, #24
00438                                 cache_addw( ORR(dest_reg, templo1) );      // orr dest_reg, templo1
00439                         }
00440                 } else
00441 #endif
00442                 {
00443                         cache_checkinstr(2);
00444                         cache_addw( LDR_IMM(dest_reg, data_reg, 0) );      // ldr dest_reg, [data_reg]
00445                 }
00446         } else {
00447 #if !defined(C_UNALIGNED_MEMORY)
00448                 if ((Bit32u)data & 1) {
00449                         cache_checkinstr(8);
00450                         cache_addw( LDRB_IMM(dest_reg, data_reg, 0) );      // ldrb dest_reg, [data_reg]
00451                         cache_addw( LDRB_IMM(templo1, data_reg, 1) );      // ldrb templo1, [data_reg, #1]
00452                         cache_addw( LSL_IMM(templo1, templo1, 8) );      // lsl templo1, templo1, #8
00453                         cache_addw( ORR(dest_reg, templo1) );      // orr dest_reg, templo1
00454                 } else
00455 #endif
00456                 {
00457                         cache_checkinstr(2);
00458                         cache_addw( LDRH_IMM(dest_reg, data_reg, 0) );      // ldrh dest_reg, [data_reg]
00459                 }
00460         }
00461 }
00462 
00463 // move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg
00464 // 16bit moves may destroy the upper 16bit of the destination register
00465 static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword) {
00466         if (!gen_mov_memval_to_reg(dest_reg, data, (dword)?4:2)) {
00467                 gen_mov_dword_to_reg_imm(templo2, (Bit32u)data);
00468                 gen_mov_word_to_reg_helper(dest_reg, data, dword, templo2);
00469         }
00470 }
00471 
00472 // move a 16bit constant value into dest_reg
00473 // the upper 16bit of the destination register may be destroyed
00474 static void INLINE gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) {
00475         gen_mov_dword_to_reg_imm(dest_reg, (Bit32u)imm);
00476 }
00477 
00478 // helper function
00479 static bool gen_mov_memval_from_reg_helper(HostReg src_reg, Bit32u data, Bitu size, HostReg addr_reg, Bit32u addr_data) {
00480         switch (size) {
00481                 case 4:
00482 #if !defined(C_UNALIGNED_MEMORY)
00483                         if ((data & 3) == 0)
00484 #endif
00485                         {
00486                                 if ((data >= addr_data) && (data < addr_data + 128) && (((data - addr_data) & 3) == 0)) {
00487                                         cache_checkinstr(4);
00488                                         cache_addw( MOV_LO_HI(templo2, addr_reg) );      // mov templo2, addr_reg
00489                                         cache_addw( STR_IMM(src_reg, templo2, data - addr_data) );      // str src_reg, [templo2, #(data - addr_data)]
00490                                         return true;
00491                                 }
00492                         }
00493                         break;
00494                 case 2:
00495 #if !defined(C_UNALIGNED_MEMORY)
00496                         if ((data & 1) == 0)
00497 #endif
00498                         {
00499                                 if ((data >= addr_data) && (data < addr_data + 64) && (((data - addr_data) & 1) == 0)) {
00500                                         cache_checkinstr(4);
00501                                         cache_addw( MOV_LO_HI(templo2, addr_reg) );      // mov templo2, addr_reg
00502                                         cache_addw( STRH_IMM(src_reg, templo2, data - addr_data) );      // strh src_reg, [templo2, #(data - addr_data)]
00503                                         return true;
00504                                 }
00505                         }
00506                         break;
00507                 case 1:
00508                         if ((data >= addr_data) && (data < addr_data + 32)) {
00509                                 cache_checkinstr(4);
00510                                 cache_addw( MOV_LO_HI(templo2, addr_reg) );      // mov templo2, addr_reg
00511                                 cache_addw( STRB_IMM(src_reg, templo2, data - addr_data) );      // strb src_reg, [templo2, #(data - addr_data)]
00512                                 return true;
00513                         }
00514                 default:
00515                         break;
00516         }
00517         return false;
00518 }
00519 
00520 // helper function
00521 static bool gen_mov_memval_from_reg(HostReg src_reg, void *dest, Bitu size) {
00522         if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, FC_REGS_ADDR, (Bit32u)&cpu_regs)) return true;
00523         if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, readdata_addr, (Bit32u)&core_dynrec.readdata)) return true;
00524         if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, FC_SEGS_ADDR, (Bit32u)&Segs)) return true;
00525         return false;
00526 }
00527 
00528 // helper function for gen_mov_word_from_reg
00529 static void gen_mov_word_from_reg_helper(HostReg src_reg,void* dest,bool dword, HostReg data_reg) {
00530         // alignment....
00531         if (dword) {
00532 #if !defined(C_UNALIGNED_MEMORY)
00533                 if ((Bit32u)dest & 3) {
00534                         if ( ((Bit32u)dest & 3) == 2 ) {
00535                                 cache_checkinstr(8);
00536                                 cache_addw( STRH_IMM(src_reg, data_reg, 0) );      // strh src_reg, [data_reg]
00537                                 cache_addw( MOV_REG(templo1, src_reg) );      // mov templo1, src_reg
00538                                 cache_addw( LSR_IMM(templo1, templo1, 16) );      // lsr templo1, templo1, #16
00539                                 cache_addw( STRH_IMM(templo1, data_reg, 2) );      // strh templo1, [data_reg, #2]
00540                         } else {
00541                                 cache_checkinstr(20);
00542                                 cache_addw( STRB_IMM(src_reg, data_reg, 0) );      // strb src_reg, [data_reg]
00543                                 cache_addw( MOV_REG(templo1, src_reg) );      // mov templo1, src_reg
00544                                 cache_addw( LSR_IMM(templo1, templo1, 8) );      // lsr templo1, templo1, #8
00545                                 cache_addw( STRB_IMM(templo1, data_reg, 1) );      // strb templo1, [data_reg, #1]
00546                                 cache_addw( MOV_REG(templo1, src_reg) );      // mov templo1, src_reg
00547                                 cache_addw( LSR_IMM(templo1, templo1, 16) );      // lsr templo1, templo1, #16
00548                                 cache_addw( STRB_IMM(templo1, data_reg, 2) );      // strb templo1, [data_reg, #2]
00549                                 cache_addw( MOV_REG(templo1, src_reg) );      // mov templo1, src_reg
00550                                 cache_addw( LSR_IMM(templo1, templo1, 24) );      // lsr templo1, templo1, #24
00551                                 cache_addw( STRB_IMM(templo1, data_reg, 3) );      // strb templo1, [data_reg, #3]
00552                         }
00553                 } else
00554 #endif
00555                 {
00556                         cache_checkinstr(2);
00557                         cache_addw( STR_IMM(src_reg, data_reg, 0) );      // str src_reg, [data_reg]
00558                 }
00559         } else {
00560 #if !defined(C_UNALIGNED_MEMORY)
00561                 if ((Bit32u)dest & 1) {
00562                         cache_checkinstr(8);
00563                         cache_addw( STRB_IMM(src_reg, data_reg, 0) );      // strb src_reg, [data_reg]
00564                         cache_addw( MOV_REG(templo1, src_reg) );      // mov templo1, src_reg
00565                         cache_addw( LSR_IMM(templo1, templo1, 8) );      // lsr templo1, templo1, #8
00566                         cache_addw( STRB_IMM(templo1, data_reg, 1) );      // strb templo1, [data_reg, #1]
00567                 } else
00568 #endif
00569                 {
00570                         cache_checkinstr(2);
00571                         cache_addw( STRH_IMM(src_reg, data_reg, 0) );      // strh src_reg, [data_reg]
00572                 }
00573         }
00574 }
00575 
00576 // move 32bit (dword==true) or 16bit (dword==false) of a register into memory
00577 static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) {
00578         if (!gen_mov_memval_from_reg(src_reg, dest, (dword)?4:2)) {
00579                 gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest);
00580                 gen_mov_word_from_reg_helper(src_reg, dest, dword, templo2);
00581         }
00582 }
00583 
00584 // move an 8bit value from memory into dest_reg
00585 // the upper 24bit of the destination register can be destroyed
00586 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
00587 // registers might not be directly byte-accessible on some architectures
00588 static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) {
00589         if (!gen_mov_memval_to_reg(dest_reg, data, 1)) {
00590                 gen_mov_dword_to_reg_imm(templo1, (Bit32u)data);
00591                 cache_checkinstr(2);
00592                 cache_addw( LDRB_IMM(dest_reg, templo1, 0) );      // ldrb dest_reg, [templo1]
00593         }
00594 }
00595 
00596 // move an 8bit value from memory into dest_reg
00597 // the upper 24bit of the destination register can be destroyed
00598 // this function can use FC_OP1/FC_OP2 as dest_reg which are
00599 // not directly byte-accessible on some architectures
00600 static void INLINE gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* data) {
00601         gen_mov_byte_to_reg_low(dest_reg, data);
00602 }
00603 
00604 // move an 8bit constant value into dest_reg
00605 // the upper 24bit of the destination register can be destroyed
00606 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
00607 // registers might not be directly byte-accessible on some architectures
00608 static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) {
00609         cache_checkinstr(2);
00610         cache_addw( MOV_IMM(dest_reg, imm) );      // mov dest_reg, #(imm)
00611 }
00612 
00613 // move an 8bit constant value into dest_reg
00614 // the upper 24bit of the destination register can be destroyed
00615 // this function can use FC_OP1/FC_OP2 as dest_reg which are
00616 // not directly byte-accessible on some architectures
00617 static void INLINE gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm) {
00618         gen_mov_byte_to_reg_low_imm(dest_reg, imm);
00619 }
00620 
00621 // move the lowest 8bit of a register into memory
00622 static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) {
00623         if (!gen_mov_memval_from_reg(src_reg, dest, 1)) {
00624                 gen_mov_dword_to_reg_imm(templo1, (Bit32u)dest);
00625                 cache_checkinstr(2);
00626                 cache_addw( STRB_IMM(src_reg, templo1, 0) );      // strb src_reg, [templo1]
00627         }
00628 }
00629 
00630 
00631 
00632 // convert an 8bit word to a 32bit dword
00633 // the register is zero-extended (sign==false) or sign-extended (sign==true)
00634 static void gen_extend_byte(bool sign,HostReg reg) {
00635         cache_checkinstr(4);
00636         cache_addw( LSL_IMM(reg, reg, 24) );      // lsl reg, reg, #24
00637 
00638         if (sign) {
00639                 cache_addw( ASR_IMM(reg, reg, 24) );      // asr reg, reg, #24
00640         } else {
00641                 cache_addw( LSR_IMM(reg, reg, 24) );      // lsr reg, reg, #24
00642         }
00643 }
00644 
00645 // convert a 16bit word to a 32bit dword
00646 // the register is zero-extended (sign==false) or sign-extended (sign==true)
00647 static void gen_extend_word(bool sign,HostReg reg) {
00648         cache_checkinstr(4);
00649         cache_addw( LSL_IMM(reg, reg, 16) );      // lsl reg, reg, #16
00650 
00651         if (sign) {
00652                 cache_addw( ASR_IMM(reg, reg, 16) );      // asr reg, reg, #16
00653         } else {
00654                 cache_addw( LSR_IMM(reg, reg, 16) );      // lsr reg, reg, #16
00655         }
00656 }
00657 
00658 // add a 32bit value from memory to a full register
00659 static void gen_add(HostReg reg,void* op) {
00660         gen_mov_word_to_reg(templo3, op, 1);
00661         cache_checkinstr(2);
00662         cache_addw( ADD_REG(reg, reg, templo3) );      // add reg, reg, templo3
00663 }
00664 
00665 // add a 32bit constant value to a full register
00666 static void gen_add_imm(HostReg reg,Bit32u imm) {
00667         Bit32u imm2, scale;
00668 
00669         if(!imm) return;
00670 
00671         imm2 = (Bit32u) (-((Bit32s)imm));
00672 
00673         if (imm <= 255) {
00674                 cache_checkinstr(2);
00675                 cache_addw( ADD_IMM8(reg, imm) );      // add reg, #imm
00676         } else if (imm2 <= 255) {
00677                 cache_checkinstr(2);
00678                 cache_addw( SUB_IMM8(reg, imm2) );      // sub reg, #(-imm)
00679         } else {
00680                 if (val_single_shift(imm2, &scale)) {
00681                         cache_checkinstr((scale)?6:4);
00682                         cache_addw( MOV_IMM(templo1, imm2 >> scale) );      // mov templo1, #(~imm >> scale)
00683                         if (scale) {
00684                                 cache_addw( LSL_IMM(templo1, templo1, scale) );      // lsl templo1, templo1, #scale
00685                         }
00686                         cache_addw( SUB_REG(reg, reg, templo1) );      // sub reg, reg, templo1
00687                 } else {
00688                         gen_mov_dword_to_reg_imm(templo1, imm);
00689                         cache_checkinstr(2);
00690                         cache_addw( ADD_REG(reg, reg, templo1) );      // add reg, reg, templo1
00691                 }
00692         }
00693 }
00694 
00695 // and a 32bit constant value with a full register
00696 static void gen_and_imm(HostReg reg,Bit32u imm) {
00697         Bit32u imm2, scale;
00698 
00699         imm2 = ~imm;
00700         if(!imm2) return;
00701 
00702         if (!imm) {
00703                 cache_checkinstr(2);
00704                 cache_addw( MOV_IMM(reg, 0) );      // mov reg, #0
00705         } else {
00706                 if (val_single_shift(imm2, &scale)) {
00707                         cache_checkinstr((scale)?6:4);
00708                         cache_addw( MOV_IMM(templo1, imm2 >> scale) );      // mov templo1, #(~imm >> scale)
00709                         if (scale) {
00710                                 cache_addw( LSL_IMM(templo1, templo1, scale) );      // lsl templo1, templo1, #scale
00711                         }
00712                         cache_addw( BIC(reg, templo1) );      // bic reg, templo1
00713                 } else {
00714                         gen_mov_dword_to_reg_imm(templo1, imm);
00715                         cache_checkinstr(2);
00716                         cache_addw( AND(reg, templo1) );      // and reg, templo1
00717                 }
00718         }
00719 }
00720 
00721 
00722 // move a 32bit constant value into memory
00723 static void gen_mov_direct_dword(void* dest,Bit32u imm) {
00724         gen_mov_dword_to_reg_imm(templo3, imm);
00725         gen_mov_word_from_reg(templo3, dest, 1);
00726 }
00727 
00728 // move an address into memory
00729 static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm) {
00730         gen_mov_direct_dword(dest,(Bit32u)imm);
00731 }
00732 
00733 // add a 32bit (dword==true) or 16bit (dword==false) constant value to a memory value
00734 static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) {
00735         if (!dword) imm &= 0xffff;
00736         if(!imm) return;
00737 
00738         if (!gen_mov_memval_to_reg(templo3, dest, (dword)?4:2)) {
00739                 gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest);
00740                 gen_mov_word_to_reg_helper(templo3, dest, dword, templo2);
00741         }
00742         gen_add_imm(templo3, imm);
00743         if (!gen_mov_memval_from_reg(templo3, dest, (dword)?4:2)) {
00744                 gen_mov_word_from_reg_helper(templo3, dest, dword, templo2);
00745         }
00746 }
00747 
00748 // add an 8bit constant value to a dword memory value
00749 static void gen_add_direct_byte(void* dest,Bit8s imm) {
00750         gen_add_direct_word(dest, (Bit32s)imm, 1);
00751 }
00752 
00753 // subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a memory value
00754 static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) {
00755         Bit32u imm2, scale;
00756 
00757         if (!dword) imm &= 0xffff;
00758         if(!imm) return;
00759 
00760         if (!gen_mov_memval_to_reg(templo3, dest, (dword)?4:2)) {
00761                 gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest);
00762                 gen_mov_word_to_reg_helper(templo3, dest, dword, templo2);
00763         }
00764 
00765         imm2 = (Bit32u) (-((Bit32s)imm));
00766 
00767         if (imm <= 255) {
00768                 cache_checkinstr(2);
00769                 cache_addw( SUB_IMM8(templo3, imm) );      // sub templo3, #imm
00770         } else if (imm2 <= 255) {
00771                 cache_checkinstr(2);
00772                 cache_addw( ADD_IMM8(templo3, imm2) );      // add templo3, #(-imm)
00773         } else {
00774                 if (val_single_shift(imm2, &scale)) {
00775                         cache_checkinstr((scale)?6:4);
00776                         cache_addw( MOV_IMM(templo1, imm2 >> scale) );      // mov templo1, #(~imm >> scale)
00777                         if (scale) {
00778                                 cache_addw( LSL_IMM(templo1, templo1, scale) );      // lsl templo1, templo1, #scale
00779                         }
00780                         cache_addw( ADD_REG(templo3, templo3, templo1) );      // add templo3, templo3, templo1
00781                 } else {
00782                         gen_mov_dword_to_reg_imm(templo1, imm);
00783                         cache_checkinstr(2);
00784                         cache_addw( SUB_REG(templo3, templo3, templo1) );      // sub templo3, templo3, templo1
00785                 }
00786         }
00787 
00788         if (!gen_mov_memval_from_reg(templo3, dest, (dword)?4:2)) {
00789                 gen_mov_word_from_reg_helper(templo3, dest, dword, templo2);
00790         }
00791 }
00792 
00793 // subtract an 8bit constant value from a dword memory value
00794 static void gen_sub_direct_byte(void* dest,Bit8s imm) {
00795         gen_sub_direct_word(dest, (Bit32s)imm, 1);
00796 }
00797 
00798 // effective address calculation, destination is dest_reg
00799 // scale_reg is scaled by scale (scale_reg*(2^scale)) and
00800 // added to dest_reg, then the immediate value is added
00801 static INLINE void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm) {
00802         if (scale) {
00803                 cache_checkinstr(4);
00804                 cache_addw( LSL_IMM(templo1, scale_reg, scale) );      // lsl templo1, scale_reg, #(scale)
00805                 cache_addw( ADD_REG(dest_reg, dest_reg, templo1) );      // add dest_reg, dest_reg, templo1
00806         } else {
00807                 cache_checkinstr(2);
00808                 cache_addw( ADD_REG(dest_reg, dest_reg, scale_reg) );      // add dest_reg, dest_reg, scale_reg
00809         }
00810         gen_add_imm(dest_reg, imm);
00811 }
00812 
00813 // effective address calculation, destination is dest_reg
00814 // dest_reg is scaled by scale (dest_reg*(2^scale)),
00815 // then the immediate value is added
00816 static INLINE void gen_lea(HostReg dest_reg,Bitu scale,Bits imm) {
00817         if (scale) {
00818                 cache_checkinstr(2);
00819                 cache_addw( LSL_IMM(dest_reg, dest_reg, scale) );      // lsl dest_reg, dest_reg, #(scale)
00820         }
00821         gen_add_imm(dest_reg, imm);
00822 }
00823 
00824 // helper function for gen_call_function_raw and gen_call_function_setup
00825 template <typename T> static void gen_call_function_helper(const T func) {
00826     Bit8u *datapos;
00827 
00828     datapos = cache_reservedata();
00829     *(Bit32u*)datapos=(Bit32u)func;
00830 
00831     if (((Bit32u)cache.pos & 0x03) == 0) {
00832         cache_addw( LDR_PC_IMM(templo1, datapos - (cache.pos + 4)) );      // ldr templo1, [pc, datapos]
00833         cache_addw( ADD_LO_PC_IMM(templo2, 4) );      // adr templo2, after_call (add templo2, pc, #4)
00834         cache_addw( MOV_HI_LO(HOST_lr, templo2) );      // mov lr, templo2
00835         cache_addw( BX(templo1) );      // bx templo1     --- switch to arm state
00836     } else {
00837         cache_addw( LDR_PC_IMM(templo1, datapos - (cache.pos + 2)) );      // ldr templo1, [pc, datapos]
00838         cache_addw( ADD_LO_PC_IMM(templo2, 4) );      // adr templo2, after_call (add templo2, pc, #4)
00839         cache_addw( MOV_HI_LO(HOST_lr, templo2) );      // mov lr, templo2
00840         cache_addw( BX(templo1) );      // bx templo1     --- switch to arm state
00841         cache_addw( NOP );      // nop
00842     }
00843     // after_call:
00844 
00845     // switch from arm to thumb state
00846     cache_addd(0xe2800000 + (templo1 << 12) + (HOST_pc << 16) + (1));      // add templo1, pc, #1
00847     cache_addd(0xe12fff10 + (templo1));      // bx templo1
00848 
00849     // thumb state from now on
00850 }
00851 
00852 // generate a call to a parameterless function
00853 template <typename T> static void INLINE gen_call_function_raw(const T func) {
00854     cache_checkinstr(18);
00855     gen_call_function_helper(func);
00856 }
00857 
00858 // generate a call to a function with paramcount parameters
00859 // note: the parameters are loaded in the architecture specific way
00860 // using the gen_load_param_ functions below
00861 template <typename T> static Bit32u INLINE gen_call_function_setup(const T func,Bitu paramcount,bool fastcall=false) {
00862         cache_checkinstr(18);
00863         Bit32u proc_addr = (Bit32u)cache.pos;
00864         gen_call_function_helper(func);
00865         return proc_addr;
00866         // if proc_addr is on word  boundary ((proc_addr & 0x03) == 0)
00867         //   then length of generated code is 16 bytes
00868         //   otherwise length of generated code is 18 bytes
00869 }
00870 
00871 #if (1)
00872 // max of 4 parameters in a1-a4
00873 
00874 // load an immediate value as param'th function parameter
00875 static void INLINE gen_load_param_imm(Bitu imm,Bitu param) {
00876         gen_mov_dword_to_reg_imm(param, imm);
00877 }
00878 
00879 // load an address as param'th function parameter
00880 static void INLINE gen_load_param_addr(Bitu addr,Bitu param) {
00881         gen_mov_dword_to_reg_imm(param, addr);
00882 }
00883 
00884 // load a host-register as param'th function parameter
00885 static void INLINE gen_load_param_reg(Bitu reg,Bitu param) {
00886         gen_mov_regs(param, reg);
00887 }
00888 
00889 // load a value from memory as param'th function parameter
00890 static void INLINE gen_load_param_mem(Bitu mem,Bitu param) {
00891         gen_mov_word_to_reg(param, (void *)mem, 1);
00892 }
00893 #else
00894         other arm abis
00895 #endif
00896 
00897 // jump to an address pointed at by ptr, offset is in imm
00898 static void gen_jmp_ptr(void * ptr,Bits imm=0) {
00899         gen_mov_word_to_reg(templo3, ptr, 1);
00900 
00901 #if !defined(C_UNALIGNED_MEMORY)
00902 // (*ptr) should be word aligned
00903         if ((imm & 0x03) == 0) {
00904 #endif
00905                 if ((imm >= 0) && (imm < 128) && ((imm & 3) == 0)) {
00906                         cache_checkinstr(6);
00907                         cache_addw( LDR_IMM(templo2, templo3, imm) );      // ldr templo2, [templo3, #imm]
00908                 } else {
00909                         gen_mov_dword_to_reg_imm(templo2, imm);
00910                         cache_checkinstr(6);
00911                         cache_addw( LDR_REG(templo2, templo3, templo2) );      // ldr templo2, [templo3, templo2]
00912                 }
00913 #if !defined(C_UNALIGNED_MEMORY)
00914         } else {
00915                 gen_add_imm(templo3, imm);
00916 
00917                 cache_checkinstr(24);
00918                 cache_addw( LDRB_IMM(templo2, templo3, 0) );      // ldrb templo2, [templo3]
00919                 cache_addw( LDRB_IMM(templo1, templo3, 1) );      // ldrb templo1, [templo3, #1]
00920                 cache_addw( LSL_IMM(templo1, templo1, 8) );      // lsl templo1, templo1, #8
00921                 cache_addw( ORR(templo2, templo1) );      // orr templo2, templo1
00922                 cache_addw( LDRB_IMM(templo1, templo3, 2) );      // ldrb templo1, [templo3, #2]
00923                 cache_addw( LSL_IMM(templo1, templo1, 16) );      // lsl templo1, templo1, #16
00924                 cache_addw( ORR(templo2, templo1) );      // orr templo2, templo1
00925                 cache_addw( LDRB_IMM(templo1, templo3, 3) );      // ldrb templo1, [templo3, #3]
00926                 cache_addw( LSL_IMM(templo1, templo1, 24) );      // lsl templo1, templo1, #24
00927                 cache_addw( ORR(templo2, templo1) );      // orr templo2, templo1
00928         }
00929 #endif
00930 
00931         // increase jmp address to keep thumb state
00932         cache_addw( ADD_IMM3(templo2, templo2, 1) );      // add templo2, templo2, #1
00933 
00934         cache_addw( BX(templo2) );      // bx templo2
00935 }
00936 
00937 // short conditional jump (+-127 bytes) if register is zero
00938 // the destination is set by gen_fill_branch() later
00939 static Bit32u gen_create_branch_on_zero(HostReg reg,bool dword) {
00940         cache_checkinstr(4);
00941         if (dword) {
00942                 cache_addw( CMP_IMM(reg, 0) );      // cmp reg, #0
00943         } else {
00944                 cache_addw( LSL_IMM(templo1, reg, 16) );      // lsl templo1, reg, #16
00945         }
00946         cache_addw( BEQ_FWD(0) );      // beq j
00947         return ((Bit32u)cache.pos-2);
00948 }
00949 
00950 // short conditional jump (+-127 bytes) if register is nonzero
00951 // the destination is set by gen_fill_branch() later
00952 static Bit32u gen_create_branch_on_nonzero(HostReg reg,bool dword) {
00953         cache_checkinstr(4);
00954         if (dword) {
00955                 cache_addw( CMP_IMM(reg, 0) );      // cmp reg, #0
00956         } else {
00957                 cache_addw( LSL_IMM(templo1, reg, 16) );      // lsl templo1, reg, #16
00958         }
00959         cache_addw( BNE_FWD(0) );      // bne j
00960         return ((Bit32u)cache.pos-2);
00961 }
00962 
00963 // calculate relative offset and fill it into the location pointed to by data
00964 static void INLINE gen_fill_branch(DRC_PTR_SIZE_IM data) {
00965 #if C_DEBUG
00966         Bits len=(Bit32u)cache.pos-(data+4);
00967         if (len<0) len=-len;
00968         if (len>252) LOG_MSG("Big jump %d",len);
00969 #endif
00970         *(Bit8u*)data=(Bit8u)( ((Bit32u)cache.pos-(data+4)) >> 1 );
00971 }
00972 
00973 
00974 // conditional jump if register is nonzero
00975 // for isdword==true the 32bit of the register are tested
00976 // for isdword==false the lowest 8bit of the register are tested
00977 static Bit32u gen_create_branch_long_nonzero(HostReg reg,bool isdword) {
00978         Bit8u *datapos;
00979 
00980         cache_checkinstr(8);
00981         datapos = cache_reservedata();
00982 
00983         if (isdword) {
00984                 cache_addw( CMP_IMM(reg, 0) );      // cmp reg, #0
00985         } else {
00986                 cache_addw( LSL_IMM(templo2, reg, 24) );      // lsl templo2, reg, #24
00987         }
00988         cache_addw( BEQ_FWD(2) );      // beq nobranch (pc+2)
00989         if (((Bit32u)cache.pos & 0x03) == 0) {
00990                 cache_addw( LDR_PC_IMM(templo1, datapos - (cache.pos + 4)) );      // ldr templo1, [pc, datapos]
00991         } else {
00992                 cache_addw( LDR_PC_IMM(templo1, datapos - (cache.pos + 2)) );      // ldr templo1, [pc, datapos]
00993         }
00994         cache_addw( BX(templo1) );      // bx templo1
00995         // nobranch:
00996         return ((Bit32u)datapos);
00997 }
00998 
00999 // compare 32bit-register against zero and jump if value less/equal than zero
01000 static Bit32u gen_create_branch_long_leqzero(HostReg reg) {
01001         Bit8u *datapos;
01002 
01003         cache_checkinstr(8);
01004         datapos = cache_reservedata();
01005 
01006         cache_addw( CMP_IMM(reg, 0) );      // cmp reg, #0
01007         cache_addw( BGT_FWD(2) );      // bgt nobranch (pc+2)
01008         if (((Bit32u)cache.pos & 0x03) == 0) {
01009                 cache_addw( LDR_PC_IMM(templo1, datapos - (cache.pos + 4)) );      // ldr templo1, [pc, datapos]
01010         } else {
01011                 cache_addw( LDR_PC_IMM(templo1, datapos - (cache.pos + 2)) );      // ldr templo1, [pc, datapos]
01012         }
01013         cache_addw( BX(templo1) );      // bx templo1
01014         // nobranch:
01015         return ((Bit32u)datapos);
01016 }
01017 
01018 // calculate long relative offset and fill it into the location pointed to by data
01019 static void INLINE gen_fill_branch_long(Bit32u data) {
01020         // this is an absolute branch
01021         *(Bit32u*)data=((Bit32u)cache.pos) + 1; // add 1 to keep processor in thumb state
01022 }
01023 
01024 static void gen_run_code(void) {
01025         Bit8u *pos1, *pos2, *pos3;
01026 
01027 #if (__ARM_EABI__)
01028         // 8-byte stack alignment
01029         cache_addd(0xe92d4ff0);                 // stmfd sp!, {v1-v8,lr}
01030 #else
01031         cache_addd(0xe92d4df0);                 // stmfd sp!, {v1-v5,v7,v8,lr}
01032 #endif
01033 
01034         cache_addd( ARM_ADD_IMM(HOST_r0, HOST_r0, 1, 0) );      // add r0, r0, #1
01035 
01036         pos1 = cache.pos;
01037         cache_addd( 0 );
01038         pos2 = cache.pos;
01039         cache_addd( 0 );
01040         pos3 = cache.pos;
01041         cache_addd( 0 );
01042 
01043         cache_addd( ARM_ADD_IMM(HOST_lr, HOST_pc, 4, 0) );                      // add lr, pc, #4
01044         cache_addd( ARM_STR_IMM_M_W(HOST_lr, HOST_sp, 4) );      // str lr, [sp, #-4]!
01045         cache_addd( ARM_BX(HOST_r0) );                  // bx r0
01046 
01047 #if (__ARM_EABI__)
01048         cache_addd(0xe8bd4ff0);                 // ldmfd sp!, {v1-v8,lr}
01049 #else
01050         cache_addd(0xe8bd4df0);                 // ldmfd sp!, {v1-v5,v7,v8,lr}
01051 #endif
01052         cache_addd( ARM_BX(HOST_lr) );                  // bx lr
01053 
01054         // align cache.pos to 32 bytes
01055         if ((((Bitu)cache.pos) & 0x1f) != 0) {
01056                 cache.pos = cache.pos + (32 - (((Bitu)cache.pos) & 0x1f));
01057         }
01058 
01059         *(Bit32u*)pos1 = ARM_LDR_IMM(FC_SEGS_ADDR, HOST_pc, cache.pos - (pos1 + 8));      // ldr FC_SEGS_ADDR, [pc, #(&Segs)]
01060         cache_addd((Bit32u)&Segs);      // address of "Segs"
01061 
01062         *(Bit32u*)pos2 = ARM_LDR_IMM(FC_REGS_ADDR, HOST_pc, cache.pos - (pos2 + 8));      // ldr FC_REGS_ADDR, [pc, #(&cpu_regs)]
01063         cache_addd((Bit32u)&cpu_regs);  // address of "cpu_regs"
01064 
01065         *(Bit32u*)pos3 = ARM_LDR_IMM(readdata_addr, HOST_pc, cache.pos - (pos3 + 8));      // ldr readdata_addr, [pc, #(&core_dynrec.readdata)]
01066         cache_addd((Bit32u)&core_dynrec.readdata);  // address of "core_dynrec.readdata"
01067 
01068         // align cache.pos to 32 bytes
01069         if ((((Bitu)cache.pos) & 0x1f) != 0) {
01070                 cache.pos = cache.pos + (32 - (((Bitu)cache.pos) & 0x1f));
01071         }
01072 }
01073 
01074 // return from a function
01075 static void gen_return_function(void) {
01076         cache_checkinstr(4);
01077         cache_addw(0xbc08);      // pop {r3}
01078         cache_addw( BX(HOST_r3) );      // bx r3
01079 }
01080 
01081 
01082 // short unconditional jump (over data pool)
01083 // must emit at most CACHE_DATA_JUMP bytes
01084 static void INLINE gen_create_branch_short(void * func) {
01085         cache_addw( B_FWD((Bit32u)func - ((Bit32u)cache.pos + 4)) );      // b func
01086 }
01087 
01088 
01089 #ifdef DRC_FLAGS_INVALIDATION
01090 
01091 // called when a call to a function can be replaced by a
01092 // call to a simpler function
01093 static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) {
01094         if ((*(Bit16u*)pos & 0xf000) == 0xe000) {
01095                 if ((*(Bit16u*)pos & 0x0fff) >= ((CACHE_DATA_ALIGN / 2) - 1) &&
01096                         (*(Bit16u*)pos & 0x0fff) < 0x0800)
01097                 {
01098                         pos = (Bit8u *) ( ( ( (Bit32u)(*(Bit16u*)pos & 0x0fff) ) << 1 ) + ((Bit32u)pos + 4) );
01099                 }
01100         }
01101 
01102 #ifdef DRC_FLAGS_INVALIDATION_DCODE
01103         if (((Bit32u)pos & 0x03) == 0)
01104         {
01105                 // try to avoid function calls but rather directly fill in code
01106                 switch (flags_type) {
01107                         case t_ADDb:
01108                         case t_ADDw:
01109                         case t_ADDd:
01110                                 *(Bit16u*)pos=ADD_REG(HOST_a1, HOST_a1, HOST_a2);       // add a1, a1, a2
01111                                 *(Bit16u*)(pos+2)=B_FWD(10);                                            // b after_call (pc+10)
01112                                 break;
01113                         case t_ORb:
01114                         case t_ORw:
01115                         case t_ORd:
01116                                 *(Bit16u*)pos=ORR(HOST_a1, HOST_a2);                            // orr a1, a2
01117                                 *(Bit16u*)(pos+2)=B_FWD(10);                                            // b after_call (pc+10)
01118                                 break;
01119                         case t_ANDb:
01120                         case t_ANDw:
01121                         case t_ANDd:
01122                                 *(Bit16u*)pos=AND(HOST_a1, HOST_a2);                            // and a1, a2
01123                                 *(Bit16u*)(pos+2)=B_FWD(10);                                            // b after_call (pc+10)
01124                                 break;
01125                         case t_SUBb:
01126                         case t_SUBw:
01127                         case t_SUBd:
01128                                 *(Bit16u*)pos=SUB_REG(HOST_a1, HOST_a1, HOST_a2);       // sub a1, a1, a2
01129                                 *(Bit16u*)(pos+2)=B_FWD(10);                                            // b after_call (pc+10)
01130                                 break;
01131                         case t_XORb:
01132                         case t_XORw:
01133                         case t_XORd:
01134                                 *(Bit16u*)pos=EOR(HOST_a1, HOST_a2);                            // eor a1, a2
01135                                 *(Bit16u*)(pos+2)=B_FWD(10);                                            // b after_call (pc+10)
01136                                 break;
01137                         case t_CMPb:
01138                         case t_CMPw:
01139                         case t_CMPd:
01140                         case t_TESTb:
01141                         case t_TESTw:
01142                         case t_TESTd:
01143                                 *(Bit16u*)pos=B_FWD(12);                                                        // b after_call (pc+12)
01144                                 break;
01145                         case t_INCb:
01146                         case t_INCw:
01147                         case t_INCd:
01148                                 *(Bit16u*)pos=ADD_IMM3(HOST_a1, HOST_a1, 1);            // add a1, a1, #1
01149                                 *(Bit16u*)(pos+2)=B_FWD(10);                                            // b after_call (pc+10)
01150                                 break;
01151                         case t_DECb:
01152                         case t_DECw:
01153                         case t_DECd:
01154                                 *(Bit16u*)pos=SUB_IMM3(HOST_a1, HOST_a1, 1);            // sub a1, a1, #1
01155                                 *(Bit16u*)(pos+2)=B_FWD(10);                                            // b after_call (pc+10)
01156                                 break;
01157                         case t_SHLb:
01158                         case t_SHLw:
01159                         case t_SHLd:
01160                                 *(Bit16u*)pos=LSL_REG(HOST_a1, HOST_a2);                        // lsl a1, a2
01161                                 *(Bit16u*)(pos+2)=B_FWD(10);                                            // b after_call (pc+10)
01162                                 break;
01163                         case t_SHRb:
01164                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24);            // lsl a1, a1, #24
01165                                 *(Bit16u*)(pos+2)=LSR_IMM(HOST_a1, HOST_a1, 24);        // lsr a1, a1, #24
01166                                 *(Bit16u*)(pos+4)=LSR_REG(HOST_a1, HOST_a2);            // lsr a1, a2
01167                                 *(Bit16u*)(pos+6)=B_FWD(6);                                                     // b after_call (pc+6)
01168                                 break;
01169                         case t_SHRw:
01170                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16);            // lsl a1, a1, #16
01171                                 *(Bit16u*)(pos+2)=LSR_IMM(HOST_a1, HOST_a1, 16);        // lsr a1, a1, #16
01172                                 *(Bit16u*)(pos+4)=LSR_REG(HOST_a1, HOST_a2);            // lsr a1, a2
01173                                 *(Bit16u*)(pos+6)=B_FWD(6);                                                     // b after_call (pc+6)
01174                                 break;
01175                         case t_SHRd:
01176                                 *(Bit16u*)pos=LSR_REG(HOST_a1, HOST_a2);                        // lsr a1, a2
01177                                 *(Bit16u*)(pos+2)=B_FWD(10);                                            // b after_call (pc+10)
01178                                 break;
01179                         case t_SARb:
01180                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24);            // lsl a1, a1, #24
01181                                 *(Bit16u*)(pos+2)=ASR_IMM(HOST_a1, HOST_a1, 24);        // asr a1, a1, #24
01182                                 *(Bit16u*)(pos+4)=ASR_REG(HOST_a1, HOST_a2);            // asr a1, a2
01183                                 *(Bit16u*)(pos+6)=B_FWD(6);                                                     // b after_call (pc+6)
01184                                 break;
01185                         case t_SARw:
01186                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16);            // lsl a1, a1, #16
01187                                 *(Bit16u*)(pos+2)=ASR_IMM(HOST_a1, HOST_a1, 16);        // asr a1, a1, #16
01188                                 *(Bit16u*)(pos+4)=ASR_REG(HOST_a1, HOST_a2);            // asr a1, a2
01189                                 *(Bit16u*)(pos+6)=B_FWD(6);                                                     // b after_call (pc+6)
01190                                 break;
01191                         case t_SARd:
01192                                 *(Bit16u*)pos=ASR_REG(HOST_a1, HOST_a2);                        // asr a1, a2
01193                                 *(Bit16u*)(pos+2)=B_FWD(10);                                            // b after_call (pc+10)
01194                                 break;
01195                         case t_RORb:
01196                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24);            // lsl a1, a1, #24
01197                                 *(Bit16u*)(pos+2)=LSR_IMM(templo1, HOST_a1, 8);         // lsr templo1, a1, #8
01198                                 *(Bit16u*)(pos+4)=ORR(HOST_a1, templo1);                        // orr a1, templo1
01199                                 *(Bit16u*)(pos+6)=NOP;                                                          // nop
01200                                 *(Bit16u*)(pos+8)=LSR_IMM(templo1, HOST_a1, 16);        // lsr templo1, a1, #16
01201                                 *(Bit16u*)(pos+10)=NOP;                                                         // nop
01202                                 *(Bit16u*)(pos+12)=ORR(HOST_a1, templo1);                       // orr a1, templo1
01203                                 *(Bit16u*)(pos+14)=ROR_REG(HOST_a1, HOST_a2);           // ror a1, a2
01204                                 break;
01205                         case t_RORw:
01206                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16);            // lsl a1, a1, #16
01207                                 *(Bit16u*)(pos+2)=LSR_IMM(templo1, HOST_a1, 16);        // lsr templo1, a1, #16
01208                                 *(Bit16u*)(pos+4)=ORR(HOST_a1, templo1);                        // orr a1, templo1
01209                                 *(Bit16u*)(pos+6)=ROR_REG(HOST_a1, HOST_a2);            // ror a1, a2
01210                                 *(Bit16u*)(pos+8)=B_FWD(4);                                                     // b after_call (pc+4)
01211                                 break;
01212                         case t_RORd:
01213                                 *(Bit16u*)pos=ROR_REG(HOST_a1, HOST_a2);                        // ror a1, a2
01214                                 *(Bit16u*)(pos+2)=B_FWD(10);                                            // b after_call (pc+10)
01215                                 break;
01216                         case t_ROLb:
01217                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24);            // lsl a1, a1, #24
01218                                 *(Bit16u*)(pos+2)=NEG(HOST_a2, HOST_a2);                        // neg a2, a2
01219                                 *(Bit16u*)(pos+4)=LSR_IMM(templo1, HOST_a1, 8);         // lsr templo1, a1, #8
01220                                 *(Bit16u*)(pos+6)=ADD_IMM8(HOST_a2, 32);                        // add a2, #32
01221                                 *(Bit16u*)(pos+8)=ORR(HOST_a1, templo1);                        // orr a1, templo1
01222                                 *(Bit16u*)(pos+10)=LSR_IMM(templo1, HOST_a1, 16);       // lsr templo1, a1, #16
01223                                 *(Bit16u*)(pos+12)=ORR(HOST_a1, templo1);                       // orr a1, templo1
01224                                 *(Bit16u*)(pos+14)=ROR_REG(HOST_a1, HOST_a2);           // ror a1, a2
01225                                 break;
01226                         case t_ROLw:
01227                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16);            // lsl a1, a1, #16
01228                                 *(Bit16u*)(pos+2)=NEG(HOST_a2, HOST_a2);                        // neg a2, a2
01229                                 *(Bit16u*)(pos+4)=LSR_IMM(templo1, HOST_a1, 16);        // lsr templo1, a1, #16
01230                                 *(Bit16u*)(pos+6)=ADD_IMM8(HOST_a2, 32);                        // add a2, #32
01231                                 *(Bit16u*)(pos+8)=ORR(HOST_a1, templo1);                        // orr a1, templo1
01232                                 *(Bit16u*)(pos+10)=NOP;                                                         // nop
01233                                 *(Bit16u*)(pos+12)=ROR_REG(HOST_a1, HOST_a2);           // ror a1, a2
01234                                 *(Bit16u*)(pos+14)=NOP;                                                         // nop
01235                                 break;
01236                         case t_ROLd:
01237                                 *(Bit16u*)pos=NEG(HOST_a2, HOST_a2);                            // neg a2, a2
01238                                 *(Bit16u*)(pos+2)=ADD_IMM8(HOST_a2, 32);                        // add a2, #32
01239                                 *(Bit16u*)(pos+4)=ROR_REG(HOST_a1, HOST_a2);            // ror a1, a2
01240                                 *(Bit16u*)(pos+6)=B_FWD(6);                                                     // b after_call (pc+6)
01241                                 break;
01242                         case t_NEGb:
01243                         case t_NEGw:
01244                         case t_NEGd:
01245                                 *(Bit16u*)pos=NEG(HOST_a1, HOST_a1);                            // neg a1, a1
01246                                 *(Bit16u*)(pos+2)=B_FWD(10);                                            // b after_call (pc+10)
01247                                 break;
01248                         default:
01249                                 *(Bit32u*)( ( ((Bit32u) (*pos)) << 2 ) + ((Bit32u)pos + 4) ) = (Bit32u)fct_ptr;         // simple_func
01250                                 break;
01251                 }
01252         }
01253         else
01254         {
01255                 // try to avoid function calls but rather directly fill in code
01256                 switch (flags_type) {
01257                         case t_ADDb:
01258                         case t_ADDw:
01259                         case t_ADDd:
01260                                 *(Bit16u*)pos=ADD_REG(HOST_a1, HOST_a1, HOST_a2);       // add a1, a1, a2
01261                                 *(Bit16u*)(pos+2)=B_FWD(12);                                            // b after_call (pc+12)
01262                                 break;
01263                         case t_ORb:
01264                         case t_ORw:
01265                         case t_ORd:
01266                                 *(Bit16u*)pos=ORR(HOST_a1, HOST_a2);                            // orr a1, a2
01267                                 *(Bit16u*)(pos+2)=B_FWD(12);                                            // b after_call (pc+12)
01268                                 break;
01269                         case t_ANDb:
01270                         case t_ANDw:
01271                         case t_ANDd:
01272                                 *(Bit16u*)pos=AND(HOST_a1, HOST_a2);                            // and a1, a2
01273                                 *(Bit16u*)(pos+2)=B_FWD(12);                                            // b after_call (pc+12)
01274                                 break;
01275                         case t_SUBb:
01276                         case t_SUBw:
01277                         case t_SUBd:
01278                                 *(Bit16u*)pos=SUB_REG(HOST_a1, HOST_a1, HOST_a2);       // sub a1, a1, a2
01279                                 *(Bit16u*)(pos+2)=B_FWD(12);                                            // b after_call (pc+12)
01280                                 break;
01281                         case t_XORb:
01282                         case t_XORw:
01283                         case t_XORd:
01284                                 *(Bit16u*)pos=EOR(HOST_a1, HOST_a2);                            // eor a1, a2
01285                                 *(Bit16u*)(pos+2)=B_FWD(12);                                            // b after_call (pc+12)
01286                                 break;
01287                         case t_CMPb:
01288                         case t_CMPw:
01289                         case t_CMPd:
01290                         case t_TESTb:
01291                         case t_TESTw:
01292                         case t_TESTd:
01293                                 *(Bit16u*)pos=B_FWD(14);                                                        // b after_call (pc+14)
01294                                 break;
01295                         case t_INCb:
01296                         case t_INCw:
01297                         case t_INCd:
01298                                 *(Bit16u*)pos=ADD_IMM3(HOST_a1, HOST_a1, 1);            // add a1, a1, #1
01299                                 *(Bit16u*)(pos+2)=B_FWD(12);                                            // b after_call (pc+12)
01300                                 break;
01301                         case t_DECb:
01302                         case t_DECw:
01303                         case t_DECd:
01304                                 *(Bit16u*)pos=SUB_IMM3(HOST_a1, HOST_a1, 1);            // sub a1, a1, #1
01305                                 *(Bit16u*)(pos+2)=B_FWD(12);                                            // b after_call (pc+12)
01306                                 break;
01307                         case t_SHLb:
01308                         case t_SHLw:
01309                         case t_SHLd:
01310                                 *(Bit16u*)pos=LSL_REG(HOST_a1, HOST_a2);                        // lsl a1, a2
01311                                 *(Bit16u*)(pos+2)=B_FWD(12);                                            // b after_call (pc+12)
01312                                 break;
01313                         case t_SHRb:
01314                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24);            // lsl a1, a1, #24
01315                                 *(Bit16u*)(pos+2)=LSR_IMM(HOST_a1, HOST_a1, 24);        // lsr a1, a1, #24
01316                                 *(Bit16u*)(pos+4)=LSR_REG(HOST_a1, HOST_a2);            // lsr a1, a2
01317                                 *(Bit16u*)(pos+6)=B_FWD(8);                                                     // b after_call (pc+8)
01318                                 break;
01319                         case t_SHRw:
01320                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16);            // lsl a1, a1, #16
01321                                 *(Bit16u*)(pos+2)=LSR_IMM(HOST_a1, HOST_a1, 16);        // lsr a1, a1, #16
01322                                 *(Bit16u*)(pos+4)=LSR_REG(HOST_a1, HOST_a2);            // lsr a1, a2
01323                                 *(Bit16u*)(pos+6)=B_FWD(8);                                                     // b after_call (pc+8)
01324                                 break;
01325                         case t_SHRd:
01326                                 *(Bit16u*)pos=LSR_REG(HOST_a1, HOST_a2);                        // lsr a1, a2
01327                                 *(Bit16u*)(pos+2)=B_FWD(12);                                            // b after_call (pc+12)
01328                                 break;
01329                         case t_SARb:
01330                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24);            // lsl a1, a1, #24
01331                                 *(Bit16u*)(pos+2)=ASR_IMM(HOST_a1, HOST_a1, 24);        // asr a1, a1, #24
01332                                 *(Bit16u*)(pos+4)=ASR_REG(HOST_a1, HOST_a2);            // asr a1, a2
01333                                 *(Bit16u*)(pos+6)=B_FWD(8);                                                     // b after_call (pc+8)
01334                                 break;
01335                         case t_SARw:
01336                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16);            // lsl a1, a1, #16
01337                                 *(Bit16u*)(pos+2)=ASR_IMM(HOST_a1, HOST_a1, 16);        // asr a1, a1, #16
01338                                 *(Bit16u*)(pos+4)=ASR_REG(HOST_a1, HOST_a2);            // asr a1, a2
01339                                 *(Bit16u*)(pos+6)=B_FWD(8);                                                     // b after_call (pc+8)
01340                                 break;
01341                         case t_SARd:
01342                                 *(Bit16u*)pos=ASR_REG(HOST_a1, HOST_a2);                        // asr a1, a2
01343                                 *(Bit16u*)(pos+2)=B_FWD(12);                                            // b after_call (pc+12)
01344                                 break;
01345                         case t_RORb:
01346                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24);            // lsl a1, a1, #24
01347                                 *(Bit16u*)(pos+2)=LSR_IMM(templo1, HOST_a1, 8);         // lsr templo1, a1, #8
01348                                 *(Bit16u*)(pos+4)=ORR(HOST_a1, templo1);                        // orr a1, templo1
01349                                 *(Bit16u*)(pos+6)=NOP;                                                          // nop
01350                                 *(Bit16u*)(pos+8)=LSR_IMM(templo1, HOST_a1, 16);        // lsr templo1, a1, #16
01351                                 *(Bit16u*)(pos+10)=NOP;                                                         // nop
01352                                 *(Bit16u*)(pos+12)=ORR(HOST_a1, templo1);                       // orr a1, templo1
01353                                 *(Bit16u*)(pos+14)=NOP;                                                         // nop
01354                                 *(Bit16u*)(pos+16)=ROR_REG(HOST_a1, HOST_a2);           // ror a1, a2
01355                                 break;
01356                         case t_RORw:
01357                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16);            // lsl a1, a1, #16
01358                                 *(Bit16u*)(pos+2)=LSR_IMM(templo1, HOST_a1, 16);        // lsr templo1, a1, #16
01359                                 *(Bit16u*)(pos+4)=ORR(HOST_a1, templo1);                        // orr a1, templo1
01360                                 *(Bit16u*)(pos+6)=ROR_REG(HOST_a1, HOST_a2);            // ror a1, a2
01361                                 *(Bit16u*)(pos+8)=B_FWD(6);                                                     // b after_call (pc+6)
01362                                 break;
01363                         case t_RORd:
01364                                 *(Bit16u*)pos=ROR_REG(HOST_a1, HOST_a2);                        // ror a1, a2
01365                                 *(Bit16u*)(pos+2)=B_FWD(12);                                            // b after_call (pc+12)
01366                                 break;
01367                         case t_ROLb:
01368                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24);            // lsl a1, a1, #24
01369                                 *(Bit16u*)(pos+2)=NEG(HOST_a2, HOST_a2);                        // neg a2, a2
01370                                 *(Bit16u*)(pos+4)=LSR_IMM(templo1, HOST_a1, 8);         // lsr templo1, a1, #8
01371                                 *(Bit16u*)(pos+6)=ADD_IMM8(HOST_a2, 32);                        // add a2, #32
01372                                 *(Bit16u*)(pos+8)=ORR(HOST_a1, templo1);                        // orr a1, templo1
01373                                 *(Bit16u*)(pos+10)=LSR_IMM(templo1, HOST_a1, 16);       // lsr templo1, a1, #16
01374                                 *(Bit16u*)(pos+12)=ORR(HOST_a1, templo1);                       // orr a1, templo1
01375                                 *(Bit16u*)(pos+14)=NOP;                                                         // nop
01376                                 *(Bit16u*)(pos+16)=ROR_REG(HOST_a1, HOST_a2);           // ror a1, a2
01377                                 break;
01378                         case t_ROLw:
01379                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16);            // lsl a1, a1, #16
01380                                 *(Bit16u*)(pos+2)=NEG(HOST_a2, HOST_a2);                        // neg a2, a2
01381                                 *(Bit16u*)(pos+4)=LSR_IMM(templo1, HOST_a1, 16);        // lsr templo1, a1, #16
01382                                 *(Bit16u*)(pos+6)=ADD_IMM8(HOST_a2, 32);                        // add a2, #32
01383                                 *(Bit16u*)(pos+8)=ORR(HOST_a1, templo1);                        // orr a1, templo1
01384                                 *(Bit16u*)(pos+10)=NOP;                                                         // nop
01385                                 *(Bit16u*)(pos+12)=ROR_REG(HOST_a1, HOST_a2);           // ror a1, a2
01386                                 *(Bit16u*)(pos+14)=NOP;                                                         // nop
01387                                 *(Bit16u*)(pos+16)=NOP;                                                         // nop
01388                                 break;
01389                         case t_ROLd:
01390                                 *(Bit16u*)pos=NEG(HOST_a2, HOST_a2);                            // neg a2, a2
01391                                 *(Bit16u*)(pos+2)=ADD_IMM8(HOST_a2, 32);                        // add a2, #32
01392                                 *(Bit16u*)(pos+4)=ROR_REG(HOST_a1, HOST_a2);            // ror a1, a2
01393                                 *(Bit16u*)(pos+6)=B_FWD(8);                                                     // b after_call (pc+8)
01394                                 break;
01395                         case t_NEGb:
01396                         case t_NEGw:
01397                         case t_NEGd:
01398                                 *(Bit16u*)pos=NEG(HOST_a1, HOST_a1);                            // neg a1, a1
01399                                 *(Bit16u*)(pos+2)=B_FWD(12);                                            // b after_call (pc+12)
01400                                 break;
01401                         default:
01402                                 *(Bit32u*)( ( ((Bit32u) (*pos)) << 2 ) + ((Bit32u)pos + 2) ) = (Bit32u)fct_ptr;         // simple_func
01403                                 break;
01404                 }
01405 
01406         }
01407 #else
01408         if (((Bit32u)pos & 0x03) == 0)
01409         {
01410                 *(Bit32u*)( ( ((Bit32u) (*pos)) << 2 ) + ((Bit32u)pos + 4) ) = (Bit32u)fct_ptr;         // simple_func
01411         }
01412         else
01413         {
01414                 *(Bit32u*)( ( ((Bit32u) (*pos)) << 2 ) + ((Bit32u)pos + 2) ) = (Bit32u)fct_ptr;         // simple_func
01415         }
01416 #endif
01417 }
01418 #endif
01419 
01420 #ifdef DRC_USE_SEGS_ADDR
01421 
01422 // mov 16bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 2 must be zero)
01423 // 16bit moves may destroy the upper 16bit of the destination register
01424 static void gen_mov_seg16_to_reg(HostReg dest_reg,Bitu index) {
01425         cache_checkinstr(4);
01426         cache_addw( MOV_LO_HI(templo1, FC_SEGS_ADDR) );      // mov templo1, FC_SEGS_ADDR
01427         cache_addw( LDRH_IMM(dest_reg, templo1, index) );      // ldrh dest_reg, [templo1, #index]
01428 }
01429 
01430 // mov 32bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 4 must be zero)
01431 static void gen_mov_seg32_to_reg(HostReg dest_reg,Bitu index) {
01432         cache_checkinstr(4);
01433         cache_addw( MOV_LO_HI(templo1, FC_SEGS_ADDR) );      // mov templo1, FC_SEGS_ADDR
01434         cache_addw( LDR_IMM(dest_reg, templo1, index) );      // ldr dest_reg, [templo1, #index]
01435 }
01436 
01437 // add a 32bit value from Segs[index] to a full register using FC_SEGS_ADDR (index modulo 4 must be zero)
01438 static void gen_add_seg32_to_reg(HostReg reg,Bitu index) {
01439         cache_checkinstr(6);
01440         cache_addw( MOV_LO_HI(templo1, FC_SEGS_ADDR) );      // mov templo1, FC_SEGS_ADDR
01441         cache_addw( LDR_IMM(templo2, templo1, index) );      // ldr templo2, [templo1, #index]
01442         cache_addw( ADD_REG(reg, reg, templo2) );      // add reg, reg, templo2
01443 }
01444 
01445 #endif
01446 
01447 #ifdef DRC_USE_REGS_ADDR
01448 
01449 // mov 16bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 2 must be zero)
01450 // 16bit moves may destroy the upper 16bit of the destination register
01451 static void gen_mov_regval16_to_reg(HostReg dest_reg,Bitu index) {
01452         cache_checkinstr(4);
01453         cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) );      // mov templo2, FC_REGS_ADDR
01454         cache_addw( LDRH_IMM(dest_reg, templo2, index) );      // ldrh dest_reg, [templo2, #index]
01455 }
01456 
01457 // mov 32bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 4 must be zero)
01458 static void gen_mov_regval32_to_reg(HostReg dest_reg,Bitu index) {
01459         cache_checkinstr(4);
01460         cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) );      // mov templo2, FC_REGS_ADDR
01461         cache_addw( LDR_IMM(dest_reg, templo2, index) );      // ldr dest_reg, [templo2, #index]
01462 }
01463 
01464 // move a 32bit (dword==true) or 16bit (dword==false) value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero)
01465 // 16bit moves may destroy the upper 16bit of the destination register
01466 static void gen_mov_regword_to_reg(HostReg dest_reg,Bitu index,bool dword) {
01467         cache_checkinstr(4);
01468         cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) );      // mov templo2, FC_REGS_ADDR
01469         if (dword) {
01470                 cache_addw( LDR_IMM(dest_reg, templo2, index) );      // ldr dest_reg, [templo2, #index]
01471         } else {
01472                 cache_addw( LDRH_IMM(dest_reg, templo2, index) );      // ldrh dest_reg, [templo2, #index]
01473         }
01474 }
01475 
01476 // move an 8bit value from cpu_regs[index]  into dest_reg using FC_REGS_ADDR
01477 // the upper 24bit of the destination register can be destroyed
01478 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
01479 // registers might not be directly byte-accessible on some architectures
01480 static void gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index) {
01481         cache_checkinstr(4);
01482         cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) );      // mov templo2, FC_REGS_ADDR
01483         cache_addw( LDRB_IMM(dest_reg, templo2, index) );      // ldrb dest_reg, [templo2, #index]
01484 }
01485 
01486 // move an 8bit value from cpu_regs[index]  into dest_reg using FC_REGS_ADDR
01487 // the upper 24bit of the destination register can be destroyed
01488 // this function can use FC_OP1/FC_OP2 as dest_reg which are
01489 // not directly byte-accessible on some architectures
01490 static void INLINE gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg,Bitu index) {
01491         cache_checkinstr(4);
01492         cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) );      // mov templo2, FC_REGS_ADDR
01493         cache_addw( LDRB_IMM(dest_reg, templo2, index) );      // ldrb dest_reg, [templo2, #index]
01494 }
01495 
01496 
01497 // add a 32bit value from cpu_regs[index] to a full register using FC_REGS_ADDR (index modulo 4 must be zero)
01498 static void gen_add_regval32_to_reg(HostReg reg,Bitu index) {
01499         cache_checkinstr(6);
01500         cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) );      // mov templo2, FC_REGS_ADDR
01501         cache_addw( LDR_IMM(templo1, templo2, index) );      // ldr templo1, [templo2, #index]
01502         cache_addw( ADD_REG(reg, reg, templo1) );      // add reg, reg, templo1
01503 }
01504 
01505 
01506 // move 16bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 2 must be zero)
01507 static void gen_mov_regval16_from_reg(HostReg src_reg,Bitu index) {
01508         cache_checkinstr(4);
01509         cache_addw( MOV_LO_HI(templo1, FC_REGS_ADDR) );      // mov templo1, FC_REGS_ADDR
01510         cache_addw( STRH_IMM(src_reg, templo1, index) );      // strh src_reg, [templo1, #index]
01511 }
01512 
01513 // move 32bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 4 must be zero)
01514 static void gen_mov_regval32_from_reg(HostReg src_reg,Bitu index) {
01515         cache_checkinstr(4);
01516         cache_addw( MOV_LO_HI(templo1, FC_REGS_ADDR) );      // mov templo1, FC_REGS_ADDR
01517         cache_addw( STR_IMM(src_reg, templo1, index) );      // str src_reg, [templo1, #index]
01518 }
01519 
01520 // move 32bit (dword==true) or 16bit (dword==false) of a register into cpu_regs[index] using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero)
01521 static void gen_mov_regword_from_reg(HostReg src_reg,Bitu index,bool dword) {
01522         cache_checkinstr(4);
01523         cache_addw( MOV_LO_HI(templo1, FC_REGS_ADDR) );      // mov templo1, FC_REGS_ADDR
01524         if (dword) {
01525                 cache_addw( STR_IMM(src_reg, templo1, index) );      // str src_reg, [templo1, #index]
01526         } else {
01527                 cache_addw( STRH_IMM(src_reg, templo1, index) );      // strh src_reg, [templo1, #index]
01528         }
01529 }
01530 
01531 // move the lowest 8bit of a register into cpu_regs[index] using FC_REGS_ADDR
01532 static void gen_mov_regbyte_from_reg_low(HostReg src_reg,Bitu index) {
01533         cache_checkinstr(4);
01534         cache_addw( MOV_LO_HI(templo1, FC_REGS_ADDR) );      // mov templo1, FC_REGS_ADDR
01535         cache_addw( STRB_IMM(src_reg, templo1, index) );      // strb src_reg, [templo1, #index]
01536 }
01537 
01538 #endif