DOSBox-X
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines
src/cpu/core_dynrec/risc_armv4le-thumb-iw.h
00001 /*
00002  *  Copyright (C) 2002-2020  The DOSBox Team
00003  *
00004  *  This program is free software; you can redistribute it and/or modify
00005  *  it under the terms of the GNU General Public License as published by
00006  *  the Free Software Foundation; either version 2 of the License, or
00007  *  (at your option) any later version.
00008  *
00009  *  This program is distributed in the hope that it will be useful,
00010  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *  GNU General Public License for more details.
00013  *
00014  *  You should have received a copy of the GNU General Public License along
00015  *  with this program; if not, write to the Free Software Foundation, Inc.,
00016  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 
00019 
00020 
00021 /* ARMv4 (little endian) backend by M-HT (thumb version with data pool, requires -mthumb-interwork switch when compiling dosbox) */
00022 
00023 
00024 // temporary "lo" registers
00025 #define templo1 HOST_v3
00026 #define templo2 HOST_v4
00027 #define templo3 HOST_v2
00028 
00029 // register that holds function return values
00030 #define FC_RETOP HOST_a1
00031 
00032 // register used for address calculations,
00033 #define FC_ADDR HOST_v1                 // has to be saved across calls, see DRC_PROTECT_ADDR_REG
00034 
00035 // register that holds the first parameter
00036 #define FC_OP1 HOST_a1
00037 
00038 // register that holds the second parameter
00039 #define FC_OP2 HOST_a2
00040 
00041 // special register that holds the third parameter for _R3 calls (byte accessible)
00042 #define FC_OP3 HOST_a4
00043 
00044 // register that holds byte-accessible temporary values
00045 #define FC_TMP_BA1 HOST_a1
00046 
00047 // register that holds byte-accessible temporary values
00048 #define FC_TMP_BA2 HOST_a2
00049 
00050 // temporary register for LEA
00051 #define TEMP_REG_DRC HOST_a4
00052 
00053 // used to hold the address of "cpu_regs" - preferably filled in function gen_run_code
00054 #define FC_REGS_ADDR HOST_v7
00055 
00056 // used to hold the address of "Segs" - preferably filled in function gen_run_code
00057 #define FC_SEGS_ADDR HOST_v8
00058 
00059 // used to hold the address of "core_dynrec.readdata" - filled in function gen_run_code
00060 #define readdata_addr HOST_v5
00061 
00062 
00063 // instruction encodings
00064 
00065 // move
00066 // mov dst, #imm                @       0 <= imm <= 255
00067 #define MOV_IMM(dst, imm) (0x2000 + ((dst) << 8) + (imm) )
00068 // mov dst, src
00069 #define MOV_REG(dst, src) ADD_IMM3(dst, src, 0)
00070 // mov dst, src
00071 #define MOV_LO_HI(dst, src) (0x4640 + (dst) + (((src) - HOST_r8) << 3) )
00072 // mov dst, src
00073 #define MOV_HI_LO(dst, src) (0x4680 + ((dst) - HOST_r8) + ((src) << 3) )
00074 
00075 // arithmetic
00076 // add dst, src, #imm           @       0 <= imm <= 7
00077 #define ADD_IMM3(dst, src, imm) (0x1c00 + (dst) + ((src) << 3) + ((imm) << 6) )
00078 // add dst, #imm                @       0 <= imm <= 255
00079 #define ADD_IMM8(dst, imm) (0x3000 + ((dst) << 8) + (imm) )
00080 // add dst, src1, src2
00081 #define ADD_REG(dst, src1, src2) (0x1800 + (dst) + ((src1) << 3) + ((src2) << 6) )
00082 // add dst, pc, #imm            @       0 <= imm < 1024 &       imm mod 4 = 0
00083 #define ADD_LO_PC_IMM(dst, imm) (0xa000 + ((dst) << 8) + ((imm) >> 2) )
00084 // sub dst, src1, src2
00085 #define SUB_REG(dst, src1, src2) (0x1a00 + (dst) + ((src1) << 3) + ((src2) << 6) )
00086 // sub dst, src, #imm           @       0 <= imm <= 7
00087 #define SUB_IMM3(dst, src, imm) (0x1e00 + (dst) + ((src) << 3) + ((imm) << 6) )
00088 // sub dst, #imm                @       0 <= imm <= 255
00089 #define SUB_IMM8(dst, imm) (0x3800 + ((dst) << 8) + (imm) )
00090 // neg dst, src
00091 #define NEG(dst, src) (0x4240 + (dst) + ((src) << 3) )
00092 // cmp dst, #imm                @       0 <= imm <= 255
00093 #define CMP_IMM(dst, imm) (0x2800 + ((dst) << 8) + (imm) )
00094 // nop
00095 #define NOP (0x46c0)
00096 
00097 // logical
00098 // and dst, src
00099 #define AND(dst, src) (0x4000 + (dst) + ((src) << 3) )
00100 // bic dst, src
00101 #define BIC(dst, src) (0x4380 + (dst) + ((src) << 3) )
00102 // eor dst, src
00103 #define EOR(dst, src) (0x4040 + (dst) + ((src) << 3) )
00104 // orr dst, src
00105 #define ORR(dst, src) (0x4300 + (dst) + ((src) << 3) )
00106 // mvn dst, src
00107 #define MVN(dst, src) (0x43c0 + (dst) + ((src) << 3) )
00108 
00109 // shift/rotate
00110 // lsl dst, src, #imm
00111 #define LSL_IMM(dst, src, imm) (0x0000 + (dst) + ((src) << 3) + ((imm) << 6) )
00112 // lsl dst, reg
00113 #define LSL_REG(dst, reg) (0x4080 + (dst) + ((reg) << 3) )
00114 // lsr dst, src, #imm
00115 #define LSR_IMM(dst, src, imm) (0x0800 + (dst) + ((src) << 3) + ((imm) << 6) )
00116 // lsr dst, reg
00117 #define LSR_REG(dst, reg) (0x40c0 + (dst) + ((reg) << 3) )
00118 // asr dst, src, #imm
00119 #define ASR_IMM(dst, src, imm) (0x1000 + (dst) + ((src) << 3) + ((imm) << 6) )
00120 // asr dst, reg
00121 #define ASR_REG(dst, reg) (0x4100 + (dst) + ((reg) << 3) )
00122 // ror dst, reg
00123 #define ROR_REG(dst, reg) (0x41c0 + (dst) + ((reg) << 3) )
00124 
00125 // load
00126 // ldr reg, [addr, #imm]                @       0 <= imm < 128  &       imm mod 4 = 0
00127 #define LDR_IMM(reg, addr, imm) (0x6800 + (reg) + ((addr) << 3) + ((imm) << 4) )
00128 // ldrh reg, [addr, #imm]               @       0 <= imm < 64   &       imm mod 2 = 0
00129 #define LDRH_IMM(reg, addr, imm) (0x8800 + (reg) + ((addr) << 3) + ((imm) << 5) )
00130 // ldrb reg, [addr, #imm]               @       0 <= imm < 32
00131 #define LDRB_IMM(reg, addr, imm) (0x7800 + (reg) + ((addr) << 3) + ((imm) << 6) )
00132 // ldr reg, [pc, #imm]          @       0 <= imm < 1024 &       imm mod 4 = 0
00133 #define LDR_PC_IMM(reg, imm) (0x4800 + ((reg) << 8) + ((imm) >> 2) )
00134 // ldr reg, [addr1, addr2]
00135 #define LDR_REG(reg, addr1, addr2) (0x5800 + (reg) + ((addr1) << 3) + ((addr2) << 6) )
00136 
00137 // store
00138 // str reg, [addr, #imm]                @       0 <= imm < 128  &       imm mod 4 = 0
00139 #define STR_IMM(reg, addr, imm) (0x6000 + (reg) + ((addr) << 3) + ((imm) << 4) )
00140 // strh reg, [addr, #imm]               @       0 <= imm < 64   &       imm mod 2 = 0
00141 #define STRH_IMM(reg, addr, imm) (0x8000 + (reg) + ((addr) << 3) + ((imm) << 5) )
00142 // strb reg, [addr, #imm]               @       0 <= imm < 32
00143 #define STRB_IMM(reg, addr, imm) (0x7000 + (reg) + ((addr) << 3) + ((imm) << 6) )
00144 
00145 // branch
00146 // beq pc+imm           @       0 <= imm < 256  &       imm mod 2 = 0
00147 #define BEQ_FWD(imm) (0xd000 + ((imm) >> 1) )
00148 // bne pc+imm           @       0 <= imm < 256  &       imm mod 2 = 0
00149 #define BNE_FWD(imm) (0xd100 + ((imm) >> 1) )
00150 // bgt pc+imm           @       0 <= imm < 256  &       imm mod 2 = 0
00151 #define BGT_FWD(imm) (0xdc00 + ((imm) >> 1) )
00152 // b pc+imm             @       0 <= imm < 2048 &       imm mod 2 = 0
00153 #define B_FWD(imm) (0xe000 + ((imm) >> 1) )
00154 // bx reg
00155 #define BX(reg) (0x4700 + ((reg) << 3) )
00156 
00157 
00158 // arm instructions
00159 
00160 // arithmetic
00161 // add dst, src, #(imm ror rimm)                @       0 <= imm <= 255 &       rimm mod 2 = 0
00162 #define ARM_ADD_IMM(dst, src, imm, rimm) (0xe2800000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) )
00163 
00164 // load
00165 // ldr reg, [addr, #imm]                @       0 <= imm < 4096
00166 #define ARM_LDR_IMM(reg, addr, imm) (0xe5900000 + ((reg) << 12) + ((addr) << 16) + (imm) )
00167 
00168 // store
00169 // str reg, [addr, #-(imm)]!            @       0 <= imm < 4096
00170 #define ARM_STR_IMM_M_W(reg, addr, imm) (0xe5200000 + ((reg) << 12) + ((addr) << 16) + (imm) )
00171 
00172 // branch
00173 // bx reg
00174 #define ARM_BX(reg) (0xe12fff10 + (reg) )
00175 
00176 
00177 // data pool defines
00178 #define CACHE_DATA_JUMP  (2)
00179 #define CACHE_DATA_ALIGN (32)
00180 #define CACHE_DATA_MIN   (32)
00181 #define CACHE_DATA_MAX   (288)
00182 
00183 // data pool variables
00184 static Bit8u * cache_datapos = NULL;    // position of data pool in the cache block
00185 static Bit32u cache_datasize = 0;               // total size of data pool
00186 static Bit32u cache_dataindex = 0;              // used size of data pool = index of free data item (in bytes) in data pool
00187 
00188 
00189 // forwarded function
00190 static void INLINE gen_create_branch_short(void * func);
00191 
00192 // function to check distance to data pool
00193 // if too close, then generate jump after data pool
00194 static void cache_checkinstr(Bit32u size) {
00195         if (cache_datasize == 0) {
00196                 if (cache_datapos != NULL) {
00197                         if (cache.pos + size + CACHE_DATA_JUMP >= cache_datapos) {
00198                                 cache_datapos = NULL;
00199                         }
00200                 }
00201                 return;
00202         }
00203 
00204         if (cache.pos + size + CACHE_DATA_JUMP <= cache_datapos) return;
00205 
00206         {
00207                 register Bit8u * newcachepos;
00208 
00209                 newcachepos = cache_datapos + cache_datasize;
00210                 gen_create_branch_short(newcachepos);
00211                 cache.pos = newcachepos;
00212         }
00213 
00214         if (cache.pos + CACHE_DATA_MAX + CACHE_DATA_ALIGN >= cache.block.active->cache.start + cache.block.active->cache.size &&
00215                 cache.pos + CACHE_DATA_MIN + CACHE_DATA_ALIGN + (CACHE_DATA_ALIGN - CACHE_ALIGN) < cache.block.active->cache.start + cache.block.active->cache.size)
00216         {
00217                 cache_datapos = (Bit8u *) (((Bitu)cache.block.active->cache.start + cache.block.active->cache.size - CACHE_DATA_ALIGN) & ~(CACHE_DATA_ALIGN - 1));
00218         } else {
00219                 register Bit32u cachemodsize;
00220 
00221                 cachemodsize = (cache.pos - cache.block.active->cache.start) & (CACHE_MAXSIZE - 1);
00222 
00223                 if (cachemodsize + CACHE_DATA_MAX + CACHE_DATA_ALIGN <= CACHE_MAXSIZE ||
00224                         cachemodsize + CACHE_DATA_MIN + CACHE_DATA_ALIGN + (CACHE_DATA_ALIGN - CACHE_ALIGN) > CACHE_MAXSIZE)
00225                 {
00226                         cache_datapos = (Bit8u *) (((Bitu)cache.pos + CACHE_DATA_MAX) & ~(CACHE_DATA_ALIGN - 1));
00227                 } else {
00228                         cache_datapos = (Bit8u *) (((Bitu)cache.pos + (CACHE_MAXSIZE - CACHE_DATA_ALIGN) - cachemodsize) & ~(CACHE_DATA_ALIGN - 1));
00229                 }
00230         }
00231 
00232         cache_datasize = 0;
00233         cache_dataindex = 0;
00234 }
00235 
00236 // function to reserve item in data pool
00237 // returns address of item
00238 static Bit8u * cache_reservedata(void) {
00239         // if data pool not yet initialized, then initialize data pool
00240         if (GCC_UNLIKELY(cache_datapos == NULL)) {
00241                 if (cache.pos + CACHE_DATA_MIN + CACHE_DATA_ALIGN < cache.block.active->cache.start + CACHE_DATA_MAX) {
00242                         cache_datapos = (Bit8u *) (((Bitu)cache.block.active->cache.start + CACHE_DATA_MAX) & ~(CACHE_DATA_ALIGN - 1));
00243                 }
00244         }
00245 
00246         // if data pool not yet used, then set data pool
00247         if (cache_datasize == 0) {
00248                 // set data pool address is too close (or behind)  cache.pos then set new data pool size
00249                 if (cache.pos + CACHE_DATA_MIN + CACHE_DATA_JUMP /*+ CACHE_DATA_ALIGN*/ > cache_datapos) {
00250                         if (cache.pos + CACHE_DATA_MAX + CACHE_DATA_ALIGN >= cache.block.active->cache.start + cache.block.active->cache.size &&
00251                                 cache.pos + CACHE_DATA_MIN + CACHE_DATA_ALIGN + (CACHE_DATA_ALIGN - CACHE_ALIGN) < cache.block.active->cache.start + cache.block.active->cache.size)
00252                         {
00253                                 cache_datapos = (Bit8u *) (((Bitu)cache.block.active->cache.start + cache.block.active->cache.size - CACHE_DATA_ALIGN) & ~(CACHE_DATA_ALIGN - 1));
00254                         } else {
00255                                 register Bit32u cachemodsize;
00256 
00257                                 cachemodsize = (cache.pos - cache.block.active->cache.start) & (CACHE_MAXSIZE - 1);
00258 
00259                                 if (cachemodsize + CACHE_DATA_MAX + CACHE_DATA_ALIGN <= CACHE_MAXSIZE ||
00260                                         cachemodsize + CACHE_DATA_MIN + CACHE_DATA_ALIGN + (CACHE_DATA_ALIGN - CACHE_ALIGN) > CACHE_MAXSIZE)
00261                                 {
00262                                         cache_datapos = (Bit8u *) (((Bitu)cache.pos + CACHE_DATA_MAX) & ~(CACHE_DATA_ALIGN - 1));
00263                                 } else {
00264                                         cache_datapos = (Bit8u *) (((Bitu)cache.pos + (CACHE_MAXSIZE - CACHE_DATA_ALIGN) - cachemodsize) & ~(CACHE_DATA_ALIGN - 1));
00265                                 }
00266                         }
00267                 }
00268                 // set initial data pool size
00269                 cache_datasize = CACHE_DATA_ALIGN;
00270         }
00271 
00272         // if data pool is full, then enlarge data pool
00273         if (cache_dataindex == cache_datasize) {
00274                 cache_datasize += CACHE_DATA_ALIGN;
00275         }
00276 
00277         cache_dataindex += 4;
00278         return (cache_datapos + (cache_dataindex - 4));
00279 }
00280 
00281 static void cache_block_before_close(void) {
00282         // if data pool in use, then resize cache block to include the data pool
00283         if (cache_datasize != 0)
00284         {
00285                 cache.pos = cache_datapos + cache_dataindex;
00286         }
00287 
00288         // clear the values before next use
00289         cache_datapos = NULL;
00290         cache_datasize = 0;
00291         cache_dataindex = 0;
00292 }
00293 
00294 
00295 // move a full register from reg_src to reg_dst
00296 static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) {
00297         if(reg_src == reg_dst) return;
00298         cache_checkinstr(2);
00299         cache_addw( MOV_REG(reg_dst, reg_src) );      // mov reg_dst, reg_src
00300 }
00301 
00302 // helper function
00303 static bool val_single_shift(Bit32u value, Bit32u *val_shift) {
00304         Bit32u shift;
00305 
00306         if (GCC_UNLIKELY(value == 0)) {
00307                 *val_shift = 0;
00308                 return true;
00309         }
00310 
00311         shift = 0;
00312         while ((value & 1) == 0) {
00313                 value>>=1;
00314                 shift+=1;
00315         }
00316 
00317         if ((value >> 8) != 0) return false;
00318 
00319         *val_shift = shift;
00320         return true;
00321 }
00322 
00323 // move a 32bit constant value into dest_reg
00324 static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) {
00325         Bit32u scale;
00326 
00327         if (imm < 256) {
00328                 cache_checkinstr(2);
00329                 cache_addw( MOV_IMM(dest_reg, imm) );      // mov dest_reg, #(imm)
00330         } else if ((~imm) < 256) {
00331                 cache_checkinstr(4);
00332                 cache_addw( MOV_IMM(dest_reg, ~imm) );      // mov dest_reg, #(~imm)
00333                 cache_addw( MVN(dest_reg, dest_reg) );      // mvn dest_reg, dest_reg
00334         } else if (val_single_shift(imm, &scale)) {
00335                 cache_checkinstr(4);
00336                 cache_addw( MOV_IMM(dest_reg, imm >> scale) );      // mov dest_reg, #(imm >> scale)
00337                 cache_addw( LSL_IMM(dest_reg, dest_reg, scale) );      // lsl dest_reg, dest_reg, #scale
00338         } else {
00339                 Bit32u diff;
00340 
00341                 cache_checkinstr(4);
00342 
00343                 diff = imm - ((Bit32u)cache.pos+4);
00344 
00345                 if ((diff < 1024) && ((imm & 0x03) == 0)) {
00346                         if (((Bit32u)cache.pos & 0x03) == 0) {
00347                                 cache_addw( ADD_LO_PC_IMM(dest_reg, diff >> 2) );      // add dest_reg, pc, #(diff >> 2)
00348                         } else {
00349                                 cache_addw( NOP );      // nop
00350                                 cache_addw( ADD_LO_PC_IMM(dest_reg, (diff - 2) >> 2) );      // add dest_reg, pc, #((diff - 2) >> 2)
00351                         }
00352                 } else {
00353                         Bit8u *datapos;
00354 
00355                         datapos = cache_reservedata();
00356                         *(Bit32u*)datapos=imm;
00357 
00358                         if (((Bit32u)cache.pos & 0x03) == 0) {
00359                                 cache_addw( LDR_PC_IMM(dest_reg, datapos - (cache.pos + 4)) );      // ldr dest_reg, [pc, datapos]
00360                         } else {
00361                                 cache_addw( LDR_PC_IMM(dest_reg, datapos - (cache.pos + 2)) );      // ldr dest_reg, [pc, datapos]
00362                         }
00363                 }
00364         }
00365 }
00366 
00367 // helper function
00368 static bool gen_mov_memval_to_reg_helper(HostReg dest_reg, Bit32u data, Bitu size, HostReg addr_reg, Bit32u addr_data) {
00369         switch (size) {
00370                 case 4:
00371 #if !defined(C_UNALIGNED_MEMORY)
00372                         if ((data & 3) == 0)
00373 #endif
00374                         {
00375                                 if ((data >= addr_data) && (data < addr_data + 128) && (((data - addr_data) & 3) == 0)) {
00376                                         cache_checkinstr(4);
00377                                         cache_addw( MOV_LO_HI(templo2, addr_reg) );      // mov templo2, addr_reg
00378                                         cache_addw( LDR_IMM(dest_reg, templo2, data - addr_data) );      // ldr dest_reg, [templo2, #(data - addr_data)]
00379                                         return true;
00380                                 }
00381                         }
00382                         break;
00383                 case 2:
00384 #if !defined(C_UNALIGNED_MEMORY)
00385                         if ((data & 1) == 0)
00386 #endif
00387                         {
00388                                 if ((data >= addr_data) && (data < addr_data + 64) && (((data - addr_data) & 1) == 0)) {
00389                                         cache_checkinstr(4);
00390                                         cache_addw( MOV_LO_HI(templo2, addr_reg) );      // mov templo2, addr_reg
00391                                         cache_addw( LDRH_IMM(dest_reg, templo2, data - addr_data) );      // ldrh dest_reg, [templo2, #(data - addr_data)]
00392                                         return true;
00393                                 }
00394                         }
00395                         break;
00396                 case 1:
00397                         if ((data >= addr_data) && (data < addr_data + 32)) {
00398                                 cache_checkinstr(4);
00399                                 cache_addw( MOV_LO_HI(templo2, addr_reg) );      // mov templo2, addr_reg
00400                                 cache_addw( LDRB_IMM(dest_reg, templo2, data - addr_data) );      // ldrb dest_reg, [templo2, #(data - addr_data)]
00401                                 return true;
00402                         }
00403                 default:
00404                         break;
00405         }
00406         return false;
00407 }
00408 
00409 // helper function
00410 static bool gen_mov_memval_to_reg(HostReg dest_reg, void *data, Bitu size) {
00411         if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, FC_REGS_ADDR, (Bit32u)&cpu_regs)) return true;
00412         if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, readdata_addr, (Bit32u)&core_dynrec.readdata)) return true;
00413         if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, FC_SEGS_ADDR, (Bit32u)&Segs)) return true;
00414         return false;
00415 }
00416 
00417 // helper function for gen_mov_word_to_reg
00418 static void gen_mov_word_to_reg_helper(HostReg dest_reg,void* data,bool dword,HostReg data_reg) {
00419         // alignment....
00420         if (dword) {
00421 #if !defined(C_UNALIGNED_MEMORY)
00422                 if ((Bit32u)data & 3) {
00423                         if ( ((Bit32u)data & 3) == 2 ) {
00424                                 cache_checkinstr(8);
00425                                 cache_addw( LDRH_IMM(dest_reg, data_reg, 0) );      // ldrh dest_reg, [data_reg]
00426                                 cache_addw( LDRH_IMM(templo1, data_reg, 2) );      // ldrh templo1, [data_reg, #2]
00427                                 cache_addw( LSL_IMM(templo1, templo1, 16) );      // lsl templo1, templo1, #16
00428                                 cache_addw( ORR(dest_reg, templo1) );      // orr dest_reg, templo1
00429                         } else {
00430                                 cache_checkinstr(16);
00431                                 cache_addw( LDRB_IMM(dest_reg, data_reg, 0) );      // ldrb dest_reg, [data_reg]
00432                                 cache_addw( ADD_IMM3(templo1, data_reg, 1) );      // add templo1, data_reg, #1
00433                                 cache_addw( LDRH_IMM(templo1, templo1, 0) );      // ldrh templo1, [templo1]
00434                                 cache_addw( LSL_IMM(templo1, templo1, 8) );      // lsl templo1, templo1, #8
00435                                 cache_addw( ORR(dest_reg, templo1) );      // orr dest_reg, templo1
00436                                 cache_addw( LDRB_IMM(templo1, data_reg, 3) );      // ldrb templo1, [data_reg, #3]
00437                                 cache_addw( LSL_IMM(templo1, templo1, 24) );      // lsl templo1, templo1, #24
00438                                 cache_addw( ORR(dest_reg, templo1) );      // orr dest_reg, templo1
00439                         }
00440                 } else
00441 #endif
00442                 {
00443                         cache_checkinstr(2);
00444                         cache_addw( LDR_IMM(dest_reg, data_reg, 0) );      // ldr dest_reg, [data_reg]
00445                 }
00446         } else {
00447 #if !defined(C_UNALIGNED_MEMORY)
00448                 if ((Bit32u)data & 1) {
00449                         cache_checkinstr(8);
00450                         cache_addw( LDRB_IMM(dest_reg, data_reg, 0) );      // ldrb dest_reg, [data_reg]
00451                         cache_addw( LDRB_IMM(templo1, data_reg, 1) );      // ldrb templo1, [data_reg, #1]
00452                         cache_addw( LSL_IMM(templo1, templo1, 8) );      // lsl templo1, templo1, #8
00453                         cache_addw( ORR(dest_reg, templo1) );      // orr dest_reg, templo1
00454                 } else
00455 #endif
00456                 {
00457                         cache_checkinstr(2);
00458                         cache_addw( LDRH_IMM(dest_reg, data_reg, 0) );      // ldrh dest_reg, [data_reg]
00459                 }
00460         }
00461 }
00462 
00463 // move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg
00464 // 16bit moves may destroy the upper 16bit of the destination register
00465 static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword) {
00466         if (!gen_mov_memval_to_reg(dest_reg, data, (dword)?4:2)) {
00467                 gen_mov_dword_to_reg_imm(templo2, (Bit32u)data);
00468                 gen_mov_word_to_reg_helper(dest_reg, data, dword, templo2);
00469         }
00470 }
00471 
00472 // move a 16bit constant value into dest_reg
00473 // the upper 16bit of the destination register may be destroyed
00474 static void INLINE gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) {
00475         gen_mov_dword_to_reg_imm(dest_reg, (Bit32u)imm);
00476 }
00477 
00478 // helper function
00479 static bool gen_mov_memval_from_reg_helper(HostReg src_reg, Bit32u data, Bitu size, HostReg addr_reg, Bit32u addr_data) {
00480         switch (size) {
00481                 case 4:
00482 #if !defined(C_UNALIGNED_MEMORY)
00483                         if ((data & 3) == 0)
00484 #endif
00485                         {
00486                                 if ((data >= addr_data) && (data < addr_data + 128) && (((data - addr_data) & 3) == 0)) {
00487                                         cache_checkinstr(4);
00488                                         cache_addw( MOV_LO_HI(templo2, addr_reg) );      // mov templo2, addr_reg
00489                                         cache_addw( STR_IMM(src_reg, templo2, data - addr_data) );      // str src_reg, [templo2, #(data - addr_data)]
00490                                         return true;
00491                                 }
00492                         }
00493                         break;
00494                 case 2:
00495 #if !defined(C_UNALIGNED_MEMORY)
00496                         if ((data & 1) == 0)
00497 #endif
00498                         {
00499                                 if ((data >= addr_data) && (data < addr_data + 64) && (((data - addr_data) & 1) == 0)) {
00500                                         cache_checkinstr(4);
00501                                         cache_addw( MOV_LO_HI(templo2, addr_reg) );      // mov templo2, addr_reg
00502                                         cache_addw( STRH_IMM(src_reg, templo2, data - addr_data) );      // strh src_reg, [templo2, #(data - addr_data)]
00503                                         return true;
00504                                 }
00505                         }
00506                         break;
00507                 case 1:
00508                         if ((data >= addr_data) && (data < addr_data + 32)) {
00509                                 cache_checkinstr(4);
00510                                 cache_addw( MOV_LO_HI(templo2, addr_reg) );      // mov templo2, addr_reg
00511                                 cache_addw( STRB_IMM(src_reg, templo2, data - addr_data) );      // strb src_reg, [templo2, #(data - addr_data)]
00512                                 return true;
00513                         }
00514                 default:
00515                         break;
00516         }
00517         return false;
00518 }
00519 
00520 // helper function
00521 static bool gen_mov_memval_from_reg(HostReg src_reg, void *dest, Bitu size) {
00522         if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, FC_REGS_ADDR, (Bit32u)&cpu_regs)) return true;
00523         if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, readdata_addr, (Bit32u)&core_dynrec.readdata)) return true;
00524         if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, FC_SEGS_ADDR, (Bit32u)&Segs)) return true;
00525         return false;
00526 }
00527 
00528 // helper function for gen_mov_word_from_reg
00529 static void gen_mov_word_from_reg_helper(HostReg src_reg,void* dest,bool dword, HostReg data_reg) {
00530         // alignment....
00531         if (dword) {
00532 #if !defined(C_UNALIGNED_MEMORY)
00533                 if ((Bit32u)dest & 3) {
00534                         if ( ((Bit32u)dest & 3) == 2 ) {
00535                                 cache_checkinstr(8);
00536                                 cache_addw( STRH_IMM(src_reg, data_reg, 0) );      // strh src_reg, [data_reg]
00537                                 cache_addw( MOV_REG(templo1, src_reg) );      // mov templo1, src_reg
00538                                 cache_addw( LSR_IMM(templo1, templo1, 16) );      // lsr templo1, templo1, #16
00539                                 cache_addw( STRH_IMM(templo1, data_reg, 2) );      // strh templo1, [data_reg, #2]
00540                         } else {
00541                                 cache_checkinstr(20);
00542                                 cache_addw( STRB_IMM(src_reg, data_reg, 0) );      // strb src_reg, [data_reg]
00543                                 cache_addw( MOV_REG(templo1, src_reg) );      // mov templo1, src_reg
00544                                 cache_addw( LSR_IMM(templo1, templo1, 8) );      // lsr templo1, templo1, #8
00545                                 cache_addw( STRB_IMM(templo1, data_reg, 1) );      // strb templo1, [data_reg, #1]
00546                                 cache_addw( MOV_REG(templo1, src_reg) );      // mov templo1, src_reg
00547                                 cache_addw( LSR_IMM(templo1, templo1, 16) );      // lsr templo1, templo1, #16
00548                                 cache_addw( STRB_IMM(templo1, data_reg, 2) );      // strb templo1, [data_reg, #2]
00549                                 cache_addw( MOV_REG(templo1, src_reg) );      // mov templo1, src_reg
00550                                 cache_addw( LSR_IMM(templo1, templo1, 24) );      // lsr templo1, templo1, #24
00551                                 cache_addw( STRB_IMM(templo1, data_reg, 3) );      // strb templo1, [data_reg, #3]
00552                         }
00553                 } else
00554 #endif
00555                 {
00556                         cache_checkinstr(2);
00557                         cache_addw( STR_IMM(src_reg, data_reg, 0) );      // str src_reg, [data_reg]
00558                 }
00559         } else {
00560 #if !defined(C_UNALIGNED_MEMORY)
00561                 if ((Bit32u)dest & 1) {
00562                         cache_checkinstr(8);
00563                         cache_addw( STRB_IMM(src_reg, data_reg, 0) );      // strb src_reg, [data_reg]
00564                         cache_addw( MOV_REG(templo1, src_reg) );      // mov templo1, src_reg
00565                         cache_addw( LSR_IMM(templo1, templo1, 8) );      // lsr templo1, templo1, #8
00566                         cache_addw( STRB_IMM(templo1, data_reg, 1) );      // strb templo1, [data_reg, #1]
00567                 } else
00568 #endif
00569                 {
00570                         cache_checkinstr(2);
00571                         cache_addw( STRH_IMM(src_reg, data_reg, 0) );      // strh src_reg, [data_reg]
00572                 }
00573         }
00574 }
00575 
00576 // move 32bit (dword==true) or 16bit (dword==false) of a register into memory
00577 static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) {
00578         if (!gen_mov_memval_from_reg(src_reg, dest, (dword)?4:2)) {
00579                 gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest);
00580                 gen_mov_word_from_reg_helper(src_reg, dest, dword, templo2);
00581         }
00582 }
00583 
00584 // move an 8bit value from memory into dest_reg
00585 // the upper 24bit of the destination register can be destroyed
00586 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
00587 // registers might not be directly byte-accessible on some architectures
00588 static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) {
00589         if (!gen_mov_memval_to_reg(dest_reg, data, 1)) {
00590                 gen_mov_dword_to_reg_imm(templo1, (Bit32u)data);
00591                 cache_checkinstr(2);
00592                 cache_addw( LDRB_IMM(dest_reg, templo1, 0) );      // ldrb dest_reg, [templo1]
00593         }
00594 }
00595 
00596 // move an 8bit value from memory into dest_reg
00597 // the upper 24bit of the destination register can be destroyed
00598 // this function can use FC_OP1/FC_OP2 as dest_reg which are
00599 // not directly byte-accessible on some architectures
00600 static void INLINE gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* data) {
00601         gen_mov_byte_to_reg_low(dest_reg, data);
00602 }
00603 
00604 // move an 8bit constant value into dest_reg
00605 // the upper 24bit of the destination register can be destroyed
00606 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
00607 // registers might not be directly byte-accessible on some architectures
00608 static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) {
00609         cache_checkinstr(2);
00610         cache_addw( MOV_IMM(dest_reg, imm) );      // mov dest_reg, #(imm)
00611 }
00612 
00613 // move an 8bit constant value into dest_reg
00614 // the upper 24bit of the destination register can be destroyed
00615 // this function can use FC_OP1/FC_OP2 as dest_reg which are
00616 // not directly byte-accessible on some architectures
00617 static void INLINE gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm) {
00618         gen_mov_byte_to_reg_low_imm(dest_reg, imm);
00619 }
00620 
00621 // move the lowest 8bit of a register into memory
00622 static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) {
00623         if (!gen_mov_memval_from_reg(src_reg, dest, 1)) {
00624                 gen_mov_dword_to_reg_imm(templo1, (Bit32u)dest);
00625                 cache_checkinstr(2);
00626                 cache_addw( STRB_IMM(src_reg, templo1, 0) );      // strb src_reg, [templo1]
00627         }
00628 }
00629 
00630 
00631 
00632 // convert an 8bit word to a 32bit dword
00633 // the register is zero-extended (sign==false) or sign-extended (sign==true)
00634 static void gen_extend_byte(bool sign,HostReg reg) {
00635         cache_checkinstr(4);
00636         cache_addw( LSL_IMM(reg, reg, 24) );      // lsl reg, reg, #24
00637 
00638         if (sign) {
00639                 cache_addw( ASR_IMM(reg, reg, 24) );      // asr reg, reg, #24
00640         } else {
00641                 cache_addw( LSR_IMM(reg, reg, 24) );      // lsr reg, reg, #24
00642         }
00643 }
00644 
00645 // convert a 16bit word to a 32bit dword
00646 // the register is zero-extended (sign==false) or sign-extended (sign==true)
00647 static void gen_extend_word(bool sign,HostReg reg) {
00648         cache_checkinstr(4);
00649         cache_addw( LSL_IMM(reg, reg, 16) );      // lsl reg, reg, #16
00650 
00651         if (sign) {
00652                 cache_addw( ASR_IMM(reg, reg, 16) );      // asr reg, reg, #16
00653         } else {
00654                 cache_addw( LSR_IMM(reg, reg, 16) );      // lsr reg, reg, #16
00655         }
00656 }
00657 
00658 // add a 32bit value from memory to a full register
00659 static void gen_add(HostReg reg,void* op) {
00660         gen_mov_word_to_reg(templo3, op, 1);
00661         cache_checkinstr(2);
00662         cache_addw( ADD_REG(reg, reg, templo3) );      // add reg, reg, templo3
00663 }
00664 
00665 // add a 32bit constant value to a full register
00666 static void gen_add_imm(HostReg reg,Bit32u imm) {
00667         Bit32u imm2, scale;
00668 
00669         if(!imm) return;
00670 
00671         imm2 = (Bit32u) (-((Bit32s)imm));
00672 
00673         if (imm <= 255) {
00674                 cache_checkinstr(2);
00675                 cache_addw( ADD_IMM8(reg, imm) );      // add reg, #imm
00676         } else if (imm2 <= 255) {
00677                 cache_checkinstr(2);
00678                 cache_addw( SUB_IMM8(reg, imm2) );      // sub reg, #(-imm)
00679         } else {
00680                 if (val_single_shift(imm2, &scale)) {
00681                         cache_checkinstr((scale)?6:4);
00682                         cache_addw( MOV_IMM(templo1, imm2 >> scale) );      // mov templo1, #(~imm >> scale)
00683                         if (scale) {
00684                                 cache_addw( LSL_IMM(templo1, templo1, scale) );      // lsl templo1, templo1, #scale
00685                         }
00686                         cache_addw( SUB_REG(reg, reg, templo1) );      // sub reg, reg, templo1
00687                 } else {
00688                         gen_mov_dword_to_reg_imm(templo1, imm);
00689                         cache_checkinstr(2);
00690                         cache_addw( ADD_REG(reg, reg, templo1) );      // add reg, reg, templo1
00691                 }
00692         }
00693 }
00694 
00695 // and a 32bit constant value with a full register
00696 static void gen_and_imm(HostReg reg,Bit32u imm) {
00697         Bit32u imm2, scale;
00698 
00699         imm2 = ~imm;
00700         if(!imm2) return;
00701 
00702         if (!imm) {
00703                 cache_checkinstr(2);
00704                 cache_addw( MOV_IMM(reg, 0) );      // mov reg, #0
00705         } else {
00706                 if (val_single_shift(imm2, &scale)) {
00707                         cache_checkinstr((scale)?6:4);
00708                         cache_addw( MOV_IMM(templo1, imm2 >> scale) );      // mov templo1, #(~imm >> scale)
00709                         if (scale) {
00710                                 cache_addw( LSL_IMM(templo1, templo1, scale) );      // lsl templo1, templo1, #scale
00711                         }
00712                         cache_addw( BIC(reg, templo1) );      // bic reg, templo1
00713                 } else {
00714                         gen_mov_dword_to_reg_imm(templo1, imm);
00715                         cache_checkinstr(2);
00716                         cache_addw( AND(reg, templo1) );      // and reg, templo1
00717                 }
00718         }
00719 }
00720 
00721 
00722 // move a 32bit constant value into memory
00723 static void gen_mov_direct_dword(void* dest,Bit32u imm) {
00724         gen_mov_dword_to_reg_imm(templo3, imm);
00725         gen_mov_word_from_reg(templo3, dest, 1);
00726 }
00727 
00728 // move an address into memory
00729 static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm) {
00730         gen_mov_direct_dword(dest,(Bit32u)imm);
00731 }
00732 
00733 // add a 32bit (dword==true) or 16bit (dword==false) constant value to a memory value
00734 static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) {
00735         if (!dword) imm &= 0xffff;
00736         if(!imm) return;
00737 
00738         if (!gen_mov_memval_to_reg(templo3, dest, (dword)?4:2)) {
00739                 gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest);
00740                 gen_mov_word_to_reg_helper(templo3, dest, dword, templo2);
00741         }
00742         gen_add_imm(templo3, imm);
00743         if (!gen_mov_memval_from_reg(templo3, dest, (dword)?4:2)) {
00744                 gen_mov_word_from_reg_helper(templo3, dest, dword, templo2);
00745         }
00746 }
00747 
00748 // add an 8bit constant value to a dword memory value
00749 static void gen_add_direct_byte(void* dest,Bit8s imm) {
00750         gen_add_direct_word(dest, (Bit32s)imm, 1);
00751 }
00752 
00753 // subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a memory value
00754 static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) {
00755         Bit32u imm2, scale;
00756 
00757         if (!dword) imm &= 0xffff;
00758         if(!imm) return;
00759 
00760         if (!gen_mov_memval_to_reg(templo3, dest, (dword)?4:2)) {
00761                 gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest);
00762                 gen_mov_word_to_reg_helper(templo3, dest, dword, templo2);
00763         }
00764 
00765         imm2 = (Bit32u) (-((Bit32s)imm));
00766 
00767         if (imm <= 255) {
00768                 cache_checkinstr(2);
00769                 cache_addw( SUB_IMM8(templo3, imm) );      // sub templo3, #imm
00770         } else if (imm2 <= 255) {
00771                 cache_checkinstr(2);
00772                 cache_addw( ADD_IMM8(templo3, imm2) );      // add templo3, #(-imm)
00773         } else {
00774                 if (val_single_shift(imm2, &scale)) {
00775                         cache_checkinstr((scale)?6:4);
00776                         cache_addw( MOV_IMM(templo1, imm2 >> scale) );      // mov templo1, #(~imm >> scale)
00777                         if (scale) {
00778                                 cache_addw( LSL_IMM(templo1, templo1, scale) );      // lsl templo1, templo1, #scale
00779                         }
00780                         cache_addw( ADD_REG(templo3, templo3, templo1) );      // add templo3, templo3, templo1
00781                 } else {
00782                         gen_mov_dword_to_reg_imm(templo1, imm);
00783                         cache_checkinstr(2);
00784                         cache_addw( SUB_REG(templo3, templo3, templo1) );      // sub templo3, templo3, templo1
00785                 }
00786         }
00787 
00788         if (!gen_mov_memval_from_reg(templo3, dest, (dword)?4:2)) {
00789                 gen_mov_word_from_reg_helper(templo3, dest, dword, templo2);
00790         }
00791 }
00792 
00793 // subtract an 8bit constant value from a dword memory value
00794 static void gen_sub_direct_byte(void* dest,Bit8s imm) {
00795         gen_sub_direct_word(dest, (Bit32s)imm, 1);
00796 }
00797 
00798 // effective address calculation, destination is dest_reg
00799 // scale_reg is scaled by scale (scale_reg*(2^scale)) and
00800 // added to dest_reg, then the immediate value is added
00801 static INLINE void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm) {
00802         if (scale) {
00803                 cache_checkinstr(4);
00804                 cache_addw( LSL_IMM(templo1, scale_reg, scale) );      // lsl templo1, scale_reg, #(scale)
00805                 cache_addw( ADD_REG(dest_reg, dest_reg, templo1) );      // add dest_reg, dest_reg, templo1
00806         } else {
00807                 cache_checkinstr(2);
00808                 cache_addw( ADD_REG(dest_reg, dest_reg, scale_reg) );      // add dest_reg, dest_reg, scale_reg
00809         }
00810         gen_add_imm(dest_reg, imm);
00811 }
00812 
00813 // effective address calculation, destination is dest_reg
00814 // dest_reg is scaled by scale (dest_reg*(2^scale)),
00815 // then the immediate value is added
00816 static INLINE void gen_lea(HostReg dest_reg,Bitu scale,Bits imm) {
00817         if (scale) {
00818                 cache_checkinstr(2);
00819                 cache_addw( LSL_IMM(dest_reg, dest_reg, scale) );      // lsl dest_reg, dest_reg, #(scale)
00820         }
00821         gen_add_imm(dest_reg, imm);
00822 }
00823 
00824 // helper function for gen_call_function_raw and gen_call_function_setup
00825 template <typename T> static void gen_call_function_helper(const T func) {
00826     Bit8u *datapos;
00827 
00828         datapos = cache_reservedata();
00829         *(Bit32u*)datapos=(Bit32u)func;
00830 
00831         if (((Bit32u)cache.pos & 0x03) == 0) {
00832                 cache_addw( LDR_PC_IMM(templo1, datapos - (cache.pos + 4)) );      // ldr templo1, [pc, datapos]
00833                 cache_addw( ADD_LO_PC_IMM(templo2, 8) );      // adr templo2, after_call (add templo2, pc, #8)
00834                 cache_addw( ADD_IMM8(templo2, 1) );      // add templo2, #1
00835                 cache_addw( MOV_HI_LO(HOST_lr, templo2) );      // mov lr, templo2
00836                 cache_addw( BX(templo1) );      // bx templo1     --- switch to arm state
00837                 cache_addw( NOP );      // nop
00838         } else {
00839                 cache_addw( LDR_PC_IMM(templo1, datapos - (cache.pos + 2)) );      // ldr templo1, [pc, datapos]
00840                 cache_addw( ADD_LO_PC_IMM(templo2, 4) );      // adr templo2, after_call (add templo2, pc, #4)
00841                 cache_addw( ADD_IMM8(templo2, 1) );      // add templo2, #1
00842                 cache_addw( MOV_HI_LO(HOST_lr, templo2) );      // mov lr, templo2
00843                 cache_addw( BX(templo1) );      // bx templo1     --- switch to arm state
00844         }
00845         // after_call:
00846 
00847         // thumb state from now on
00848 }
00849 
00850 // generate a call to a parameterless function
00851 template <typename T> static void INLINE gen_call_function_raw(const T func) {
00852         cache_checkinstr(12);
00853         gen_call_function_helper(func);
00854 }
00855 
00856 // generate a call to a function with paramcount parameters
00857 // note: the parameters are loaded in the architecture specific way
00858 // using the gen_load_param_ functions below
00859 template <typename T> template <typename T> static Bit32u INLINE gen_call_function_setup(const T func,Bitu paramcount,bool fastcall=false) {
00860         cache_checkinstr(12);
00861         Bit32u proc_addr = (Bit32u)cache.pos;
00862         gen_call_function_helper(func);
00863         return proc_addr;
00864         // if proc_addr is on word  boundary ((proc_addr & 0x03) == 0)
00865         //   then length of generated code is 12 bytes
00866         //   otherwise length of generated code is 10 bytes
00867 }
00868 
00869 #if (1)
00870 // max of 4 parameters in a1-a4
00871 
00872 // load an immediate value as param'th function parameter
00873 static void INLINE gen_load_param_imm(Bitu imm,Bitu param) {
00874         gen_mov_dword_to_reg_imm(param, imm);
00875 }
00876 
00877 // load an address as param'th function parameter
00878 static void INLINE gen_load_param_addr(Bitu addr,Bitu param) {
00879         gen_mov_dword_to_reg_imm(param, addr);
00880 }
00881 
00882 // load a host-register as param'th function parameter
00883 static void INLINE gen_load_param_reg(Bitu reg,Bitu param) {
00884         gen_mov_regs(param, reg);
00885 }
00886 
00887 // load a value from memory as param'th function parameter
00888 static void INLINE gen_load_param_mem(Bitu mem,Bitu param) {
00889         gen_mov_word_to_reg(param, (void *)mem, 1);
00890 }
00891 #else
00892         other arm abis
00893 #endif
00894 
00895 // jump to an address pointed at by ptr, offset is in imm
00896 static void gen_jmp_ptr(void * ptr,Bits imm=0) {
00897         gen_mov_word_to_reg(templo3, ptr, 1);
00898 
00899 #if !defined(C_UNALIGNED_MEMORY)
00900 // (*ptr) should be word aligned
00901         if ((imm & 0x03) == 0) {
00902 #endif
00903                 if ((imm >= 0) && (imm < 128) && ((imm & 3) == 0)) {
00904                         cache_checkinstr(6);
00905                         cache_addw( LDR_IMM(templo2, templo3, imm) );      // ldr templo2, [templo3, #imm]
00906                 } else {
00907                         gen_mov_dword_to_reg_imm(templo2, imm);
00908                         cache_checkinstr(6);
00909                         cache_addw( LDR_REG(templo2, templo3, templo2) );      // ldr templo2, [templo3, templo2]
00910                 }
00911 #if !defined(C_UNALIGNED_MEMORY)
00912         } else {
00913                 gen_add_imm(templo3, imm);
00914 
00915                 cache_checkinstr(24);
00916                 cache_addw( LDRB_IMM(templo2, templo3, 0) );      // ldrb templo2, [templo3]
00917                 cache_addw( LDRB_IMM(templo1, templo3, 1) );      // ldrb templo1, [templo3, #1]
00918                 cache_addw( LSL_IMM(templo1, templo1, 8) );      // lsl templo1, templo1, #8
00919                 cache_addw( ORR(templo2, templo1) );      // orr templo2, templo1
00920                 cache_addw( LDRB_IMM(templo1, templo3, 2) );      // ldrb templo1, [templo3, #2]
00921                 cache_addw( LSL_IMM(templo1, templo1, 16) );      // lsl templo1, templo1, #16
00922                 cache_addw( ORR(templo2, templo1) );      // orr templo2, templo1
00923                 cache_addw( LDRB_IMM(templo1, templo3, 3) );      // ldrb templo1, [templo3, #3]
00924                 cache_addw( LSL_IMM(templo1, templo1, 24) );      // lsl templo1, templo1, #24
00925                 cache_addw( ORR(templo2, templo1) );      // orr templo2, templo1
00926         }
00927 #endif
00928 
00929         // increase jmp address to keep thumb state
00930         cache_addw( ADD_IMM3(templo2, templo2, 1) );      // add templo2, templo2, #1
00931 
00932         cache_addw( BX(templo2) );      // bx templo2
00933 }
00934 
00935 // short conditional jump (+-127 bytes) if register is zero
00936 // the destination is set by gen_fill_branch() later
00937 static Bit32u gen_create_branch_on_zero(HostReg reg,bool dword) {
00938         cache_checkinstr(4);
00939         if (dword) {
00940                 cache_addw( CMP_IMM(reg, 0) );      // cmp reg, #0
00941         } else {
00942                 cache_addw( LSL_IMM(templo1, reg, 16) );      // lsl templo1, reg, #16
00943         }
00944         cache_addw( BEQ_FWD(0) );      // beq j
00945         return ((Bit32u)cache.pos-2);
00946 }
00947 
00948 // short conditional jump (+-127 bytes) if register is nonzero
00949 // the destination is set by gen_fill_branch() later
00950 static Bit32u gen_create_branch_on_nonzero(HostReg reg,bool dword) {
00951         cache_checkinstr(4);
00952         if (dword) {
00953                 cache_addw( CMP_IMM(reg, 0) );      // cmp reg, #0
00954         } else {
00955                 cache_addw( LSL_IMM(templo1, reg, 16) );      // lsl templo1, reg, #16
00956         }
00957         cache_addw( BNE_FWD(0) );      // bne j
00958         return ((Bit32u)cache.pos-2);
00959 }
00960 
00961 // calculate relative offset and fill it into the location pointed to by data
00962 static void INLINE gen_fill_branch(DRC_PTR_SIZE_IM data) {
00963 #if C_DEBUG
00964         Bits len=(Bit32u)cache.pos-(data+4);
00965         if (len<0) len=-len;
00966         if (len>252) LOG_MSG("Big jump %d",len);
00967 #endif
00968         *(Bit8u*)data=(Bit8u)( ((Bit32u)cache.pos-(data+4)) >> 1 );
00969 }
00970 
00971 
00972 // conditional jump if register is nonzero
00973 // for isdword==true the 32bit of the register are tested
00974 // for isdword==false the lowest 8bit of the register are tested
00975 static Bit32u gen_create_branch_long_nonzero(HostReg reg,bool isdword) {
00976         Bit8u *datapos;
00977 
00978         cache_checkinstr(8);
00979         datapos = cache_reservedata();
00980 
00981         if (isdword) {
00982                 cache_addw( CMP_IMM(reg, 0) );      // cmp reg, #0
00983         } else {
00984                 cache_addw( LSL_IMM(templo2, reg, 24) );      // lsl templo2, reg, #24
00985         }
00986         cache_addw( BEQ_FWD(2) );      // beq nobranch (pc+2)
00987         if (((Bit32u)cache.pos & 0x03) == 0) {
00988                 cache_addw( LDR_PC_IMM(templo1, datapos - (cache.pos + 4)) );      // ldr templo1, [pc, datapos]
00989         } else {
00990                 cache_addw( LDR_PC_IMM(templo1, datapos - (cache.pos + 2)) );      // ldr templo1, [pc, datapos]
00991         }
00992         cache_addw( BX(templo1) );      // bx templo1
00993         // nobranch:
00994         return ((Bit32u)datapos);
00995 }
00996 
00997 // compare 32bit-register against zero and jump if value less/equal than zero
00998 static Bit32u gen_create_branch_long_leqzero(HostReg reg) {
00999         Bit8u *datapos;
01000 
01001         cache_checkinstr(8);
01002         datapos = cache_reservedata();
01003 
01004         cache_addw( CMP_IMM(reg, 0) );      // cmp reg, #0
01005         cache_addw( BGT_FWD(2) );      // bgt nobranch (pc+2)
01006         if (((Bit32u)cache.pos & 0x03) == 0) {
01007                 cache_addw( LDR_PC_IMM(templo1, datapos - (cache.pos + 4)) );      // ldr templo1, [pc, datapos]
01008         } else {
01009                 cache_addw( LDR_PC_IMM(templo1, datapos - (cache.pos + 2)) );      // ldr templo1, [pc, datapos]
01010         }
01011         cache_addw( BX(templo1) );      // bx templo1
01012         // nobranch:
01013         return ((Bit32u)datapos);
01014 }
01015 
01016 // calculate long relative offset and fill it into the location pointed to by data
01017 static void INLINE gen_fill_branch_long(Bit32u data) {
01018         // this is an absolute branch
01019         *(Bit32u*)data=((Bit32u)cache.pos) + 1; // add 1 to keep processor in thumb state
01020 }
01021 
01022 static void gen_run_code(void) {
01023         Bit8u *pos1, *pos2, *pos3;
01024 
01025 #if (__ARM_EABI__)
01026         // 8-byte stack alignment
01027         cache_addd(0xe92d4ff0);                 // stmfd sp!, {v1-v8,lr}
01028 #else
01029         cache_addd(0xe92d4df0);                 // stmfd sp!, {v1-v5,v7,v8,lr}
01030 #endif
01031 
01032         cache_addd( ARM_ADD_IMM(HOST_r0, HOST_r0, 1, 0) );      // add r0, r0, #1
01033 
01034         pos1 = cache.pos;
01035         cache_addd( 0 );
01036         pos2 = cache.pos;
01037         cache_addd( 0 );
01038         pos3 = cache.pos;
01039         cache_addd( 0 );
01040 
01041         cache_addd( ARM_ADD_IMM(HOST_lr, HOST_pc, 4, 0) );                      // add lr, pc, #4
01042         cache_addd( ARM_STR_IMM_M_W(HOST_lr, HOST_sp, 4) );      // str lr, [sp, #-4]!
01043         cache_addd( ARM_BX(HOST_r0) );                  // bx r0
01044 
01045 #if (__ARM_EABI__)
01046         cache_addd(0xe8bd4ff0);                 // ldmfd sp!, {v1-v8,lr}
01047 #else
01048         cache_addd(0xe8bd4df0);                 // ldmfd sp!, {v1-v5,v7,v8,lr}
01049 #endif
01050         cache_addd( ARM_BX(HOST_lr) );                  // bx lr
01051 
01052         // align cache.pos to 32 bytes
01053         if ((((Bitu)cache.pos) & 0x1f) != 0) {
01054                 cache.pos = cache.pos + (32 - (((Bitu)cache.pos) & 0x1f));
01055         }
01056 
01057         *(Bit32u*)pos1 = ARM_LDR_IMM(FC_SEGS_ADDR, HOST_pc, cache.pos - (pos1 + 8));      // ldr FC_SEGS_ADDR, [pc, #(&Segs)]
01058         cache_addd((Bit32u)&Segs);      // address of "Segs"
01059 
01060         *(Bit32u*)pos2 = ARM_LDR_IMM(FC_REGS_ADDR, HOST_pc, cache.pos - (pos2 + 8));      // ldr FC_REGS_ADDR, [pc, #(&cpu_regs)]
01061         cache_addd((Bit32u)&cpu_regs);  // address of "cpu_regs"
01062 
01063         *(Bit32u*)pos3 = ARM_LDR_IMM(readdata_addr, HOST_pc, cache.pos - (pos3 + 8));      // ldr readdata_addr, [pc, #(&core_dynrec.readdata)]
01064         cache_addd((Bit32u)&core_dynrec.readdata);  // address of "core_dynrec.readdata"
01065 
01066         // align cache.pos to 32 bytes
01067         if ((((Bitu)cache.pos) & 0x1f) != 0) {
01068                 cache.pos = cache.pos + (32 - (((Bitu)cache.pos) & 0x1f));
01069         }
01070 }
01071 
01072 // return from a function
01073 static void gen_return_function(void) {
01074         cache_checkinstr(4);
01075         cache_addw(0xbc08);      // pop {r3}
01076         cache_addw( BX(HOST_r3) );      // bx r3
01077 }
01078 
01079 
01080 // short unconditional jump (over data pool)
01081 // must emit at most CACHE_DATA_JUMP bytes
01082 static void INLINE gen_create_branch_short(void * func) {
01083         cache_addw( B_FWD((Bit32u)func - ((Bit32u)cache.pos + 4)) );      // b func
01084 }
01085 
01086 
01087 #ifdef DRC_FLAGS_INVALIDATION
01088 
01089 // called when a call to a function can be replaced by a
01090 // call to a simpler function
01091 static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) {
01092         if ((*(Bit16u*)pos & 0xf000) == 0xe000) {
01093                 if ((*(Bit16u*)pos & 0x0fff) >= ((CACHE_DATA_ALIGN / 2) - 1) &&
01094                         (*(Bit16u*)pos & 0x0fff) < 0x0800)
01095                 {
01096                         pos = (Bit8u *) ( ( ( (Bit32u)(*(Bit16u*)pos & 0x0fff) ) << 1 ) + ((Bit32u)pos + 4) );
01097                 }
01098         }
01099 
01100 #ifdef DRC_FLAGS_INVALIDATION_DCODE
01101         if (((Bit32u)pos & 0x03) == 0)
01102         {
01103                 // try to avoid function calls but rather directly fill in code
01104                 switch (flags_type) {
01105                         case t_ADDb:
01106                         case t_ADDw:
01107                         case t_ADDd:
01108                                 *(Bit16u*)pos=ADD_REG(HOST_a1, HOST_a1, HOST_a2);       // add a1, a1, a2
01109                                 *(Bit16u*)(pos+2)=B_FWD(6);                                                     // b after_call (pc+6)
01110                                 break;
01111                         case t_ORb:
01112                         case t_ORw:
01113                         case t_ORd:
01114                                 *(Bit16u*)pos=ORR(HOST_a1, HOST_a2);                            // orr a1, a2
01115                                 *(Bit16u*)(pos+2)=B_FWD(6);                                                     // b after_call (pc+6)
01116                                 break;
01117                         case t_ANDb:
01118                         case t_ANDw:
01119                         case t_ANDd:
01120                                 *(Bit16u*)pos=AND(HOST_a1, HOST_a2);                            // and a1, a2
01121                                 *(Bit16u*)(pos+2)=B_FWD(6);                                                     // b after_call (pc+6)
01122                                 break;
01123                         case t_SUBb:
01124                         case t_SUBw:
01125                         case t_SUBd:
01126                                 *(Bit16u*)pos=SUB_REG(HOST_a1, HOST_a1, HOST_a2);       // sub a1, a1, a2
01127                                 *(Bit16u*)(pos+2)=B_FWD(6);                                                     // b after_call (pc+6)
01128                                 break;
01129                         case t_XORb:
01130                         case t_XORw:
01131                         case t_XORd:
01132                                 *(Bit16u*)pos=EOR(HOST_a1, HOST_a2);                            // eor a1, a2
01133                                 *(Bit16u*)(pos+2)=B_FWD(6);                                                     // b after_call (pc+6)
01134                                 break;
01135                         case t_CMPb:
01136                         case t_CMPw:
01137                         case t_CMPd:
01138                         case t_TESTb:
01139                         case t_TESTw:
01140                         case t_TESTd:
01141                                 *(Bit16u*)pos=B_FWD(8);                                                         // b after_call (pc+8)
01142                                 break;
01143                         case t_INCb:
01144                         case t_INCw:
01145                         case t_INCd:
01146                                 *(Bit16u*)pos=ADD_IMM3(HOST_a1, HOST_a1, 1);            // add a1, a1, #1
01147                                 *(Bit16u*)(pos+2)=B_FWD(6);                                                     // b after_call (pc+6)
01148                                 break;
01149                         case t_DECb:
01150                         case t_DECw:
01151                         case t_DECd:
01152                                 *(Bit16u*)pos=SUB_IMM3(HOST_a1, HOST_a1, 1);            // sub a1, a1, #1
01153                                 *(Bit16u*)(pos+2)=B_FWD(6);                                                     // b after_call (pc+6)
01154                                 break;
01155                         case t_SHLb:
01156                         case t_SHLw:
01157                         case t_SHLd:
01158                                 *(Bit16u*)pos=LSL_REG(HOST_a1, HOST_a2);                        // lsl a1, a2
01159                                 *(Bit16u*)(pos+2)=B_FWD(6);                                                     // b after_call (pc+6)
01160                                 break;
01161                         case t_SHRb:
01162                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24);            // lsl a1, a1, #24
01163                                 *(Bit16u*)(pos+2)=NOP;                                                          // nop
01164                                 *(Bit16u*)(pos+4)=LSR_IMM(HOST_a1, HOST_a1, 24);        // lsr a1, a1, #24
01165                                 *(Bit16u*)(pos+6)=NOP;                                                          // nop
01166                                 *(Bit16u*)(pos+8)=LSR_REG(HOST_a1, HOST_a2);            // lsr a1, a2
01167                                 *(Bit16u*)(pos+10)=NOP;                                                         // nop
01168                                 break;
01169                         case t_SHRw:
01170                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16);            // lsl a1, a1, #16
01171                                 *(Bit16u*)(pos+2)=NOP;                                                          // nop
01172                                 *(Bit16u*)(pos+4)=LSR_IMM(HOST_a1, HOST_a1, 16);        // lsr a1, a1, #16
01173                                 *(Bit16u*)(pos+6)=NOP;                                                          // nop
01174                                 *(Bit16u*)(pos+8)=LSR_REG(HOST_a1, HOST_a2);            // lsr a1, a2
01175                                 *(Bit16u*)(pos+10)=NOP;                                                         // nop
01176                                 break;
01177                         case t_SHRd:
01178                                 *(Bit16u*)pos=LSR_REG(HOST_a1, HOST_a2);                        // lsr a1, a2
01179                                 *(Bit16u*)(pos+2)=B_FWD(6);                                                     // b after_call (pc+6)
01180                                 break;
01181                         case t_SARb:
01182                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24);            // lsl a1, a1, #24
01183                                 *(Bit16u*)(pos+2)=NOP;                                                          // nop
01184                                 *(Bit16u*)(pos+4)=ASR_IMM(HOST_a1, HOST_a1, 24);        // asr a1, a1, #24
01185                                 *(Bit16u*)(pos+6)=NOP;                                                          // nop
01186                                 *(Bit16u*)(pos+8)=ASR_REG(HOST_a1, HOST_a2);            // asr a1, a2
01187                                 *(Bit16u*)(pos+10)=NOP;                                                         // nop
01188                                 break;
01189                         case t_SARw:
01190                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16);            // lsl a1, a1, #16
01191                                 *(Bit16u*)(pos+2)=NOP;                                                          // nop
01192                                 *(Bit16u*)(pos+4)=ASR_IMM(HOST_a1, HOST_a1, 16);        // asr a1, a1, #16
01193                                 *(Bit16u*)(pos+6)=NOP;                                                          // nop
01194                                 *(Bit16u*)(pos+8)=ASR_REG(HOST_a1, HOST_a2);            // asr a1, a2
01195                                 *(Bit16u*)(pos+10)=NOP;                                                         // nop
01196                                 break;
01197                         case t_SARd:
01198                                 *(Bit16u*)pos=ASR_REG(HOST_a1, HOST_a2);                        // asr a1, a2
01199                                 *(Bit16u*)(pos+2)=B_FWD(6);                                                     // b after_call (pc+6)
01200                                 break;
01201                         case t_RORb:
01202                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24);            // lsl a1, a1, #24
01203                                 *(Bit16u*)(pos+2)=LSR_IMM(templo1, HOST_a1, 8);         // lsr templo1, a1, #8
01204                                 *(Bit16u*)(pos+4)=ORR(HOST_a1, templo1);                        // orr a1, templo1
01205                                 *(Bit16u*)(pos+6)=LSR_IMM(templo1, HOST_a1, 16);        // lsr templo1, a1, #16
01206                                 *(Bit16u*)(pos+8)=ORR(HOST_a1, templo1);                        // orr a1, templo1
01207                                 *(Bit16u*)(pos+10)=ROR_REG(HOST_a1, HOST_a2);           // ror a1, a2
01208                                 break;
01209                         case t_RORw:
01210                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16);            // lsl a1, a1, #16
01211                                 *(Bit16u*)(pos+2)=LSR_IMM(templo1, HOST_a1, 16);        // lsr templo1, a1, #16
01212                                 *(Bit16u*)(pos+4)=NOP;                                                          // nop
01213                                 *(Bit16u*)(pos+6)=ORR(HOST_a1, templo1);                        // orr a1, templo1
01214                                 *(Bit16u*)(pos+8)=NOP;                                                          // nop
01215                                 *(Bit16u*)(pos+10)=ROR_REG(HOST_a1, HOST_a2);           // ror a1, a2
01216                                 break;
01217                         case t_RORd:
01218                                 *(Bit16u*)pos=ROR_REG(HOST_a1, HOST_a2);                        // ror a1, a2
01219                                 *(Bit16u*)(pos+2)=B_FWD(6);                                                     // b after_call (pc+6)
01220                                 break;
01221                         case t_ROLw:
01222                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16);            // lsl a1, a1, #16
01223                                 *(Bit16u*)(pos+2)=NEG(HOST_a2, HOST_a2);                        // neg a2, a2
01224                                 *(Bit16u*)(pos+4)=LSR_IMM(templo1, HOST_a1, 16);        // lsr templo1, a1, #16
01225                                 *(Bit16u*)(pos+6)=ADD_IMM8(HOST_a2, 32);                        // add a2, #32
01226                                 *(Bit16u*)(pos+8)=ORR(HOST_a1, templo1);                        // orr a1, templo1
01227                                 *(Bit16u*)(pos+10)=ROR_REG(HOST_a1, HOST_a2);           // ror a1, a2
01228                                 break;
01229                         case t_ROLd:
01230                                 *(Bit16u*)pos=NEG(HOST_a2, HOST_a2);                            // neg a2, a2
01231                                 *(Bit16u*)(pos+2)=NOP;                                                          // nop
01232                                 *(Bit16u*)(pos+4)=ADD_IMM8(HOST_a2, 32);                        // add a2, #32
01233                                 *(Bit16u*)(pos+6)=NOP;                                                          // nop
01234                                 *(Bit16u*)(pos+8)=ROR_REG(HOST_a1, HOST_a2);            // ror a1, a2
01235                                 *(Bit16u*)(pos+10)=NOP;                                                         // nop
01236                                 break;
01237                         case t_NEGb:
01238                         case t_NEGw:
01239                         case t_NEGd:
01240                                 *(Bit16u*)pos=NEG(HOST_a1, HOST_a1);                            // neg a1, a1
01241                                 *(Bit16u*)(pos+2)=B_FWD(6);                                                     // b after_call (pc+6)
01242                                 break;
01243                         default:
01244                                 *(Bit32u*)( ( ((Bit32u) (*pos)) << 2 ) + ((Bit32u)pos + 4) ) = (Bit32u)fct_ptr;         // simple_func
01245                                 break;
01246                 }
01247         }
01248         else
01249         {
01250                 // try to avoid function calls but rather directly fill in code
01251                 switch (flags_type) {
01252                         case t_ADDb:
01253                         case t_ADDw:
01254                         case t_ADDd:
01255                                 *(Bit16u*)pos=ADD_REG(HOST_a1, HOST_a1, HOST_a2);       // add a1, a1, a2
01256                                 *(Bit16u*)(pos+2)=B_FWD(4);                                                     // b after_call (pc+4)
01257                                 break;
01258                         case t_ORb:
01259                         case t_ORw:
01260                         case t_ORd:
01261                                 *(Bit16u*)pos=ORR(HOST_a1, HOST_a2);                            // orr a1, a2
01262                                 *(Bit16u*)(pos+2)=B_FWD(4);                                                     // b after_call (pc+4)
01263                                 break;
01264                         case t_ANDb:
01265                         case t_ANDw:
01266                         case t_ANDd:
01267                                 *(Bit16u*)pos=AND(HOST_a1, HOST_a2);                            // and a1, a2
01268                                 *(Bit16u*)(pos+2)=B_FWD(4);                                                     // b after_call (pc+4)
01269                                 break;
01270                         case t_SUBb:
01271                         case t_SUBw:
01272                         case t_SUBd:
01273                                 *(Bit16u*)pos=SUB_REG(HOST_a1, HOST_a1, HOST_a2);       // sub a1, a1, a2
01274                                 *(Bit16u*)(pos+2)=B_FWD(4);                                                     // b after_call (pc+4)
01275                                 break;
01276                         case t_XORb:
01277                         case t_XORw:
01278                         case t_XORd:
01279                                 *(Bit16u*)pos=EOR(HOST_a1, HOST_a2);                            // eor a1, a2
01280                                 *(Bit16u*)(pos+2)=B_FWD(4);                                                     // b after_call (pc+4)
01281                                 break;
01282                         case t_CMPb:
01283                         case t_CMPw:
01284                         case t_CMPd:
01285                         case t_TESTb:
01286                         case t_TESTw:
01287                         case t_TESTd:
01288                                 *(Bit16u*)pos=B_FWD(6);                                                         // b after_call (pc+6)
01289                                 break;
01290                         case t_INCb:
01291                         case t_INCw:
01292                         case t_INCd:
01293                                 *(Bit16u*)pos=ADD_IMM3(HOST_a1, HOST_a1, 1);            // add a1, a1, #1
01294                                 *(Bit16u*)(pos+2)=B_FWD(4);                                                     // b after_call (pc+4)
01295                                 break;
01296                         case t_DECb:
01297                         case t_DECw:
01298                         case t_DECd:
01299                                 *(Bit16u*)pos=SUB_IMM3(HOST_a1, HOST_a1, 1);            // sub a1, a1, #1
01300                                 *(Bit16u*)(pos+2)=B_FWD(4);                                                     // b after_call (pc+4)
01301                                 break;
01302                         case t_SHLb:
01303                         case t_SHLw:
01304                         case t_SHLd:
01305                                 *(Bit16u*)pos=LSL_REG(HOST_a1, HOST_a2);                        // lsl a1, a2
01306                                 *(Bit16u*)(pos+2)=B_FWD(4);                                                     // b after_call (pc+4)
01307                                 break;
01308                         case t_SHRb:
01309                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24);            // lsl a1, a1, #24
01310                                 *(Bit16u*)(pos+2)=NOP;                                                          // nop
01311                                 *(Bit16u*)(pos+4)=LSR_IMM(HOST_a1, HOST_a1, 24);        // lsr a1, a1, #24
01312                                 *(Bit16u*)(pos+6)=NOP;                                                          // nop
01313                                 *(Bit16u*)(pos+8)=LSR_REG(HOST_a1, HOST_a2);            // lsr a1, a2
01314                                 break;
01315                         case t_SHRw:
01316                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16);            // lsl a1, a1, #16
01317                                 *(Bit16u*)(pos+2)=NOP;                                                          // nop
01318                                 *(Bit16u*)(pos+4)=LSR_IMM(HOST_a1, HOST_a1, 16);        // lsr a1, a1, #16
01319                                 *(Bit16u*)(pos+6)=NOP;                                                          // nop
01320                                 *(Bit16u*)(pos+8)=LSR_REG(HOST_a1, HOST_a2);            // lsr a1, a2
01321                                 break;
01322                         case t_SHRd:
01323                                 *(Bit16u*)pos=LSR_REG(HOST_a1, HOST_a2);                        // lsr a1, a2
01324                                 *(Bit16u*)(pos+2)=B_FWD(4);                                                     // b after_call (pc+4)
01325                                 break;
01326                         case t_SARb:
01327                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24);            // lsl a1, a1, #24
01328                                 *(Bit16u*)(pos+2)=NOP;                                                          // nop
01329                                 *(Bit16u*)(pos+4)=ASR_IMM(HOST_a1, HOST_a1, 24);        // asr a1, a1, #24
01330                                 *(Bit16u*)(pos+6)=NOP;                                                          // nop
01331                                 *(Bit16u*)(pos+8)=ASR_REG(HOST_a1, HOST_a2);            // asr a1, a2
01332                                 break;
01333                         case t_SARw:
01334                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16);            // lsl a1, a1, #16
01335                                 *(Bit16u*)(pos+2)=NOP;                                                          // nop
01336                                 *(Bit16u*)(pos+4)=ASR_IMM(HOST_a1, HOST_a1, 16);        // asr a1, a1, #16
01337                                 *(Bit16u*)(pos+6)=NOP;                                                          // nop
01338                                 *(Bit16u*)(pos+8)=ASR_REG(HOST_a1, HOST_a2);            // asr a1, a2
01339                                 break;
01340                         case t_SARd:
01341                                 *(Bit16u*)pos=ASR_REG(HOST_a1, HOST_a2);                        // asr a1, a2
01342                                 *(Bit16u*)(pos+2)=B_FWD(4);                                                     // b after_call (pc+4)
01343                                 break;
01344                         case t_RORw:
01345                                 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16);            // lsl a1, a1, #16
01346                                 *(Bit16u*)(pos+2)=LSR_IMM(templo1, HOST_a1, 16);        // lsr templo1, a1, #16
01347                                 *(Bit16u*)(pos+4)=NOP;                                                          // nop
01348                                 *(Bit16u*)(pos+6)=ORR(HOST_a1, templo1);                        // orr a1, templo1
01349                                 *(Bit16u*)(pos+8)=ROR_REG(HOST_a1, HOST_a2);            // ror a1, a2
01350                                 break;
01351                         case t_RORd:
01352                                 *(Bit16u*)pos=ROR_REG(HOST_a1, HOST_a2);                        // ror a1, a2
01353                                 *(Bit16u*)(pos+2)=B_FWD(4);                                                     // b after_call (pc+4)
01354                                 break;
01355                         case t_ROLd:
01356                                 *(Bit16u*)pos=NEG(HOST_a2, HOST_a2);                            // neg a2, a2
01357                                 *(Bit16u*)(pos+2)=NOP;                                                          // nop
01358                                 *(Bit16u*)(pos+4)=ADD_IMM8(HOST_a2, 32);                        // add a2, #32
01359                                 *(Bit16u*)(pos+6)=NOP;                                                          // nop
01360                                 *(Bit16u*)(pos+8)=ROR_REG(HOST_a1, HOST_a2);            // ror a1, a2
01361                                 break;
01362                         case t_NEGb:
01363                         case t_NEGw:
01364                         case t_NEGd:
01365                                 *(Bit16u*)pos=NEG(HOST_a1, HOST_a1);                            // neg a1, a1
01366                                 *(Bit16u*)(pos+2)=B_FWD(4);                                                     // b after_call (pc+4)
01367                                 break;
01368                         default:
01369                                 *(Bit32u*)( ( ((Bit32u) (*pos)) << 2 ) + ((Bit32u)pos + 2) ) = (Bit32u)fct_ptr;         // simple_func
01370                                 break;
01371                 }
01372 
01373         }
01374 #else
01375         if (((Bit32u)pos & 0x03) == 0)
01376         {
01377                 *(Bit32u*)( ( ((Bit32u) (*pos)) << 2 ) + ((Bit32u)pos + 4) ) = (Bit32u)fct_ptr;         // simple_func
01378         }
01379         else
01380         {
01381                 *(Bit32u*)( ( ((Bit32u) (*pos)) << 2 ) + ((Bit32u)pos + 2) ) = (Bit32u)fct_ptr;         // simple_func
01382         }
01383 #endif
01384 }
01385 #endif
01386 
01387 #ifdef DRC_USE_SEGS_ADDR
01388 
01389 // mov 16bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 2 must be zero)
01390 // 16bit moves may destroy the upper 16bit of the destination register
01391 static void gen_mov_seg16_to_reg(HostReg dest_reg,Bitu index) {
01392         cache_checkinstr(4);
01393         cache_addw( MOV_LO_HI(templo1, FC_SEGS_ADDR) );      // mov templo1, FC_SEGS_ADDR
01394         cache_addw( LDRH_IMM(dest_reg, templo1, index) );      // ldrh dest_reg, [templo1, #index]
01395 }
01396 
01397 // mov 32bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 4 must be zero)
01398 static void gen_mov_seg32_to_reg(HostReg dest_reg,Bitu index) {
01399         cache_checkinstr(4);
01400         cache_addw( MOV_LO_HI(templo1, FC_SEGS_ADDR) );      // mov templo1, FC_SEGS_ADDR
01401         cache_addw( LDR_IMM(dest_reg, templo1, index) );      // ldr dest_reg, [templo1, #index]
01402 }
01403 
01404 // add a 32bit value from Segs[index] to a full register using FC_SEGS_ADDR (index modulo 4 must be zero)
01405 static void gen_add_seg32_to_reg(HostReg reg,Bitu index) {
01406         cache_checkinstr(6);
01407         cache_addw( MOV_LO_HI(templo1, FC_SEGS_ADDR) );      // mov templo1, FC_SEGS_ADDR
01408         cache_addw( LDR_IMM(templo2, templo1, index) );      // ldr templo2, [templo1, #index]
01409         cache_addw( ADD_REG(reg, reg, templo2) );      // add reg, reg, templo2
01410 }
01411 
01412 #endif
01413 
01414 #ifdef DRC_USE_REGS_ADDR
01415 
01416 // mov 16bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 2 must be zero)
01417 // 16bit moves may destroy the upper 16bit of the destination register
01418 static void gen_mov_regval16_to_reg(HostReg dest_reg,Bitu index) {
01419         cache_checkinstr(4);
01420         cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) );      // mov templo2, FC_REGS_ADDR
01421         cache_addw( LDRH_IMM(dest_reg, templo2, index) );      // ldrh dest_reg, [templo2, #index]
01422 }
01423 
01424 // mov 32bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 4 must be zero)
01425 static void gen_mov_regval32_to_reg(HostReg dest_reg,Bitu index) {
01426         cache_checkinstr(4);
01427         cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) );      // mov templo2, FC_REGS_ADDR
01428         cache_addw( LDR_IMM(dest_reg, templo2, index) );      // ldr dest_reg, [templo2, #index]
01429 }
01430 
01431 // move a 32bit (dword==true) or 16bit (dword==false) value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero)
01432 // 16bit moves may destroy the upper 16bit of the destination register
01433 static void gen_mov_regword_to_reg(HostReg dest_reg,Bitu index,bool dword) {
01434         cache_checkinstr(4);
01435         cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) );      // mov templo2, FC_REGS_ADDR
01436         if (dword) {
01437                 cache_addw( LDR_IMM(dest_reg, templo2, index) );      // ldr dest_reg, [templo2, #index]
01438         } else {
01439                 cache_addw( LDRH_IMM(dest_reg, templo2, index) );      // ldrh dest_reg, [templo2, #index]
01440         }
01441 }
01442 
01443 // move an 8bit value from cpu_regs[index]  into dest_reg using FC_REGS_ADDR
01444 // the upper 24bit of the destination register can be destroyed
01445 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
01446 // registers might not be directly byte-accessible on some architectures
01447 static void gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index) {
01448         cache_checkinstr(4);
01449         cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) );      // mov templo2, FC_REGS_ADDR
01450         cache_addw( LDRB_IMM(dest_reg, templo2, index) );      // ldrb dest_reg, [templo2, #index]
01451 }
01452 
01453 // move an 8bit value from cpu_regs[index]  into dest_reg using FC_REGS_ADDR
01454 // the upper 24bit of the destination register can be destroyed
01455 // this function can use FC_OP1/FC_OP2 as dest_reg which are
01456 // not directly byte-accessible on some architectures
01457 static void INLINE gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg,Bitu index) {
01458         cache_checkinstr(4);
01459         cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) );      // mov templo2, FC_REGS_ADDR
01460         cache_addw( LDRB_IMM(dest_reg, templo2, index) );      // ldrb dest_reg, [templo2, #index]
01461 }
01462 
01463 
01464 // add a 32bit value from cpu_regs[index] to a full register using FC_REGS_ADDR (index modulo 4 must be zero)
01465 static void gen_add_regval32_to_reg(HostReg reg,Bitu index) {
01466         cache_checkinstr(6);
01467         cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) );      // mov templo2, FC_REGS_ADDR
01468         cache_addw( LDR_IMM(templo1, templo2, index) );      // ldr templo1, [templo2, #index]
01469         cache_addw( ADD_REG(reg, reg, templo1) );      // add reg, reg, templo1
01470 }
01471 
01472 
01473 // move 16bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 2 must be zero)
01474 static void gen_mov_regval16_from_reg(HostReg src_reg,Bitu index) {
01475         cache_checkinstr(4);
01476         cache_addw( MOV_LO_HI(templo1, FC_REGS_ADDR) );      // mov templo1, FC_REGS_ADDR
01477         cache_addw( STRH_IMM(src_reg, templo1, index) );      // strh src_reg, [templo1, #index]
01478 }
01479 
01480 // move 32bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 4 must be zero)
01481 static void gen_mov_regval32_from_reg(HostReg src_reg,Bitu index) {
01482         cache_checkinstr(4);
01483         cache_addw( MOV_LO_HI(templo1, FC_REGS_ADDR) );      // mov templo1, FC_REGS_ADDR
01484         cache_addw( STR_IMM(src_reg, templo1, index) );      // str src_reg, [templo1, #index]
01485 }
01486 
01487 // move 32bit (dword==true) or 16bit (dword==false) of a register into cpu_regs[index] using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero)
01488 static void gen_mov_regword_from_reg(HostReg src_reg,Bitu index,bool dword) {
01489         cache_checkinstr(4);
01490         cache_addw( MOV_LO_HI(templo1, FC_REGS_ADDR) );      // mov templo1, FC_REGS_ADDR
01491         if (dword) {
01492                 cache_addw( STR_IMM(src_reg, templo1, index) );      // str src_reg, [templo1, #index]
01493         } else {
01494                 cache_addw( STRH_IMM(src_reg, templo1, index) );      // strh src_reg, [templo1, #index]
01495         }
01496 }
01497 
01498 // move the lowest 8bit of a register into cpu_regs[index] using FC_REGS_ADDR
01499 static void gen_mov_regbyte_from_reg_low(HostReg src_reg,Bitu index) {
01500         cache_checkinstr(4);
01501         cache_addw( MOV_LO_HI(templo1, FC_REGS_ADDR) );      // mov templo1, FC_REGS_ADDR
01502         cache_addw( STRB_IMM(src_reg, templo1, index) );      // strb src_reg, [templo1, #index]
01503 }
01504 
01505 #endif