/*************************************************************************** * * All modifications in this file to the original code are * (C) Copyright 1992, ..., 2007 the "DOSEMU-Development-Team". * * for details see file COPYING.DOSEMU in the DOSEMU distribution * * * SIMX86 a Intel 80x86 cpu emulator * Copyright (C) 1997,2001 Alberto Vignani, FIAT Research Center * a.vignani@crf.it * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * Additional copyright notes: * * 1. The kernel-level vm86 handling was taken out of the Linux kernel * (linux/arch/i386/kernel/vm86.c). This code originaly was written by * Linus Torvalds with later enhancements by Lutz Molgedey and Hans Lermen. * ***************************************************************************/ /* * BACK-END for the cpuemu interpreter. * * It translates the intermediate ops (defined in codegen.h) to their * final binary form and stores the generated code into a temporary * buffer (CodeBuf). * These intermediate ops are still being reworked and grow in an * incremental way; I hope they will converge to some better defined * set as soon as I'll start coding for some other processor. * * There should be other similar modules, one for each target. So you * can have codegen-ppc.c or codegen-emulated.c or whatever else. * * This module generates x86 code. Hey, wait... x86 from x86? * Actually the generated code runs always in 32-bit mode, so in a way * the 16-bit V86 mode is "emulated". * * All instructions operate on a virtual CPU image in memory ("TheCPU"), * and are completely self-contained. They read from TheCPU registers, * operate, and store back to the same registers. There's an exception - * FLAGS, which are not stored back until the end of a code block. * In fact, you will note that there's NO flag handling here, because on * x86 we use the real hardware to calculate them, and this speeds up * things a lot compared to a full interpreter. Flags will be a nightmare * for non-x86 host CPUs. * * This only applies to the condition code flags though, OF, SF, ZF, AF, * PF and CF (0x8d5). All other flags are stored in EFLAGS=TheCPU.eflags, * including DF. Normally the real DF is clear for compatibility with C * code; it is only temporarily set during string instructions. * * There is NO optimization for the produced code. It is a very pipeline- * unconscious code full of register dependencies and reloadings. * Clearly we hope that the 1st level cache of the host CPU works as * advertised ;-) * * There are two main functions here: * AddrGen, which implements the AGU (Address Generation Unit). * It calculates the address coming from ModRM and stores * it into a well-defined register (edi in the x86 case) * Gen, which does the ALU work and all the rest. There is no * branch specific unit, as the branches are (in principle) * all interpreted. * Both functions use a variable parameter approach, just to make them * hard to follow ;-) * */ /*************************************************************************** * * Registers on enter: * ebx pointer to SynCPU (must not be changed) * flags from cpu->eflags * * Registers used by the 32-bit machine: * eax scratch, data * ebx pointer to SynCPU (must not be changed) * ecx scratch, address/count * edx scratch, data * esi scratch, address * edi memory/register address * esp not modified * flags modified * * Registers on exit: * eax PC for the next instruction * edx flags * edi last memory address * ***************************************************************************/ // #include #include #include #include #include "emu86.h" #include "dlmalloc.h" #ifdef HOST_ARCH_X86 #include "codegen-x86.h" static void Gen_x86(int op, int mode, ...); static void AddrGen_x86(int op, int mode, ...); static unsigned int CloseAndExec_x86(unsigned int PC, int mode, int ln); /* Buffer and pointers to store generated code */ unsigned char *CodePtr = NULL; CodeBuf *GenCodeBuf = NULL; unsigned char *BaseGenBuf = NULL; int GenBufSize = 0; hitimer_u TimeStartExec; unsigned int VgaAbsBankBase = 0; ///////////////////////////////////////////////////////////////////////////// #define Offs_From_Arg() (char)(va_arg(ap,int)) /* This code is appended at the end of every instruction sequence. It * passes back the IP of the next instruction after the sequence. * (the one where we switch back to interpreted code). * * movl #return_PC,eax * popl edx (flags) * ret */ unsigned char TailCode[TAILSIZE+1] = { 0xb8,0,0,0,0,0x5a,0xc3,0xf4 }; /* * This function is only here for looking at the generated binary code * with objdump. */ static void _test_(void) __attribute__((used)); static void _test_(void) { __asm__ __volatile__ (" \ nop \ " : : : "memory" ); } ///////////////////////////////////////////////////////////////////////////// /* empirical!! */ static int goodmemref(unsigned int m) { if (m < 0x110000) return 1; if (m >= config.dpmi_base - TheCPU.mem_base && m <= mMaxMem) return 1; return 0; } ///////////////////////////////////////////////////////////////////////////// void InitGen_x86(void) { Gen = Gen_x86; AddrGen = AddrGen_x86; CloseAndExec = CloseAndExec_x86; UseLinker = USE_LINKER; GenCodeBuf = NULL; BaseGenBuf = NULL; GenBufSize = 0; InitTrees(); } ///////////////////////////////////////////////////////////////////////////// /* NOTE: parameters IG->px must be the last argument in a Gn() macro * because of the OR operator, which would cause trouble if the parameter * is negative */ static void CodeGen(IMeta *I, int j) { /* evil hack, keeping state from MOVS_SavA to MOVS_SetA in a static variable */ static unsigned char * rep_retry_ptr = (unsigned char*)0xdeadbeef; IGen *IG = &(I->gen[j]); register unsigned char *Cp = CodePtr; unsigned char * CpTemp; int mode = IG->mode; int rcod; #ifdef PROFILE hitimer_t t0 = 0; if (debug_level('e')) t0 = GETTSC(); #endif switch(IG->op) { case A_DI_0: // base(32), imm // movl $imm,%%edi G1(0xbf,Cp); G4(IG->p1,Cp); if (!(mode & MLEA)) { // addl offs(%%ebx),%%edi (seg reg offset) G3M(0x03,0x7b,IG->p0,Cp); } break; case A_DI_1: { // base(32), {imm}, reg int idsp=IG->p1; if (mode & ADDR16) { // movzwl offs(%%ebx),%%edi G4M(0x0f,0xb7,0x7b,IG->p2,Cp); if ((mode&IMMED) && (idsp!=0)) { // addw $immed,%%di G3(0xc78166,Cp); G2(idsp,Cp); } } else { // movl offs(%%ebx),%%edi G3M(0x8b,0x7b,IG->p2,Cp); if (idsp!=0) { GenLeaEDI(idsp); } } if (!(mode & MLEA)) { // addl offs(%%ebx),%%edi (seg reg offset) G3M(0x03,0x7b,IG->p0,Cp); } } break; case A_DI_2: { // base(32), {imm}, reg, reg, {shift} int idsp=IG->p1; if (mode & ADDR16) { // movzwl offs(%%ebx),%%edi G4M(0x0f,0xb7,0x7b,IG->p3,Cp); // addw offs(%%ebx),%%di G4M(0x66,0x03,0x7b,IG->p2,Cp); if (idsp!=0) { // addw $immed,%%di G3(0xc78166,Cp); G2(idsp,Cp); } } else { unsigned char sh = IG->p4; // movl offs(%%ebx),%%edi G3M(0x8b,0x7b,IG->p3,Cp); if (sh) { // shll $1,%%edi if (sh==1) { G2(0xe7d1,Cp); } // shll $count,%%edi else { G2(0xe7c1,Cp); G1(sh,Cp); } } // addl offs(%%ebx),%%edi G3M(0x03,0x7b,IG->p2,Cp); if (idsp!=0) { GenLeaEDI(idsp); } } if (!(mode & MLEA)) { // addl offs(%%ebx),%%edi (seg reg offset) G3M(0x03,0x7b,IG->p0,Cp); } } break; case A_DI_2D: { // modrm_sibd, 32-bit mode int idsp = IG->p0; unsigned char sh = IG->p2; // movl offs(%%ebx),%%edi G3M(0x8b,0x7b,IG->p1,Cp); // shll $count,%%ecx if (sh) { // shll $1,%%edi if (sh==1) { G2(0xe7d1,Cp); } // shll $count,%%edi else { G2(0xe7c1,Cp); G1(sh,Cp); } } if (idsp!=0) { GenLeaEDI(idsp); } if (!(mode & MLEA)) { // addl offs(%%ebx),%%edi (seg reg offset) G3M(0x03,0x7b,IG->ovds,Cp); } } break; case A_SR_SH4: { // real mode make base addr from seg // movzwl ofs(%%ebx),%%eax G4M(0x0f,0xb7,0x43,IG->p0,Cp); // shll $4,%%eax G3M(0xc1,0xe0,0x04,Cp); // addl Ofs_MEMBASE(%%ebx),%%eax G3M(0x03,0x43,Ofs_MEMBASE,Cp); // movl %%eax,ofs(%%ebx) G3M(0x89,0x43,IG->p1,Cp); // addl $0xffff,%eax G1(0x05,Cp); G4(0x0000ffff,Cp); // movl %%eax,ofs(%%ebx) G3M(0x89,0x43,IG->p1+4,Cp); } break; case L_NOP: G1(0x90,Cp); break; // Special case: CR0&0x3f case L_CR0: // movl Ofs_CR0(%%ebx),%%eax G3M(0x8b,0x43,Ofs_CR0,Cp); // andl $0x3f,%%eax G3(0x3fe083,Cp); break; case O_FOP: if (Fp87_op(IG->p0, IG->p1)) TheCPU.err = -96; Cp = CodePtr; break; case L_REG: { if (mode&(MBYTE|MBYTX)) { // movb offs(%%ebx),%%al G3M(0x8a,0x43,IG->p0,Cp); } else { // mov{wl} offs(%%ebx),%%{e}ax Gen66(mode,Cp); G3M(0x8b,0x43,IG->p0,Cp); } } break; case S_REG: { if (mode&MBYTE) { // movb %%al,offs(%%ebx) G3M(0x88,0x43,IG->p0,Cp); } else { // mov{wl} %%{e}ax,offs(%%ebx) Gen66(mode,Cp); G3M(0x89,0x43,IG->p0,Cp); } } break; case L_REG2REG: { if (mode&MBYTE) { G3M(0x8a,0x43,IG->p0,Cp); // rsrc G3M(0x88,0x43,IG->p1,Cp); // rdest } else { Gen66(mode,Cp); G3M(0x8b,0x43,IG->p0,Cp); Gen66(mode,Cp); G3M(0x89,0x43,IG->p1,Cp); } } break; case S_DI_R: { // mov{wl} %%{e}di,offs(%%ebx) Gen66(mode,Cp); G3M(0x89,0x7b,IG->p0,Cp); } break; case S_DI_IMM: { if (mode&MBYTE) { // movb $xx,(%%edi) G1(0xb0,Cp); G1(IG->p0,Cp); STD_WRITE_B; } else { // mov{wl} $xx,(%%edi) G1(0xb8,Cp); G4(IG->p0,Cp); STD_WRITE_WL(mode); } } break; case L_IMM: { if (mode&MBYTE) { // movb $immed,offs(%%ebx) G3M(0xc6,0x43,IG->p0,Cp); G1(IG->p1,Cp); } else { // mov{wl} $immed,offs(%%ebx) Gen66(mode,Cp); G3M(0xc7,0x43,IG->p0,Cp); G2_4(mode,IG->p1,Cp); } } break; case L_IMM_R1: { if (mode&MBYTE) { // movb $immed,%%al G1(0xb0,Cp); G1(IG->p0,Cp); } else { // mov{wl} $immed,%%{e}ax Gen66(mode,Cp); G1(0xb8,Cp); G2_4(mode,IG->p0,Cp); } } break; case L_MOVZS: if (mode & MBYTX) { if (!(mode & DATA16)) { // mov{sz}bw %%al,%%eax G3M(0x0f,(0xb6|IG->p0),0xc0,Cp); } else if (IG->p0) { // movsbw %%al,%%ax = cbw G2M(0x66,0x98,Cp); } else { // movzbw %%al,%%ax = movb $0, %%ah G2M(0xb4,0x00,Cp); } } else { if (mode & DATA16) { // mov{sz}ww %%ax,%%ax G4M(0x66,0x0f,(0xb7|IG->p0),0xc0,Cp); } else if (IG->p0) { // movswl %%ax,%%eax = cwde G1(0x98,Cp); } else { // movzwl %%ax,%%eax G3M(0x0f,0xb7,0xc0,Cp); } } // mov{wl} %%{e}ax,offs(%%ebx) Gen66(mode,Cp); G3M(0x89,0x43,IG->p1,Cp); break; case L_LXS1: { // mov{wl} (%%edi),%%{e}ax Gen66(mode,Cp); G2M(0x8b,0x07,Cp); // mov{wl} %%{e}ax,offs(%%ebx) Gen66(mode,Cp); G3M(0x89,0x43,IG->p0,Cp); // leal {2|4}(%%edi),%%edi G2M(0x8d,0x7f,Cp); G1((mode&DATA16? 2:4),Cp); } break; case L_LXS2: { /* real mode segment base from segment value */ // movzwl (%%edi),%%eax G3M(0x0f,0xb7,0x07,Cp); // movw %%ax,ofs(%%ebx) G4M(0x66,0x89,0x43,IG->p0,Cp); // shll $4,%%eax G3M(0xc1,0xe0,0x04,Cp); // addl Ofs_MEMBASE(%%ebx),%%eax G3M(0x03,0x43,Ofs_MEMBASE,Cp); // movl %%eax,ofs(%%ebx) G3M(0x89,0x43,IG->p1,Cp); // addl $0xffff,%eax G1(0x05,Cp); G4(0x0000ffff,Cp); // movl %%eax,ofs(%%ebx) G3M(0x89,0x43,IG->p1+4,Cp); } break; case L_ZXAX: // movzwl %%ax,%%eax G3(0xc0b70f,Cp); break; // case L_DI_R1: case L_VGAREAD: if (!(TheCPU.mode&RM_REG) && vga.inst_emu && (IG->ovds!=Ofs_XCS) && (IG->ovds!=Ofs_XSS)) { // movl %%edi,%%eax G2M(0x89,0xf8,Cp); // subl VgaAbsBankBase(%%ebx),%%eax G2(0x832b,Cp); G4((unsigned char *)&VgaAbsBankBase-CPUOFFS(0),Cp); // cmpl vga.bank_len(%%ebx),%%eax G2(0x833b,Cp); G4((unsigned char *)&vga.mem.bank_len-CPUOFFS(0),Cp); // jnb normal_read // pushl mode G3(0x6a1073,Cp); G1(mode,Cp); #ifdef __x86_64__ // pop %%rsi; push %%rdi G5(0x8b8d48575e,Cp); // lea e_VgaRead(%%rbx),%%rcx; call %%rcx; pop %%rdi G4((unsigned char *)e_VgaRead-CPUOFFS(0),Cp); G3(0x5fd1ff,Cp); // must be the same amount of ins bytes as i386!! #else // pushl %%edi G2(0xb957,Cp); // call e_VgaRead G4((long)e_VgaRead,Cp); G2(0xd1ff,Cp); // add $8,%%esp; nop G4(0x9008c483,Cp); #endif // jmp (skip normal read) G2M(0xeb,((mode&(DATA16|MBYTE))==DATA16? 3:2),Cp); } if (mode&(MBYTE|MBYTX)) { G2(0x078a,Cp); } else { Gen66(mode,Cp); G2(0x078b,Cp); } break; // case S_DI: case L_VGAWRITE: if (!(TheCPU.mode&RM_REG) && vga.inst_emu && (IG->ovds!=Ofs_XCS) && (IG->ovds!=Ofs_XSS)) { // movl %%edi,%%ecx G2M(0x89,0xf9,Cp); // subl VgaBankAbsBase(%%ebx),%%ecx G2(0x8b2b,Cp); G4((unsigned char *)&VgaAbsBankBase-CPUOFFS(0),Cp); // cmpl vga.mem.bank_len(%%ebx),%%ecx G2(0x8b3b,Cp); G4((unsigned char *)&vga.mem.bank_len-CPUOFFS(0),Cp); // jnb normal_write // pushl mode G3(0x6a1273,Cp); G1(mode,Cp); #ifdef __x86_64__ // pop %%rdx; mov %%eax, %%esi; push %%rdi G4(0x57c6895a,Cp); // lea e_VgaWrite(%%ebx),%%ecx; call %%ecx; pop %%rdi G3(0x8b8d48,Cp); G4((unsigned char *)e_VgaWrite-CPUOFFS(0),Cp); G3(0x5fd1ff,Cp); // must be the same amount of ins bytes as i386!! #else // pushl %%eax // pushl %%edi G3(0xb95750,Cp); // call e_VgaWrite G4((long)e_VgaWrite,Cp); G2(0xd1ff,Cp); // add $0c,%%esp; nop; nop G4(0x900cc483,Cp); G1(0x90,Cp); #endif // jmp (skip normal write) G2(0x03eb,Cp); } if (mode&MBYTE) { STD_WRITE_B; } else { STD_WRITE_WL(mode); } break; case O_ADD_R: // acc = acc op reg rcod = ADDbtrm; goto arith0; case O_OR_R: rcod = ORbtrm; goto arith0; case O_ADC_R: rcod = ADCbtrm; goto arith0; case O_SBB_R: rcod = SBBbtrm; goto arith0; case O_AND_R: rcod = ANDbtrm; goto arith0; case O_SUB_R: rcod = SUBbtrm; goto arith0; case O_XOR_R: rcod = XORbtrm; goto arith0; case O_CMP_R: rcod = CMPbtrm; goto arith0; case O_INC_R: rcod = GRP2brm; goto arith0; case O_DEC_R: rcod = 0x08fe; arith0: { G1(POPdx,Cp); // get flags from stack into %%edx switch (IG->op) { case O_ADC_R: // tests carry case O_SBB_R: // tests carry case O_INC_R: // preserves carry case O_DEC_R: // preserves carry // shr $1,%%edx to get carry flag from stack G2M(0xd1,0xea,Cp); } if (mode & MBYTE) { if (mode & IMMED) { // OPb $immed,%%al G1(rcod+2,Cp); G1(IG->p0,Cp); } else { // OPb offs(%%ebx),%%al G2(0x4300|rcod,Cp); G1(IG->p0,Cp); } } else { if (mode & IMMED) { // OP{lw} $immed,%%{e}ax Gen66(mode,Cp); G1(rcod+3,Cp); G2_4(mode,IG->p0,Cp); } else { // OP{wl} offs(%%ebx),%%{e}ax Gen66(mode,Cp); G2(0x4301|rcod,Cp); G1(IG->p0,Cp); } } G1(PUSHF,Cp); // flags back on stack } break; case O_CLEAR: G3M(POPdx,0x31,0xc0,Cp); //ignore flags; xorl %%eax,%%eax if (mode & MBYTE) { // movb %%al,offs(%%ebx) G3M(0x88,0x43,IG->p0,Cp); } else { // mov{wl} %%{e}ax,offs(%%ebx) Gen66(mode,Cp); G3M(0x89,0x43,IG->p0,Cp); } G1(PUSHF,Cp); // new flags on stack break; case O_TEST: G1(POPdx,Cp); // ignore flags if (mode & MBYTE) { // testb $0xff,offs(%%ebx) G4M(0xf6,0x43,IG->p0,0xff,Cp); } else if (mode&DATA16) { // testw $0xffff,offs(%%ebx) G4M(0x66,0xf7,0x43,IG->p0,Cp); G2(0xffff,Cp); } else { // test $0xffffffff,offs(%%ebx) G3M(0xf7,0x43,IG->p0,Cp); G4(0xffffffff,Cp); } G1(PUSHF,Cp); // new flags on stack break; case O_SBSELF: // if CY=0 -> reg=0, flag=xx46 // if CY=1 -> reg=-1, flag=xx97 // pop %%edx; shr $1,%%edx to get carry flag from stack G3M(POPdx,0xd1,0xea,Cp); // sbbl %%eax,%%eax G2M(0x19,0xc0,Cp); if (mode & MBYTE) { // movb %%al,offs(%%ebx) G3M(0x88,0x43,IG->p0,Cp); } else { // mov{wl} %%{e}ax,offs(%%ebx) Gen66(mode,Cp); G3M(0x89,0x43,IG->p0,Cp); } G1(PUSHF,Cp); // flags back on stack break; case O_ADD_FR: rcod = ADDbfrm; /* 0x00 */ goto arith1; case O_OR_FR: rcod = ORbfrm; /* 0x08 */ goto arith1; case O_ADC_FR: rcod = ADCbfrm; /* 0x10 */ goto arith1; case O_SBB_FR: rcod = SBBbfrm; /* 0x18 */ goto arith1; case O_AND_FR: rcod = ANDbfrm; /* 0x20 */ goto arith1; case O_SUB_FR: rcod = SUBbfrm; /* 0x28 */ goto arith1; case O_XOR_FR: rcod = XORbfrm; /* 0x30 */ goto arith1; case O_CMP_FR: rcod = CMPbfrm; /* 0x38 */ arith1: G1(POPdx,Cp); // get flags from stack into %%edx if (IG->op == O_ADC_FR || IG->op == O_SBB_FR) { // shr $1,%%edx to get carry flag from stack G2M(0xd1,0xea,Cp); } if (mode & MBYTE) { if (mode & IMMED) { // OPb $immed,offs(%%ebx) G4M(0x80,0x43|rcod,IG->p0,IG->p1,Cp); } else { // OPb %%al,offs(%%ebx) G2(0x4300|rcod,Cp); G1(IG->p0,Cp); } } else { if (mode & IMMED) { // OP{wl} $immed,offs(%%ebx) Gen66(mode,Cp); G3M(0x81,0x43|rcod,IG->p0,Cp); G2_4(mode,IG->p1,Cp); } else { // OP{wl} %%eax,offs(%%ebx) Gen66(mode,Cp); G2(0x4301|rcod,Cp); G1(IG->p0,Cp); } } G1(PUSHF,Cp); // flags back on stack break; case O_NOT: if (mode & MBYTE) { // notb %%al G2M(0xf6,0xd0,Cp); } else { // NOT{wl} %%(e)ax Gen66(mode,Cp); G2M(0xf7,0xd0,Cp); } break; case O_NEG: G1(POPdx,Cp); // ignore flags from stack if (mode & MBYTE) { // negb %%al G2M(0xf6,0xd8,Cp); } else { // neg{wl} (%%edi) Gen66(mode,Cp); G2M(0xf7,0xd8,Cp); } G1(PUSHF,Cp); // new flags on stack break; case O_INC: G1(POPdx,Cp); // get flags from stack into %%edx // shr $1,%%edx to get preserved carry flag from stack G2M(0xd1,0xea,Cp); if (mode & MBYTE) { // incb %%al G2M(0xfe,0xc0,Cp); } else if (mode & DATA16) { // inc %%ax #ifdef __x86_64__ // 0x40 is a REX byte, not inc G3M(0x66,0xff,0xc0,Cp); #else G2M(0x66,0x40,Cp); #endif } else { // inc %%eax #ifdef __x86_64__ G2M(0xff,0xc0,Cp); #else G1(0x40,Cp); #endif } G1(PUSHF,Cp); // flags back on stack before writing break; case O_DEC: G1(POPdx,Cp); // get flags from stack into %%edx // shr $1,%%edx to get preserved carry flag from stack G2M(0xd1,0xea,Cp); if (mode & MBYTE) { // decb %%al G2M(0xfe,0xc8,Cp); } else if (mode & DATA16) { // dec %%ax #ifdef __x86_64__ // 0x48 is a REX byte, not dec G3M(0x66,0xff,0xc8,Cp); #else G2M(0x66,0x48,Cp); #endif } else { // dec %%eax #ifdef __x86_64__ G2M(0xff,0xc8,Cp); #else G1(0x48,Cp); #endif } G1(PUSHF,Cp); // flags back on stack break; case O_CMPXCHG: { G1(POPdx,Cp); // ignore flags from stack if (mode & MBYTE) { // movb offs1(%%ebx),%%dl G3M(0x8a,0x53,IG->p0,Cp); if (mode & RM_REG) { // cmpxchgb %%dl,offs2(%%ebx) G4M(0x0f,0xb0,0x53,IG->p1,Cp); } else { // cmpxchgb %%dl,(%%edi) G3M(0x0f,0xb0,0x17,Cp); } } else { // mov{wl} offs1(%%ebx),%%{e}dx Gen66(mode,Cp); G3M(0x8b,0x53,IG->p0,Cp); if (mode & RM_REG) { // cmpxchg{wl} %%{e}dx,offs2(%%ebx) Gen66(mode,Cp); G4M(0x0f,0xb1,0x53,IG->p1,Cp); } else { // cmpxchg{wl} %%{e}dx,(%%edi) Gen66(mode,Cp); G3M(0x0f,0xb1,0x17,Cp); } } } G1(PUSHF,Cp); // flags back on stack break; case O_XCHG: { if (mode & MBYTE) { // xchgb offs(%%ebx),%%al G3M(0x86,0x43,IG->p0,Cp); } else { // xchg{wl} offs(%%ebx),%%{e}ax Gen66(mode,Cp); G3M(0x87,0x43,IG->p0,Cp); } } break; case O_XCHG_R: { // mov{wl} offs1(%%ebx),%%{e}ax Gen66(mode,Cp); G3M(0x8b,0x43,IG->p0,Cp); // xchg{wl} offs2(%%ebx),%%{e}ax Gen66(mode,Cp); G3M(0x87,0x43,IG->p1,Cp); // mov{wl} %%{e}ax,offs1(%%ebx) Gen66(mode,Cp); G3M(0x89,0x43,IG->p0,Cp); } break; case O_MUL: G1(POPF,Cp); // get flags from stack if (mode & MBYTE) { // mulb Ofs_AL(%%ebx),%%al G3M(0xf6,0x63,Ofs_AL,Cp); // movw %%ax,Ofs_AX(%%ebx) G4M(OPERoverride,0x89,0x43,Ofs_AX,Cp); } else if (mode&DATA16) { // mulw Ofs_AX(%%ebx),%%ax G4M(OPERoverride,0xf7,0x63,Ofs_AX,Cp); // movw %%ax,Ofs_AX(%%ebx) G4M(OPERoverride,0x89,0x43,Ofs_AX,Cp); // movw %%dx,Ofs_DX(%%ebx) G4M(OPERoverride,0x89,0x53,Ofs_DX,Cp); } else { // mull Ofs_EAX(%%ebx),%%eax G3M(0xf7,0x63,Ofs_EAX,Cp); // movl %%eax,Ofs_EAX(%%ebx) G3M(0x89,0x43,Ofs_EAX,Cp); // movl %%edx,Ofs_EDX(%%ebx) G3M(0x89,0x53,Ofs_EDX,Cp); } G1(PUSHF,Cp); // flags back on stack break; case O_IMUL: G1(POPF,Cp); // get flags from stack if (mode & MBYTE) { if ((mode&(IMMED|DATA16))==(IMMED|DATA16)) { // imul $immed,%%ax,%%ax G3M(OPERoverride,0x6b,0xc0,Cp); G1(IG->p0,Cp); // movw %%ax,offs(%%ebx) G4M(OPERoverride,0x89,0x43,IG->p1,Cp); } else if ((mode&(IMMED|DATA16))==IMMED) { // imul $immed,%%eax,%%eax G2M(0x6b,0xc0,Cp); G1(IG->p0,Cp); // movl %%eax,offs(%%ebx) G3M(0x89,0x43,IG->p1,Cp); } else { // imul Ofs_AL(%%ebx),%%al G3M(0xf6,0x6b,Ofs_AL,Cp); // movw %%ax,Ofs_AX(%%ebx) G4M(OPERoverride,0x89,0x43,Ofs_AX,Cp); } } else if (mode&DATA16) { if (mode&IMMED) { // imul $immed,%%ax,%%ax G3M(OPERoverride,0x69,0xc0,Cp); G2(IG->p0,Cp); // movw %%ax,offs(%%ebx) G4M(OPERoverride,0x89,0x43,IG->p1,Cp); } else if (mode&MEMADR) { // imul offs(%%ebx),%%ax G4M(OPERoverride,0x0f,0xaf,0x43,Cp); G1(IG->p0,Cp); // movw %%ax,offs(%%ebx) G4M(OPERoverride,0x89,0x43,IG->p0,Cp); } else { // imul Ofs_AX(%%ebx),%%ax G4M(OPERoverride,0xf7,0x6b,Ofs_AX,Cp); // movw %%ax,Ofs_AX(%%ebx) G4M(OPERoverride,0x89,0x43,Ofs_AX,Cp); // movw %%dx,Ofs_DX(%%ebx) G4M(OPERoverride,0x89,0x53,Ofs_DX,Cp); } } else { if (mode&IMMED) { // imul $immed,%%eax,%%eax G2M(0x69,0xc0,Cp); G4(IG->p0,Cp); // movl %%eax,offs(%%ebx) G3M(0x89,0x43,IG->p1,Cp); } else if (mode&MEMADR) { // imul offs(%%ebx),%%eax G4M(0x0f,0xaf,0x43,IG->p0,Cp); // movl %%eax,offs(%%ebx) G3M(0x89,0x43,IG->p0,Cp); } else { // imul Ofs_EAX(%%ebx),%%eax G3M(0xf7,0x6b,Ofs_EAX,Cp); // movl %%eax,Ofs_EAX(%%ebx) G3M(0x89,0x43,Ofs_EAX,Cp); // movl %%edx,Ofs_EDX(%%ebx) G3M(0x89,0x53,Ofs_EDX,Cp); } } G1(PUSHF,Cp); // flags back on stack break; case O_DIV: { G1(POPF,Cp); // get flags from stack G2(0xc189,Cp); // movl %%eax,%%ecx if (mode & MBYTE) { // movw Ofs_AX(%%ebx),%%ax G4M(OPERoverride,0x8b,0x43,Ofs_AX,Cp); /* exception trap: save current PC */ // movl $eip,Ofs_CR2(%%ebx) G2M(0xc7,0x43,Cp); G1(Ofs_CR2,Cp); G4(IG->p0,Cp); // div %%cl,%%al G2M(0xf6,0xf1,Cp); // movw %%ax,Ofs_AX(%%ebx) G4M(OPERoverride,0x89,0x43,Ofs_AX,Cp); } else if (mode&DATA16) { // movw Ofs_AX(%%ebx),%%ax G4M(OPERoverride,0x8b,0x43,Ofs_AX,Cp); // movw Ofs_DX(%%ebx),%%dx G4M(OPERoverride,0x8b,0x53,Ofs_DX,Cp); /* exception trap: save current PC */ // movl $eip,Ofs_CR2(%%ebx) G2(0x43c7,Cp); G1(Ofs_CR2,Cp); G4(IG->p0,Cp); // div %%cx,%%ax G3M(OPERoverride,0xf7,0xf1,Cp); // movw %%ax,Ofs_AX(%%ebx) G4M(OPERoverride,0x89,0x43,Ofs_AX,Cp); // movw %%dx,Ofs_DX(%%ebx) G4M(OPERoverride,0x89,0x53,Ofs_DX,Cp); } else { // movl Ofs_EAX(%%ebx),%%eax G3M(0x8b,0x43,Ofs_EAX,Cp); // movl Ofs_EDX(%%ebx),%%edx G3M(0x8b,0x53,Ofs_EDX,Cp); /* exception trap: save current PC */ // movl $eip,Ofs_CR2(%%ebx) G2(0x43c7,Cp); G1(Ofs_CR2,Cp); G4(IG->p0,Cp); // div %%ecx,%%eax G2M(0xf7,0xf1,Cp); // movl %%eax,Ofs_EAX(%%ebx) G3M(0x89,0x43,Ofs_EAX,Cp); // movl %%edx,Ofs_EDX(%%ebx) G3M(0x89,0x53,Ofs_EDX,Cp); } G1(PUSHF,Cp); // flags back on stack } break; case O_IDIV: { G1(POPF,Cp); // get flags from stack G2(0xc189,Cp); // movw %%eax,%%ecx if (mode & MBYTE) { // movw Ofs_AX(%%ebx),%%ax G4M(OPERoverride,0x8b,0x43,Ofs_AX,Cp); /* exception trap: save current PC */ // movl $eip,Ofs_CR2(%%ebx) G2(0x43c7,Cp); G1(Ofs_CR2,Cp); G4(IG->p0,Cp); // idiv %%cl,%%al G2M(0xf6,0xf9,Cp); // movw %%ax,Ofs_AX(%%ebx) G4M(OPERoverride,0x89,0x43,Ofs_AX,Cp); } else if (mode&DATA16) { // movw Ofs_AX(%%ebx),%%ax G4M(OPERoverride,0x8b,0x43,Ofs_AX,Cp); // movw Ofs_DX(%%ebx),%%dx G4M(OPERoverride,0x8b,0x53,Ofs_DX,Cp); /* exception trap: save current PC */ // movl $eip,Ofs_CR2(%%ebx) G2(0x43c7,Cp); G1(Ofs_CR2,Cp); G4(IG->p0,Cp); // idiv %%cx,%%ax G3M(OPERoverride,0xf7,0xf9,Cp); // movw %%ax,Ofs_AX(%%ebx) G4M(OPERoverride,0x89,0x43,Ofs_AX,Cp); // movw %%dx,Ofs_DX(%%ebx) G4M(OPERoverride,0x89,0x53,Ofs_DX,Cp); } else { // movl Ofs_EAX(%%ebx),%%eax G3M(0x8b,0x43,Ofs_EAX,Cp); // movl Ofs_EDX(%%ebx),%%edx G3M(0x8b,0x53,Ofs_EDX,Cp); /* exception trap: save current PC */ // movl $eip,Ofs_CR2(%%ebx) G2(0x43c7,Cp); G1(Ofs_CR2,Cp); G4(IG->p0,Cp); // idiv %%ecx,%%eax G2M(0xf7,0xf9,Cp); // movl %%eax,Ofs_EAX(%%ebx) G3M(0x89,0x43,Ofs_EAX,Cp); // movl %%edx,Ofs_EDX(%%ebx) G3M(0x89,0x53,Ofs_EDX,Cp); } G1(PUSHF,Cp); // flags back on stack } break; case O_CBWD: // movl Ofs_EAX(%%ebx),%%eax G3M(0x8b,0x43,Ofs_EAX,Cp); if (mode & MBYTE) { /* 0x98: CBW,CWDE */ if (mode & DATA16) { // AL->AX // cbw G2(0x9866,Cp); // movw %%ax,Ofs_AX(%%ebx) G4M(0x66,0x89,0x43,Ofs_AX,Cp); } else { // AX->EAX // cwde // movl %%eax,Ofs_EAX(%%ebx) G4M(0x98,0x89,0x43,Ofs_EAX,Cp); } } else if (mode & DATA16) { /* 0x99: AX->DX:AX */ // cwd G2(0x9966,Cp); // movw %%dx,Ofs_DX(%%ebx) G4M(0x66,0x89,0x53,Ofs_DX,Cp); } else { /* 0x99: EAX->EDX:EAX */ // cdq // movl %%edx,Ofs_EDX(%%ebx) G4M(0x99,0x89,0x53,Ofs_EDX,Cp); } break; case O_XLAT: // movl OVERR_DS(%%ebx),%%edi G2(0x7b8b,Cp); G1(IG->ovds,Cp); // movzbl Ofs_AL(%%ebx),%%ecx G4M(0x0f,0xb6,0x4b,Ofs_AL,Cp); // movl Ofs_EBX(%%ebx),%%eax G3M(0x8b,0x43,Ofs_EBX,Cp); // leal (%%ecx,%%eax,1),%%ecx G3(0x010c8d,Cp); if (mode & ADDR16) { // movzwl %%cx,%%ecx G3(0xC9B70F,Cp); } // leal (%%ecx,%%edi,1),%%edi G3M(0x8d,0x3c,0x39,Cp); break; case O_ROL: rcod = 0x00; goto shrot0; case O_ROR: rcod = 0x08; goto shrot0; case O_RCL: rcod = 0x10; goto shrot0; case O_RCR: rcod = 0x18; goto shrot0; case O_SHL: rcod = 0x20; goto shrot0; case O_SHR: rcod = 0x28; goto shrot0; case O_SAR: rcod = 0x38; shrot0: G1(0x9d,Cp); // get flags from stack if (mode & MBYTE) { // op al,1: d0 c0+r // op al,n: c0 c0+r n // op al,cl: d2 c0+r if (mode & IMMED) { unsigned char sh = IG->p0; G1(sh==1? 0xd0:0xc0,Cp); G1(0xc0 | rcod,Cp); if (sh!=1) G1(sh,Cp); } else { // movb Ofs_CL(%%ebx),%%cl G3M(0x8a,0x4b,Ofs_CL,Cp); // OPb %%cl,%%al G1(0xd2,Cp); G1(0xc0 | rcod,Cp); } } else { // op (e)ax,1: (66) d1 c0+r // op (e)ax,n: (66) c1 c0+r n // op (e)ax,cl: (66) d3 c0+r if (mode & IMMED) { unsigned char sh = IG->p0; Gen66(mode,Cp); G1(sh==1? 0xd1:0xc1,Cp); G1(0xc0 | rcod,Cp); if (sh!=1) G1(sh,Cp); } else { // movb Ofs_CL(%%ebx),%%cl G3M(0x8a,0x4b,Ofs_CL,Cp); // OP{wl} %%cl,(%%edi) Gen66(mode,Cp); G1(0xd3,Cp); G1(0xc0 | rcod,Cp); } } G1(0x9c,Cp); // flags back on stack break; case O_OPAX: { /* used by DAA,DAS,AAA,AAS,AAM,AAD */ // movl Ofs_EAX(%%ebx),%%eax G4M(0x9d,0x8b,0x43,Ofs_EAX,Cp); #ifdef __x86_64__ /* have to emulate all of them... */ switch(IG->p1) { case DAA: case DAS: { int op = (IG->p1 == DAS ? 0x28 : 0); const static char pseq[] = { // pushf; mov %al,%cl; add $0x66,%al 0x9c,0x88,0xc1,0x04,0x66, // pushf; pop %rax; pop %rdx 0x9c,0x58,0x5a, // or %dl,%al; and $0x11,%al // combine AF/CF 0x08,0xd0,0x24,0x11, // mov %al,%dl; rol $4,%al 0x88,0xc2,0xc0,0xc0,0x04, // imul $6,%eax // multiply 0 CF 0 0 0 AF by 6 0x6b,0xc0,0x06}; GNX(Cp, pseq, sizeof(pseq)); // add/sub %al,%cl; pushf // Combine flags from G3M(op,0xc1,0x9c,Cp); // or %dl,(%rsp) // add/sub with AF/CF G3M(0x08,0x14,0x24,Cp); // movb %%cl,Ofs_EAX(%%ebx) G3M(0x88,0x4b,Ofs_EAX,Cp); break; } case AAA: case AAS: { int op = (IG->p1 == AAS ? 0x28 : 0); const static char pseq[] = { // pushf; mov %eax,%ecx; and 0xf,%al 0x9c,0x89,0xc1,0x24,0x0f, // add $6,%al; pop %edx 0x04,0x06,0x5a, // or %dl,%al; and $0xee,%dl // ~(AF|CF) 0x08,0xd0,0x80,0xe2,0xee, // and $0x10,%al; xchg %eax,%ecx; jz 1f 0x24,0x10,0x91,0x74,0x07}; GNX(Cp, pseq, sizeof(pseq)); // add/sub $0x106, %ax G4M(0x66,op+0x05,0x06,0x01,Cp); // or $0x11,%dl; (AF|CF) 1: and $0xf,%al G3M(0x80,0xca,0x11,Cp); G2M(0x24,0x0f,Cp); // push %rdx; movl %%eax,Ofs_EAX(%%ebx) G4M(0x52,0x89,0x43,Ofs_EAX,Cp); break; } case AAM: // mov $0,%ah; mov p2,%cl G4M(0xb4,0x00,0xb1,IG->p2,Cp); // div %cl; xchg %al,%ah G4M(0xf6,0xf1,0x86,0xc4,Cp); // orb %al,%al (for flags) G2M(0x08,0xc0,Cp); // movl %%eax,Ofs_EAX(%%ebx) G4M(0x89,0x43,Ofs_EAX,0x9c,Cp); break; case AAD: // mov %al,%cl; mov %ah,%al G4M(0x88,0xc1,0x88,0xe0,Cp); // mov p2,%ah; mul %ah G4M(0xb4,IG->p2,0xf6,0xe4,Cp); // add %cl,%al; mov $0,%ah G4M(0x00,0xc8,0xb4,0x00,Cp); // movl %%eax,Ofs_EAX(%%ebx) G4M(0x89,0x43,Ofs_EAX,0x9c,Cp); break; default: error("Unimplemented O_OPAX instruction\n"); leavedos(99); } #else // get n>0,n<3 bytes from parameter stack G1(IG->p1,Cp); if (IG->p0==2) { G1(IG->p2,Cp); } // movl %%eax,Ofs_EAX(%%ebx) G4M(0x89,0x43,Ofs_EAX,0x9c,Cp); #endif } break; case O_PUSH: { static char pseq16[] = { // movl Ofs_XSS(%%ebx),%%esi 0x8b,0x73,Ofs_XSS, // movl Ofs_ESP(%%ebx),%%ecx 0x8b,0x4b,Ofs_ESP, // leal -2(%%ecx),%%ecx 0x8d,0x49,0xfe, // 16-bit stack seg w/underflow (RM) // andl StackMask(%%ebx),%%ecx 0x23,0x4b,Ofs_STACKM, // movw %%ax,(%%esi,%%ecx,1) 0x66,0x89,0x04,0x0e, // do 16-bit PM apps exist which use a 32-bit stack seg? // movl %%ecx,Ofs_ESP(%%ebx) 0x89,0x4b,Ofs_ESP }; static char pseq32[] = { // movl Ofs_XSS(%%ebx),%%esi 0x8b,0x73,Ofs_XSS, // movl Ofs_ESP(%%ebx),%%ecx 0x8b,0x4b,Ofs_ESP, // leal -4(%%ecx),%%ecx 0x8d,0x49,0xfc, // andl StackMask(%%ebx),%%ecx 0x23,0x4b,Ofs_STACKM, // movl %%eax,(%%esi,%%ecx,1) 0x89,0x04,0x0e, #if 0 /* keep high 16-bits of ESP in small-stack mode */ // movl StackMask(%%ebx),%%edx 0x8b,0x53,Ofs_STACKM, // notl %%edx 0xf7,0xd2, // andl Ofs_ESP(%%ebx),%%edx 0x23,0x53,Ofs_ESP, // orl %%edx,%%ecx 0x09,0xd1, #endif // movl %%ecx,Ofs_ESP(%%ebx) 0x89,0x4b,Ofs_ESP }; register char *p; int sz; if (mode&DATA16) p=pseq16,sz=sizeof(pseq16); else p=pseq32,sz=sizeof(pseq32); GNX(Cp, p, sz); } break; /* PUSH derived (sub-)sequences: */ case O_PUSH1: { static char pseq[] = { // movl Ofs_ESP(%%ebx),%%ecx 0x8b,0x4b,Ofs_ESP, // movl Ofs_XSS(%%ebx),%%esi 0x8b,0x73,Ofs_XSS }; GNX(Cp, pseq, sizeof(pseq)); } break; case O_PUSH2: { /* register push only */ static unsigned char pseq16[] = { // movl offs(%%ebx),%%eax /*00*/ 0x8b,0x43,0x00, // leal -2(%%ecx),%%ecx 0x8d,0x49,0xfe, // andl StackMask(%%ebx),%%ecx 0x23,0x4b,Ofs_STACKM, // movw %%ax,(%%esi,%%ecx,1) 0x66,0x89,0x04,0x0e, }; static unsigned char pseq32[] = { // movl offs(%%ebx),%%eax /*00*/ 0x8b,0x43,0x00, // leal -4(%%ecx),%%ecx 0x8d,0x49,0xfc, // andl StackMask(%%ebx),%%ecx 0x23,0x4b,Ofs_STACKM, // movl %%eax,(%%esi,%%ecx,1) 0x89,0x04,0x0e, }; register unsigned char *p, *q; int sz; if (mode&DATA16) p=pseq16,sz=sizeof(pseq16); else p=pseq32,sz=sizeof(pseq32); q=Cp; GNX(Cp, p, sz); q[2] = IG->p0; } break; case O_PUSH3: // movl %%ecx,Ofs_ESP(%%ebx) G3M(0x89,0x4b,Ofs_ESP,Cp); break; case O_PUSH2F: { static unsigned char pseqpre[] = { // movl Ofs_XSS(%%ebx),%%esi 0x8b,0x73,Ofs_XSS, // movl Ofs_ESP(%%ebx),%%ecx 0x8b,0x4b,Ofs_ESP, // leal -4(%%ecx),%%ecx /*08*/ 0x8d,0x49,0xfc, // andl StackMask(%%ebx),%%ecx 0x23,0x4b,Ofs_STACKM, // movl (%%esp),%%edx (get flags on stack) 0x8b,0x14,0x24, // movl Ofs_FLAGS(%%ebx),%%eax 0x8b,0x43,Ofs_EFLAGS, // andw EFLAGS_CC,%%dx (0x8d5: OF/SF/ZF/AF/PF/CF) 0x66,0x81,0xe2,0xd5,0x08, // andw ~EFLAGS_CC,%%ax 0x66,0x25,0x2a,0xf7, // orw %%dx,%%ax 0x66,0x09,0xd0, // movl %%ecx,Ofs_ESP(%%ebx) 0x89,0x4b,Ofs_ESP }; unsigned char *q=Cp; GNX(Cp, pseqpre, sizeof(pseqpre)); if (mode&DATA16) q[8] = 0xfe; /* use -2 in lea ins */ if (in_dpmi) { /* This solves the DOSX 'System test 8' error. * The virtualized IF is pushed instead of the * real one (which is always 1). This way, tests * on the pushed value of the form * cli * pushf * test 0x200,(esp) * don't fail anymore. It is not clear, apart this * special test case, whether pushing the virtual * IF is actually useful; probably not. In any * case, POPF ignores this IF on stack. * Since PUSHF doesn't trap in PM, non-cpuemued * dosemu will always fail this particular test. */ // rcr $10,%%eax (IF->cy) G3M(0xc1,0xd8,0x0a,Cp); // bt $19,(_EFLAGS-TheCPU)(%ebx) (test for VIF) G3M(0x0f,0xba,0xa3,Cp); /* relative ebx offset works on x86-64 too */ G4((unsigned char *)&_EFLAGS-CPUOFFS(0),Cp); // (19 from bt); rcl $10,%%eax G4M(0x13,0xc1,0xd0,0x0a,Cp); } if (mode&DATA16) { // movw %%ax,(%%esi,%%ecx,1) G4M(0x66,0x89,0x04,0x0e,Cp); } else { // movl %%eax,(%%esi,%%ecx,1) G4M(0x89,0x04,0x0e,NOP,Cp); } /* nop to make space for a code patch */ G1(NOP, Cp); } break; case O_PUSHI: { static unsigned char pseq16[] = { // movw $immed,%%ax /*00*/ 0xb8,0,0,0,0, // movl Ofs_XSS(%%ebx),%%esi 0x8b,0x73,Ofs_XSS, // movl Ofs_ESP(%%ebx),%%ecx 0x8b,0x4b,Ofs_ESP, // leal -2(%%ecx),%%ecx 0x8d,0x49,0xfe, // andl StackMask(%%ebx),%%ecx 0x23,0x4b,Ofs_STACKM, // movw %%ax,(%%esi,%%ecx,1) 0x66,0x89,0x04,0x0e, // movl %%ecx,Ofs_ESP(%%ebx) 0x89,0x4b,Ofs_ESP }; static unsigned char pseq32[] = { // movl $immed,%%eax /*00*/ 0xb8,0,0,0,0, // movl Ofs_XSS(%%ebx),%%esi 0x8b,0x73,Ofs_XSS, // movl Ofs_ESP(%%ebx),%%ecx 0x8b,0x4b,Ofs_ESP, // leal -4(%%ecx),%%ecx 0x8d,0x49,0xfc, // andl StackMask(%%ebx),%%ecx 0x23,0x4b,Ofs_STACKM, // movl %%eax,(%%esi,%%ecx,1) 0x89,0x04,0x0e, // movl %%ecx,Ofs_ESP(%%ebx) 0x89,0x4b,Ofs_ESP }; register unsigned char *p, *q; int sz; if (mode&DATA16) { p = pseq16,sz=sizeof(pseq16); } else { p = pseq32,sz=sizeof(pseq32); } q=Cp; GNX(Cp, p, sz); *((int *)(q+1)) = IG->p0; } break; case O_PUSHA: { /* push order: eax ecx edx ebx esp ebp esi edi */ static char pseq16[] = { // wrong if SP wraps! // movl Ofs_XSS(%%ebx),%%esi 0x8b,0x73,Ofs_XSS, // movl Ofs_ESP(%%ebx),%%ecx 0x8b,0x4b,Ofs_ESP, // leal -16(%%ecx),%%ecx 0x8d,0x49,0xf0, // andl StackMask(%%ebx),%%ecx 0x23,0x4b,Ofs_STACKM, // pushl %%esi; leal (%%esi,%%ecx,1),%%esi 0x56,0x8d,0x34,0x0e, // movl Ofs_EDI(%%ebx),%%eax // movl Ofs_ESI(%%ebx),%%edx 0x8b,0x43,Ofs_EDI,0x8b,0x53,Ofs_ESI, // movw %%ax,0(%%esi) // movw %%dx,2(%%esi) 0x66,0x89,0x46,0x00,0x66,0x89,0x56,0x02, // movl Ofs_EBP(%%ebx),%%eax // movl Ofs_ESP(%%ebx),%%edx 0x8b,0x43,Ofs_EBP,0x8b,0x53,Ofs_ESP, // movw %%ax,4(%%esi) // movw %%dx,6(%%esi) 0x66,0x89,0x46,0x04,0x66,0x89,0x56,0x06, // movl Ofs_EBX(%%ebx),%%eax // movl Ofs_EDX(%%ebx),%%edx 0x8b,0x43,Ofs_EBX,0x8b,0x53,Ofs_EDX, // movw %%ax,8(%%esi) // movw %%dx,10(%%esi) 0x66,0x89,0x46,0x08,0x66,0x89,0x56,0x0a, // movl Ofs_ECX(%%ebx),%%eax // movl Ofs_EAX(%%ebx),%%edx 0x8b,0x43,Ofs_ECX,0x8b,0x53,Ofs_EAX, // movw %%ax,12(%%esi) // movw %%dx,14(%%esi) 0x66,0x89,0x46,0x0c,0x66,0x89,0x56,0x0e, // popl %%esi; movl %%ecx,Ofs_ESP(%%ebx) 0x5e,0x89,0x4b,Ofs_ESP }; static char pseq32[] = { // movl Ofs_XSS(%%ebx),%%edi 0x8b,0x7b,Ofs_XSS, // movl Ofs_ESP(%%ebx),%%ecx 0x8b,0x4b,Ofs_ESP, // leal -32(%%ecx),%%ecx 0x8d,0x49,0xe0, // andl StackMask(%%ebx),%%ecx 0x23,0x4b,Ofs_STACKM, // leal (%%edi,%%ecx,1),%%edi 0x8d,0x3c,0x0f, // cld; leal Ofs_EDI(%%ebx),%%esi 0xfc,0x8d,0x73,Ofs_EDI, // push %%ecx; mov $8,%%ecx 0x51,0xb9,8,0,0,0, // rep; movsl; pop %%ecx 0xf3,0xa5,0x59, // movl %%ecx,Ofs_ESP(%%ebx) 0x89,0x4b,Ofs_ESP }; register char *p; int sz; if (mode&DATA16) { p = pseq16,sz=sizeof(pseq16); } else { p = pseq32,sz=sizeof(pseq32); } GNX(Cp, p, sz); } break; case O_POP: { static char pseq16[] = { // movl Ofs_XSS(%%ebx),%%esi 0x8b,0x73,Ofs_XSS, // movl Ofs_ESP(%%ebx),%%ecx 0x8b,0x4b,Ofs_ESP, // andl StackMask(%%ebx),%%ecx 0x23,0x4b,Ofs_STACKM, // movw (%%esi,%%ecx,1),%%ax 0x66,0x8b,0x04,0x0e, // leal 2(%%ecx),%%ecx 0x8d,0x49,0x02, #ifdef STACK_WRAP_MP /* mask after incrementing */ // andl StackMask(%%ebx),%%ecx 0x23,0x4b,Ofs_STACKM, #endif // movl %%ecx,Ofs_ESP(%%ebx) 0x89,0x4b,Ofs_ESP }; static char pseq32[] = { // movl Ofs_XSS(%%ebx),%%esi 0x8b,0x73,Ofs_XSS, // movl Ofs_ESP(%%ebx),%%ecx 0x8b,0x4b,Ofs_ESP, // andl StackMask(%%ebx),%%ecx 0x23,0x4b,Ofs_STACKM, // movl (%%esi,%%ecx,1),%%eax 0x90,0x8b,0x04,0x0e, // leal 4(%%ecx),%%ecx 0x8d,0x49,0x04, #ifdef STACK_WRAP_MP /* mask after incrementing */ // andl StackMask(%%ebx),%%ecx 0x23,0x4b,Ofs_STACKM, #endif #ifdef KEEP_ESP /* keep high 16-bits of ESP in small-stack mode */ // movl StackMask(%%ebx),%%edx 0x8b,0x53,Ofs_STACKM, // notl %%edx 0xf7,0xd2, // andl Ofs_ESP(%%ebx),%%edx 0x23,0x53,Ofs_ESP, // orl %%edx,%%ecx 0x09,0xd1, #endif // movl %%ecx,Ofs_ESP(%%ebx) 0x89,0x4b,Ofs_ESP }; register char *p; int sz; if (mode&DATA16) p=pseq16,sz=sizeof(pseq16); else p=pseq32,sz=sizeof(pseq32); // for popping into memory the sequence is: // first pop, then adjust stack, then // do address calculation and last store data GNX(Cp, p, sz); } break; /* POP derived (sub-)sequences: */ case O_POP1: { static char pseq[] = { // movl Ofs_XSS(%%ebx),%%esi 0x8b,0x73,Ofs_XSS, // movl Ofs_ESP(%%ebx),%%ecx 0x8b,0x4b,Ofs_ESP }; GNX(Cp, pseq, sizeof(pseq)); } break; case O_POP2: { static unsigned char pseq16[] = { // andl StackMask(%%ebx),%%ecx 0x23,0x4b,Ofs_STACKM, // movw (%%esi,%%ecx,1),%%ax 0x66,0x8b,0x04,0x0e, // movw %%ax,offs(%%ebx) /*07*/ 0x66,0x89,0x43,0x00, // leal 2(%%ecx),%%ecx 0x8d,0x49,0x02 }; static unsigned char pseq32[] = { // andl StackMask(%%ebx),%%ecx 0x23,0x4b,Ofs_STACKM, // movl (%%esi,%%ecx,1),%%eax 0x90,0x8b,0x04,0x0e, // movl %%eax,offs(%%ebx) /*07*/ 0x90,0x89,0x43,0x00, // leal 4(%%ecx),%%ecx 0x8d,0x49,0x04, #ifdef KEEP_ESP /* keep high 16-bits of ESP in small-stack mode */ // movl StackMask(%%ebx),%%edx 0x8b,0x53,Ofs_STACKM, // notl %%edx 0xf7,0xd2, // andl Ofs_ESP(%%ebx),%%edx 0x23,0x53,Ofs_ESP, // orl %%edx,%%ecx 0x09,0xd1, #endif }; register unsigned char *p, *q; int sz; if (mode&DATA16) p=pseq16,sz=sizeof(pseq16); else p=pseq32,sz=sizeof(pseq32); // for popping into memory the sequence is: // first pop, then adjust stack, then // do address calculation and last store data q=Cp; GNX(Cp, p, sz); q[0x0a] = IG->p0; } break; case O_POP3: // movl %%ecx,Ofs_ESP(%%ebx) G3M(0x89,0x4b,Ofs_ESP,Cp); break; case O_POPA: { static unsigned char pseq16[] = { // wrong if SP wraps! // movl Ofs_XSS(%%ebx),%%esi 0x8b,0x73,Ofs_XSS, // movl Ofs_ESP(%%ebx),%%ecx 0x8b,0x4b,Ofs_ESP, // andl StackMask(%%ebx),%%ecx 0x23,0x4b,Ofs_STACKM, // pushl %%esi; leal (%%esi,%%ecx,1),%%esi 0x56,0x8d,0x34,0x0e, // movw 0(%%esi),%%ax // movw 2(%%esi),%%dx 0x66,0x8b,0x46,0x00,0x66,0x8b,0x56,0x02, // movw %%ax,Ofs_DI(%%ebx) // movw %%dx,Ofs_SI(%%ebx) 0x66,0x89,0x43,Ofs_DI,0x66,0x89,0x53,Ofs_SI, // movw 4(%%esi),%%ax 0x66,0x8b,0x46,0x04, // movw %%ax,Ofs_BP(%%ebx) 0x66,0x89,0x43,Ofs_BP, // movw 8(%%esi),%%ax // movw 10(%%esi),%%dx 0x66,0x8b,0x46,0x08,0x66,0x8b,0x56,0x0a, // movw %%ax,Ofs_BX(%%ebx) // movw %%dx,Ofs_DX(%%ebx) 0x66,0x89,0x43,Ofs_BX,0x66,0x89,0x53,Ofs_DX, // movw 12(%%esi),%%ax // movw 14(%%esi),%%dx 0x66,0x8b,0x46,0x0c,0x66,0x8b,0x56,0x0e, // movw %%ax,Ofs_CX(%%ebx) // movw %%dx,Ofs_AX(%%ebx) 0x66,0x89,0x43,Ofs_CX,0x66,0x89,0x53,Ofs_AX, // popl %%esi; leal 16(%%ecx),%%ecx 0x5e,0x8d,0x49,0x10, #ifdef STACK_WRAP_MP /* mask after incrementing */ // andl StackMask(%%ebx),%%ecx 0x23,0x4b,Ofs_STACKM, #endif // movl %%ecx,Ofs_ESP(%%ebx) 0x89,0x4b,Ofs_ESP }; static unsigned char pseq32[] = { // movl Ofs_XSS(%%ebx),%%esi 0x8b,0x73,Ofs_XSS, // movl Ofs_ESP(%%ebx),%%ecx 0x8b,0x4b,Ofs_ESP, // andl StackMask(%%ebx),%%ecx 0x23,0x4b,Ofs_STACKM, // leal (%%esi,%%ecx,1),%%esi 0x8d,0x34,0x0e, // cld; leal Ofs_EDI(%%ebx),%%edi 0xfc,0x8d,0x7b,Ofs_EDI, // here ESP is overwritten, BUT it has been saved // locally in %%ebp and will be rewritten later // push %%ecx; mov $8,%%ecx 0x51,0xb9,8,0,0,0, // rep; movsl; pop %%ecx 0xf3,0xa5,0x59, // leal 32(%%ecx),%%ecx 0x8d,0x49,0x20, #ifdef STACK_WRAP_MP /* mask after incrementing */ // andl StackMask(%%ebx),%%ecx 0x23,0x4b,Ofs_STACKM, #endif #ifdef KEEP_ESP /* keep high 16-bits of ESP in small-stack mode */ // movl StackMask(%%ebx),%%edx 0x8b,0x53,Ofs_STACKM, // notl %%edx 0xf7,0xd2, // andl Ofs_ESP(%%ebx),%%edx 0x23,0x53,Ofs_ESP, // orl %%edx,%%ecx 0x09,0xd1, #endif // movl %%ecx,Ofs_ESP(%%ebx) 0x89,0x4b,Ofs_ESP }; register unsigned char *p; int sz; if (mode&DATA16) { p = pseq16,sz=sizeof(pseq16); } else { p = pseq32,sz=sizeof(pseq32); } GNX(Cp, p, sz); } break; case O_LEAVE: { static char pseq16[] = { // movzwl Ofs_BP(%%ebx),%%ecx 0x0f,0xb7,0x4b,Ofs_EBP, // movl Ofs_XSS(%%ebx),%%esi 0x8b,0x73,Ofs_XSS, // andl StackMask(%%ebx),%%ecx 0x23,0x4b,Ofs_STACKM, // movw (%%esi,%%ecx,1),%%ax 0x66,0x8b,0x04,0x0e, // movw %%ax,Ofs_BP(%%ebx) 0x66,0x89,0x43,Ofs_BP, // leal 2(%%ecx),%%ecx 0x8d,0x49,0x02, #ifdef STACK_WRAP_MP /* mask after incrementing */ // andl StackMask(%%ebx),%%ecx 0x23,0x4b,Ofs_STACKM, #endif // movl %%ecx,Ofs_ESP(%%ebx) 0x89,0x4b,Ofs_ESP }; static char pseq32[] = { // movl Ofs_EBP(%%ebx),%%ecx 0x8b,0x4b,Ofs_EBP, // movl Ofs_XSS(%%ebx),%%esi 0x8b,0x73,Ofs_XSS, // andl StackMask(%%ebx),%%ecx 0x23,0x4b,Ofs_STACKM, // movl (%%esi,%%ecx,1),%%eax 0x8b,0x04,0x0e, // movl %%eax,Ofs_EBP(%%ebx) 0x89,0x43,Ofs_EBP, // leal 4(%%ecx),%%ecx 0x8d,0x49,0x04, #ifdef STACK_WRAP_MP /* mask after incrementing */ // andl StackMask(%%ebx),%%ecx 0x23,0x4b,Ofs_STACKM, #endif #ifdef KEEP_ESP /* keep high 16-bits of ESP in small-stack mode */ // movl StackMask(%%ebx),%%edx 0x8b,0x53,Ofs_STACKM, // notl %%edx 0xf7,0xd2, // andl Ofs_ESP(%%ebx),%%edx 0x23,0x53,Ofs_ESP, // orl %%edx,%%ecx 0x09,0xd1, #endif // movl %%ecx,Ofs_ESP(%%ebx) 0x89,0x4b,Ofs_ESP }; register char *p; int sz; if (mode&DATA16) p=pseq16,sz=sizeof(pseq16); else p=pseq32,sz=sizeof(pseq32); GNX(Cp, p, sz); } break; case O_MOVS_SetA: if (mode&ADDR16) { /* The CX load has to be before the address reloads */ if (mode&(MREP|MREPNE)) { // movzwl Ofs_CX(%%ebx),%%ecx G4M(0x0f,0xb7,0x4b,Ofs_CX,Cp); rep_retry_ptr = Cp; } if (mode&MOVSSRC) { // movzwl Ofs_SI(%%ebx),%%esi G4M(0x0f,0xb7,0x73,Ofs_SI,Cp); if(mode & (MREPNE|MREP)) { /* EAX: iterations possible until address overflow if DF is set. */ // movl %%esi,%%eax G2M(MOVwfrm,0xF0,Cp); if(!(mode & MBYTE)) { if(mode & DATA16) { // shrl $1,%%eax G2M(SHIFTw,0xE8,Cp); } else { // shrl $2,%%eax G3M(SHIFTwi,0xE8,2,Cp); } } // incl %%eax #ifdef __x86_64__ // 0x40 is a REX byte, not inc G2M(0xff,0xc0,Cp); #else G1(INCax,Cp); #endif } // addl OVERR_DS(%%ebx),%%esi G3M(0x03,0x73,IG->ovds,Cp); } if (mode&MOVSDST) { // movzwl Ofs_DI(%%ebx),%%edi G4M(0x0f,0xb7,0x7b,Ofs_DI,Cp); if(mode & (MREPNE|MREP)) { /* EDX: iterations possible until address overflow if DF is set. */ // movl %%edi,%%edx G2M(MOVwfrm,0xFA,Cp); if(!(mode & MBYTE)) { if(mode & DATA16) { // shrl $1,%%edx G2M(SHIFTw,0xEA,Cp); } else { // shrl $2,%%edx G3M(SHIFTwi,0xEA,2,Cp); } } // incl %%edx #ifdef __x86_64__ // 0x42 is a REX byte, not inc G2M(0xff,0xc2,Cp); #else G1(INCdx,Cp); #endif } // addl Ofs_XES(%%ebx),%%edi G3M(0x03,0x7b,Ofs_XES,Cp); } if (mode&(MREP|MREPNE)) { /* Address overflow detection */ // testl $4,Ofs_EFLAGS+1(%%ebx) G4M(GRP1brm,0x43,Ofs_EFLAGS+1,0x4,Cp); // jnz 0f (distance is 8 bytes per limit to adjust) G2M(JNE_JNZ,(mode&(MOVSDST|MOVSSRC)) == (MOVSDST|MOVSSRC) ? 0x10:0x08,Cp); /* correct for cleared DF */ if(mode&MOVSSRC) { // negl %%eax G2M(GRP1wrm,0xD8,Cp); // addl $(0x10000/opsize+1),%%eax G2M(IMMEDwrm,0xC0,Cp); G4(0x10000/OPSIZE(mode)+1,Cp); } if(mode&MOVSDST) { // negl %%edx G2M(GRP1wrm,0xDA,Cp); // addl $(0x10000/opsize+1),%%edx G2M(IMMEDwrm,0xC2,Cp); G4(0x10000/OPSIZE(mode)+1,Cp); } // 0: /* consolidate limits to edx */ switch(mode&(MOVSDST|MOVSSRC)) { case MOVSDST: /* nothing to do, limit already in edx */ break; case MOVSSRC: /* limit in eax, want it in edx */ // xchg %%eax,%%edx G1(XCHGdx,Cp); break; case MOVSSRC | MOVSDST: /* smaller limit to edx */ // cmp %%eax,%%edx G2M(CMPwtrm,0xD0,Cp); // jb 0f G2M(JB_JNAE,0x01,Cp); // xchg %%eax,%%edx G1(XCHGdx,Cp); break; } // cmp %%ecx,%%edx G2M(CMPwfrm,0xCA,Cp); // jbe 0f G2M(JBE_JNA,0x02,Cp); // mov %%ecx,%%edx G2M(MOVwfrm,0xCA,Cp); // 0: // xchg %%ecx,%%edx G2M(XCHGwrm,0xCA,Cp); // sub %%ecx, %%edx G2M(SUBwfrm,0xCA,Cp); } } else { if (mode&MOVSSRC) { // movl OVERR_DS(%%ebx),%%esi G2(0x738b,Cp); G1(IG->ovds,Cp); // addl Ofs_ESI(%%ebx),%%esi G3M(0x03,0x73,Ofs_ESI,Cp); } if (mode&MOVSDST) { // movl Ofs_XES(%%ebx),%%edi G3M(0x8b,0x7b,Ofs_XES,Cp); // addl Ofs_EDI(%%ebx),%%edi G3M(0x03,0x7b,Ofs_EDI,Cp); } if (mode&(MREP|MREPNE)) { // movl Ofs_ECX(%%ebx),%%ecx G3M(0x8b,0x4b,Ofs_ECX,Cp); } } break; case O_MOVS_MovD: GetDF(Cp); if (mode&(MREP|MREPNE)) { G3M(NOP,NOP,REP,Cp); } if (mode&MBYTE) { G1(MOVSb,Cp); } else { Gen66(mode,Cp); G1(MOVSw,Cp); } if (!(mode&(MREP|MREPNE))) { G4(0x90909090,Cp); } G1(CLD,Cp); break; case O_MOVS_LodD: GetDF(Cp); if (mode&(MREP|MREPNE)) { G1(REP,Cp); } if (mode&MBYTE) { G1(LODSb,Cp); } else { Gen66(mode,Cp); G1(LODSw,Cp); } G1(CLD,Cp); break; case O_MOVS_StoD: GetDF(Cp); if (mode&(MREP|MREPNE)) { G3M(NOP,NOP,REP,Cp); } if (mode&MBYTE) { G1(STOSb,Cp); } else { Gen66(mode,Cp); G1(STOSw,Cp); } if (!(mode&(MREP|MREPNE))) { G4(0x90909090,Cp); } G1(CLD,Cp); break; case O_MOVS_ScaD: CpTemp = NULL; if(mode & (MREP|MREPNE)) { G2M(JCXZ,00,Cp); // Pointer to the jecxz distance byte CpTemp = Cp-1; } GetDF(Cp); if (mode&MREP) { G1(REP,Cp); } else if (mode&MREPNE) { G1(REPNE,Cp); } if (mode&MBYTE) { G1(SCASb,Cp); } else { Gen66(mode,Cp); G1(SCASw,Cp); } G3M(CLD,POPsi,PUSHF,Cp); // replace flags back on stack,esi=dummy if(mode & (MREP|MREPNE)) *CpTemp = (Cp-(CpTemp+1)); break; case O_MOVS_CmpD: CpTemp = NULL; if(mode & (MREP|MREPNE)) { G2M(JCXZ,00,Cp); // Pointer to the jecxz distance byte CpTemp = Cp-1; } GetDF(Cp); if (mode&MREP) { G1(REP,Cp); } else if (mode&MREPNE) { G1(REPNE,Cp); } if (mode&MBYTE) { G1(CMPSb,Cp); } else { Gen66(mode,Cp); G1(CMPSw,Cp); } G3M(CLD,POPax,PUSHF,Cp); // replace flags back on stack,eax=dummy if(mode & (MREP|MREPNE)) *CpTemp = (Cp-(CpTemp+1)); break; case O_MOVS_SavA: if (mode&ADDR16) { if(mode & MREPCOND) { /* it is important to *NOT* destroy the flags here, so use lea instead of add. Flags are needed for termination detection */ // lea 0(%%edx,%%ecx),%%ecx ; add remaining to cx G3M(0x8D,0x0C,0x11,Cp); /* terminate immediately if rep was stopped by flags */ // j[n]z 0f G2M((mode&MREP)?JE_JZ:JNE_JNZ,0x02,Cp); // xor %%edx,%%edx ; clear remaining G2M(XORwtrm,0xD2,Cp); // 0: } else if(mode & (MREP|MREPNE)) { /* use shorter add instruction for nonconditional reps */ // add %%edx,%%ecx G2M(ADDwtrm,0xCA,Cp); } if (mode&MOVSSRC) { // esi = base1 + CPU_(e)SI +- n // subl OVERR_DS(%%ebx),%%esi G2(0x732b,Cp); G1(IG->ovds,Cp); // movw %%si,Ofs_SI(%%ebx) G4M(0x66,0x89,0x73,Ofs_SI,Cp); } if (mode&MOVSDST) { // edi = base2 + CPU_(e)DI +- n // subl Ofs_XES(%%ebx),%%edi G3M(0x2b,0x7b,Ofs_XES,Cp); // movw %%di,Ofs_DI(%%ebx) G4M(0x66,0x89,0x7b,Ofs_DI,Cp); } // continue after SI/DI overflow; store ecx if (mode&(MREP|MREPNE)) { unsigned char * jmpbackbase; // or %%edx,%%edx G2M(ORwtrm,0xD2,Cp); // jnz retry jmpbackbase = Cp; G2M(JNE_JNZ,(rep_retry_ptr-jmpbackbase-2)&0xFF,Cp); // movw %%cx,Ofs_CX(%%ebx) G4M(0x66,0x89,0x4b,Ofs_CX,Cp); } } else { if (mode&(MREP|MREPNE)) { // movl %%ecx,Ofs_ECX(%%ebx) G3M(0x89,0x4b,Ofs_ECX,Cp); } if (mode&MOVSSRC) { // esi = base1 + CPU_(e)SI +- n // subl OVERR_DS(%%ebx),%%esi G2(0x732b,Cp); G1(IG->ovds,Cp); // movl %%esi,Ofs_ESI(%%ebx) G3M(0x89,0x73,Ofs_ESI,Cp); } if (mode&MOVSDST) { // edi = base2 + CPU_(e)DI +- n // subl Ofs_XES(%%ebx),%%edi G3M(0x2b,0x7b,Ofs_XES,Cp); // movl %%edi,Ofs_EDI(%%ebx) G3M(0x89,0x7b,Ofs_EDI,Cp); } } break; case O_SLAHF: rcod = IG->p0; // 0=LAHF 1=SAHF if (rcod==0) { /* LAHF */ // movb 0(%%esp),%%al G3M(0x8a,0x04,0x24,Cp); // movb %%al,Ofs_AH(%%ebx) G3M(0x88,0x43,Ofs_AH,Cp); } else { /* SAHF */ // movb Ofs_AH(%%ebx),%%al G3M(0x8a,0x43,Ofs_AH,Cp); // movb %%al,0(%%esp) G3M(0x88,0x04,0x24,Cp); } break; case O_SETFL: { unsigned char o1 = IG->p0; switch(o1) { // these are direct on x86 case CMC: // xorb $1,0(%%esp) G4M(0x80,0x34,0x24,0x01,Cp); break; case CLC: // andb $0xfe,(%%esp) G4M(0x80,0x24,0x24,0xfe,Cp); break; case STC: // orb $1,0(%%esp) G4M(0x80,0x0c,0x24,0x01,Cp); break; case CLD: // andb $0xfb,EFLAGS+1(%%ebx) G4M(0x80,0x63,Ofs_EFLAGS+1,0xfb,Cp); break; case STD: // orb $4,EFLAGS+1(%%ebx) G4M(0x80,0x4b,Ofs_EFLAGS+1,0x04,Cp); break; } } break; case O_BSWAP: { // movl offs(%%ebx),%%eax G3M(0x8b,0x43,IG->p0,Cp); // bswap %%eax G2M(0x0f,0xc8,Cp); // movl %%eax,offs(%%ebx) G3M(0x89,0x43,IG->p0,Cp); } break; case O_SETCC: { unsigned char n = IG->p0; PopPushF(Cp); // get flags from stack // setcc %%al G3M(0x0f,(0x90|(n&15)),0xc0,Cp); } break; case O_BITOP: { unsigned char n = IG->p0; G1(0x9d,Cp); // get flags from stack switch (n) { case 0x03: /* BT */ case 0x0b: /* BTS */ case 0x13: /* BTR */ case 0x1b: /* BTC */ // mov{wl} offs(%%ebx),%%{e}dx Gen66(mode,Cp); G3M(0x8b,0x53,IG->p1,Cp); if (mode & RM_REG) { // OP{wl} %%{e}dx,%%{e}ax Gen66(mode,Cp); G3M(0x0f,(n+0xa0),0xd0,Cp); } else { // OP{wl} %%{e}dx,(%%edi) Gen66(mode,Cp); G3M(0x0f,(n+0xa0),0x17,Cp); } break; case 0x1c: /* BSF */ case 0x1d: /* BSR */ // OP{wl} %%{e}ax,%%{e}dx Gen66(mode,Cp); G3M(0x0f,(n+0xa0),0xd0,Cp); // jz 1f G2M(0x74,(mode&DATA16)?0x04:0x03,Cp); // mov{wl} %%{e}dx,offs(%%ebx) 1: Gen66(mode,Cp); G3M(0x89,0x53,IG->p1,Cp); break; case 0x20: /* BT imm8 */ case 0x28: /* BTS imm8 */ case 0x30: /* BTR imm8 */ case 0x38: /* BTC imm8 */ // OP{wl} $immed,%%{e}ax Gen66(mode,Cp); G4M(0x0f,0xba,(n|0xc0),IG->p1,Cp); break; } G1(0x9c,Cp); // flags back on stack } break; case O_SHFD: { unsigned char l_r = IG->p0; G1(0x9d,Cp); // get flags from stack // mov{wl} offs(%%ebx),%%{e}dx Gen66(mode,Cp); G3M(0x8b,0x53,IG->p1,Cp); if (mode & IMMED) { unsigned char shc = IG->p2; // sh{lr}d $immed,%%{e}dx,%%{e}ax Gen66(mode,Cp); G4M(0x0f,(0xa4|l_r),0xd0,shc,Cp); } else { // movl Ofs_ECX(%%ebx),%%ecx G3M(0x8b,0x4b,Ofs_ECX,Cp); // sh{lr}d %%cl,%%{e}dx,%%{e}ax Gen66(mode,Cp); G3M(0x0f,(0xa5|l_r),0xd0,Cp); } G1(0x9c,Cp); // flags back on stack } break; case O_RDTSC: { // rdtsc G2(0x310f,Cp); if (eTimeCorrect >= 0) { // movl %%eax,%%ecx // movl %%edx,%%edi G4(0xd789c189,Cp); // subl TimeStartExec.t.tl(%%ebx),%%eax // sbbl TimeStartExec.t.th(%%ebx),%%edx G2(0x832b,Cp); G4((unsigned char *)&TimeStartExec.t.tl-CPUOFFS(0),Cp); G2(0x931b,Cp); G4((unsigned char *)&TimeStartExec.t.th-CPUOFFS(0),Cp); // addl TheCPU.EMUtime(%%ebx),%%eax // adcl TheCPU.EMUtime+4(%%ebx),%%edx G3M(0x03,0x43,Ofs_ETIME,Cp); G3M(0x13,0x53,Ofs_ETIME+4,Cp); // movl %%ecx,TimeStartExec.t.tl(%%ebx) // movl %%edi,TimeStartExec.t.th(%%ebx) G2(0x8b89,Cp); G4((unsigned char *)&TimeStartExec.t.tl-CPUOFFS(0),Cp); G2(0xbb89,Cp); G4((unsigned char *)&TimeStartExec.t.th-CPUOFFS(0),Cp); // movl %%eax,TheCPU.EMUtime(%%ebx) // movl %%edx,TheCPU.EMUtime+4(%%ebx) G3M(0x89,0x43,Ofs_ETIME,Cp); G3M(0x89,0x53,Ofs_ETIME+4,Cp); } // movl %%eax,Ofs_EAX(%%ebx) // movl %%edx,Ofs_EDX(%%ebx) G3M(0x89,0x43,Ofs_EAX,Cp); G3M(0x89,0x53,Ofs_EDX,Cp); } break; case O_INPDX: // movl Ofs_EDX(%%ebx),%%edx G3M(0x8b,0x53,Ofs_EDX,Cp); if (mode&MBYTE) { // inb (%%dx),%%al; movb %%al,Ofs_AL(%%ebx) G4M(0xec,0x88,0x43,Ofs_AL,Cp); } else { // in{wl} (%%dx),%%{e}ax Gen66(mode,Cp); G1(0xed,Cp); // mov{wl} %%{e}ax,Ofs_EAX(%%ebx) Gen66(mode,Cp); G3M(0x89,0x43,Ofs_EAX,Cp); } break; case O_OUTPDX: // movl Ofs_EDX(%%ebx),%%edx G3M(0x8b,0x53,Ofs_EDX,Cp); if (mode&MBYTE) { // movb Ofs_AL(%%ebx),%%al; outb %%al,(%%dx) G4M(0x8a,0x43,Ofs_AL,0xee,Cp); } else { // movl Ofs_EAX(%%ebx),%%eax G3M(0x8b,0x43,Ofs_EAX,Cp); // out{wl} %%{e}ax,(%%dx) Gen66(mode,Cp); G1(0xef,Cp); } break; case JMP_LINK: { // cond, dspt, retaddr, link static unsigned char pseq16[] = { // movw $RA,%%ax /*00*/ 0xb8,0,0,0,0, // movl Ofs_XSS(%%ebx),%%esi 0x8b,0x73,Ofs_XSS, // movl Ofs_ESP(%%ebx),%%ecx 0x8b,0x4b,Ofs_ESP, // leal -2(%%ecx),%%ecx 0x8d,0x49,0xfe, // andl StackMask(%%ebx),%%ecx 0x23,0x4b,Ofs_STACKM, // movw %%ax,(%%esi,%%ecx,1) 0x66,0x89,0x04,0x0e, // movl %%ecx,Ofs_ESP(%%ebx) 0x89,0x4b,Ofs_ESP }; static unsigned char pseq32[] = { // movl $RA,%%eax /*00*/ 0xb8,0,0,0,0, // movl Ofs_XSS(%%ebx),%%esi 0x8b,0x73,Ofs_XSS, // movl Ofs_ESP(%%ebx),%%ecx 0x8b,0x4b,Ofs_ESP, // leal -4(%%ecx),%%ecx 0x8d,0x49,0xfc, // andl StackMask(%%ebx),%%ecx 0x23,0x4b,Ofs_STACKM, // movl %%eax,(%%esi,%%ecx,1) 0x89,0x04,0x0e, // movl %%ecx,Ofs_ESP(%%ebx) 0x89,0x4b,Ofs_ESP }; unsigned char cond = IG->p0; int dspt = IG->p1; int dspnt = IG->p2; linkdesc *lt = IG->lt; if (cond == 0x11) { // call register unsigned char *p, *q; int sz; if (mode&DATA16) { p=pseq16,sz=sizeof(pseq16); } else { p=pseq32,sz=sizeof(pseq32); } q = Cp; GNX(Cp, p, sz); *((int *)(q+1)) = dspnt; if (debug_level('e')>1) e_printf("CALL: ret=%08x\n",dspnt); } // t: b8 [exit_pc] 5a c3 G1(0xb8,Cp); lt->t_type = JMP_LINK; /* {n}t_link = offset from codebuf start to immed value */ lt->t_link.rel = Cp-BaseGenBuf; lt->nt_link.abs = 0; G4(dspt,Cp); G2(0xc35a,Cp); if (debug_level('e')>2) e_printf("JMP_Link %08x:%08x lk=%d:%08x:%p\n", dspt,dspnt,lt->t_type,lt->t_link.rel,lt->nt_link.abs); } break; case JF_LINK: case JB_LINK: { // cond, PC, dspt, dspnt, link unsigned char cond = IG->p0; int jpc = IG->p1; int dspt = IG->p2; int dspnt = IG->p3; linkdesc *lt = IG->lt; int sz; // JCXZ: 8b 4b Ofs_ECX e3 07 or 0f b7 4b Ofs_ECX e3 07 // JCC: 7x 07 // nt: b8 [nt_pc] 5a c3 // t: 0f b7 4f [sig] e3 07 // b8 [sig_pc] 5a c3 // b8 [t_pc] 5a c3 sz = TAILSIZE + (mode & CKSIGN? 13:0); if (cond==0x31) { if (mode&ADDR16) { // movzwl Ofs_ECX(%%ebx),%%ecx G4M(0x0f,0xb7,0x4b,Ofs_ECX,Cp); } else { // movl Ofs_ECX(%%ebx),%%ecx G3M(0x8b,0x4b,Ofs_ECX,Cp); } G2M(0xe3,sz,Cp); // jecxz } else { PopPushF(Cp); // get flags from stack G2M(0x70|cond,sz,Cp); // normal cond } if (mode & CKSIGN) { // check signal on NOT TAKEN branch // for backjmp-after-jcc: // movzwl Ofs_SIGAPEND(%%ebx),%%ecx G4M(0x0f,0xb7,0x4b,Ofs_SIGAPEND,Cp); // jecxz {continue}: exit if sigpend not 0 G2M(0xe3,TAILSIZE,Cp); // movl {exit_addr},%%eax; pop %%edx; ret G1(0xb8,Cp); G4(jpc,Cp); G2(0xc35a,Cp); } lt->t_type = IG->op; // not taken: continue with next instr G1(0xb8,Cp); /* {n}t_link = offset from codebuf start to immed value */ lt->nt_link.rel = Cp-BaseGenBuf; G4(dspnt,Cp); G2(0xc35a,Cp); // taken if (IG->op==JB_LINK) { // check signal on TAKEN branch for back jumps G4M(0x0f,0xb7,0x4b,Ofs_SIGAPEND,Cp); G2M(0xe3,TAILSIZE,Cp); G1(0xb8,Cp); G4(jpc,Cp); G2(0xc35a,Cp); } G1(0xb8,Cp); lt->t_link.rel = Cp-BaseGenBuf; G4(dspt,Cp); G2(0xc35a,Cp); if (debug_level('e')>2) e_printf("J_Link %08x:%08x lk=%d:%08x:%08x\n", dspt,dspnt,lt->t_type,lt->t_link.rel,lt->nt_link.rel); } break; case JLOOP_LINK: { // cond, PC, dspt, dspnt, link unsigned char cond = IG->p0; int dspt = IG->p1; int dspnt = IG->p2; linkdesc *lt = IG->lt; // {66} dec Ofs_ECX(ebx) // LOOP: jnz t // LOOPZ: jz nt; test 0x40,dl; jnz t // LOOPNZ: jz nt; test 0x40,dl; jz t // nt: b8 [nt_pc] c3 // t: 8b 0d [sig] jcxz t2 // {66} inc Ofs_ECX(ebx) // b8 [sig_pc] c3 // t2: b8 [t_pc] c3 if (mode&ADDR16) { G4M(OPERoverride,0xff,0x4b,Ofs_ECX,Cp); } else { G3M(0xff,0x4b,Ofs_ECX,Cp); } /* * 20 LOOP taken = (e)cx nt=cxz * 24 LOOPZ taken = (e)cx && ZF nt=cxz||!ZF * 25 LOOPNZ taken = (e)cx && !ZF nt=cxz|| ZF */ if (cond==0x24) { // loopz G2M(0x74,0x06,Cp); // jz->nt // test flags (on stack) G4M(0xf6,0x04,0x24,0x40,Cp); G2M(0x75,TAILSIZE,Cp); // jnz->t } else if (cond==0x25) { // loopnz G2M(0x74,0x06,Cp); // jz->nt // test flags (on stack) G4M(0xf6,0x04,0x24,0x40,Cp); G2M(0x74,TAILSIZE,Cp); // jz->t } else { G2M(0x75,TAILSIZE,Cp); // jnz->t } lt->t_type = JLOOP_LINK; // not taken: continue with next instr G1(0xb8,Cp); /* {n}t_link = offset from codebuf start to immed value */ lt->nt_link.rel = Cp-BaseGenBuf; G4(dspnt,Cp); G2(0xc35a,Cp); // taken G1(0xb8,Cp); lt->t_link.rel = Cp-BaseGenBuf; G4(dspt,Cp); G2(0xc35a,Cp); if (debug_level('e')>2) e_printf("JLOOP_Link %08x:%08x lk=%d:%08x:%08x\n", dspt,dspnt,lt->t_type,lt->t_link.rel,lt->nt_link.rel); } break; } CodePtr = Cp; #ifdef PROFILE if (debug_level('e')) GenTime += (GETTSC() - t0); #endif } /* * address generator unit * careful - do not use eax, and NEVER change any flag! */ static void AddrGen_x86(int op, int mode, ...) { va_list ap; IMeta *I; IGen *IG; #ifdef PROFILE hitimer_t t0 = 0; if (debug_level('e')) t0 = GETTSC(); #endif if (CurrIMeta<0) { CurrIMeta=0; InstrMeta[0].ngen=0; GenCodeBuf=NULL; BaseGenBuf=NULL; GenBufSize=0; } I = &InstrMeta[CurrIMeta]; if (I->ngen >= NUMGENS) leavedos(0xbac1); IG = &(I->gen[I->ngen]); if (debug_level('e')>6) dbug_printf("AGEN: %3d %6x\n",op,mode); va_start(ap, mode); IG->op = op; IG->mode = mode; IG->ovds = OVERR_DS; GenBufSize += GendBytesPerOp[op]; switch(op) { case A_DI_0: // base(32), imm case A_DI_1: { // base(32), {imm}, reg, {shift} signed char ofs = (char)va_arg(ap,int); signed char o; IG->p0 = ofs; IG->p1 = va_arg(ap,int); if (op==A_DI_0) break; o = Offs_From_Arg(); IG->p2 = o; } break; case A_DI_2: { // base(32), {imm}, reg, reg, {shift} signed char o1,o2; signed char ofs = (char)va_arg(ap,int); unsigned char sh; IG->p0 = ofs; IG->p1 = va_arg(ap,int); o1 = Offs_From_Arg(); o2 = Offs_From_Arg(); IG->p2 = o1; IG->p3 = o2; sh = (unsigned char)(va_arg(ap,int)); IG->p4 = sh; } break; case A_DI_2D: { // modrm_sibd, 32-bit mode signed char o; unsigned char sh; IG->p0 = va_arg(ap,int); o = Offs_From_Arg(); IG->p1 = o; sh = (unsigned char)(va_arg(ap,int)); IG->p2 = sh; } break; case A_SR_SH4: { // real mode make base addr from seg signed char o1 = Offs_From_Arg(); signed char o2 = Offs_From_Arg(); IG->p0 = o1; IG->p1 = o2; } break; } va_end(ap); I->ngen++; #ifdef PROFILE if (debug_level('e')) GenTime += (GETTSC() - t0); #endif } static void Gen_x86(int op, int mode, ...) { int rcod=0; va_list ap; IMeta *I; IGen *IG; #ifdef PROFILE hitimer_t t0 = 0; if (debug_level('e')) t0 = GETTSC(); #endif if (CurrIMeta<0) { CurrIMeta=0; InstrMeta[0].ngen=0; GenCodeBuf=NULL; BaseGenBuf=NULL; GenBufSize = 0; } I = &InstrMeta[CurrIMeta]; if (I->ngen >= NUMGENS) leavedos(0xbac2); IG = &(I->gen[I->ngen]); if (debug_level('e')>6) dbug_printf("CGEN: %3d %6x\n",op,mode); va_start(ap, mode); IG->op = op; IG->mode = mode; IG->ovds = OVERR_DS; GenBufSize += GendBytesPerOp[op]; switch(op) { case L_NOP: case L_CR0: case L_ZXAX: // case L_DI_R1: case L_VGAREAD: // case S_DI: case L_VGAWRITE: case O_NOT: case O_NEG: case O_INC: case O_DEC: case O_MUL: case O_CBWD: case O_XLAT: case O_PUSH: case O_PUSH1: case O_PUSH2F: case O_PUSH3: case O_PUSHA: case O_POP: case O_POP1: case O_POP3: case O_POPA: case O_LEAVE: case O_MOVS_SetA: case O_MOVS_MovD: case O_MOVS_LodD: case O_MOVS_StoD: case O_MOVS_ScaD: case O_MOVS_CmpD: case O_MOVS_SavA: case O_RDTSC: case O_INPDX: case O_OUTPDX: break; case L_REG: case S_REG: case S_DI_R: case L_LXS1: case O_XCHG: case O_CLEAR: case O_TEST: case O_SBSELF: case O_BSWAP: { signed char o = Offs_From_Arg(); IG->p0 = o; } break; case L_REG2REG: case L_LXS2: /* real mode segment base from segment value */ case O_CMPXCHG: case O_XCHG_R: { signed char o1 = Offs_From_Arg(); signed char o2 = Offs_From_Arg(); IG->p0 = o1; IG->p1 = o2; } break; case S_DI_IMM: case L_IMM_R1: case O_ADD_R: // acc = acc op reg case O_OR_R: case O_ADC_R: case O_SBB_R: case O_AND_R: case O_SUB_R: case O_XOR_R: case O_CMP_R: case O_INC_R: case O_DEC_R: case O_DIV: case O_IDIV: case O_PUSHI: { int v = va_arg(ap,int); IG->p0 = v; } break; case O_ADD_FR: // reg = reg op acc/imm case O_OR_FR: case O_ADC_FR: case O_SBB_FR: case O_AND_FR: case O_SUB_FR: case O_XOR_FR: case O_CMP_FR: { signed char o = Offs_From_Arg(); IG->p0 = o; if (mode & IMMED) { int v = va_arg(ap,int); IG->p1 = v; } } break; case L_IMM: { signed char o = Offs_From_Arg(); int v = va_arg(ap,int); IG->p0 = o; IG->p1 = v; } break; case L_MOVZS: { signed char o; rcod = (va_arg(ap,int)&1)<<3; // 0=z 8=s o = Offs_From_Arg(); IG->p0 = rcod; IG->p1 = o; } break; case O_IMUL: if (mode&IMMED) { int v = va_arg(ap,int); signed char o = Offs_From_Arg(); IG->p0 = v; IG->p1 = o; } if (!(mode&MBYTE)) { if (mode&MEMADR) { signed char o = Offs_From_Arg(); IG->p0 = o; } } break; case O_ROL: case O_ROR: case O_RCL: case O_RCR: case O_SHL: case O_SHR: case O_SAR: if (mode & IMMED) { unsigned char sh = (unsigned char)va_arg(ap,int); IG->p0 = sh; } break; case O_OPAX: { /* used by DAA..AAD */ int n = va_arg(ap,int); IG->p0 = n; // get n>0,n<3 bytes from parameter stack IG->p1 = va_arg(ap,int); if (n==2) IG->p2 = va_arg(ap,int); } break; case O_PUSH2: case O_POP2: { signed char o = Offs_From_Arg(); IG->p0 = o; } break; case O_SLAHF: rcod = va_arg(ap,int)&1; // 0=LAHF 1=SAHF IG->p0 = rcod; break; case O_SETFL: case O_SETCC: { unsigned char n = (unsigned char)va_arg(ap,int); IG->p0 = n; } break; case O_FOP: { unsigned char exop = (unsigned char)va_arg(ap,int); IG->p0 = exop; IG->p1 = va_arg(ap,int); // reg } break; case O_BITOP: { unsigned char n = (unsigned char)va_arg(ap,int); signed char o = Offs_From_Arg(); IG->p0 = n; IG->p1 = o; } break; case O_SHFD: { unsigned char l_r = (unsigned char)va_arg(ap,int)&8; signed char o = Offs_From_Arg(); IG->p0 = l_r; IG->p1 = o; if (mode & IMMED) { unsigned char shc = (unsigned char)va_arg(ap,int)&0x1f; IG->p2 = shc; } } break; case JMP_LINK: // cond, dspt, retaddr, link case JLOOP_LINK: { unsigned char cond = (unsigned char)va_arg(ap,int); IG->p0 = cond; IG->p1 = va_arg(ap,int); // dspt IG->p2 = va_arg(ap,int); // dspnt IG->lt = va_arg(ap,linkdesc *); // lt } break; case JF_LINK: case JB_LINK: { // cond, PC, dspt, dspnt, link unsigned char cond = (unsigned char)va_arg(ap,int); IG->p0 = cond; IG->p1 = va_arg(ap,int); // jpc IG->p2 = va_arg(ap,int); // dspt IG->p3 = va_arg(ap,int); // dspnt IG->lt = va_arg(ap,linkdesc *); // lt } break; } va_end(ap); I->ngen++; #ifdef PROFILE if (debug_level('e')) GenTime += (GETTSC() - t0); #endif } ///////////////////////////////////////////////////////////////////////////// static void ProduceCode(unsigned int PC) { int i,j,nap,mall_req; unsigned int adr_lo=0, adr_hi=0; unsigned char *cp1; IMeta *I0 = &InstrMeta[0]; if (debug_level('e')>1) { e_printf("---------------------------------------------\n"); e_printf("ProduceCode: CurrIMeta=%d\n",CurrIMeta); } if (CurrIMeta < 0) leavedos(0xbac3); /* reserve space for auto-ptr and info structures */ nap = I0->ncount+1; /* allocate the actual code buffer here; size is a worst-case * estimate based on measured bytes per opcode. * * Code buffer layout: * 0000 (GenCodeBuf) pointed from {TNode}.mblock * contains a back pointer to the TNode * 0008/0004 self-pointer (address of this location) * 0010/0008 Addr2Pc table (nap) pointed from {TNode}.pmeta * nap+10/8 actual code produced (BaseGenBuf) * plus tail code * Only the code part is filled here. * GenBufSize contain a first guess of the amount of space required * */ mall_req = GenBufSize + offsetof(CodeBuf,meta[nap]) + 32;// 32 for tail GenCodeBuf = dlmalloc(mall_req); /* actual code buffer starts from here */ BaseGenBuf = CodePtr = (unsigned char *)&GenCodeBuf->meta[nap]; I0->addr = BaseGenBuf; if (debug_level('e')>1) e_printf("CodeBuf=%p siz %d CodePtr=%p\n",GenCodeBuf,GenBufSize,CodePtr); for (i=0; inpc; } else { if (I->npc < adr_lo) adr_lo = I->npc; else if (I->npc > adr_hi) adr_hi = I->npc; } I->addr = cp1 = CodePtr; for (j=0; jngen; j++) { CodeGen(I, j); if (debug_level('e')>1) { IGen *IG = &(I->gen[j]); int dg = CodePtr-cp1; e_printf("PGEN(%02d,%02d) %3d %6x %2d %08x %08x %08x %08x %08x\n", i,j,IG->op,IG->mode,dg, IG->p0,IG->p1,IG->p2,IG->p3,IG->p4); cp1 = CodePtr; if (dg > GendBytesPerOp[IG->op]) { /**/ dbug_printf("Gend[%d] = %d\n",IG->op,dg); GendBytesPerOp[IG->op] = dg; } } } I->len = CodePtr - I->addr; if (debug_level('e')>3) GCPrint(I->addr, BaseGenBuf, I->len); } if (debug_level('e')>1) e_printf("Size=%td guess=%d\n",(CodePtr-BaseGenBuf),GenBufSize); /**/ if ((CodePtr-BaseGenBuf) > GenBufSize) leavedos(0x535347); if (PC < adr_lo) adr_lo = PC; else if (PC > adr_hi) adr_hi = PC; InstrMeta[0].seqbase = adr_lo; InstrMeta[0].seqlen = adr_hi - adr_lo; if (debug_level('e')>1) e_printf("---------------------------------------------\n"); } ///////////////////////////////////////////////////////////////////////////// /* * The node linker. * * A code sequence can have one of two termination types: * * 1) straight end (no jump), or unconditional jump or call * * key: | * | * | * mov $next_addr,eax * ret * * 2) conditional jump or loop * * key: | * | * | * jcond taken * mov $not_taken_addr,eax * ret * taken: mov $taken_addr,eax * ret * * In the first case, there's only one linking point; in the second, two. * Linking means replacing the "mov addr,eax" instruction with a direct * jump to the start point of the next code fragment. * The parameters used are (t_ means taken, nt_ means not taken): * t_ref,nt_ref pointers to next node * t_link,nt_link addresses of the patch point * t_undo,nt_undo saves the previous data for unlinking * Since a node can be referred from many others, we need to keep * "back-references" in a list in order to unlink it. */ static void _nodelinker2(TNode *LG, TNode *G) { unsigned int *lp; linkdesc *T = &G->clink; backref *B; if (debug_level('e')>8 && LG) e_printf("nodelinker2: %08x->%08x\n",LG->key,G->key); if (LG && (LG->alive>0)) { int ra; linkdesc *L = &LG->clink; if (L->t_type) { // node ends with links lp = L->t_link.abs; // check 'taken' branch if (*lp==G->key && ((unsigned char*)lp)[-1] == 0xb8) { // points to current node? if (L->t_ref!=0) { dbug_printf("Linker: t_ref at %08x busy\n",LG->key); leavedos(0x8102); } L->t_undo = *lp; // b8 [npc] -> e9/eb reladr ra = G->addr - (unsigned char *)L->t_link.abs; if ((ra > -127) && (ra < 128)) { ra -= 1; ((char *)lp)[-1] = 0xeb; } else { ra -= 4; ((char *)lp)[-1] = 0xe9; } *lp = ra; L->t_ref = &G->mblock->bkptr; B = calloc(1,sizeof(backref)); // head insertion B->next = T->bkr.next; T->bkr.next = B; B->ref = &LG->mblock->bkptr; B->branch = 'T'; T->nrefs++; if (G==LG) { G->flags |= F_SLFL; if (debug_level('e')>1) { e_printf("Linker: node (%p:%08x:%p) SELF link\n" "\t\tjmp %08x, undo=%08x, t_ref %d=%p->%p\n", G,G->key,G->addr, ra, L->t_undo, T->nrefs, L->t_ref, *L->t_ref); } } else if (debug_level('e')>1) { e_printf("Linker: previous node (%p:%08x:%p)\n" "\t\tlinked to (%p:%08x:%p)\n" "\t\tjmp %08x, undo=%08x, t_ref %d=%p->%p\n", LG,LG->key,LG->addr, G,G->key,G->addr, ra, L->t_undo, T->nrefs, L->t_ref, *L->t_ref); } if (debug_level('e')>8) { backref *bk = T->bkr.next; #ifdef DEBUG_LINKER if (bk==NULL) { dbug_printf("bkr null\n"); leavedos(0x8108); } #endif while (bk) { dbug_printf("bkref=%c%p->%p\n",bk->branch, bk->ref,*bk->ref); bk=bk->next; } } } if (L->t_type>JMP_LINK) { // if it has a 'not taken' link lp = L->nt_link.abs; // check 'not taken' branch if (*lp==G->key && ((unsigned char*)lp)[-1] == 0xb8) { // points to current node? if (L->nt_ref!=0) { dbug_printf("Linker: nt_ref at %08x busy\n",LG->key); leavedos(0x8103); } L->nt_undo = *lp; // b8 [npc] -> e9/eb reladr ra = G->addr - (unsigned char *)L->nt_link.abs; if ((ra > -127) && (ra < 128)) { ra -= 1; ((char *)lp)[-1] = 0xeb; } else { ra -= 4; ((char *)lp)[-1] = 0xe9; } *lp = ra; L->nt_ref = &G->mblock->bkptr; B = calloc(1,sizeof(backref)); // head insertion B->next = T->bkr.next; T->bkr.next = B; B->ref = &LG->mblock->bkptr; B->branch = 'N'; T->nrefs++; if (G==LG) { G->flags |= F_SLFL; if (debug_level('e')>1) { e_printf("Linker: node (%p:%08x:%p) SELF link\n" "\t\tjmp %08x, undo=%08x, nt_ref %d=%p->%p\n", G,G->key,G->addr, ra, L->nt_undo, T->nrefs, L->nt_ref, *L->nt_ref); } } else if (debug_level('e')>1) { e_printf("Linker: previous node (%p:%08x:%p)\n" "\t\tlinked to (%p:%08x:%p)\n" "\t\tjmp %08x, undo=%08x, nt_ref %d=%p->%p\n", LG,LG->key,LG->addr, G,G->key,G->addr, ra, L->nt_undo, T->nrefs, L->nt_ref, *L->nt_ref); } if (debug_level('e')>8) { backref *bk = T->bkr.next; #ifdef DEBUG_LINKER if (bk==NULL) { dbug_printf("bkr null\n"); leavedos(0x8109); } #endif while (bk) { dbug_printf("bkref=%c%p->%p\n",bk->branch, bk->ref,*bk->ref); bk=bk->next; } } } } } } } static void NodeLinker(TNode *G) { #ifdef PROFILE hitimer_t t0 = 0; #endif #if !defined(SINGLESTEP)&&!defined(SINGLEBLOCK) if (!UseLinker) #endif return; #ifdef PROFILE if (debug_level('e')) t0 = GETTSC(); #endif /* check links FROM LastXNode TO current node */ if (G != LastXNode) _nodelinker2(LastXNode, G); /* check links INSIDE current node */ _nodelinker2(G, G); #ifdef PROFILE if (debug_level('e')) LinkTime += (GETTSC() - t0); #endif } void NodeUnlinker(TNode *G) { unsigned int *lp; linkdesc *T = &G->clink; backref *B = T->bkr.next; #ifdef PROFILE hitimer_t t0 = 0; #endif #if !defined(SINGLESTEP)&&!defined(SINGLEBLOCK) if (!UseLinker) #endif return; #ifdef PROFILE if (debug_level('e')) t0 = GETTSC(); #endif // unlink backward references (from other nodes to the current // node) if (debug_level('e')>8) e_printf("Unlinker: bkr.next=%p\n",B); while (B) { backref *b2 = B; if (B->branch=='T') { TNode *H = *B->ref; linkdesc *L = &H->clink; if (debug_level('e')>2) e_printf("Unlinking T ref from node %p(%08x) to %08x\n", H, L->t_undo, G->key); if (L->t_undo != G->key) { dbug_printf("Unlinker: BK ref error u=%08x k=%08x\n", L->t_undo, G->key); leavedos(0x8110); } lp = L->t_link.abs; ((char *)lp)[-1] = 0xb8; *lp = L->t_undo; L->t_ref = NULL; L->t_undo = 0; T->nrefs--; } else if (B->branch=='N') { TNode *H = *B->ref; linkdesc *L = &H->clink; if (debug_level('e')>2) e_printf("Unlinking N ref from node %p(%08x) to %08x\n", H, L->nt_undo, G->key); if (L->nt_undo != G->key) { dbug_printf("Unlinker: BK ref error u=%08x k=%08x\n", L->nt_undo, G->key); leavedos(0x8110); } lp = L->nt_link.abs; ((char *)lp)[-1] = 0xb8; *lp = L->nt_undo; L->nt_ref = NULL; L->nt_undo = 0; T->nrefs--; } else { e_printf("Invalid unlink [%c] ref %p from node ?(?) to %08x\n", B->branch, B->ref, G->key); leavedos(0x8116); } B = B->next; free(b2); } if (G==LastXNode) LastXNode=NULL; if (T->nrefs) { dbug_printf("Unlinker: nrefs error\n"); leavedos(0x8115); } // unlink forward references (from the current node to other // nodes), which are backward refs for the other nodes if (debug_level('e')>8) e_printf("Unlinker: refs=T%p N%p\n",T->t_ref,T->nt_ref); if (T->t_ref) { TNode *Gt = *T->t_ref; backref *Btq = &Gt->clink.bkr; backref *Bt = Gt->clink.bkr.next; if (debug_level('e')>2) e_printf("Unlink fwd T ref to node %p(%08x)\n",Gt, Gt->key); while (Bt) { if (*Bt->ref==G) { Btq->next = Bt->next; Gt->clink.nrefs--; free(Bt); break; } Btq = Bt; Bt = Bt->next; } if (Bt==NULL) { // not found... dbug_printf("Unlinker: FW T ref error\n"); leavedos(0x8111); } T->t_ref = NULL; } if (T->nt_ref) { TNode *Gn = *T->nt_ref; backref *Bnq = &Gn->clink.bkr; backref *Bn = Gn->clink.bkr.next; if (debug_level('e')>2) e_printf("Unlink fwd N ref to node %p(%08x)\n",Gn, Gn->key); while (Bn) { if (*Bn->ref==G) { Bnq->next = Bn->next; Gn->clink.nrefs--; free(Bn); break; } Bnq = Bn; Bn = Bn->next; } if (Bn==NULL) { // not found... dbug_printf("Unlinker: FW N ref error\n"); leavedos(0x8112); } T->nt_ref = NULL; } memset(T, 0, sizeof(linkdesc)); #ifdef PROFILE if (debug_level('e')) LinkTime += (GETTSC() - t0); #endif } ///////////////////////////////////////////////////////////////////////////// /* * These are the functions which actually executes the generated code. * * There are two paths: * 1) for CloseAndExec_x86 we are ending a code generation phase, and our code * is still in the CodeBuf together with all its detailed info stored * in InstrMeta. First we close the sequence adding the TailCode; * it, and move it to the collecting tree and clear the temporary * structures. Then, in Exec_x86 we execute the code. * The PC parameter is the address in the source code of the next * instruction following the end of the code block. It will be stored * into the TailCode of the block. * 2) We are executing a sequence found in the collecting tree. * Exec_x86 is called directly. * G is the node we found (possibly the start of a chain of linked * code sequences). * * When the code is executed, it returns in eax the source address of the * next instruction to find/parse. * */ static unsigned int CloseAndExec_x86(unsigned int PC, int mode, int ln) { IMeta *I0; unsigned char *p; TNode *G; unsigned short seqlen; if (CurrIMeta <= 0) { /**/ e_printf("(X) Nothing to exec at %08x\n",PC); return PC; } // we're creating a new node I0 = &InstrMeta[0]; if (debug_level('e')>2) { e_printf("== (%d) == Closing sequence at %08x\n",ln,PC); } ProduceCode(PC); p = CodePtr; /* If the code doesn't terminate with a jump/loop instruction * it still lacks the tail code; add it here */ if (I0->clink.t_type==0) { /* copy tail instructions to the end of the code block */ memcpy(p, TailCode, TAILSIZE); p += TAILFIX; I0->clink.t_link.abs = (unsigned int *)p; *((unsigned int *)p) = PC; CodePtr += TAILSIZE; } /* show jump+tail code */ if ((debug_level('e')>6) && (CurrIMeta>0)) { IMeta *GL = &InstrMeta[CurrIMeta-1]; unsigned char *pl = GL->addr+GL->len; GCPrint(pl, BaseGenBuf, CodePtr - pl); } I0->totlen = CodePtr - BaseGenBuf; if (debug_level('e')>3) e_printf("Seq len %#x:%#x\n",I0->seqlen,I0->totlen); NodesParsed++; #ifdef PROFILE if (debug_level('e')) TotalNodesParsed++; #endif G = Move2Tree(); /* when is G==NULL? */ /* InstrMeta will be zeroed at this point */ /* mprotect the page here; a page fault will be triggered * if some other code tries to write over the page including * this node */ seqlen = G->seqlen; /* Special case: translated block followed by int instruction. Ints are always interpreted, but sometimes (e.g. for FPU emulators, INT 3x) the int instruction is modified by the int handler to become an actual FPU instruction). This makes sure that then the whole block is retranslated so it is as long as possible */ if (Fetch(G->seqbase+seqlen) == INT) seqlen += 2; e_markpage(G->seqbase, seqlen); e_mprotect(G->seqbase, seqlen); return Exec_x86(G, ln); } unsigned int Exec_x86(TNode *G, int ln) { unsigned long flg; unsigned char *ecpu; unsigned int mem_ref; unsigned int ePC; unsigned short seqflg = G->flags; unsigned char *SeqStart = G->addr; hitimer_u TimeEndExec; #ifdef PROFILE if (debug_level('e')) TotalNodesExecd++; #endif /* * LastXNode stuff: history. We keep in every node the address of * the next node executed, in historical order. This speeds up * finding a node in the tree a lot, hits are always in the * 70-85% range! */ if (LastXNode && (LastXNode->alive>0)) { LastXNode->nxnode = G; // can be relocated in the tree! LastXNode->nxkey = G->key; if (debug_level('e')>2) e_printf("History: from %08x to %08x\n",LastXNode->key,G->key); } ecpu = CPUOFFS(0); if (debug_level('e')>1) { if (TheCPU.sigalrm_pending>0) e_printf("** SIGALRM is pending\n"); e_printf("== (%d) == Executing code at %p flg=%04x\n", ln,SeqStart,seqflg); } #ifdef ASM_DUMP fprintf(aLog,"%p: exec\n",G->key); #endif if (seqflg & F_FPOP) { /* mask exceptions in generated code */ unsigned short fpuc; asm ("fstcw %0" : "=m"(TheCPU.fpuc)); fpuc = TheCPU.fpuc | 0x3f; asm ("fldcw %0" :: "m"(fpuc)); } /* get the protected mode flags. Note that RF and VM are cleared * by pushfd (but not by ints and traps) */ flg = getflags(); /* pass TF=0, IF=1, DF=0 */ flg = (flg & ~(EFLAGS_CC|EFLAGS_IF|EFLAGS_DF|EFLAGS_TF)) | (EFLAGS & EFLAGS_CC) | EFLAGS_IF; /* This is for exception processing */ InCompiledCode = 1; /* stack frame for compiled code: * esp+00 TheCPU flags * 04/08 return address * 08/10 dosemu flags * 14/18 ebx * 18/20... locals of CloseAndExec */ #ifdef __x86_64__ #define RE_REG(r) "%%r"#r #else #define RE_REG(r) "%%e"#r if (config.cpuprefetcht0) #endif __asm__ __volatile__ ( " prefetcht0 %0\n" : : "m"(*ecpu) ); if (eTimeCorrect >= 0) { __asm__ __volatile__ ( " push "RE_REG(bx)"\n" " call 1f\n" " jmp 2f\n" "1: push %8\n" /* push and get TheCPU flags */ " rdtsc\n" " movl %%eax,%3\n" /* save time before execution */ " movl %%edx,%4\n" " mov %7,"RE_REG(bx)"\n"/* address of TheCPU(+0x80!) */ " jmp *%9\n" /* call SeqStart */ "2: mov "RE_REG(dx)",%0\n"/* save flags */ " movl %%eax,%1\n" /* save PC at block exit */ " rdtsc\n" " pop "RE_REG(bx) /* restore regs */ : "=S"(flg),"=c"(ePC),"=D"(mem_ref), "=m"(TimeStartExec.t.tl),"=m"(TimeStartExec.t.th), "=&a"(TimeEndExec.t.tl),"=&d"(TimeEndExec.t.th) : "c"(ecpu),"0"(flg),"2"(SeqStart) : "memory", "cc" #ifdef __x86_64__ /* Generated code calls C functions which clobber ... */ ,"r8","r9","r10","r11" #endif ); } else { __asm__ __volatile__ ( " push "RE_REG(bx)"\n" " call 1f\n" " jmp 2f\n" "1: push %4\n" /* push and get TheCPU flags */ " mov %3,"RE_REG(bx)"\n"/* address of TheCPU(+0x80!) */ " jmp *%5\n" /* call SeqStart */ "2: mov "RE_REG(dx)",%0\n"/* save flags */ " movl %%eax,%1\n" /* save PC at block exit */ " pop "RE_REG(bx) /* restore regs */ : "=S"(flg),"=c"(ePC),"=D"(mem_ref) : "c"(ecpu),"0"(flg),"2"(SeqStart) : "memory", "cc" #ifdef __x86_64__ /* Generated code calls C functions which clobber ... */ ,"r8","r9","r10","r11" #endif ); } InCompiledCode = 0; EFLAGS = (EFLAGS & ~EFLAGS_CC) | (flg & EFLAGS_CC); TheCPU.mem_ref = mem_ref; /* was there at least one FP op in the sequence? */ if (seqflg & F_FPOP) { int exs; __asm__ __volatile__ ("fstsw %0" : "=m"(exs)); exs &= 0x7f; if (exs) { e_printf("FPU: error status %02x\n",exs); if ((exs & ~TheCPU.fpuc) & 0x3f) { __asm__ __volatile__ ("fnclex\n" ::: "memory"); e_printf("FPU exception\n"); /* TheCPU.err = EXCP10_COPR; */ } } } if (eTimeCorrect >= 0) { TimeEndExec.td -= TimeStartExec.td; TheCPU.EMUtime += TimeEndExec.td; } if (debug_level('e')) { #ifdef PROFILE ExecTime += TimeEndExec.td; #endif if (debug_level('e')>1) { e_printf("** End code, PC=%08x sig=%x\n",ePC, TheCPU.sigalrm_pending); if ((debug_level('e')>3) && (seqflg & F_FPOP)) { e_printf(" %s\n", e_trace_fp()); } /* DANGEROUS - can crash dosemu! */ if ((debug_level('e')>4) && goodmemref(mem_ref)) { TryMemRef = 1; e_printf("*mem_ref [%#08x] = %08x\n",mem_ref, READ_DWORD(mem_ref)); TryMemRef = 0; } } } /* signal_pending at this point is 1 if there was ANY signal, * not just a SIGALRM */ if (signal_pending()) { CEmuStat|=CeS_SIGPEND; } /* sigalrm_pending at this point can be: * 0 - if there was no signal * 1 - if there was a signal * .. so reset it for next try */ TheCPU.sigalrm_pending = 0; #if defined(SINGLESTEP)||defined(SINGLEBLOCK) avltr_delete(G->key); if (debug_level('e')>1) e_printf("\n%s",e_print_regs()); #else /* * After execution comes the linker stage. * So the order is: * 1) build code sequence in the IMeta buffer * 2) move buffer to a newly allocated node in the tree * 3) execute it, always returning back at the end * 4) link it to other nodes * LastXNode stuff: linking. A node is linked with the next * one in execution order, provided that the end source address * of the preceding node matches the start source address of the * following (i.e. no interpreted instructions in between). */ if (G && (G->alive>0)) { if (UseLinker) NodeLinker(G); LastXNode = G; if (debug_level('e')>2) e_printf("New LastXNode=%08x\n",G->key); } else #endif LastXNode = NULL; return ePC; } ///////////////////////////////////////////////////////////////////////////// #endif