public TABLE_DESCRIPTOR(byte _min, byte _max, byte _mask, byte _shift, byte _props, OPCODE_DESCRIPTOR[] _opcodes) { min = _min; max = _max; mask = _mask; shift = _shift; props = _props; opcodes = _opcodes; }
//Copies MODRM and SIB bytes to struct INSTRUCTION. static byte parse_modrm_sib(ulong offset, ref INSTRUCTION instr, OPCODE_DESCRIPTOR opcode) { byte len = 0; if ((opcode.props & PROP_MODRM)!=0) { len++; instr.flags |= INSTR_FLAG_MODRM; //instr.modrm = *offset; instr.modrm = assembly.ReadBytes(offset, 1)[0]; if (instr.addrsize != ADDR_SIZE_16) { if ((instr.modrm & 0x7) == 0x4 && (instr.modrm & 0xC0) != 0xC0) { len++; instr.flags |= INSTR_FLAG_SIB; //instr.sib = offset[1]; instr.sib = instr.modrm = assembly.ReadBytes(offset, 2)[1]; } } } return len; }
//Main function for parsing opcode and prefixes. First of all it parses all prefixes and then // looks up for struct OPCODE_DESCRIPTOR. The following algorithm is used to handle instructions that // use prefixes as opcode extension: // // * Have we prefix that may be opcode extension? // No: Lookup starts from 1byte table. // * Is instruction found? // No: Error. // Yes: Success. // Yes: Lookup starts from 'ext_table_index' table. // * Is instruction found? // No: Lookup starts from 1byte table. // * Is instruction found? // No: Error. // Yes: Success. // Yes: Success. static UInt32 parse_opcode(ulong offset, ref OPCODE_DESCRIPTOR opcode_descr, ref INSTRUCTION instr, INTERNAL_DATA idata, ref DISASM_INOUT_PARAMS param) { byte ext_table_index = 0xFF; byte ext_prefix_index = 0; UInt32 res; UInt32 tmp; res = parse_prefixes(offset, ref instr, idata, ext_table_index, ext_prefix_index, ref param); if (idata.severe_err==0) { instr.opcode_offset = (byte)res; offset += res; if ((ext_table_index != 0xFF) && (offset == 0xF)) { tmp = lookup_opcode(offset, ext_table_index, ref opcode_descr, idata); if ((idata.severe_err==0) && (opcode_descr.id != ID_NULL)) { idata.prefixes[ext_prefix_index] = 0xFF; check_ext_sf_prefixes(idata.prefixes, ref instr, ref param); res += tmp; } else { idata.severe_err = 0; res += lookup_opcode(offset, IDX_1BYTE, ref opcode_descr, idata); } } else { res += lookup_opcode(offset, IDX_1BYTE, ref opcode_descr, idata); } if ((idata.severe_err==0) && (opcode_descr.id == ID_NULL)) { idata.severe_err = ERRS.ERR_BADCODE;//error: invalid opcode. } } return res; }
//Parses instruction's mnemonic. If mnemonic is simple, it is just copied to // struct INSTRUCTION. If mnemonic contains has multi mnemonic indicator (MM_INDICATOR) // at first character then it depends on implicit operand's size. In this case the function // calls get_instruction_opsize and builds choses mnemonic basing on result. static void parse_mnemonic(OPCODE_DESCRIPTOR opcode, INSTRUCTION instr, INTERNAL_DATA idata, DISMODE mode) { if ((opcode.mnemonic.value.Length>0) && (opcode.mnemonic.value[0] != MM_INDICATOR)) { instr.mnemonic = opcode.mnemonic.value; } else { get_instruction_opsize(opcode.mnemonic, instr, idata, mode); instr.mnemonic = opcode.mnemonic.values[bsr(instr.opsize) - 1]; } }
//Reads input stream and iterates through tables looking up appropriate struct OPCODE_DESCRIPTOR. // Byte value at [offset] is used as index, the function checks tables limits and max instruction's length. static UInt32 lookup_opcode(ulong offset, byte table, ref OPCODE_DESCRIPTOR opcode_descr, INTERNAL_DATA idata) { byte max; byte opcode; UInt32 res; res = 0; //opcode_descr = NULL; do { opcode = assembly.ReadBytes(offset, 1)[0]; opcode >>= tables[table].shift; opcode &= tables[table].mask; opcode -= tables[table].min; //It looks strange, but I want that table descriptors contain // "real" values. max = (byte)(tables[table].max - tables[table].min); if (opcode > max) { idata.severe_err = ERRS.ERR_BADCODE; break; } if (res > Dasmer.MAX_INSTRUCTION_LEN) { idata.severe_err = ERRS.ERR_TOO_LONG; break; } if ( (tables[table].props & TBL_PROP_MODRM)==0 ) { res++; offset++; } if ((tables[table].opcodes[opcode].groups & GRP_SWITCH)!=0) { table = (byte)tables[table].opcodes[opcode].props;// &0xFF; continue; } break; } while(true); if (idata.severe_err == ERRS.ERR_OK) opcode_descr = tables[table].opcodes[opcode]; return res; }
//Copies instruction's flags from struct OPCODE_DESCRIPTOR to struct INSTRUCTION. static void copy_instr_flags(ref INSTRUCTION instr, ref OPCODE_DESCRIPTOR opcode) { if ((opcode.props & PROP_IOPL)!=0) instr.flags |= INSTR_FLAG_IOPL; if ((opcode.props & PROP_RING0)!=0) instr.flags |= INSTR_FLAG_RING0; if ((opcode.props & PROP_SERIAL)!=0) instr.flags |= INSTR_FLAG_SERIAL; if ((opcode.props & PROP_UNDOC) != 0) instr.flags |= INSTR_FLAG_UNDOC; }
static void copy_eflags(ref INSTRUCTION instr, ref OPCODE_DESCRIPTOR opcode) { instr.tested_flags = opcode.tested_flags; instr.modified_flags = opcode.modified_flags; instr.set_flags = opcode.set_flags; instr.cleared_flags = opcode.cleared_flags; instr.undefined_flags = opcode.undefined_flags; }
public UInt32 disassemble(ulong offset, ref IInstruction instr1, ref DISASM_INOUT_PARAMS param) { UInt32 len; UInt32 res; OPCODE_DESCRIPTOR opcode = new OPCODE_DESCRIPTOR(); INTERNAL_DATA idata = new INTERNAL_DATA(0xFF); INSTRUCTION instr = instr1 as INSTRUCTION; //Setup everything. //memset(instr, 0x0, sizeof(*instr)); //memset(&idata, 0x0, sizeof(idata)); //memset(idata.prefixes, 0xFF, sizeof(idata.prefixes)); param.sf_prefixes_len = 0; param.errcode = 0; len = res = 0; //Lookup opcode. res = parse_opcode(offset, ref opcode, ref instr, idata, ref param); if (idata.severe_err != ERRS.ERR_OK) { param.errcode = idata.severe_err; return 0; } len += res; if (len > Dasmer.MAX_INSTRUCTION_LEN) { param.errcode = ERRS.ERR_TOO_LONG; return 0; } get_address_size(ref instr, idata.prefixes, param.mode); //Parse MODRM and SIB bytes. len += parse_modrm_sib(offset + len, ref instr, opcode); if (len > Dasmer.MAX_INSTRUCTION_LEN) { param.errcode = ERRS.ERR_TOO_LONG; return 0; } //Copy flags, eflags, id, groups. copy_eflags(ref instr, ref opcode); copy_instr_flags(ref instr, ref opcode); instr.id = opcode.id; instr.groups = opcode.groups; //Parse mnemonic. parse_mnemonic(opcode, instr, idata, param.mode); if (idata.severe_err != ERRS.ERR_OK) { param.errcode = idata.severe_err; return 0; } //Deal with operands. res = parse_operand(offset, offset + len, opcode.ops[0], instr, 0, idata, param.mode); if (idata.severe_err != ERRS.ERR_OK) { param.errcode = idata.severe_err; return 0; } len += res; if (len > Dasmer.MAX_INSTRUCTION_LEN) { param.errcode = ERRS.ERR_TOO_LONG; return 0; } res = parse_operand(offset, offset + len, opcode.ops[1], instr, 1, idata, param.mode); if (idata.severe_err != ERRS.ERR_OK) { param.errcode = idata.severe_err; return 0; } len += res; if (len > Dasmer.MAX_INSTRUCTION_LEN) { param.errcode = ERRS.ERR_TOO_LONG; return 0; } res = parse_operand(offset, offset + len, opcode.ops[2], instr, 2, idata, param.mode); if (idata.severe_err != ERRS.ERR_OK) { param.errcode = idata.severe_err; return 0; } len += res; if (len > Dasmer.MAX_INSTRUCTION_LEN) { param.errcode = ERRS.ERR_TOO_LONG; return 0; } //Do postprocessing if necessary. if ((opcode.props & PROP_POST_PROC)!=0) { res = postprocs[opcode.props >> POST_PROC_SHIFT](offset, offset, ref instr, idata, param.mode); if (idata.severe_err != ERRS.ERR_OK) { param.errcode = idata.severe_err; return 0; } if (res>0) { len = res; if (len > Dasmer.MAX_INSTRUCTION_LEN) { param.errcode = ERRS.ERR_TOO_LONG; return 0; } } } //Check if REX is superfluous. if ((param.mode == DISMODE.DISASSEMBLE_MODE_64) && (idata.is_rex_used != 0)) add_sf_prefix_value(idata.prefixes, PREF_REX_INDEX, instr.rex, ref instr, ref param); //Check if segment prefix is superfluous. check_seg_sf_prefixes(instr, idata.prefixes, param); //Check if opsize is superfluous. if ((idata.is_opsize_used!=0) && idata.prefixes[PREF_OPSIZE_INDEX] != 0xFF) add_sf_prefix(idata.prefixes, PREF_OPSIZE_INDEX, ref instr, ref param); //Check if addrsize is superfluous. if ((idata.is_addrsize_used!=0) && idata.prefixes[PREF_ADDRSIZE_INDEX] != 0xFF) add_sf_prefix(idata.prefixes, PREF_ADDRSIZE_INDEX, ref instr, ref param); //Convert prefixes to output representation. convert_prefixes(instr, idata.prefixes); //Copy error if exists. param.errcode = idata.err; //And post checks. if ((param.arch & opcode.arch)!=0) param.errcode = ERRS.ERR_ANOT_ARCH;//error: another architecture. else if ( ((instr.prefixes & INSTR_PREFIX_LOCK)!=0) && ((opcode.props & PROP_LOCK)==0) ) param.errcode = ERRS.ERR_NON_LOCKABLE;//error: prefix lock non-lockable instruction. else if (((opcode.props & PROP_I64) != 0) && (param.mode == DISMODE.DISASSEMBLE_MODE_64)) param.errcode = ERRS.ERR_16_32_ONLY;//error: instruction is 16/32bit mode only. else if (((opcode.props & PROP_O64) != 0) && (param.mode != DISMODE.DISASSEMBLE_MODE_64)) param.errcode = ERRS.ERR_64_ONLY;//error: instruction is 64bit mode only. apply_disasm_options(ref instr, len, param); instr.bytes = assembly.ReadBytes(offset, (int)len); instr.Addr = (ulong)offset; return len; }