Scroll to navigation

x86_disasm(3) libdisasm x86_disasm(3)

NAME

x86_disasm, x86_disasm_forward, x86_disasm_range - disassemble a bytestream to x86 assembly language instructions

SYNOPSIS

#include <libdis.h>
 
typedef void (*DISASM_CALLBACK)( x86_insn_t *, void * );
 
typedef long (*DISASM_RESOLVER)( x86_op_t *, x86_insn_t *, void * );
int x86_disasm( unsigned char *buf, unsigned int buf_len,
 
unsigned long buf_rva , unsigned int offset,
 
x86_insn_t * insn );
 
int x86_disasm_range( unsigned char *buf, unsigned long buf_rva,
 
unsigned int offset , unsigned int len,
 
DISASM_CALLBACK func , void *arg );
 
int x86_disasm_forward( unsigned char *buf, unsigned int buf_len,
 
unsigned long buf_rva , unsigned int offset,
 
DISASM_CALLBACK func , void *arg,
 
DISASM_RESOLVER resolver , void *r_arg );

DESCRIPTION

#define MAX_REGNAME 8
 
#define MAX_PREFIX_STR 32
 
#define MAX_MNEM_STR 16
 
#define MAX_INSN_SIZE 20 /* same as in i386.h */
 
#define MAX_OP_STRING 32 /* max possible operand size in string form */
 
#define MAX_OP_RAW_STRING 64 /* max possible operand size in raw form */
 
#define MAX_OP_XML_STRING 256 /* max possible operand size in xml form */
 
#define MAX_NUM_OPERANDS 8 /* max # implicit and explicit operands */
 
#define MAX_INSN_STRING 512 /* 2 * 8 * MAX_OP_STRING */
 
#define MAX_INSN_RAW_STRING 1024 /* 2 * 8 * MAX_OP_RAW_STRING */
 
#define MAX_INSN_XML_STRING 4096 /* 2 * 8 * MAX_OP_XML_STRING */
enum x86_reg_type { /* NOTE: these may be ORed together */
 
reg_gen = 0x00001, /* general purpose */
 
reg_in = 0x00002, /* incoming args, ala RISC */
 
reg_out = 0x00004, /* args to calls, ala RISC */
 
reg_local = 0x00008, /* local vars, ala RISC */
 
reg_fpu = 0x00010, /* FPU data register */
 
reg_seg = 0x00020, /* segment register */
 
reg_simd = 0x00040, /* SIMD/MMX reg */
 
reg_sys = 0x00080, /* restricted/system register */
 
reg_sp = 0x00100, /* stack pointer */
 
reg_fp = 0x00200, /* frame pointer */
 
reg_pc = 0x00400, /* program counter */
 
reg_retaddr = 0x00800, /* return addr for func */
 
reg_cond = 0x01000, /* condition code / flags */
 
reg_zero = 0x02000, /* zero register, ala RISC */
 
reg_ret = 0x04000, /* return value */
 
reg_src = 0x10000, /* array/rep source */
 
reg_dest = 0x20000, /* array/rep destination */
 
reg_count = 0x40000 /* array/rep/loop counter */
 
};
typedef struct {
 
char name[MAX_REGNAME];
 
enum x86_reg_type type; /* what register is used for */
 
unsigned int size; /* size of register in bytes */
 
unsigned int id; /* register ID #, for quick compares */
 
unsigned int alias; /* ID of reg this is an alias of */
 
unsigned int shift; /* amount to shift aliased reg by */
 
} x86_reg_t;
typedef struct {
 
unsigned int scale; /* scale factor */
 
x86_reg_t index, base; /* index, base registers */
 
long disp; /* displacement */
 
char disp_sign; /* is negative? 1/0 */
 
char disp_size; /* 0, 1, 2, 4 */
 
} x86_ea_t;
enum x86_op_type { /* mutually exclusive */
 
op_unused = 0, /* empty/unused operand */
 
op_register = 1, /* CPU register */
 
op_immediate = 2, /* Immediate Value */
 
op_relative_near = 3, /* Relative offset from IP */
 
op_relative_far = 4,
 
op_absolute = 5, /* Absolute address (ptr16:32) */
 
op_expression = 6, /* Address expression (scale/index/base/disp) */
 
op_offset = 7, /* Offset from start of segment (m32) */
 
op_unknown
 
};
enum x86_op_datatype { /* these use Intel's lame terminology */
 
op_byte = 1, /* 1 byte integer */
 
op_word = 2, /* 2 byte integer */
 
op_dword = 3, /* 4 byte integer */
 
op_qword = 4, /* 8 byte integer */
 
op_dqword = 5, /* 16 byte integer */
 
op_sreal = 6, /* 4 byte real (single real) */
 
op_dreal = 7, /* 8 byte real (double real) */
 
op_extreal = 8, /* 10 byte real (extended real) */
 
op_bcd = 9, /* 10 byte binary-coded decimal */
 
op_simd = 10, /* 16 byte packed (SIMD, MMX) */
 

op_ssimd = 10, /* 16 byte : 4 packed single FP (SIMD, MMX) */
 

op_dsimd = 11, /* 16 byte : 2 packed double FP (SIMD, MMX) */
 

op_sssimd = 12, /* 4 byte : scalar single FP (SIMD, MMX) */
 

op_sdsimd = 13, /* 8 byte : scalar double FP (SIMD, MMX) */
 
op_descr32 = 14, /* 6 byte Intel descriptor 2:4 */
 
op_descr16 = 15, /* 4 byte Intel descriptor 2:2 */
 
op_pdescr32 = 16, /* 6 byte Intel pseudo-descriptor 32:16 */
 
op_pdescr16 = 17, /* 6 byte Intel pseudo-descriptor 8:24:16 */
 
op_fpuenv = 11 /* 28 byte FPU control/environment data */
 
};
enum x86_op_access { /* ORed together */
 
op_read = 1,
 
op_write = 2,
 
op_execute = 4
 
};
enum x86_op_flags { /* ORed together, but segs are mutually exclusive */
 
op_signed = 1, /* signed integer */
 
op_string = 2, /* possible string or array */
 
op_constant = 4, /* symbolic constant */
 
op_pointer = 8, /* operand points to a memory address */
 
op_sysref = 0x010, /* operand is a syscall number */
 
op_implied = 0x020, /* operand is implicit in insn */
 
op_hardcode = 0x040, /* operans is hardcoded in insn */
 
op_es_seg = 0x100, /* ES segment override */
 
op_cs_seg = 0x200, /* CS segment override */
 
op_ss_seg = 0x300, /* SS segment override */
 
op_ds_seg = 0x400, /* DS segment override */
 
op_fs_seg = 0x500, /* FS segment override */
 
op_gs_seg = 0x600 /* GS segment override */
 
};
typedef struct {
 
enum x86_op_type type; /* operand type */
 
enum x86_op_datatype datatype; /* operand size */
 
enum x86_op_access access; /* operand access [RWX] */
 
enum x86_op_flags flags; /* misc flags */
 
union {
 
/* immediate values */
 
char sbyte;
 
short sword;
 
long sdword;
 
qword sqword;
 
unsigned char byte;
 
unsigned short word;
 
unsigned long dword;
 
qword qword;
 
float sreal;
 
double dreal;
 
/* misc large/non-native types */
 
unsigned char extreal[10];
 
unsigned char bcd[10];
 
qword dqword[2];
 
unsigned char simd[16];
 
unsigned char fpuenv[28];
 
/* absolute address */
 
void * address;
 
/* offset from segment */
 
unsigned long offset;
 
/* ID of CPU register */
 
x86_reg_t reg;
 
/* offsets from current insn */
 
char relative_near;
 
long relative_far;
 
/* effective address [expression] */
 
x86_ea_t expression;
 
} data;
 
void * insn;
 
} x86_op_t;
typedef struct x86_operand_list {
 
x86_op_t op;
 
struct x86_operand_list *next;
 
} x86_oplist_t;
enum x86_insn_group {
 
insn_none = 0,
 
insn_controlflow = 1,
 
insn_arithmetic = 2,
 
insn_logic = 3,
 
insn_stack = 4,
 
insn_comparison = 5,
 
insn_move = 6,
 
insn_string = 7,
 
insn_bit_manip = 8,
 
insn_flag_manip = 9,
 
insn_fpu = 10,
 
insn_interrupt = 13,
 
insn_system = 14,
 
insn_other = 15
 
};
enum x86_insn_type {
 
insn_invalid = 0,
 
/* insn_controlflow */
 
insn_jmp = 0x1001,
 
insn_jcc = 0x1002,
 
insn_call = 0x1003,
 
insn_callcc = 0x1004,
 
insn_return = 0x1005,
 
insn_loop = 0x1006,
 
/* insn_arithmetic */
 
insn_add = 0x2001,
 
insn_sub = 0x2002,
 
insn_mul = 0x2003,
 
insn_div = 0x2004,
 
insn_inc = 0x2005,
 
insn_dec = 0x2006,
 
insn_shl = 0x2007,
 
insn_shr = 0x2008,
 
insn_rol = 0x2009,
 
insn_ror = 0x200A,
 
/* insn_logic */
 
insn_and = 0x3001,
 
insn_or = 0x3002,
 
insn_xor = 0x3003,
 
insn_not = 0x3004,
 
insn_neg = 0x3005,
 
/* insn_stack */
 
insn_push = 0x4001,
 
insn_pop = 0x4002,
 
insn_pushregs = 0x4003,
 
insn_popregs = 0x4004,
 
insn_pushflags = 0x4005,
 
insn_popflags = 0x4006,
 
insn_enter = 0x4007,
 
insn_leave = 0x4008,
 
/* insn_comparison */
 
insn_test = 0x5001,
 
insn_cmp = 0x5002,
 
/* insn_move */
 
insn_mov = 0x6001, /* move */
 
insn_movcc = 0x6002, /* conditional move */
 
insn_xchg = 0x6003, /* exchange */
 
insn_xchgcc = 0x6004, /* conditional exchange */
 
/* insn_string */
 
insn_strcmp = 0x7001,
 
insn_strload = 0x7002,
 
insn_strmov = 0x7003,
 
insn_strstore = 0x7004,
 
insn_translate = 0x7005, /* xlat */
 
/* insn_bit_manip */
 
insn_bittest = 0x8001,
 
insn_bitset = 0x8002,
 
insn_bitclear = 0x8003,
 
/* insn_flag_manip */
 
insn_clear_carry = 0x9001,
 
insn_clear_zero = 0x9002,
 
insn_clear_oflow = 0x9003,
 
insn_clear_dir = 0x9004,
 
insn_clear_sign = 0x9005,
 
insn_clear_parity = 0x9006,
 
insn_set_carry = 0x9007,
 
insn_set_zero = 0x9008,
 
insn_set_oflow = 0x9009,
 
insn_set_dir = 0x900A,
 
insn_set_sign = 0x900B,
 
insn_set_parity = 0x900C,
 
insn_tog_carry = 0x9010,
 
insn_tog_zero = 0x9020,
 
insn_tog_oflow = 0x9030,
 
insn_tog_dir = 0x9040,
 
insn_tog_sign = 0x9050,
 
insn_tog_parity = 0x9060,
 
/* insn_fpu */
 
insn_fmov = 0xA001,
 
insn_fmovcc = 0xA002,
 
insn_fneg = 0xA003,
 
insn_fabs = 0xA004,
 
insn_fadd = 0xA005,
 
insn_fsub = 0xA006,
 
insn_fmul = 0xA007,
 
insn_fdiv = 0xA008,
 
insn_fsqrt = 0xA009,
 
insn_fcmp = 0xA00A,
 
insn_fcos = 0xA00C,
 
insn_fldpi = 0xA00D,
 
insn_fldz = 0xA00E,
 
insn_ftan = 0xA00F,
 
insn_fsine = 0xA010,
 
insn_fsys = 0xA020,
 
/* insn_interrupt */
 
insn_int = 0xD001,
 
insn_intcc = 0xD002, /* not present in x86 ISA */
 
insn_iret = 0xD003,
 
insn_bound = 0xD004,
 
insn_debug = 0xD005,
 
insn_trace = 0xD006,
 
insn_invalid_op = 0xD007,
 
insn_oflow = 0xD008,
 
/* insn_system */
 
insn_halt = 0xE001,
 
insn_in = 0xE002, /* input from port/bus */
 
insn_out = 0xE003, /* output to port/bus */
 
insn_cpuid = 0xE004,
 
/* insn_other */
 
insn_nop = 0xF001,
 
insn_bcdconv = 0xF002, /* convert to or from BCD */
 
insn_szconv = 0xF003 /* change size of operand */
 
};
enum x86_insn_note {
 
insn_note_ring0 = 1, /* Only available in ring 0 */
 
insn_note_smm = 2, /* "" in System Management Mode */
 
insn_note_serial = 4 /* Serializing instruction */
 
};
enum x86_flag_status {
 
insn_carry_set = 0x1,
 
insn_zero_set = 0x2,
 
insn_oflow_set = 0x4,
 
insn_dir_set = 0x8,
 
insn_sign_set = 0x10,
 
insn_parity_set = 0x20,
 
insn_carry_or_zero_set = 0x40,
 
insn_zero_set_or_sign_ne_oflow = 0x80,
 
insn_carry_clear = 0x100,
 
insn_zero_clear = 0x200,
 
insn_oflow_clear = 0x400,
 
insn_dir_clear = 0x800,
 
insn_sign_clear = 0x1000,
 
insn_parity_clear = 0x2000,
 
insn_sign_eq_oflow = 0x4000,
 
insn_sign_ne_oflow = 0x8000
 
};
enum x86_insn_cpu {
 
cpu_8086 = 1, /* Intel */
 
cpu_80286 = 2,
 
cpu_80386 = 3,
 
cpu_80387 = 4,
 
cpu_80486 = 5,
 
cpu_pentium = 6,
 
cpu_pentiumpro = 7,
 
cpu_pentium2 = 8,
 
cpu_pentium3 = 9, >br> cpu_pentium4 = 10,
 
cpu_k6 = 16, /* AMD */
 
cpu_k7 = 32,
 
cpu_athlon = 48
 
};
enum x86_insn_isa {
 
isa_gp = 1, /* general purpose */
 
isa_fp = 2, /* floating point */
 
isa_fpumgt = 3, /* FPU/SIMD management */
 
isa_mmx = 4, /* Intel MMX */
 
isa_sse1 = 5, /* Intel SSE SIMD */
 
isa_sse2 = 6, /* Intel SSE2 SIMD */
 
isa_sse3 = 7, /* Intel SSE3 SIMD */
 
isa_3dnow = 8, /* AMD 3DNow! SIMD */
 
isa_sys = 9 /* system instructions */
 
};
enum x86_insn_prefix {
 
insn_no_prefix = 0,
 
insn_rep_zero = 1,
 
insn_rep_notzero = 2,
 
insn_lock = 4,
 
insn_delay = 8
 
};
typedef struct {
 
/* information about the instruction */
 
unsigned long addr; /* load address */
 
unsigned long offset; /* offset into file/buffer */
 
enum x86_insn_group group; /* meta-type, e.g. INSN_EXEC */
 
enum x86_insn_type type; /* type, e.g. INSN_BRANCH */
 
unsigned char bytes[MAX_INSN_SIZE];
 
unsigned char size; /* size of insn in bytes */
 
enum x86_insn_prefix prefix;
 
enum x86_flag_status flags_set; /* flags set or tested by insn */
 
enum x86_flag_status flags_tested;
 
/* the instruction proper */
 
char prefix_string[32]; /* prefixes [might be truncated] */
 
char mnemonic[8];
 
x86_op_t operands[3];
 
/* convenience fields for user */
 
void *block; /* code block containing this insn */
 
void *function; /* function containing this insn */
 
void *tag; /* tag the insn as seen/processed */
 
} x86_insn_t;
 
#define X86_WILDCARD_BYTE 0xF4
typedef struct {
 

enum x86_op_type type; /* operand type */
 

enum x86_op_datatype datatype; /* operand size */
 

enum x86_op_access access; /* operand access [RWX] */
 

enum x86_op_flags flags; /* misc flags */
 
} x86_invariant_op_t;
typedef struct {
 
unsigned char bytes[64]; /* invariant representation */
 
unsigned int size; /* number of bytes in insn */
 

enum x86_insn_group group; /* meta-type, e.g. INSN_EXEC */
 

enum x86_insn_type type; /* type, e.g. INSN_BRANCH */
 
x86_invariant_op_t operands[3]; /* use same ordering as x86_insn_t */
 
} x86_invariant_t;

EXAMPLES

The following sample callback outputs instructions in raw syntax:
void raw_print( x86_insn_t *insn, void *arg ) {
 

char line[1024];
 

x86_format_insn(insn, line, 1024, raw_syntax);
 

printf( "%s0, line);
 
}
The following sample resolver performs very limited checking on the operand of a jump or call to determine what program address the operand refers to:
long resolver( x86_op_t *op, x86_insn_t *insn ) {
 

long retval = -1;
 

/* this is a flat ripoff of internal_resolver in libdis.c --
 

we don't do any register or stack resolving, or check
 

to see if we have already encountered this RVA */
 

if ( op->type == op_absolute || op->type == op_offset ) {
 

retval = op->data.sdword;
 

} else if (op->type == op_relative ){
 

if ( op->datatype == op_byte ) {
 

retval = insn->addr + insn->size + op->data.sbyte;
 

} else if ( op->datatype == op_word ) {
 

retval = insn->addr + insn->size + op->data.sword;
 

} else if ( op->datatype == op_dword ) {
 

retval = insn->addr + insn->size + op->data.sdword;
 

}
 

}
 

return( retval );
 
}
The following code snippets demonstrate how to use the various disassembly routines:

unsigned char *buf; /* buffer of bytes to disassemble */
 

unsigned int buf_len;/* length of buffer */
 

unsigned long rva; /* load address of start of buffer */
 

unsigned int pos; /* position in buffer */
 

x86_insn_t insn; /* disassembled instruction */

/* disassemble entire buffer, printing automatically */
 

x86_disasm_range( buf, buf_rva, pos, buf_len,
 

raw_print, NULL );

/* disassemble a single instruction, then print it */
 

if (x86_disasm( buf, buf_len, buf_rva, pos, &insn ) ) {
 

raw_print( &insn, NULL );
 

}

/* disassemble forward in 'buf' starting at 'pos' */
 

x86_disasm_forward( buf, buf_len, buf_rva, pos,
 

raw_print, NULL, resolver );
 

SEE ALSO

libdisasm(7), x86_format_insn(3), x86_init(3), x86dis(1)
0.21 mammon_