diff options
| author | Raymaekers Luca <luca@spacehb.net> | 2025-10-24 12:58:52 +0200 |
|---|---|---|
| committer | Raymaekers Luca <luca@spacehb.net> | 2025-10-24 12:58:52 +0200 |
| commit | 856fd58549e5bf50e800a665f9deb27d967df2fb (patch) | |
| tree | 6950210e5ae3618b501a7045f10f8fc06dd903df /src/libs/reference_decoder | |
| parent | d8b3ca9d02377cf04a09e0f518a3385b7324bc4d (diff) | |
checkpoint
Diffstat (limited to 'src/libs/reference_decoder')
| -rw-r--r-- | src/libs/reference_decoder/sim86.h | 27 | ||||
| -rw-r--r-- | src/libs/reference_decoder/sim86_decode.cpp | 303 | ||||
| -rw-r--r-- | src/libs/reference_decoder/sim86_decode.h | 35 | ||||
| -rw-r--r-- | src/libs/reference_decoder/sim86_instruction.cpp | 84 | ||||
| -rw-r--r-- | src/libs/reference_decoder/sim86_instruction.h | 109 | ||||
| -rw-r--r-- | src/libs/reference_decoder/sim86_instruction_table.cpp | 27 | ||||
| -rw-r--r-- | src/libs/reference_decoder/sim86_instruction_table.h | 60 | ||||
| -rw-r--r-- | src/libs/reference_decoder/sim86_instruction_table.inl | 250 | ||||
| -rw-r--r-- | src/libs/reference_decoder/sim86_lib.cpp | 73 | ||||
| -rw-r--r-- | src/libs/reference_decoder/sim86_memory.cpp | 66 | ||||
| -rw-r--r-- | src/libs/reference_decoder/sim86_memory.h | 28 | ||||
| -rw-r--r-- | src/libs/reference_decoder/sim86_text_table.cpp | 57 |
12 files changed, 1119 insertions, 0 deletions
diff --git a/src/libs/reference_decoder/sim86.h b/src/libs/reference_decoder/sim86.h new file mode 100644 index 0000000..b065a0e --- /dev/null +++ b/src/libs/reference_decoder/sim86.h @@ -0,0 +1,27 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +typedef char unsigned u8; +typedef short unsigned u16; +typedef int unsigned u32; +typedef long long unsigned u64; + +typedef char s8; +typedef short s16; +typedef int s32; +typedef long long s64; + +typedef s32 b32; + +#define ArrayCount(Array) (sizeof(Array) / sizeof((Array)[0])) + +static u32 const SIM86_VERSION = 4; diff --git a/src/libs/reference_decoder/sim86_decode.cpp b/src/libs/reference_decoder/sim86_decode.cpp new file mode 100644 index 0000000..dc084d5 --- /dev/null +++ b/src/libs/reference_decoder/sim86_decode.cpp @@ -0,0 +1,303 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +struct decode_context +{ + u32 DefaultSegment; + u32 AdditionalFlags; +}; + +static instruction_operand GetRegOperand(u32 IntelRegIndex, b32 Wide) +{ + // NOTE(casey): This maps Intel's REG and RM field encodings for registers to our encoding for registers. + register_access RegTable[][2] = + { + {{Register_a, 0, 1}, {Register_a, 0, 2}}, + {{Register_c, 0, 1}, {Register_c, 0, 2}}, + {{Register_d, 0, 1}, {Register_d, 0, 2}}, + {{Register_b, 0, 1}, {Register_b, 0, 2}}, + {{Register_a, 1, 1}, {Register_sp, 0, 2}}, + {{Register_c, 1, 1}, {Register_bp, 0, 2}}, + {{Register_d, 1, 1}, {Register_si, 0, 2}}, + {{Register_b, 1, 1}, {Register_di, 0, 2}}, + }; + + instruction_operand Result = {}; + Result.Type = Operand_Register; + Result.Register = RegTable[IntelRegIndex & 0x7][(Wide != 0)]; + + return Result; +} + +// NOTE(casey): ParseDataValue is not a real function, it's basically just a macro that is used in +// TryParse. It should never be called otherwise, but that is not something you can do in C++. +// In other languages it would be a "local function". +static u32 ParseDataValue(segmented_access *Access, b32 Exists, b32 Wide, b32 SignExtended) +{ + u32 Result = {}; + + if(Exists) + { + if(Wide) + { + u8 D0 = *AccessMemory(*Access, 0); + u8 D1 = *AccessMemory(*Access, 1); + Result = (D1 << 8) | D0; + Access->SegmentOffset += 2; + } + else + { + Result = *AccessMemory(*Access); + if(SignExtended) + { + Result = (s32)*(s8 *)&Result; + } + Access->SegmentOffset += 1; + } + } + + return Result; +} + +static instruction TryDecode(decode_context *Context, instruction_encoding *Inst, segmented_access At) +{ + instruction Dest = {}; + b32 Has[Bits_Count] = {}; + u32 Bits[Bits_Count] = {}; + b32 Valid = true; + + u64 StartingAddress = GetAbsoluteAddressOf(At); + + u8 BitsPendingCount = 0; + u8 BitsPending = 0; + for(u32 BitsIndex = 0; Valid && (BitsIndex < ArrayCount(Inst->Bits)); ++BitsIndex) + { + instruction_bits TestBits = Inst->Bits[BitsIndex]; + if(TestBits.Usage == Bits_End) + { + // NOTE(casey): That's the end of the instruction format. + break; + } + + u32 ReadBits = TestBits.Value; + if(TestBits.BitCount != 0) + { + if(BitsPendingCount == 0) + { + BitsPendingCount = 8; + BitsPending = *AccessMemory(At); + ++At.SegmentOffset; + } + + // NOTE(casey): If this assert fires, it means we have an error in our table, + // since there are no 8086 instructions that have bit values straddling a + // byte boundary. + assert(TestBits.BitCount <= BitsPendingCount); + + BitsPendingCount -= TestBits.BitCount; + ReadBits = BitsPending; + ReadBits >>= BitsPendingCount; + ReadBits &= ~(0xff << TestBits.BitCount); + } + + if(TestBits.Usage == Bits_Literal) + { + // NOTE(casey): This is a "required" sequence + Valid = Valid && (ReadBits == TestBits.Value); + } + else + { + Bits[TestBits.Usage] |= (ReadBits << TestBits.Shift); + Has[TestBits.Usage] = true; + } + } + + if(Valid) + { + u32 Mod = Bits[Bits_MOD]; + u32 RM = Bits[Bits_RM]; + u32 W = Bits[Bits_W]; + b32 S = Bits[Bits_S]; + b32 D = Bits[Bits_D]; + + b32 HasDirectAddress = ((Mod == 0b00) && (RM == 0b110)); + Has[Bits_Disp] = ((Has[Bits_Disp]) || (Mod == 0b10) || (Mod == 0b01) || HasDirectAddress); + + b32 DisplacementIsW = ((Bits[Bits_DispAlwaysW]) || (Mod == 0b10) || HasDirectAddress); + b32 DataIsW = ((Bits[Bits_WMakesDataW]) && !S && W); + + Bits[Bits_Disp] |= ParseDataValue(&At, Has[Bits_Disp], DisplacementIsW, (!DisplacementIsW)); + Bits[Bits_Data] |= ParseDataValue(&At, Has[Bits_Data], DataIsW, S); + + Dest.Op = Inst->Op; + Dest.Flags = Context->AdditionalFlags; + Dest.Address = StartingAddress; + Dest.Size = GetAbsoluteAddressOf(At) - StartingAddress; + Dest.SegmentOverride = Context->DefaultSegment; + + if(W) + { + Dest.Flags |= Inst_Wide; + } + + if(Bits[Bits_Far]) + { + Dest.Flags |= Inst_Far; + } + + if(Bits[Bits_Z]) + { + Dest.Flags |= Inst_RepNE; + } + + u32 Disp = Bits[Bits_Disp]; + s16 Displacement = (s16)Disp; + + instruction_operand *RegOperand = &Dest.Operands[D ? 0 : 1]; + instruction_operand *ModOperand = &Dest.Operands[D ? 1 : 0]; + + if(Has[Bits_SR]) + { + *RegOperand = RegisterOperand(Register_es + (Bits[Bits_SR] & 0x3), 2); + } + + if(Has[Bits_REG]) + { + *RegOperand = GetRegOperand(Bits[Bits_REG], W); + } + + if(Has[Bits_MOD]) + { + if(Mod == 0b11) + { + *ModOperand = GetRegOperand(RM, W || (Bits[Bits_RMRegAlwaysW])); + } + else + { + register_mapping_8086 IntelTerm0[8] = { Register_b, Register_b, Register_bp, Register_bp, Register_si, Register_di, Register_bp, Register_b}; + register_mapping_8086 IntelTerm1[8] = {Register_si, Register_di, Register_si, Register_di}; + + u32 I = RM&0x7; + register_mapping_8086 Term0 = IntelTerm0[I]; + register_mapping_8086 Term1 = IntelTerm1[I]; + if((Mod == 0b00) && (RM == 0b110)) + { + Term0 = {}; + Term1 = {}; + } + + *ModOperand = EffectiveAddressOperand(RegisterAccess(Term0, 0, 2), RegisterAccess(Term1, 0, 2), Displacement); + } + } + + if(Has[Bits_Data] && Has[Bits_Disp] && !Has[Bits_MOD]) + { + Dest.Operands[0] = IntersegmentAddressOperand(Bits[Bits_Data], Bits[Bits_Disp]); + } + else + { + // + // NOTE(casey): Because there are some strange opcodes that do things like have an immediate as + // a _destination_ ("out", for example), I define immediates and other "additional operands" to + // go in "whatever slot was not used by the reg and mod fields". + // + + instruction_operand *LastOperand = &Dest.Operands[0]; + if(LastOperand->Type) + { + LastOperand = &Dest.Operands[1]; + } + + if(Bits[Bits_RelJMPDisp]) + { + *LastOperand = ImmediateOperand(Displacement, Immediate_RelativeJumpDisplacement); + } + else if(Has[Bits_Data]) + { + *LastOperand = ImmediateOperand(Bits[Bits_Data]); + } + else if(Has[Bits_V]) + { + if(Bits[Bits_V]) + { + *LastOperand = RegisterOperand(Register_c, 1); + } + else + { + *LastOperand = ImmediateOperand(1); + } + } + } + } + + return Dest; +} + +static instruction DecodeInstruction(instruction_table Table, segmented_access At) +{ + /* TODO(casey): Hmm. It seems like this is a very inefficient way to parse + instructions, isn't it? For every instruction, we check every entry in the + table until we find a match. Is this bad design? Or did the person who wrote + it know what they were doing, and has a plan for how it can be optimized + later? Only time will tell... :) */ + + decode_context Context = {}; + instruction Result = {}; + + u32 StartingAddress = GetAbsoluteAddressOf(At); + u32 TotalSize = 0; + while(TotalSize < Table.MaxInstructionByteCount) + { + Result = {}; + for(u32 Index = 0; Index < Table.EncodingCount; ++Index) + { + instruction_encoding Inst = Table.Encodings[Index]; + Result = TryDecode(&Context, &Inst, At); + if(Result.Op) + { + At.SegmentOffset += Result.Size; + TotalSize += Result.Size; + break; + } + } + + if(Result.Op == Op_lock) + { + Context.AdditionalFlags |= Inst_Lock; + } + else if(Result.Op == Op_rep) + { + Context.AdditionalFlags |= Inst_Rep | (Result.Flags & Inst_RepNE); + } + else if(Result.Op == Op_segment) + { + Context.AdditionalFlags |= Inst_Segment; + Context.DefaultSegment = Result.Operands[1].Register.Index; + } + else + { + break; + } + } + + if(TotalSize <= Table.MaxInstructionByteCount) + { + Result.Address = StartingAddress; + Result.Size = TotalSize; + } + else + { + Result = {}; + } + + return Result; +} diff --git a/src/libs/reference_decoder/sim86_decode.h b/src/libs/reference_decoder/sim86_decode.h new file mode 100644 index 0000000..534ce2d --- /dev/null +++ b/src/libs/reference_decoder/sim86_decode.h @@ -0,0 +1,35 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +enum register_mapping_8086 +{ + Register_none, + + Register_a, + Register_b, + Register_c, + Register_d, + Register_sp, + Register_bp, + Register_si, + Register_di, + Register_es, + Register_cs, + Register_ss, + Register_ds, + Register_ip, + Register_flags, + + Register_count, +}; + +static instruction DecodeInstruction(instruction_table Table, segmented_access At); diff --git a/src/libs/reference_decoder/sim86_instruction.cpp b/src/libs/reference_decoder/sim86_instruction.cpp new file mode 100644 index 0000000..34fcf1f --- /dev/null +++ b/src/libs/reference_decoder/sim86_instruction.cpp @@ -0,0 +1,84 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +static b32 OperandIsType(instruction Instruction, u32 Index, operand_type Type) +{ + b32 Result = (Instruction.Operands[Index].Type == Type); + return Result; +} + +static instruction_operand GetOperand(instruction Instruction, u32 Index) +{ + assert(Index < ArrayCount(Instruction.Operands)); + instruction_operand Result = Instruction.Operands[Index]; + return Result; +} + +static register_access RegisterAccess(u32 Index, u32 Offset, u32 Count) +{ + register_access Result = {}; + + Result.Index = Index; + Result.Offset = Offset; + Result.Count = Count; + + return Result; +} + +static instruction_operand IntersegmentAddressOperand(u32 Segment, s32 Displacement) +{ + instruction_operand Result = {}; + + Result.Type = Operand_Memory; + Result.Address.ExplicitSegment = Segment; + Result.Address.Displacement = Displacement; + Result.Address.Flags = Address_ExplicitSegment; + + return Result; +} + +static instruction_operand EffectiveAddressOperand(register_access Term0, register_access Term1, s32 Displacement) +{ + instruction_operand Result = {}; + + Result.Type = Operand_Memory; + Result.Address.Terms[0].Register = Term0; + Result.Address.Terms[0].Scale = 1; + Result.Address.Terms[1].Register = Term1; + Result.Address.Terms[1].Scale = 1; + Result.Address.Displacement = Displacement; + + return Result; +} + +static instruction_operand RegisterOperand(u32 Index, u32 Count) +{ + instruction_operand Result = {}; + + Result.Type = Operand_Register; + Result.Register.Index = Index; + Result.Register.Offset = 0; + Result.Register.Count = Count; + + return Result; +} + +static instruction_operand ImmediateOperand(u32 Value, u32 Flags = 0) +{ + instruction_operand Result = {}; + + Result.Type = Operand_Immediate; + Result.Immediate.Value = Value; + Result.Immediate.Flags = Flags; + + return Result; +} diff --git a/src/libs/reference_decoder/sim86_instruction.h b/src/libs/reference_decoder/sim86_instruction.h new file mode 100644 index 0000000..191635a --- /dev/null +++ b/src/libs/reference_decoder/sim86_instruction.h @@ -0,0 +1,109 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +typedef u32 register_index; + +// NOTE(casey): To make it easier to compile with languages which do not +// have auto-typedef'ing (like C, etc.), all types are manually typedef'd here. +typedef struct register_access register_access; +typedef struct effective_address_term effective_address_term; +typedef struct effective_address_expression effective_address_expression; +typedef struct immediate immediate; +typedef struct instruction_operand instruction_operand; +typedef struct instruction instruction; + +typedef enum operation_type : u32 +{ + Op_None, + +#define INST(Mnemonic, ...) Op_##Mnemonic, +#define INSTALT(...) +#include "sim86_instruction_table.inl" + + Op_Count, +} operation_type; + +enum instruction_flag +{ + Inst_Lock = 0x1, + Inst_Rep = 0x2, + Inst_Segment = 0x4, + Inst_Wide = 0x8, + Inst_Far = 0x10, + Inst_RepNE = 0x20, // NOTE(casey): For user convenience, this will be set _in addition to_ Inst_Rep for REPNE/REPNZ +}; + +struct register_access +{ + register_index Index; + u32 Offset; + u32 Count; +}; + +struct effective_address_term +{ + register_access Register; + s32 Scale; +}; + +enum effective_address_flag +{ + Address_ExplicitSegment = 0x1, +}; +struct effective_address_expression +{ + effective_address_term Terms[2]; + u32 ExplicitSegment; + s32 Displacement; + u32 Flags; +}; + +enum immediate_flag +{ + Immediate_RelativeJumpDisplacement = 0x1, +}; +struct immediate +{ + s32 Value; + u32 Flags; +}; + +typedef enum operand_type : u32 +{ + Operand_None, + Operand_Register, + Operand_Memory, + Operand_Immediate, +} operand_type; +struct instruction_operand +{ + operand_type Type; + union + { + effective_address_expression Address; + register_access Register; + immediate Immediate; + }; +}; + +struct instruction +{ + u32 Address; + u32 Size; + + operation_type Op; + u32 Flags; + + instruction_operand Operands[2]; + + register_index SegmentOverride; +}; diff --git a/src/libs/reference_decoder/sim86_instruction_table.cpp b/src/libs/reference_decoder/sim86_instruction_table.cpp new file mode 100644 index 0000000..be3ec82 --- /dev/null +++ b/src/libs/reference_decoder/sim86_instruction_table.cpp @@ -0,0 +1,27 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +static instruction_encoding InstructionTable8086[] = +{ +#include "sim86_instruction_table.inl" +}; + +static instruction_table Get8086InstructionTable() +{ + instruction_table Result = {}; + + Result.EncodingCount = ArrayCount(InstructionTable8086); + Result.Encodings = InstructionTable8086; + Result.MaxInstructionByteCount = 15; // NOTE(casey): This is the "Intel-specified" maximum length of an instruction, including prefixes + + return Result; +} diff --git a/src/libs/reference_decoder/sim86_instruction_table.h b/src/libs/reference_decoder/sim86_instruction_table.h new file mode 100644 index 0000000..ed8aea8 --- /dev/null +++ b/src/libs/reference_decoder/sim86_instruction_table.h @@ -0,0 +1,60 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +enum instruction_bits_usage : u8 +{ + Bits_End, // NOTE(casey): The 0 value, indicating the end of the instruction encoding array + + Bits_Literal, // NOTE(casey): These are opcode bits that identify instructions + + // NOTE(casey): These bits correspond directly to the 8086 instruction manual + Bits_D, + Bits_S, + Bits_W, + Bits_V, + Bits_Z, + Bits_MOD, + Bits_REG, + Bits_RM, + Bits_SR, + Bits_Disp, + Bits_Data, + + Bits_DispAlwaysW, // NOTE(casey): Tag for instructions where the displacement is always 16 bits + Bits_WMakesDataW, // NOTE(casey): Tag for instructions where SW=01 makes the data field become 16 bits + Bits_RMRegAlwaysW, // NOTE(casey): Tag for instructions where the register encoded in RM is always 16-bit width + Bits_RelJMPDisp, // NOTE(casey): Tag for instructions that require address adjustment to go through NASM properly + Bits_Far, // NOTE(casey): Tag for instructions that require a "far" keyword in their ASM to select the right opcode + + Bits_Count, +}; + +struct instruction_bits +{ + instruction_bits_usage Usage; + u8 BitCount; + u8 Shift; + u8 Value; +}; + +struct instruction_encoding +{ + operation_type Op; + instruction_bits Bits[16]; +}; + +struct instruction_table +{ + instruction_encoding *Encodings; + u32 EncodingCount; + u32 MaxInstructionByteCount; +}; diff --git a/src/libs/reference_decoder/sim86_instruction_table.inl b/src/libs/reference_decoder/sim86_instruction_table.inl new file mode 100644 index 0000000..576c0f6 --- /dev/null +++ b/src/libs/reference_decoder/sim86_instruction_table.inl @@ -0,0 +1,250 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +/* + NOTE(casey): This instruction table is a direct translation of table 4-12 in the Intel 8086 manual. + The macros are designed to allow direct transcription, without changing the order or manner + of specification in the table in any way. Additional "implicit" versions of the macros are provided + so that hard-coded fields can be supplied uniformly. + + The table is also designed to allow you to include it multiple times to "pull out" other things + from the table, such as opcode mnemonics as strings or enums, etc. +*/ + +#ifndef INST +#define INST(Mnemonic, ...) {Op_##Mnemonic, __VA_ARGS__}, +#endif + +#ifndef INSTALT +#define INSTALT INST +#endif + +#define B(Bits) {Bits_Literal, sizeof(#Bits)-1, 0, 0b##Bits} +#define D {Bits_D, 1} +#define S {Bits_S, 1} +#define W {Bits_W, 1} +#define V {Bits_V, 1} +#define Z {Bits_Z, 1} + +#define XXX {Bits_Data, 3, 0} +#define YYY {Bits_Data, 3, 3} +#define RM {Bits_RM, 3} +#define MOD {Bits_MOD, 2} +#define REG {Bits_REG, 3} +#define SR {Bits_SR, 2} + +#define ImpW(Value) {Bits_W, 0, 0, Value} +#define ImpREG(Value) {Bits_REG, 0, 0, Value} +#define ImpMOD(Value) {Bits_MOD, 0, 0, Value} +#define ImpRM(Value) {Bits_RM, 0, 0, Value} +#define ImpD(Value) {Bits_D, 0, 0, Value} +#define ImpS(Value) {Bits_S, 0, 0, Value} + +#define DISP {Bits_Disp, 0, 0, 0} +#define ADDR {Bits_Disp, 0, 0, 0}, {Bits_DispAlwaysW, 0, 0, 1} +#define DATA {Bits_Data, 0, 0, 0} +#define DATA_IF_W {Bits_WMakesDataW, 0, 0, 1} +#define Flags(F) {F, 0, 0, 1} + +INST(mov, {B(100010), D, W, MOD, REG, RM}) +INSTALT(mov, {B(1100011), W, MOD, B(000), RM, DATA, DATA_IF_W, ImpD(0)}) +INSTALT(mov, {B(1011), W, REG, DATA, DATA_IF_W, ImpD(1)}) +INSTALT(mov, {B(1010000), W, ADDR, ImpREG(0), ImpMOD(0), ImpRM(0b110), ImpD(1)}) +INSTALT(mov, {B(1010001), W, ADDR, ImpREG(0), ImpMOD(0), ImpRM(0b110), ImpD(0)}) +INSTALT(mov, {B(100011), D, B(0), MOD, B(0), SR, RM, ImpW(1)}) // NOTE(casey): This collapses 2 entries in the 8086 table by adding an explicit D bit + +INST(push, {B(11111111), MOD, B(110), RM, ImpW(1), ImpD(1)}) +INSTALT(push, {B(01010), REG, ImpW(1), ImpD(1)}) +INSTALT(push, {B(000), SR, B(110), ImpW(1), ImpD(1)}) + +INST(pop, {B(10001111), MOD, B(000), RM, ImpW(1), ImpD(1)}) +INSTALT(pop, {B(01011), REG, ImpW(1), ImpD(1)}) +INSTALT(pop, {B(000), SR, B(111), ImpW(1), ImpD(1)}) + +INST(xchg, {B(1000011), W, MOD, REG, RM, ImpD(1)}) +INSTALT(xchg, {B(10010), REG, ImpMOD(0b11), ImpW(1), ImpRM(0)}) + +INST(in, {B(1110010), W, DATA, ImpREG(0), ImpD(1)}) +INSTALT(in, {B(1110110), W, ImpREG(0), ImpD(1), ImpMOD(0b11), ImpRM(2), Flags(Bits_RMRegAlwaysW)}) +INST(out, {B(1110011), W, DATA, ImpREG(0), ImpD(0)}) +INSTALT(out, {B(1110111), W, ImpREG(0), ImpD(0), ImpMOD(0b11), ImpRM(2), Flags(Bits_RMRegAlwaysW)}) + +INST(xlat, {B(11010111)}) +INST(lea, {B(10001101), MOD, REG, RM, ImpD(1), ImpW(1)}) +INST(lds, {B(11000101), MOD, REG, RM, ImpD(1), ImpW(1)}) +INST(les, {B(11000100), MOD, REG, RM, ImpD(1), ImpW(1)}) +INST(lahf, {B(10011111)}) +INST(sahf, {B(10011110)}) +INST(pushf, {B(10011100)}) +INST(popf, {B(10011101)}) + +INST(add, {B(000000), D, W, MOD, REG, RM}) +INSTALT(add, {B(100000), S, W, MOD, B(000), RM, DATA, DATA_IF_W}) +INSTALT(add, {B(0000010), W, DATA, DATA_IF_W, ImpREG(0), ImpD(1)}) + +INST(adc, {B(000100), D, W, MOD, REG, RM}) +INSTALT(adc, {B(100000), S, W, MOD, B(010), RM, DATA, DATA_IF_W}) +INSTALT(adc, {B(0001010), W, DATA, DATA_IF_W, ImpREG(0), ImpD(1)}) + +INST(inc, {B(1111111), W, MOD, B(000), RM, ImpD(1)}) +INSTALT(inc, {B(01000), REG, ImpW(1), ImpD(1)}) + +INST(aaa, {B(00110111)}) +INST(daa, {B(00100111)}) + +INST(sub, {B(001010), D, W, MOD, REG, RM}) +INSTALT(sub, {B(100000), S, W, MOD, B(101), RM, DATA, DATA_IF_W}) +INSTALT(sub, {B(0010110), W, DATA, DATA_IF_W, ImpREG(0), ImpD(1)}) + +INST(sbb, {B(000110), D, W, MOD, REG, RM}) +INSTALT(sbb, {B(100000), S, W, MOD, B(011), RM, DATA, DATA_IF_W}) +INSTALT(sbb, {B(0001110), W, DATA, DATA_IF_W, ImpREG(0), ImpD(1)}) + +INST(dec, {B(1111111), W, MOD, B(001), RM, ImpD(1)}) +INSTALT(dec, {B(01001), REG, ImpW(1), ImpD(1)}) + +INST(neg, {B(1111011), W, MOD, B(011), RM}) + +INST(cmp, {B(001110), D, W, MOD, REG, RM}) +INSTALT(cmp, {B(100000), S, W, MOD, B(111), RM, DATA, DATA_IF_W}) +INSTALT(cmp, {B(0011110), W, DATA, DATA_IF_W, ImpREG(0), ImpD(1)}) // NOTE(casey): The manual table suggests this data is only 8-bit, but wouldn't it be 16 as well? + +INST(aas, {B(00111111)}) +INST(das, {B(00101111)}) +INST(mul, {B(1111011), W, MOD, B(100), RM, ImpS(0)}) +INST(imul, {B(1111011), W, MOD, B(101), RM, ImpS(1)}) +INST(aam, {B(11010100), B(00001010)}) // NOTE(casey): The manual says this has a DISP... but how could it? What for?? +INST(div, {B(1111011), W, MOD, B(110), RM, ImpS(0)}) +INST(idiv, {B(1111011), W, MOD, B(111), RM, ImpS(1)}) +INST(aad, {B(11010101), B(00001010)}) +INST(cbw, {B(10011000)}) +INST(cwd, {B(10011001)}) + +INST(not, {B(1111011), W, MOD, B(010), RM}) +INST(shl, {B(110100), V, W, MOD, B(100), RM}) +INST(shr, {B(110100), V, W, MOD, B(101), RM}) +INST(sar, {B(110100), V, W, MOD, B(111), RM}) +INST(rol, {B(110100), V, W, MOD, B(000), RM}) +INST(ror, {B(110100), V, W, MOD, B(001), RM}) +INST(rcl, {B(110100), V, W, MOD, B(010), RM}) +INST(rcr, {B(110100), V, W, MOD, B(011), RM}) + +INST(and, {B(001000), D, W, MOD, REG, RM}) +INSTALT(and, {B(1000000), W, MOD, B(100), RM, DATA, DATA_IF_W}) +INSTALT(and, {B(0010010), W, DATA, DATA_IF_W, ImpREG(0), ImpD(1)}) + +INST(test, {B(1000010), W, MOD, REG, RM}) // NOTE(casey): The manual suggests there is a D flag here, but it doesn't appear to be true (it would conflict with xchg if it did) +INSTALT(test, {B(1111011), W, MOD, B(000), RM, DATA, DATA_IF_W}) +INSTALT(test, {B(1010100), W, DATA, DATA_IF_W, ImpREG(0), ImpD(1)}) // NOTE(casey): The manual table suggests this data is only 8-bit, but it seems like it could be 16 too? + +INST(or, {B(000010), D, W, MOD, REG, RM}) +INSTALT(or, {B(1000000), W, MOD, B(001), RM, DATA, DATA_IF_W}) +INSTALT(or, {B(0000110), W, DATA, DATA_IF_W, ImpREG(0), ImpD(1)}) + +INST(xor, {B(001100), D, W, MOD, REG, RM}) +INSTALT(xor, {B(1000000), W, MOD, B(110), RM, DATA, DATA_IF_W}) // NOTE(casey): The manual has conflicting information about this encoding, but I believe this is the correct binary pattern. +INSTALT(xor, {B(0011010), W, DATA, DATA_IF_W, ImpREG(0), ImpD(1)}) + +INST(rep, {B(1111001), Z}) +INST(movs, {B(1010010), W}) +INST(cmps, {B(1010011), W}) +INST(scas, {B(1010111), W}) +INST(lods, {B(1010110), W}) +INST(stos, {B(1010101), W}) + +INST(call, {B(11101000), ADDR, Flags(Bits_RelJMPDisp)}) +INSTALT(call, {B(11111111), MOD, B(010), RM, ImpW(1)}) +INSTALT(call, {B(10011010), ADDR, DATA, DATA_IF_W, ImpW(1), Flags(Bits_Far)}) +INSTALT(call, {B(11111111), MOD, B(011), RM, ImpW(1), Flags(Bits_Far)}) + +INST(jmp, {B(11101001), ADDR, Flags(Bits_RelJMPDisp)}) +INSTALT(jmp, {B(11101011), DISP, Flags(Bits_RelJMPDisp)}) +INSTALT(jmp, {B(11111111), MOD, B(100), RM, ImpW(1)}) +INSTALT(jmp, {B(11101010), ADDR, DATA, DATA_IF_W, ImpW(1), Flags(Bits_Far)}) +INSTALT(jmp, {B(11111111), MOD, B(101), RM, ImpW(1), Flags(Bits_Far)}) + +// NOTE(casey): The actual Intel manual does not distinguish mnemonics RET and RETF, +// but NASM needs this to reassemble properly, so we do. +INST(ret, {B(11000011)}) +INSTALT(ret, {B(11000010), DATA, DATA_IF_W, ImpW(1)}) +INST(retf, {B(11001011), Flags(Bits_Far)}) +INSTALT(retf, {B(11001010), DATA, DATA_IF_W, ImpW(1), Flags(Bits_Far)}) + +INST(je, {B(01110100), DISP, Flags(Bits_RelJMPDisp)}) +INST(jl, {B(01111100), DISP, Flags(Bits_RelJMPDisp)}) +INST(jle, {B(01111110), DISP, Flags(Bits_RelJMPDisp)}) +INST(jb, {B(01110010), DISP, Flags(Bits_RelJMPDisp)}) +INST(jbe, {B(01110110), DISP, Flags(Bits_RelJMPDisp)}) +INST(jp, {B(01111010), DISP, Flags(Bits_RelJMPDisp)}) +INST(jo, {B(01110000), DISP, Flags(Bits_RelJMPDisp)}) +INST(js, {B(01111000), DISP, Flags(Bits_RelJMPDisp)}) +INST(jne, {B(01110101), DISP, Flags(Bits_RelJMPDisp)}) +INST(jnl, {B(01111101), DISP, Flags(Bits_RelJMPDisp)}) +INST(jg, {B(01111111), DISP, Flags(Bits_RelJMPDisp)}) +INST(jnb, {B(01110011), DISP, Flags(Bits_RelJMPDisp)}) +INST(ja, {B(01110111), DISP, Flags(Bits_RelJMPDisp)}) +INST(jnp, {B(01111011), DISP, Flags(Bits_RelJMPDisp)}) +INST(jno, {B(01110001), DISP, Flags(Bits_RelJMPDisp)}) +INST(jns, {B(01111001), DISP, Flags(Bits_RelJMPDisp)}) +INST(loop, {B(11100010), DISP, Flags(Bits_RelJMPDisp)}) +INST(loopz, {B(11100001), DISP, Flags(Bits_RelJMPDisp)}) +INST(loopnz, {B(11100000), DISP, Flags(Bits_RelJMPDisp)}) +INST(jcxz, {B(11100011), DISP, Flags(Bits_RelJMPDisp)}) + +INST(int, {B(11001101), DATA}) +INST(int3, {B(11001100)}) // TODO(casey): The manual does not suggest that this intrinsic has an "int3" mnemonic, but NASM thinks so + +INST(into, {B(11001110)}) +INST(iret, {B(11001111)}) + +INST(clc, {B(11111000)}) +INST(cmc, {B(11110101)}) +INST(stc, {B(11111001)}) +INST(cld, {B(11111100)}) +INST(std, {B(11111101)}) +INST(cli, {B(11111010)}) +INST(sti, {B(11111011)}) +INST(hlt, {B(11110100)}) +INST(wait, {B(10011011)}) +INST(esc, {B(11011), XXX, MOD, YYY, RM}) +INST(lock, {B(11110000)}) +INST(segment, {B(001), SR, B(110)}) + +#undef INST +#undef INSTALT + +#undef B +#undef D +#undef S +#undef W +#undef V +#undef Z + +#undef XXX +#undef YYY +#undef RM +#undef MOD +#undef REG +#undef SR + +#undef ImpW +#undef ImpREG +#undef ImpMOD +#undef ImpRM +#undef ImpD +#undef ImpS + +#undef DISP +#undef ADDR +#undef DATA +#undef DATA_IF_W +#undef Flags diff --git a/src/libs/reference_decoder/sim86_lib.cpp b/src/libs/reference_decoder/sim86_lib.cpp new file mode 100644 index 0000000..6971eb2 --- /dev/null +++ b/src/libs/reference_decoder/sim86_lib.cpp @@ -0,0 +1,73 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +#define assert(...) + +#include "sim86.h" + +#include "sim86_instruction.h" +#include "sim86_instruction_table.h" +#include "sim86_memory.h" +#include "sim86_decode.h" + +#include "sim86_instruction.cpp" +#include "sim86_instruction_table.cpp" +#include "sim86_memory.cpp" +#include "sim86_decode.cpp" +#include "sim86_text_table.cpp" + +extern "C" u32 Sim86_GetVersion(void) +{ + u32 Result = SIM86_VERSION; + return Result; +} + +extern "C" void Sim86_Decode8086Instruction(u32 SourceSize, u8 *Source, instruction *Dest) +{ + instruction_table Table = Get8086InstructionTable(); + + // NOTE(casey): The 8086 decoder requires the ability to read up to 15 bytes (the maximum + // allowable instruction size) + assert(Table.MaxInstructionByteCount == 15); + u8 GuardBuffer[16] = {}; + if(SourceSize < Table.MaxInstructionByteCount) + { + // NOTE(casey): I replaced the memcpy here with a manual copy to make it easier for + // people compiling on things like WebAssembly who do not want to use Emscripten. + for(u32 I = 0; I < SourceSize; ++I) + { + GuardBuffer[I] = Source[I]; + } + + Source = GuardBuffer; + } + + segmented_access At = FixedMemoryPow2(4, Source); + *Dest = DecodeInstruction(Table, At); +} + +extern "C" char const *Sim86_RegisterNameFromOperand(register_access *RegAccess) +{ + char const *Result = GetRegName(*RegAccess); + return Result; +} + +extern "C" char const *Sim86_MnemonicFromOperationType(operation_type Type) +{ + char const *Result = GetMnemonic(Type); + return Result; +} + +extern "C" void Sim86_Get8086InstructionTable(instruction_table *Dest) +{ + *Dest = Get8086InstructionTable(); +}
\ No newline at end of file diff --git a/src/libs/reference_decoder/sim86_memory.cpp b/src/libs/reference_decoder/sim86_memory.cpp new file mode 100644 index 0000000..1d58ede --- /dev/null +++ b/src/libs/reference_decoder/sim86_memory.cpp @@ -0,0 +1,66 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +static u32 GetHighestAddress(segmented_access SegMem) +{ + u32 Result = SegMem.Mask; + return Result; +} + +static u32 GetAbsoluteAddressOf(u32 Mask, u16 SegmentBase, u16 SegmentOffset, u16 AdditionalOffset) +{ + u32 Result = (((u32)SegmentBase << 4) + (u32)(SegmentOffset + AdditionalOffset)) & Mask; + return Result; +} + +static u32 GetAbsoluteAddressOf(segmented_access SegMem, u16 Offset) +{ + u32 Result = GetAbsoluteAddressOf(SegMem.Mask, SegMem.SegmentBase, SegMem.SegmentOffset, Offset); + return Result; +} + +static segmented_access MoveBaseBy(segmented_access Access, s32 Offset) +{ + Access.SegmentOffset += Offset; + + segmented_access Result = Access; + + Result.SegmentBase += (Result.SegmentOffset >> 4); + Result.SegmentOffset &= 0xf; + + assert(GetAbsoluteAddressOf(Result, 0) == GetAbsoluteAddressOf(Access, 0)); + + return Result; +} + +static u8 *AccessMemory(segmented_access SegMem, u16 Offset) +{ + u32 AbsAddr = GetAbsoluteAddressOf(SegMem, Offset); + u8 *Result = SegMem.Memory + AbsAddr; + return Result; +} + +static b32 IsValid(segmented_access SegMem) +{ + b32 Result = (SegMem.Mask != 0); + return Result; +} + +static segmented_access FixedMemoryPow2(u32 SizePow2, u8 *Memory) +{ + segmented_access Result = {}; + + Result.Memory = Memory; + Result.Mask = (1 << SizePow2) - 1; + + return Result; +} diff --git a/src/libs/reference_decoder/sim86_memory.h b/src/libs/reference_decoder/sim86_memory.h new file mode 100644 index 0000000..4e790dc --- /dev/null +++ b/src/libs/reference_decoder/sim86_memory.h @@ -0,0 +1,28 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +struct segmented_access +{ + u8 *Memory; + u32 Mask; + u16 SegmentBase; + u16 SegmentOffset; +}; + +static u32 GetHighestAddress(segmented_access SegMem); +static u32 GetAbsoluteAddressOf(segmented_access SegMem, u16 Offset = 0); +static segmented_access MoveBaseBy(segmented_access Access, s32 Offset); + +static u8 *AccessMemory(segmented_access SegMem, u16 Offset = 0); + +static b32 IsValid(segmented_access SegMem); +static segmented_access FixedMemoryPow2(u32 SizePow2, u8 *Memory); diff --git a/src/libs/reference_decoder/sim86_text_table.cpp b/src/libs/reference_decoder/sim86_text_table.cpp new file mode 100644 index 0000000..e90a649 --- /dev/null +++ b/src/libs/reference_decoder/sim86_text_table.cpp @@ -0,0 +1,57 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +char const *OpcodeMnemonics[] = +{ + "", + +#define INST(Mnemonic, ...) #Mnemonic, +#define INSTALT(...) +#include "sim86_instruction_table.inl" +}; + +static char const *GetMnemonic(operation_type Op) +{ + char const *Result = ""; + if(Op < Op_Count) + { + Result = OpcodeMnemonics[Op]; + } + + return Result; +} + +static char const *GetRegName(register_access Reg) +{ + char const *Names[][3] = + { + {"", "", ""}, + {"al", "ah", "ax"}, + {"bl", "bh", "bx"}, + {"cl", "ch", "cx"}, + {"dl", "dh", "dx"}, + {"sp", "sp", "sp"}, + {"bp", "bp", "bp"}, + {"si", "si", "si"}, + {"di", "di", "di"}, + {"es", "es", "es"}, + {"cs", "cs", "cs"}, + {"ss", "ss", "ss"}, + {"ds", "ds", "ds"}, + {"ip", "ip", "ip"}, + {"flags", "flags", "flags"}, + {"", "", ""} + }; + + char const *Result = Names[Reg.Index % ArrayCount(Names)][(Reg.Count == 2) ? 2 : Reg.Offset&1]; + return Result; +} |
