diff options
Diffstat (limited to 'src/sim86/libs/reference_decoder/sim86_decode.cpp')
| -rw-r--r-- | src/sim86/libs/reference_decoder/sim86_decode.cpp | 303 |
1 files changed, 303 insertions, 0 deletions
diff --git a/src/sim86/libs/reference_decoder/sim86_decode.cpp b/src/sim86/libs/reference_decoder/sim86_decode.cpp new file mode 100644 index 0000000..dc084d5 --- /dev/null +++ b/src/sim86/libs/reference_decoder/sim86_decode.cpp @@ -0,0 +1,303 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +struct decode_context +{ + u32 DefaultSegment; + u32 AdditionalFlags; +}; + +static instruction_operand GetRegOperand(u32 IntelRegIndex, b32 Wide) +{ + // NOTE(casey): This maps Intel's REG and RM field encodings for registers to our encoding for registers. + register_access RegTable[][2] = + { + {{Register_a, 0, 1}, {Register_a, 0, 2}}, + {{Register_c, 0, 1}, {Register_c, 0, 2}}, + {{Register_d, 0, 1}, {Register_d, 0, 2}}, + {{Register_b, 0, 1}, {Register_b, 0, 2}}, + {{Register_a, 1, 1}, {Register_sp, 0, 2}}, + {{Register_c, 1, 1}, {Register_bp, 0, 2}}, + {{Register_d, 1, 1}, {Register_si, 0, 2}}, + {{Register_b, 1, 1}, {Register_di, 0, 2}}, + }; + + instruction_operand Result = {}; + Result.Type = Operand_Register; + Result.Register = RegTable[IntelRegIndex & 0x7][(Wide != 0)]; + + return Result; +} + +// NOTE(casey): ParseDataValue is not a real function, it's basically just a macro that is used in +// TryParse. It should never be called otherwise, but that is not something you can do in C++. +// In other languages it would be a "local function". +static u32 ParseDataValue(segmented_access *Access, b32 Exists, b32 Wide, b32 SignExtended) +{ + u32 Result = {}; + + if(Exists) + { + if(Wide) + { + u8 D0 = *AccessMemory(*Access, 0); + u8 D1 = *AccessMemory(*Access, 1); + Result = (D1 << 8) | D0; + Access->SegmentOffset += 2; + } + else + { + Result = *AccessMemory(*Access); + if(SignExtended) + { + Result = (s32)*(s8 *)&Result; + } + Access->SegmentOffset += 1; + } + } + + return Result; +} + +static instruction TryDecode(decode_context *Context, instruction_encoding *Inst, segmented_access At) +{ + instruction Dest = {}; + b32 Has[Bits_Count] = {}; + u32 Bits[Bits_Count] = {}; + b32 Valid = true; + + u64 StartingAddress = GetAbsoluteAddressOf(At); + + u8 BitsPendingCount = 0; + u8 BitsPending = 0; + for(u32 BitsIndex = 0; Valid && (BitsIndex < ArrayCount(Inst->Bits)); ++BitsIndex) + { + instruction_bits TestBits = Inst->Bits[BitsIndex]; + if(TestBits.Usage == Bits_End) + { + // NOTE(casey): That's the end of the instruction format. + break; + } + + u32 ReadBits = TestBits.Value; + if(TestBits.BitCount != 0) + { + if(BitsPendingCount == 0) + { + BitsPendingCount = 8; + BitsPending = *AccessMemory(At); + ++At.SegmentOffset; + } + + // NOTE(casey): If this assert fires, it means we have an error in our table, + // since there are no 8086 instructions that have bit values straddling a + // byte boundary. + assert(TestBits.BitCount <= BitsPendingCount); + + BitsPendingCount -= TestBits.BitCount; + ReadBits = BitsPending; + ReadBits >>= BitsPendingCount; + ReadBits &= ~(0xff << TestBits.BitCount); + } + + if(TestBits.Usage == Bits_Literal) + { + // NOTE(casey): This is a "required" sequence + Valid = Valid && (ReadBits == TestBits.Value); + } + else + { + Bits[TestBits.Usage] |= (ReadBits << TestBits.Shift); + Has[TestBits.Usage] = true; + } + } + + if(Valid) + { + u32 Mod = Bits[Bits_MOD]; + u32 RM = Bits[Bits_RM]; + u32 W = Bits[Bits_W]; + b32 S = Bits[Bits_S]; + b32 D = Bits[Bits_D]; + + b32 HasDirectAddress = ((Mod == 0b00) && (RM == 0b110)); + Has[Bits_Disp] = ((Has[Bits_Disp]) || (Mod == 0b10) || (Mod == 0b01) || HasDirectAddress); + + b32 DisplacementIsW = ((Bits[Bits_DispAlwaysW]) || (Mod == 0b10) || HasDirectAddress); + b32 DataIsW = ((Bits[Bits_WMakesDataW]) && !S && W); + + Bits[Bits_Disp] |= ParseDataValue(&At, Has[Bits_Disp], DisplacementIsW, (!DisplacementIsW)); + Bits[Bits_Data] |= ParseDataValue(&At, Has[Bits_Data], DataIsW, S); + + Dest.Op = Inst->Op; + Dest.Flags = Context->AdditionalFlags; + Dest.Address = StartingAddress; + Dest.Size = GetAbsoluteAddressOf(At) - StartingAddress; + Dest.SegmentOverride = Context->DefaultSegment; + + if(W) + { + Dest.Flags |= Inst_Wide; + } + + if(Bits[Bits_Far]) + { + Dest.Flags |= Inst_Far; + } + + if(Bits[Bits_Z]) + { + Dest.Flags |= Inst_RepNE; + } + + u32 Disp = Bits[Bits_Disp]; + s16 Displacement = (s16)Disp; + + instruction_operand *RegOperand = &Dest.Operands[D ? 0 : 1]; + instruction_operand *ModOperand = &Dest.Operands[D ? 1 : 0]; + + if(Has[Bits_SR]) + { + *RegOperand = RegisterOperand(Register_es + (Bits[Bits_SR] & 0x3), 2); + } + + if(Has[Bits_REG]) + { + *RegOperand = GetRegOperand(Bits[Bits_REG], W); + } + + if(Has[Bits_MOD]) + { + if(Mod == 0b11) + { + *ModOperand = GetRegOperand(RM, W || (Bits[Bits_RMRegAlwaysW])); + } + else + { + register_mapping_8086 IntelTerm0[8] = { Register_b, Register_b, Register_bp, Register_bp, Register_si, Register_di, Register_bp, Register_b}; + register_mapping_8086 IntelTerm1[8] = {Register_si, Register_di, Register_si, Register_di}; + + u32 I = RM&0x7; + register_mapping_8086 Term0 = IntelTerm0[I]; + register_mapping_8086 Term1 = IntelTerm1[I]; + if((Mod == 0b00) && (RM == 0b110)) + { + Term0 = {}; + Term1 = {}; + } + + *ModOperand = EffectiveAddressOperand(RegisterAccess(Term0, 0, 2), RegisterAccess(Term1, 0, 2), Displacement); + } + } + + if(Has[Bits_Data] && Has[Bits_Disp] && !Has[Bits_MOD]) + { + Dest.Operands[0] = IntersegmentAddressOperand(Bits[Bits_Data], Bits[Bits_Disp]); + } + else + { + // + // NOTE(casey): Because there are some strange opcodes that do things like have an immediate as + // a _destination_ ("out", for example), I define immediates and other "additional operands" to + // go in "whatever slot was not used by the reg and mod fields". + // + + instruction_operand *LastOperand = &Dest.Operands[0]; + if(LastOperand->Type) + { + LastOperand = &Dest.Operands[1]; + } + + if(Bits[Bits_RelJMPDisp]) + { + *LastOperand = ImmediateOperand(Displacement, Immediate_RelativeJumpDisplacement); + } + else if(Has[Bits_Data]) + { + *LastOperand = ImmediateOperand(Bits[Bits_Data]); + } + else if(Has[Bits_V]) + { + if(Bits[Bits_V]) + { + *LastOperand = RegisterOperand(Register_c, 1); + } + else + { + *LastOperand = ImmediateOperand(1); + } + } + } + } + + return Dest; +} + +static instruction DecodeInstruction(instruction_table Table, segmented_access At) +{ + /* TODO(casey): Hmm. It seems like this is a very inefficient way to parse + instructions, isn't it? For every instruction, we check every entry in the + table until we find a match. Is this bad design? Or did the person who wrote + it know what they were doing, and has a plan for how it can be optimized + later? Only time will tell... :) */ + + decode_context Context = {}; + instruction Result = {}; + + u32 StartingAddress = GetAbsoluteAddressOf(At); + u32 TotalSize = 0; + while(TotalSize < Table.MaxInstructionByteCount) + { + Result = {}; + for(u32 Index = 0; Index < Table.EncodingCount; ++Index) + { + instruction_encoding Inst = Table.Encodings[Index]; + Result = TryDecode(&Context, &Inst, At); + if(Result.Op) + { + At.SegmentOffset += Result.Size; + TotalSize += Result.Size; + break; + } + } + + if(Result.Op == Op_lock) + { + Context.AdditionalFlags |= Inst_Lock; + } + else if(Result.Op == Op_rep) + { + Context.AdditionalFlags |= Inst_Rep | (Result.Flags & Inst_RepNE); + } + else if(Result.Op == Op_segment) + { + Context.AdditionalFlags |= Inst_Segment; + Context.DefaultSegment = Result.Operands[1].Register.Index; + } + else + { + break; + } + } + + if(TotalSize <= Table.MaxInstructionByteCount) + { + Result.Address = StartingAddress; + Result.Size = TotalSize; + } + else + { + Result = {}; + } + + return Result; +} |
