summaryrefslogtreecommitdiff
path: root/src/code/reference_decoder/sim86_decode.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/code/reference_decoder/sim86_decode.cpp')
-rw-r--r--src/code/reference_decoder/sim86_decode.cpp303
1 files changed, 303 insertions, 0 deletions
diff --git a/src/code/reference_decoder/sim86_decode.cpp b/src/code/reference_decoder/sim86_decode.cpp
new file mode 100644
index 0000000..dc084d5
--- /dev/null
+++ b/src/code/reference_decoder/sim86_decode.cpp
@@ -0,0 +1,303 @@
+/* ========================================================================
+
+ (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Please see https://computerenhance.com for more information
+
+ ======================================================================== */
+
+struct decode_context
+{
+ u32 DefaultSegment;
+ u32 AdditionalFlags;
+};
+
+static instruction_operand GetRegOperand(u32 IntelRegIndex, b32 Wide)
+{
+ // NOTE(casey): This maps Intel's REG and RM field encodings for registers to our encoding for registers.
+ register_access RegTable[][2] =
+ {
+ {{Register_a, 0, 1}, {Register_a, 0, 2}},
+ {{Register_c, 0, 1}, {Register_c, 0, 2}},
+ {{Register_d, 0, 1}, {Register_d, 0, 2}},
+ {{Register_b, 0, 1}, {Register_b, 0, 2}},
+ {{Register_a, 1, 1}, {Register_sp, 0, 2}},
+ {{Register_c, 1, 1}, {Register_bp, 0, 2}},
+ {{Register_d, 1, 1}, {Register_si, 0, 2}},
+ {{Register_b, 1, 1}, {Register_di, 0, 2}},
+ };
+
+ instruction_operand Result = {};
+ Result.Type = Operand_Register;
+ Result.Register = RegTable[IntelRegIndex & 0x7][(Wide != 0)];
+
+ return Result;
+}
+
+// NOTE(casey): ParseDataValue is not a real function, it's basically just a macro that is used in
+// TryParse. It should never be called otherwise, but that is not something you can do in C++.
+// In other languages it would be a "local function".
+static u32 ParseDataValue(segmented_access *Access, b32 Exists, b32 Wide, b32 SignExtended)
+{
+ u32 Result = {};
+
+ if(Exists)
+ {
+ if(Wide)
+ {
+ u8 D0 = *AccessMemory(*Access, 0);
+ u8 D1 = *AccessMemory(*Access, 1);
+ Result = (D1 << 8) | D0;
+ Access->SegmentOffset += 2;
+ }
+ else
+ {
+ Result = *AccessMemory(*Access);
+ if(SignExtended)
+ {
+ Result = (s32)*(s8 *)&Result;
+ }
+ Access->SegmentOffset += 1;
+ }
+ }
+
+ return Result;
+}
+
+static instruction TryDecode(decode_context *Context, instruction_encoding *Inst, segmented_access At)
+{
+ instruction Dest = {};
+ b32 Has[Bits_Count] = {};
+ u32 Bits[Bits_Count] = {};
+ b32 Valid = true;
+
+ u64 StartingAddress = GetAbsoluteAddressOf(At);
+
+ u8 BitsPendingCount = 0;
+ u8 BitsPending = 0;
+ for(u32 BitsIndex = 0; Valid && (BitsIndex < ArrayCount(Inst->Bits)); ++BitsIndex)
+ {
+ instruction_bits TestBits = Inst->Bits[BitsIndex];
+ if(TestBits.Usage == Bits_End)
+ {
+ // NOTE(casey): That's the end of the instruction format.
+ break;
+ }
+
+ u32 ReadBits = TestBits.Value;
+ if(TestBits.BitCount != 0)
+ {
+ if(BitsPendingCount == 0)
+ {
+ BitsPendingCount = 8;
+ BitsPending = *AccessMemory(At);
+ ++At.SegmentOffset;
+ }
+
+ // NOTE(casey): If this assert fires, it means we have an error in our table,
+ // since there are no 8086 instructions that have bit values straddling a
+ // byte boundary.
+ assert(TestBits.BitCount <= BitsPendingCount);
+
+ BitsPendingCount -= TestBits.BitCount;
+ ReadBits = BitsPending;
+ ReadBits >>= BitsPendingCount;
+ ReadBits &= ~(0xff << TestBits.BitCount);
+ }
+
+ if(TestBits.Usage == Bits_Literal)
+ {
+ // NOTE(casey): This is a "required" sequence
+ Valid = Valid && (ReadBits == TestBits.Value);
+ }
+ else
+ {
+ Bits[TestBits.Usage] |= (ReadBits << TestBits.Shift);
+ Has[TestBits.Usage] = true;
+ }
+ }
+
+ if(Valid)
+ {
+ u32 Mod = Bits[Bits_MOD];
+ u32 RM = Bits[Bits_RM];
+ u32 W = Bits[Bits_W];
+ b32 S = Bits[Bits_S];
+ b32 D = Bits[Bits_D];
+
+ b32 HasDirectAddress = ((Mod == 0b00) && (RM == 0b110));
+ Has[Bits_Disp] = ((Has[Bits_Disp]) || (Mod == 0b10) || (Mod == 0b01) || HasDirectAddress);
+
+ b32 DisplacementIsW = ((Bits[Bits_DispAlwaysW]) || (Mod == 0b10) || HasDirectAddress);
+ b32 DataIsW = ((Bits[Bits_WMakesDataW]) && !S && W);
+
+ Bits[Bits_Disp] |= ParseDataValue(&At, Has[Bits_Disp], DisplacementIsW, (!DisplacementIsW));
+ Bits[Bits_Data] |= ParseDataValue(&At, Has[Bits_Data], DataIsW, S);
+
+ Dest.Op = Inst->Op;
+ Dest.Flags = Context->AdditionalFlags;
+ Dest.Address = StartingAddress;
+ Dest.Size = GetAbsoluteAddressOf(At) - StartingAddress;
+ Dest.SegmentOverride = Context->DefaultSegment;
+
+ if(W)
+ {
+ Dest.Flags |= Inst_Wide;
+ }
+
+ if(Bits[Bits_Far])
+ {
+ Dest.Flags |= Inst_Far;
+ }
+
+ if(Bits[Bits_Z])
+ {
+ Dest.Flags |= Inst_RepNE;
+ }
+
+ u32 Disp = Bits[Bits_Disp];
+ s16 Displacement = (s16)Disp;
+
+ instruction_operand *RegOperand = &Dest.Operands[D ? 0 : 1];
+ instruction_operand *ModOperand = &Dest.Operands[D ? 1 : 0];
+
+ if(Has[Bits_SR])
+ {
+ *RegOperand = RegisterOperand(Register_es + (Bits[Bits_SR] & 0x3), 2);
+ }
+
+ if(Has[Bits_REG])
+ {
+ *RegOperand = GetRegOperand(Bits[Bits_REG], W);
+ }
+
+ if(Has[Bits_MOD])
+ {
+ if(Mod == 0b11)
+ {
+ *ModOperand = GetRegOperand(RM, W || (Bits[Bits_RMRegAlwaysW]));
+ }
+ else
+ {
+ register_mapping_8086 IntelTerm0[8] = { Register_b, Register_b, Register_bp, Register_bp, Register_si, Register_di, Register_bp, Register_b};
+ register_mapping_8086 IntelTerm1[8] = {Register_si, Register_di, Register_si, Register_di};
+
+ u32 I = RM&0x7;
+ register_mapping_8086 Term0 = IntelTerm0[I];
+ register_mapping_8086 Term1 = IntelTerm1[I];
+ if((Mod == 0b00) && (RM == 0b110))
+ {
+ Term0 = {};
+ Term1 = {};
+ }
+
+ *ModOperand = EffectiveAddressOperand(RegisterAccess(Term0, 0, 2), RegisterAccess(Term1, 0, 2), Displacement);
+ }
+ }
+
+ if(Has[Bits_Data] && Has[Bits_Disp] && !Has[Bits_MOD])
+ {
+ Dest.Operands[0] = IntersegmentAddressOperand(Bits[Bits_Data], Bits[Bits_Disp]);
+ }
+ else
+ {
+ //
+ // NOTE(casey): Because there are some strange opcodes that do things like have an immediate as
+ // a _destination_ ("out", for example), I define immediates and other "additional operands" to
+ // go in "whatever slot was not used by the reg and mod fields".
+ //
+
+ instruction_operand *LastOperand = &Dest.Operands[0];
+ if(LastOperand->Type)
+ {
+ LastOperand = &Dest.Operands[1];
+ }
+
+ if(Bits[Bits_RelJMPDisp])
+ {
+ *LastOperand = ImmediateOperand(Displacement, Immediate_RelativeJumpDisplacement);
+ }
+ else if(Has[Bits_Data])
+ {
+ *LastOperand = ImmediateOperand(Bits[Bits_Data]);
+ }
+ else if(Has[Bits_V])
+ {
+ if(Bits[Bits_V])
+ {
+ *LastOperand = RegisterOperand(Register_c, 1);
+ }
+ else
+ {
+ *LastOperand = ImmediateOperand(1);
+ }
+ }
+ }
+ }
+
+ return Dest;
+}
+
+static instruction DecodeInstruction(instruction_table Table, segmented_access At)
+{
+ /* TODO(casey): Hmm. It seems like this is a very inefficient way to parse
+ instructions, isn't it? For every instruction, we check every entry in the
+ table until we find a match. Is this bad design? Or did the person who wrote
+ it know what they were doing, and has a plan for how it can be optimized
+ later? Only time will tell... :) */
+
+ decode_context Context = {};
+ instruction Result = {};
+
+ u32 StartingAddress = GetAbsoluteAddressOf(At);
+ u32 TotalSize = 0;
+ while(TotalSize < Table.MaxInstructionByteCount)
+ {
+ Result = {};
+ for(u32 Index = 0; Index < Table.EncodingCount; ++Index)
+ {
+ instruction_encoding Inst = Table.Encodings[Index];
+ Result = TryDecode(&Context, &Inst, At);
+ if(Result.Op)
+ {
+ At.SegmentOffset += Result.Size;
+ TotalSize += Result.Size;
+ break;
+ }
+ }
+
+ if(Result.Op == Op_lock)
+ {
+ Context.AdditionalFlags |= Inst_Lock;
+ }
+ else if(Result.Op == Op_rep)
+ {
+ Context.AdditionalFlags |= Inst_Rep | (Result.Flags & Inst_RepNE);
+ }
+ else if(Result.Op == Op_segment)
+ {
+ Context.AdditionalFlags |= Inst_Segment;
+ Context.DefaultSegment = Result.Operands[1].Register.Index;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ if(TotalSize <= Table.MaxInstructionByteCount)
+ {
+ Result.Address = StartingAddress;
+ Result.Size = TotalSize;
+ }
+ else
+ {
+ Result = {};
+ }
+
+ return Result;
+}