diff options
71 files changed, 4690 insertions, 0 deletions
diff --git a/archived/less_old_sim8086/build/error b/archived/less_old_sim8086/build/error new file mode 100644 index 0000000..36d5148 --- /dev/null +++ b/archived/less_old_sim8086/build/error @@ -0,0 +1,2 @@ +/home/aluc/zot/computerenhance/sim8086/listings/listing_0042_completionist_decode.asm:395: warning: superfluous LOCK prefix on XCHG instruction [-w+prefix-lock-xchg] +Segmentation fault (core dumped) diff --git a/archived/less_old_sim8086/build/first b/archived/less_old_sim8086/build/first new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/archived/less_old_sim8086/build/first diff --git a/archived/less_old_sim8086/build/first.asm b/archived/less_old_sim8086/build/first.asm new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/archived/less_old_sim8086/build/first.asm diff --git a/archived/less_old_sim8086/build/listing_0037_single_register_mov b/archived/less_old_sim8086/build/listing_0037_single_register_mov new file mode 100644 index 0000000..93b300d --- /dev/null +++ b/archived/less_old_sim8086/build/listing_0037_single_register_mov @@ -0,0 +1 @@ +
\ No newline at end of file diff --git a/archived/less_old_sim8086/build/listing_0038_many_register_mov b/archived/less_old_sim8086/build/listing_0038_many_register_mov new file mode 100644 index 0000000..5605230 --- /dev/null +++ b/archived/less_old_sim8086/build/listing_0038_many_register_mov @@ -0,0 +1 @@ +وډމȈÉ
\ No newline at end of file diff --git a/archived/less_old_sim8086/build/listing_0039_more_movs b/archived/less_old_sim8086/build/listing_0039_more_movs Binary files differnew file mode 100644 index 0000000..405b804 --- /dev/null +++ b/archived/less_old_sim8086/build/listing_0039_more_movs diff --git a/archived/less_old_sim8086/build/listing_0040_challenge_movs b/archived/less_old_sim8086/build/listing_0040_challenge_movs Binary files differnew file mode 100644 index 0000000..63b3ec5 --- /dev/null +++ b/archived/less_old_sim8086/build/listing_0040_challenge_movs diff --git a/archived/less_old_sim8086/build/listing_0041_add_sub_cmp_jnz b/archived/less_old_sim8086/build/listing_0041_add_sub_cmp_jnz Binary files differnew file mode 100644 index 0000000..6c71f1b --- /dev/null +++ b/archived/less_old_sim8086/build/listing_0041_add_sub_cmp_jnz diff --git a/archived/less_old_sim8086/build/listing_0041_add_sub_cmp_jnz_error b/archived/less_old_sim8086/build/listing_0041_add_sub_cmp_jnz_error new file mode 100644 index 0000000..0ad0b5f --- /dev/null +++ b/archived/less_old_sim8086/build/listing_0041_add_sub_cmp_jnz_error @@ -0,0 +1,2 @@ +Could not open file. +diff: ../build/listing_0041_add_sub_cmp_jnz_first: No such file or directory diff --git a/archived/less_old_sim8086/build/listing_0041_add_sub_cmp_jnz_first.asm b/archived/less_old_sim8086/build/listing_0041_add_sub_cmp_jnz_first.asm new file mode 100644 index 0000000..2168722 --- /dev/null +++ b/archived/less_old_sim8086/build/listing_0041_add_sub_cmp_jnz_first.asm @@ -0,0 +1,2 @@ +bits 16 +jnz test_label1 diff --git a/archived/less_old_sim8086/build/listing_0041_add_sub_cmp_jnz_second b/archived/less_old_sim8086/build/listing_0041_add_sub_cmp_jnz_second new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/archived/less_old_sim8086/build/listing_0041_add_sub_cmp_jnz_second diff --git a/archived/less_old_sim8086/build/listing_0041_add_sub_cmp_jnz_second.asm b/archived/less_old_sim8086/build/listing_0041_add_sub_cmp_jnz_second.asm new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/archived/less_old_sim8086/build/listing_0041_add_sub_cmp_jnz_second.asm diff --git a/archived/less_old_sim8086/build/listing_0042_completionist_decode b/archived/less_old_sim8086/build/listing_0042_completionist_decode Binary files differnew file mode 100644 index 0000000..6bf7b11 --- /dev/null +++ b/archived/less_old_sim8086/build/listing_0042_completionist_decode diff --git a/archived/less_old_sim8086/build/listing_0042_completionist_decode_error b/archived/less_old_sim8086/build/listing_0042_completionist_decode_error new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/archived/less_old_sim8086/build/listing_0042_completionist_decode_error diff --git a/archived/less_old_sim8086/build/listing_0042_completionist_decode_first b/archived/less_old_sim8086/build/listing_0042_completionist_decode_first new file mode 100644 index 0000000..b64d64a --- /dev/null +++ b/archived/less_old_sim8086/build/listing_0042_completionist_decode_first @@ -0,0 +1 @@ +
\ No newline at end of file diff --git a/archived/less_old_sim8086/build/listing_0042_completionist_decode_first.asm b/archived/less_old_sim8086/build/listing_0042_completionist_decode_first.asm new file mode 100644 index 0000000..e3947a2 --- /dev/null +++ b/archived/less_old_sim8086/build/listing_0042_completionist_decode_first.asm @@ -0,0 +1,2 @@ +bits 16 +pop word [bp + si] diff --git a/archived/less_old_sim8086/build/listing_0042_completionist_decode_second b/archived/less_old_sim8086/build/listing_0042_completionist_decode_second Binary files differnew file mode 100644 index 0000000..5407bf3 --- /dev/null +++ b/archived/less_old_sim8086/build/listing_0042_completionist_decode_second diff --git a/archived/less_old_sim8086/build/listing_0042_completionist_decode_second.asm b/archived/less_old_sim8086/build/listing_0042_completionist_decode_second.asm new file mode 100644 index 0000000..000ca41 --- /dev/null +++ b/archived/less_old_sim8086/build/listing_0042_completionist_decode_second.asm @@ -0,0 +1,3 @@ +bits 16 + +add al, [bx + si] diff --git a/archived/less_old_sim8086/build/sim8086 b/archived/less_old_sim8086/build/sim8086 Binary files differnew file mode 100755 index 0000000..4ae804a --- /dev/null +++ b/archived/less_old_sim8086/build/sim8086 diff --git a/archived/less_old_sim8086/build/test b/archived/less_old_sim8086/build/test new file mode 100644 index 0000000..866ad47 --- /dev/null +++ b/archived/less_old_sim8086/build/test @@ -0,0 +1 @@ +Q
\ No newline at end of file diff --git a/archived/less_old_sim8086/build/test.asm b/archived/less_old_sim8086/build/test.asm new file mode 100644 index 0000000..8ec3d4e --- /dev/null +++ b/archived/less_old_sim8086/build/test.asm @@ -0,0 +1,3 @@ +bits 16 + +push cx diff --git a/archived/less_old_sim8086/code/build.sh b/archived/less_old_sim8086/code/build.sh new file mode 100755 index 0000000..0c37234 --- /dev/null +++ b/archived/less_old_sim8086/code/build.sh @@ -0,0 +1,26 @@ +#!/bin/sh + +ThisDir="$(dirname "$(readlink -f "$0")")" +cd "$ThisDir" + +CompilerFlags=" +-ggdb +-DSIM8086_INTERNAL +-nostdinc++ +" + +WarningFlags=" +-Wall +-Wextra +-Wno-unused-label +-Wno-unused-variable +-Wno-unused-but-set-variable +-Wno-missing-field-initializers +-Wno-write-strings +" + +printf 'sim8086.c\n' +g++ $CompilerFlags $WarningFlags -o ../build/sim8086 sim8086.cpp + +# printf 'print_binary.c\n' +# gcc -ggdb -Wall -Wno-unused-variable -o ../build/print_binary print_binary.c
\ No newline at end of file diff --git a/archived/less_old_sim8086/code/print_binary.c b/archived/less_old_sim8086/code/print_binary.c new file mode 100644 index 0000000..1382cae --- /dev/null +++ b/archived/less_old_sim8086/code/print_binary.c @@ -0,0 +1,63 @@ +#include <stdio.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <signal.h> + +#define Assert(Expr) \ + if (!(Expr)) \ + { \ + raise(SIGTRAP); \ + } + + +int +main(int ArgC, char *Args[]) +{ + if (ArgC < 2) + { + fprintf(stderr, "Missing argument.\n"); + fprintf(stderr, "Usage: %s <filename>\n", Args[0]); + } + else + { + struct stat StatBuffer = {0}; + char *Filename = 0; + int FD = -1; + int Err = -1; + size_t Filesize = 0; + char *Buffer = 0; + + Filename = Args[1]; + FD = open(Filename, O_RDONLY); + Assert(FD != -1); + Err = stat(Filename, &StatBuffer); + Assert(Err != -1); + Filesize = StatBuffer.st_size; + + if (Filesize) + { + Buffer = mmap(0, Filesize, PROT_READ, MAP_SHARED, FD, 0); + Assert(Buffer); + + for (size_t At = 0; At < Filesize; At++) + { + unsigned char Byte = Buffer[At]; + int Count = 8; + while (Count--) + { + printf("%d", Byte >> 7); + Byte <<= 1; + } + printf(" "); + } + printf("\n"); + } + else + { + fprintf(stderr, "Empty file.\n"); + } + } + + return 0; +} diff --git a/archived/less_old_sim8086/code/sim8086.cpp b/archived/less_old_sim8086/code/sim8086.cpp new file mode 100644 index 0000000..be6a1f2 --- /dev/null +++ b/archived/less_old_sim8086/code/sim8086.cpp @@ -0,0 +1,855 @@ +/* + +Instruction have 6 bytes that define what type of instruction it is. + + Each instruction has zero, one or two operands. + The operands can be switched depending on the D bit. + + The MOD field indicates if a displacement follows. +Except when R/M is 110. + +The D and W bits are always in the first byte. + +Data is always required. + +Reg can happen in the first byte. + +Some expressions have a second 3bit field that stores the "sub-kind" of instruction. +*/ + +#include <stdint.h> +#include <stdio.h> +#include <unistd.h> +#include <fcntl.h> + +#include "sim8086.h" + +#define Assert(Expression) \ +if(!(Expression)) \ +{ \ +*(char *)0 = 0; \ +} + +#if SIM8086_INTERNAL +#define DebugLabel(Name) Name: +#else +#define DebugLabel(Name) +#endif + +#define Swap(Type, First, Second) \ +{ \ +Type Temp; \ +Temp = First; \ +First = Second; \ +Second = Temp; \ +} + +#define JE 0b01110100 +#define JL 0b01111100 +#define JLE 0b01111110 +#define JB 0b01110010 +#define JBE 0b01110110 +#define JP 0b01111010 +#define JO 0b01110000 +#define JS 0b01111000 +#define JNL 0b01111101 +#define JG 0b01111111 +#define JNB 0b01110011 +#define JA 0b01110111 +#define JNP 0b01111011 +#define JNO 0b01110001 +#define JNS 0b01111001 +#define JNZ 0b01110101 +#define LOOP 0b11100010 +#define LOOPZ 0b11100001 +#define LOOPNZ 0b11100000 +#define JCXZ 0b11100011 + +#define ADD 0b000 +#define SUB 0b101 +#define CMP 0b111 + +// Can be indexed by R/M or reg field. +// Also if the W bit is set it can be used to index and get the wide name. +global_variable char *RegistersNames[][2] = +{ + [0b000] = { "al", "ax" }, + [0b001] = { "cl", "cx" }, + [0b010] = { "dl", "dx" }, + [0b011] = { "bl", "bx" }, + [0b100] = { "ah", "sp" }, + [0b101] = { "ch", "bp" }, + [0b110] = { "dh", "si" }, + [0b111] = { "bh", "di" } +}; +#define ACCUMULATOR 0b000 + +global_variable char *DisplacementsNames[] = +{ + [0b000] = "bx + si", + [0b001] = "bx + di", + [0b010] = "bp + si", + [0b011] = "bp + di", + [0b100] = "si", + [0b101] = "di", + [0b110] = "bp", + [0b111] = "bx" +}; + +global_variable char *SegmentRegistersNames[] = +{ + [0b000] = "ss", + [0b001] = "cs", + [0b010] = "ds", + [0b011] = "es", +}; + +global_variable char GlobalBinaryTempString[9]; + +inline +void MemoryCopy(char *To, char *From, size_t Count) +{ + while(Count--) *To++ = *From++; +} + +inline +void CStringCopy(char *To, char *From) +{ + while((*To++ = *From++)); +} + +inline +u32 CStringLength(char *Src) +{ + u32 Result = 0; + while(Src[Result]) Result++; + return Result; +} + +char *PrefixString(char *Dest, char *Prefix) +{ + u32 PrefixLength = CStringLength(Prefix); + u32 DestLen = CStringLength(Dest); + + // Move the string over by the prefix length + for(s32 CharAt = DestLen; + CharAt >= 0; + CharAt--) + { + Dest[PrefixLength + CharAt] = Dest[CharAt]; + } + + // Copy the prefix over + MemoryCopy(Dest, Prefix, 5); + + return Dest; +} + +int ReadBytesFromFile(int File, u8 **Dest, size_t Count) +{ + int BytesRead = read(File, *Dest, Count); + *Dest += BytesRead; + Assert(BytesRead == (int)Count || BytesRead == 0); + return BytesRead; +} + +char *ByteToBinaryString(u8 Byte) +{ + char *Result = GlobalBinaryTempString; + + int Count = 8; + while(Count--) + { + *Result++ = '0' + (Byte >> 7); + Byte <<= 1; + } + *Result = 0; + + return GlobalBinaryTempString; +} + +void SetBitFromBitAt(b32 *Bit, u8 Byte, u32 BitAt) +{ + if(BitAt) + { + *Bit = ((Byte & (1 << (BitAt - 1))) != 0); + } +} + +u8 ExtractBitsFromByte(u8 Byte, u8 Bits, u8 BitAt) +{ + u8 Result = 0; + + u8 ShiftValue = BitAt - 1; + u8 Mask = (Bits << ShiftValue); + Result = ((Byte & Mask) >> ShiftValue); + + return Result; +} + +int main(int ArgCount, char *Args[]) +{ + u8 Bytes[255] = {}; + instruction InstructionsBuffer[256] = {}; + instruction_table Table = {}; + Table.Instructions = InstructionsBuffer; + Table.Size = (sizeof(InstructionsBuffer)/sizeof(InstructionsBuffer[0])) - 1; + + Table.Instructions[Table.Count++] = + { + .Name = "mov", + .Description = "Register/memory to/from register", + .OperandsCount = 2, + .BytesCount = 2, + .Bytes = + { + { + .Op = 0b10001000, + .OpMask = 0b11111100, + .WBitAt = 1, + .DBitAt = 2, + }, + { + .ModBitsAt = 7, + .RegBitsAt = 4, + .RMBitsAt = 1 + } + } + }; + + Table.Instructions[Table.Count++] = + { + .Name = "mov", + .Description = "Immediate to register/memory", + .OperandsCount = 2, + .HasImmediate = true, + .BytesCount = 1, + .Bytes = + { + { + .Op = 0b10000110, + .OpMask = 0b11111110, + .WBitAt = 1, + .ModBitsAt = 7, + .RMBitsAt = 1 + } + } + }; + + Table.Instructions[Table.Count++] = + { + .Name = "mov", + .Description = "Immediate to register", + .OperandsCount = 2, + .HasImmediate = true, + .BytesCount = 1, + .Bytes = { + { + .Op = 0b10110000, + .OpMask = 0b11110000, + .WBitAt = 4, + .RegBitsAt = 1, + } + } + }; + + Table.Instructions[Table.Count++] = + { + .Name = "mov", + .Description = "Immediate to register/memory", + .OperandsCount = 2, + .HasImmediate = true, + .BytesCount = 2, + .Bytes = + { + { + .Op = 0b11000110, + .OpMask = 0b11111110, + .WBitAt = 1, + }, + { + .ModBitsAt = 7, + .RMBitsAt = 1, + } + } + }; + + Table.Instructions[Table.Count++] = + { + .Name = "mov", + .Description = "Memory to accumulator", + .OperandsCount = 2, + .HasAddress = true, + .ToAccumulator = true, + .BytesCount = 1, + .Bytes = + { + { + .Op = 0b10100000, + .OpMask = 0b11111110, + .WBitAt = 1 + } + } + }; + + // NOTE(luca): This is the same instruction as the previous one with the D bit always set to true... + Table.Instructions[Table.Count++] = + { + .Name = "mov", + .Description = "Accumulator to memory", + .OperandsCount = 2, + .HasAddress = true, + .ToAccumulator = true, + .FlipOperands = true, + .BytesCount = 1, + .Bytes = + { + { + .Op = 0b10100010, + .OpMask = 0b11111110, + .WBitAt = 1 + } + } + }; + + Table.Instructions[Table.Count++] = + { + .Name = "nil", + .Description = "Reg/Memory with register to either", + .OperandsCount = 2, + .BytesCount = 2, + .Bytes = + { + { + .Op = 0b00000000, + .OpMask = 0b11000100, + .SubOpMask = 0b111 << 3, + .WBitAt = 1, + .DBitAt = 2, + }, + { + .ModBitsAt = 7, + .RegBitsAt = 4, + .RMBitsAt = 1, + } + }, + .SubInstructionsCount = 5, + .SubInstructions = + { + { 0b000 << 3, "add" }, + { 0b010 << 3, "adc" }, + { 0b101 << 3, "sub" }, + { 0b011 << 3, "sbb" }, + { 0b111 << 3, "cmp" }, + } + }; + + Table.Instructions[Table.Count++] = + { + .Name = "nil", + .Description = "Immediate to register/memory", + .OperandsCount = 2, + .HasImmediate = true, + .BytesCount = 2, + .Bytes = + { + { + .Op = 0b10000000, + .OpMask = 0b11000100, + .WBitAt = 1, + .SBitAt = 2, + }, + { + .SubOpMask = 0b00111000, + .ModBitsAt = 7, + .RMBitsAt = 1, + } + }, + .SubInstructionsCount = 5, + .SubInstructions = + { + { 0b000 << 3, "add" }, + { 0b010 << 3, "adc" }, + { 0b101 << 3, "sub" }, + { 0b011 << 3, "sbb" }, + { 0b111 << 3, "cmp" }, + } + }; + + Table.Instructions[Table.Count++] = + { + .Name = "nil", + .Description = "Immediate to accumulator", + .OperandsCount = 2, + .HasImmediate = true, + .ToAccumulator = true, + .BytesCount = 1, + .Bytes = + { + { + .Op = 0b00000100, + .OpMask = 0b11000110, + .SubOpMask = 0b111 << 3, + .WBitAt = 1, + }, + }, + .SubInstructionsCount = 5, + .SubInstructions = + { + { 0b000 << 3, "add" }, + { 0b010 << 3, "adc" }, + { 0b101 << 3, "sub" }, + { 0b011 << 3, "sbb" }, + { 0b111 << 3, "cmp" }, + } + }; + +#define ByteJump(InsName, InsDescription, InsOp) \ +{ \ +.Name = InsName, \ +.Description = InsDescription, \ +.OperandsCount = 1, \ +.IsByteJump = true, \ +.BytesCount = 1, \ +.Bytes = \ +{ \ +{ \ +.Op = InsOp, \ +.OpMask = 0b11111111, \ +} \ +} \ +} + Table.Instructions[Table.Count++] = ByteJump("jz", "Jump on zero", 0b01110100); + Table.Instructions[Table.Count++] = ByteJump("jl", "Jump on less", 0b01111100); + Table.Instructions[Table.Count++] = ByteJump("jle", "Jump on less or equal", 0b01111110); + Table.Instructions[Table.Count++] = ByteJump("jb", "Jump on below", 0b01110010); + Table.Instructions[Table.Count++] = ByteJump("jbe", "Jump on below or equal", 0b01110110); + Table.Instructions[Table.Count++] = ByteJump("jp", "Jump on parity", 0b01111010); + Table.Instructions[Table.Count++] = ByteJump("jo", "Jump on overflow", 0b01110000); + Table.Instructions[Table.Count++] = ByteJump("js", "Jump on sign", 0b01111000); + Table.Instructions[Table.Count++] = ByteJump("jnz", "Jump on not zero", 0b01110101); + Table.Instructions[Table.Count++] = ByteJump("jnl", "Jump on not less", 0b01111101); + Table.Instructions[Table.Count++] = ByteJump("jnle", "Jump on not less or equal", 0b01111111); + Table.Instructions[Table.Count++] = ByteJump("jnb", "Jump on not below", 0b01110011); + Table.Instructions[Table.Count++] = ByteJump("jnbe", "Jump on not below or equal", 0b01110111); + Table.Instructions[Table.Count++] = ByteJump("jnp", "Jump on not par", 0b01111011); + Table.Instructions[Table.Count++] = ByteJump("jno", "Jump on not overflow", 0b01110001); + Table.Instructions[Table.Count++] = ByteJump("jns", "Jump on not sign", 0b01111001); + Table.Instructions[Table.Count++] = ByteJump("loop", "Loop CX times", 0b11100010); + Table.Instructions[Table.Count++] = ByteJump("loopz", "Loop while zero", 0b11100001); + Table.Instructions[Table.Count++] = ByteJump("loopnz", "Loop while not zero", 0b11100000); + Table.Instructions[Table.Count++] = ByteJump("jcxz", "Jump on CX zero", 0b11100011); +#undef ByteJump + + Table.Instructions[Table.Count++] = + { + .Name = "push", + .Description = "Register/memory", + .OperandsCount = 1, + .BytesCount = 2, + .Bytes = + { + { + .Op = 0b11111111, + .OpMask = 0b11111111, + }, + { + .Op = 0b00110000, + .OpMask = 0b00111000, + .ModBitsAt = 7, + .RMBitsAt = 1, + } + } + }; + + Table.Instructions[Table.Count++] = + { + .Name = "push", + .Description = "Register", + .OperandsCount = 1, + .BytesCount = 1, + .Bytes = + { + { + .Op = 0b01010000, + .OpMask = 0b11111000, + .RegBitsAt = 1, + }, + } + }; + + Table.Instructions[Table.Count++] = + { + .Name = "push", + .Description = "Segment register", + .OperandsCount = 1, + .HasSegmentRegister = true, + .BytesCount = 1, + .Bytes = + { + { + .Op = 0b00000110, + .OpMask = 0b11000111, + .RegBitsAt = 4, + }, + } + }; + + if(ArgCount > 1) + { + int File = open(Args[1], O_RDONLY); + if(File != -1) + { + int BytesRead = 0; + + printf("bits 16\n\n"); + + int KeepReading = true; + while(KeepReading) + { + u8 *ByteAt = Bytes; + BytesRead = ReadBytesFromFile(File, &ByteAt, 1); + + if(BytesRead == 1) + { + for(u32 InstructionIndex = 0; + InstructionIndex < Table.Count; + InstructionIndex++) + { + instruction InsAt = Table.Instructions[InstructionIndex]; + + if((Bytes[0] & InsAt.Bytes[0].OpMask) == InsAt.Bytes[0].Op) + { + DebugLabel(ByteFound); + BytesRead = ReadBytesFromFile(File, &ByteAt, InsAt.BytesCount - BytesRead); + + b32 HasMode = false; + b32 HasDisplacement = false; + b32 HasRegister = false; + b32 MemoryMode = false; + b32 RegisterMode = false; + b32 EffectiveAddressMode = false; + b32 Displacement8Bit = false; + b32 Displacement16Bit = false; + b32 DBitSet = false; + b32 SBitSet = false; + b32 VBitSet = false; + b32 WBitSet = false; + u8 RegBits = 0; + u8 ModBits = 0; + u8 RMBits = 0; + s16 DisplacementValue = 0; + s16 DataValue = 0; + s8 IncrementValue = 0; + + char *InstructionName = InsAt.Name; +#define OP_DEST 0 +#define OP_SOURCE 1 + char Operands[2][255] = {}; + char *SourceOperand = Operands[1]; + char *DestOperand = Operands[0]; + u32 NextOperand = OP_DEST; + + if(InsAt.ToAccumulator) + { + NextOperand = OP_SOURCE; + } + + for(u32 BytesIndex = 0; + BytesIndex < InsAt.BytesCount; + BytesIndex++) + { + u8 CurrentByte = Bytes[BytesIndex]; + instruction_byte CurrentInsByte = InsAt.Bytes[BytesIndex]; + + if(CurrentInsByte.OpMask) + { + Assert((CurrentByte & CurrentInsByte.OpMask) == CurrentInsByte.Op); + } + + SetBitFromBitAt(&DBitSet, CurrentByte, CurrentInsByte.DBitAt); + SetBitFromBitAt(&WBitSet, CurrentByte, CurrentInsByte.WBitAt); + SetBitFromBitAt(&SBitSet, CurrentByte, CurrentInsByte.SBitAt); + SetBitFromBitAt(&VBitSet, CurrentByte, CurrentInsByte.VBitAt); + + if(CurrentInsByte.SubOpMask) + { + Assert(InsAt.SubInstructionsCount); + + u8 SubInsBits = (CurrentByte & CurrentInsByte.SubOpMask); + + b32 FoundSubInstruction = false; + sub_instruction ScanSubIns = {}; + for(u32 SubInstructionsIndex = 0; + (SubInstructionsIndex < InsAt.SubInstructionsCount) && !FoundSubInstruction; + SubInstructionsIndex++) + { + ScanSubIns = InsAt.SubInstructions[SubInstructionsIndex]; + FoundSubInstruction = (ScanSubIns.Op == SubInsBits); + } + Assert(FoundSubInstruction); + + InstructionName = ScanSubIns.Name; + } + + if(CurrentInsByte.ModBitsAt) + { + HasMode = true; + ModBits = ExtractBitsFromByte(CurrentByte, 0b11, CurrentInsByte.ModBitsAt); + + if(ModBits == 0b00) + { + MemoryMode = true; + } + else if(ModBits == 0b01) + { + HasDisplacement = true; + Displacement8Bit = true; + } + else if(ModBits == 0b10) + { + HasDisplacement = true; + Displacement16Bit = true; + } + else if(ModBits == 0b11) + { + RegisterMode = true; + } + } + + if(CurrentInsByte.RegBitsAt) + { + RegBits = ExtractBitsFromByte(CurrentByte, 0b111, CurrentInsByte.RegBitsAt); + HasRegister = true; + } + + if(CurrentInsByte.RMBitsAt) + { + RMBits = ExtractBitsFromByte(CurrentByte, 0b111, CurrentInsByte.RMBitsAt); + + if(MemoryMode && RMBits == 0b110) + { + MemoryMode = false; + EffectiveAddressMode = true; + } + } + + } // for + + DebugLabel(Parsed); + + if(InsAt.HasAddress) + { + HasMode = true; + WBitSet = true; + EffectiveAddressMode = true; + } + + if(HasRegister) + { + b32 IsWide = (WBitSet || InsAt.OperandsCount == 1); + u32 TargetOperand = OP_DEST; + char *OperandName = 0; + + if(!InsAt.HasSegmentRegister) + { + OperandName = RegistersNames[RegBits][IsWide]; + } + else + { + OperandName = SegmentRegistersNames[RegBits]; + } + + b32 OperandIsDest = (DBitSet || + InsAt.HasImmediate || + (InsAt.OperandsCount == 1)); + TargetOperand = (OperandIsDest) ? OP_DEST : OP_SOURCE; + NextOperand = (OperandIsDest) ? OP_SOURCE : OP_DEST; + + CStringCopy(Operands[TargetOperand], OperandName); + } + + + if(HasMode) + { + if(MemoryMode) + { + if(InsAt.OperandsCount == 1) + { + sprintf(Operands[NextOperand++], "%s [%s]", WBitSet ? "byte" : "word", DisplacementsNames[RMBits]); + } + else + { + sprintf(Operands[NextOperand++], "[%s]", DisplacementsNames[RMBits]); + } + } + else if(RegisterMode) + { + CStringCopy(Operands[NextOperand++], RegistersNames[RMBits][WBitSet]); + } + else if(HasDisplacement) + { + if(Displacement8Bit) + { + ReadBytesFromFile(File, &ByteAt, 1); + DisplacementValue = (s8)ByteAt[-1]; + } + else if(Displacement16Bit) + { + ReadBytesFromFile(File, &ByteAt, 2); + DisplacementValue = ((s16)ByteAt[-1] << 8) | (s16)ByteAt[-2]; + } + else + { + Assert(0); + } + + if(DisplacementValue != 0) + { + if(InsAt.OperandsCount == 1) + { + b32 IsNegative = (DisplacementValue < 0); + sprintf(Operands[NextOperand++], "word [%s %c %d]", + DisplacementsNames[RMBits], + IsNegative ? '-' : '+', + IsNegative ? DisplacementValue * -1 : DisplacementValue); + } + else + { + b32 IsNegative = (DisplacementValue < 0); + sprintf(Operands[NextOperand++], "[%s %c %d]", + DisplacementsNames[RMBits], + IsNegative ? '-' : '+', + IsNegative ? DisplacementValue * -1 : DisplacementValue); + } + } + else + { + if(InsAt.OperandsCount == 1) + { + sprintf(Operands[OP_DEST], "%s [%s]", WBitSet ? "word" : "byte", DisplacementsNames[RMBits]); + } + else + { + sprintf(Operands[NextOperand++], "[%s]", DisplacementsNames[RMBits]); + } + } + + } + else if(EffectiveAddressMode) + { + // NOTE(luca): Data is always 16-bit displacement, read 2 bytes. + ReadBytesFromFile(File, &ByteAt, 2); + DataValue = ((s16)ByteAt[-1] << 8) | (s16)ByteAt[-2]; + + if(InsAt.HasImmediate || InsAt.OperandsCount == 1) + { + sprintf(Operands[OP_DEST], "word [%d]", DataValue); + } + else + { + sprintf(Operands[NextOperand++], "[%d]", DataValue); + } + + } + else + { + Assert(0); + } + } + + if(InsAt.HasImmediate) + { + if(WBitSet && !SBitSet) + { + ReadBytesFromFile(File, &ByteAt, 2); + DataValue = (ByteAt[-1] << 8) | ByteAt[-2]; + } + else + { + ReadBytesFromFile(File, &ByteAt, 1); + DataValue = ByteAt[-1]; + } + + if(MemoryMode || HasDisplacement) + { + sprintf(Operands[OP_SOURCE], "%s %d", WBitSet ? "word" : "byte", DataValue); + } + else + { + sprintf(Operands[OP_SOURCE], "%d", DataValue); + } + } + + if(InsAt.ToAccumulator) + { + sprintf(Operands[OP_DEST], "%s", RegistersNames[ACCUMULATOR][WBitSet]); + } + + if(InsAt.IsByteJump) + { + ReadBytesFromFile(File, &ByteAt, 1); + IncrementValue = (s8)ByteAt[-1]; + IncrementValue += InsAt.BytesCount + 1; + b32 IsNegative = (IncrementValue < 0); + sprintf(Operands[NextOperand++], "$%c%d", + IsNegative ? '-' : '+', + IsNegative ? IncrementValue *= -1 : IncrementValue); + } + + if(InsAt.FlipOperands) + { + Assert(InsAt.OperandsCount == 2); + Swap(char *, SourceOperand, DestOperand); + } + + if(InsAt.OperandsCount == 2) + { + printf("%s %s, %s\n", InstructionName, DestOperand, SourceOperand); + } + else if(InsAt.OperandsCount == 1) + { + printf("%s %s\n", InstructionName, DestOperand); + } + else if(InsAt.OperandsCount == 0) + { + printf("%s\n", InstructionName); + } + else + { + Assert(0); + } + + break; + } // if op + } // for ins + } // if bytesread + else if(BytesRead == -1) + { + fprintf(stderr, "Error while reading.\n"); + KeepReading = false; + } + else if(BytesRead == 0) + { + KeepReading = false; + } + else + { + fprintf(stderr, "Read too many bytes.\n"); + KeepReading = false; + } + } // while + } // if file + else + { + fprintf(stderr, "Could not open file.\n"); + } + } // if argcount + else + { + fprintf(stderr, "Missing argument.\n" + "Usage: %s <assembly>\n", Args[0]); + } + + return 0; +}
\ No newline at end of file diff --git a/archived/less_old_sim8086/code/sim8086.h b/archived/less_old_sim8086/code/sim8086.h new file mode 100644 index 0000000..136fcfe --- /dev/null +++ b/archived/less_old_sim8086/code/sim8086.h @@ -0,0 +1,70 @@ +/* date = May 13th 2025 11:06 pm */ + +#ifndef SIM8086_H +#define SIM8086_H + +typedef uint64_t u64; +typedef uint32_t u32; +typedef uint16_t u16; +typedef uint8_t u8; +typedef int64_t s64; +typedef int32_t s32; +typedef int16_t s16; +typedef int8_t s8; +typedef s32 b32; +typedef float r32; +typedef double r64; +#define false 0 +#define true 1 + +#define global_variable static +#define internal static +#define local_persist static + +struct instruction_byte +{ + // NOTE(luca): All byte positions are 1-based so that 0 can mean that you do not have to test for this byte. + u8 Op; + u8 OpMask; + u8 SubOpMask; + u8 WBitAt; + u8 SBitAt; + u8 DBitAt; + u8 VBitAt; + u8 ModBitsAt; + u8 RegBitsAt; + u8 RMBitsAt; +}; +struct sub_instruction +{ + u8 Op; + char *Name; +}; +struct instruction +{ + char *Name; + char *Description; + u32 OperandsCount; + b32 HasSegmentRegister; + b32 HasImmediate; + b32 HasAddress; + b32 IsByteJump; + b32 ToAccumulator; + b32 FlipOperands; + + u32 BytesCount; + instruction_byte Bytes[8]; + + // NOTE(luca): Sub instructions are instructions which do the same work but can have different names depending on a second opcode. + u32 SubInstructionsCount; + sub_instruction SubInstructions[8]; +}; + +struct instruction_table +{ + instruction *Instructions; + u32 Count; + u32 Size; +}; + +#endif //SIM8086_H diff --git a/archived/less_old_sim8086/listings/listing_0037_single_register_mov.asm b/archived/less_old_sim8086/listings/listing_0037_single_register_mov.asm new file mode 100644 index 0000000..1b58e05 --- /dev/null +++ b/archived/less_old_sim8086/listings/listing_0037_single_register_mov.asm @@ -0,0 +1,19 @@ +; ======================================================================== +; +; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Please see https://computerenhance.com for further information +; +; ======================================================================== + +; ======================================================================== +; LISTING 37 +; ======================================================================== + +bits 16 + +mov cx, bx diff --git a/archived/less_old_sim8086/listings/listing_0038_many_register_mov.asm b/archived/less_old_sim8086/listings/listing_0038_many_register_mov.asm new file mode 100644 index 0000000..c157b91 --- /dev/null +++ b/archived/less_old_sim8086/listings/listing_0038_many_register_mov.asm @@ -0,0 +1,29 @@ +; ======================================================================== +; +; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Please see https://computerenhance.com for further information +; +; ======================================================================== + +; ======================================================================== +; LISTING 38 +; ======================================================================== + +bits 16 + +mov cx, bx +mov ch, ah +mov dx, bx +mov si, bx +mov bx, di +mov al, cl +mov ch, ch +mov bx, ax +mov bx, si +mov sp, di +mov bp, ax diff --git a/archived/less_old_sim8086/listings/listing_0039_more_movs.asm b/archived/less_old_sim8086/listings/listing_0039_more_movs.asm new file mode 100644 index 0000000..854fcb4 --- /dev/null +++ b/archived/less_old_sim8086/listings/listing_0039_more_movs.asm @@ -0,0 +1,47 @@ +; ======================================================================== +; +; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Please see https://computerenhance.com for further information +; +; ======================================================================== + +; ======================================================================== +; LISTING 39 +; ======================================================================== + +bits 16 + +; Register-to-register +mov si, bx +mov dh, al + +; 8-bit immediate-to-register +mov cl, 12 +mov ch, -12 + +; 16-bit immediate-to-register +mov cx, 12 +mov cx, -12 +mov dx, 3948 +mov dx, -3948 + +; Source address calculation +mov al, [bx + si] +mov bx, [bp + di] +mov dx, [bp] + +; Source address calculation plus 8-bit displacement +mov ah, [bx + si + 4] + +; Source address calculation plus 16-bit displacement +mov al, [bx + si + 4999] + +; Dest address calculation +mov [bx + di], cx +mov [bp + si], cl +mov [bp], ch diff --git a/archived/less_old_sim8086/listings/listing_0040_challenge_movs.asm b/archived/less_old_sim8086/listings/listing_0040_challenge_movs.asm new file mode 100644 index 0000000..966e47a --- /dev/null +++ b/archived/less_old_sim8086/listings/listing_0040_challenge_movs.asm @@ -0,0 +1,38 @@ +; ======================================================================== +; +; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Please see https://computerenhance.com for further information +; +; ======================================================================== + +; ======================================================================== +; LISTING 40 +; ======================================================================== + +bits 16 + +; Signed displacements +mov ax, [bx + di - 37] +mov [si - 300], cx +mov dx, [bx - 32] + +; Explicit sizes +mov [bp + di], byte 7 +mov [di + 901], word 347 + +; Direct address +mov bp, [5] +mov bx, [3458] + +; Memory-to-accumulator test +mov ax, [2555] +mov ax, [16] + +; Accumulator-to-memory test +mov [2554], ax +mov [15], ax diff --git a/archived/less_old_sim8086/listings/listing_0041_add_sub_cmp_jnz.asm b/archived/less_old_sim8086/listings/listing_0041_add_sub_cmp_jnz.asm new file mode 100644 index 0000000..6b79cf0 --- /dev/null +++ b/archived/less_old_sim8086/listings/listing_0041_add_sub_cmp_jnz.asm @@ -0,0 +1,121 @@ +; ======================================================================== +; +; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Please see https://computerenhance.com for further information +; +; ======================================================================== + +; ======================================================================== +; LISTING 41 +; ======================================================================== + +bits 16 + +add bx, [bx+si] +add bx, [bp] +add si, 2 +add bp, 2 +add cx, 8 +add bx, [bp + 0] +add cx, [bx + 2] +add bh, [bp + si + 4] +add di, [bp + di + 6] +add [bx+si], bx +add [bp], bx +add [bp + 0], bx +add [bx + 2], cx +add [bp + si + 4], bh +add [bp + di + 6], di +add byte [bx], 34 +add word [bp + si + 1000], 29 +add ax, [bp] +add al, [bx + si] +add ax, bx +add al, ah +add ax, 1000 +add al, -30 +add al, 9 + +sub bx, [bx+si] +sub bx, [bp] +sub si, 2 +sub bp, 2 +sub cx, 8 +sub bx, [bp + 0] +sub cx, [bx + 2] +sub bh, [bp + si + 4] +sub di, [bp + di + 6] +sub [bx+si], bx +sub [bp], bx +sub [bp + 0], bx +sub [bx + 2], cx +sub [bp + si + 4], bh +sub [bp + di + 6], di +sub byte [bx], 34 +sub word [bx + di], 29 +sub ax, [bp] +sub al, [bx + si] +sub ax, bx +sub al, ah +sub ax, 1000 +sub al, -30 +sub al, 9 + +cmp bx, [bx+si] +cmp bx, [bp] +cmp si, 2 +cmp bp, 2 +cmp cx, 8 +cmp bx, [bp + 0] +cmp cx, [bx + 2] +cmp bh, [bp + si + 4] +cmp di, [bp + di + 6] +cmp [bx+si], bx +cmp [bp], bx +cmp [bp + 0], bx +cmp [bx + 2], cx +cmp [bp + si + 4], bh +cmp [bp + di + 6], di +cmp byte [bx], 34 +cmp word [4834], 29 +cmp ax, [bp] +cmp al, [bx + si] +cmp ax, bx +cmp al, ah +cmp ax, 1000 +cmp al, -30 +cmp al, 9 + +test_label0: +jnz test_label1 +jnz test_label0 +test_label1: +jnz test_label0 +jnz test_label1 + +label: +je label +jl label +jle label +jb label +jbe label +jp label +jo label +js label +jne label +jnl label +jg label +jnb label +ja label +jnp label +jno label +jns label +loop label +loopz label +loopnz label +jcxz label diff --git a/archived/less_old_sim8086/listings/listing_0042_completionist_decode.asm b/archived/less_old_sim8086/listings/listing_0042_completionist_decode.asm new file mode 100644 index 0000000..86ed278 --- /dev/null +++ b/archived/less_old_sim8086/listings/listing_0042_completionist_decode.asm @@ -0,0 +1,451 @@ +; ======================================================================== +; +; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Please see https://computerenhance.com for further information +; +; ======================================================================== + +; ======================================================================== +; LISTING 42 +; ======================================================================== + +; +; NOTE(casey): This is not meant to be a real compliance test for 8086 +; disassemblers. It's just a reasonable selection of opcodes and patterns +; to use as a first pass in making sure a disassembler handles a large +; cross-section of the encoding. To be absolutely certain you haven't +; missed something, you would need a more exhaustive listing! +; + +bits 16 + +mov si, bx +mov dh, al +mov cl, 12 +mov ch, -12 +mov cx, 12 +mov cx, -12 +mov dx, 3948 +mov dx, -3948 +mov al, [bx + si] +mov bx, [bp + di] +mov dx, [bp] +mov ah, [bx + si + 4] +mov al, [bx + si + 4999] +mov [bx + di], cx +mov [bp + si], cl +mov [bp], ch +mov ax, [bx + di - 37] +mov [si - 300], cx +mov dx, [bx - 32] +mov [bp + di], byte 7 +mov [di + 901], word 347 +mov bp, [5] +mov bx, [3458] +mov ax, [2555] +mov ax, [16] +mov [2554], ax +mov [15], ax + +push word [bp + si] +push word [3000] +push word [bx + di - 30] +push cx +push ax +push dx +push cs + +pop word [bp + si] +pop word [3] +pop word [bx + di - 3000] +pop sp +pop di +pop si +pop ds + +xchg ax, [bp - 1000] +xchg [bx + 50], bp + +xchg ax, ax +xchg ax, dx +xchg ax, sp +xchg ax, si +xchg ax, di + +xchg cx, dx +xchg si, cx +xchg cl, ah + +in al, 200 +in al, dx +in ax, dx + +out 44, ax +out dx, al + +xlat +lea ax, [bx + di + 1420] +lea bx, [bp - 50] +lea sp, [bp - 1003] +lea di, [bx + si - 7] + +lds ax, [bx + di + 1420] +lds bx, [bp - 50] +lds sp, [bp - 1003] +lds di, [bx + si - 7] + +les ax, [bx + di + 1420] +les bx, [bp - 50] +les sp, [bp - 1003] +les di, [bx + si - 7] + +lahf +sahf +pushf +popf + +add cx, [bp] +add dx, [bx + si] +add [bp + di + 5000], ah +add [bx], al +add sp, 392 +add si, 5 +add ax, 1000 +add ah, 30 +add al, 9 +add cx, bx +add ch, al + +adc cx, [bp] +adc dx, [bx + si] +adc [bp + di + 5000], ah +adc [bx], al +adc sp, 392 +adc si, 5 +adc ax, 1000 +adc ah, 30 +adc al, 9 +adc cx, bx +adc ch, al + +inc ax +inc cx +inc dh +inc al +inc ah +inc sp +inc di +inc byte [bp + 1002] +inc word [bx + 39] +inc byte [bx + si + 5] +inc word [bp + di - 10044] +inc word [9349] +inc byte [bp] + +aaa +daa + +sub cx, [bp] +sub dx, [bx + si] +sub [bp + di + 5000], ah +sub [bx], al +sub sp, 392 +sub si, 5 +sub ax, 1000 +sub ah, 30 +sub al, 9 +sub cx, bx +sub ch, al + +sbb cx, [bp] +sbb dx, [bx + si] +sbb [bp + di + 5000], ah +sbb [bx], al +sbb sp, 392 +sbb si, 5 +sbb ax, 1000 +sbb ah, 30 +sbb al, 9 +sbb cx, bx +sbb ch, al + +dec ax +dec cx +dec dh +dec al +dec ah +dec sp +dec di +dec byte [bp + 1002] +dec word [bx + 39] +dec byte [bx + si + 5] +dec word [bp + di - 10044] +dec word [9349] +dec byte [bp] + +neg ax +neg cx +neg dh +neg al +neg ah +neg sp +neg di +neg byte [bp + 1002] +neg word [bx + 39] +neg byte [bx + si + 5] +neg word [bp + di - 10044] +neg word [9349] +neg byte [bp] + +cmp bx, cx +cmp dh, [bp + 390] +cmp [bp + 2], si +cmp bl, 20 +cmp byte [bx], 34 +cmp ax, 23909 + +aas +das + +mul al +mul cx +mul word [bp] +mul byte [bx + di + 500] + +imul ch +imul dx +imul byte [bx] +imul word [9483] + +aam + +div bl +div sp +div byte [bx + si + 2990] +div word [bp + di + 1000] + +idiv ax +idiv si +idiv byte [bp + si] +idiv word [bx + 493] + +aad +cbw +cwd + +not ah +not bl +not sp +not si +not word [bp] +not byte [bp + 9905] + +shl ah, 1 +shr ax, 1 +sar bx, 1 +rol cx, 1 +ror dh, 1 +rcl sp, 1 +rcr bp, 1 + +shl word [bp + 5], 1 +shr byte [bx + si - 199], 1 +sar byte [bx + di - 300], 1 +rol word [bp], 1 +ror word [4938], 1 +rcl byte [3], 1 +rcr word [bx], 1 + +shl ah, cl +shr ax, cl +sar bx, cl +rol cx, cl +ror dh, cl +rcl sp, cl +rcr bp, cl + +shl word [bp + 5], cl +shr word [bx + si - 199], cl +sar byte [bx + di - 300], cl +rol byte [bp], cl +ror byte [4938], cl +rcl byte [3], cl +rcr word [bx], cl + +and al, ah +and ch, cl +and bp, si +and di, sp +and al, 93 +and ax, 20392 +and [bp + si + 10], ch +and [bx + di + 1000], dx +and bx, [bp] +and cx, [4384] +and byte [bp - 39], 239 +and word [bx + si - 4332], 10328 + +test bx, cx +test dh, [bp + 390] +test [bp + 2], si +test bl, 20 +test byte [bx], 34 +test ax, 23909 + +or al, ah +or ch, cl +or bp, si +or di, sp +or al, 93 +or ax, 20392 +or [bp + si + 10], ch +or [bx + di + 1000], dx +or bx, [bp] +or cx, [4384] +or byte [bp - 39], 239 +or word [bx + si - 4332], 10328 + +xor al, ah +xor ch, cl +xor bp, si +xor di, sp +xor al, 93 +xor ax, 20392 +xor [bp + si + 10], ch +xor [bx + di + 1000], dx +xor bx, [bp] +xor cx, [4384] +xor byte [bp - 39], 239 +xor word [bx + si - 4332], 10328 + +rep movsb +rep cmpsb +rep scasb +rep lodsb +rep movsw +rep cmpsw +rep scasw +rep lodsw + +; NOTE(casey): Special thanks (as always!) to Mārtiņš Možeiko for figuring out why NASM +; wouldn't compile "rep stds" instructions. It was because it was a misprint in the 8086 +; manual! It was really just "rep stos", which of course is still in x64, and NASM +; assembles it just fine. +rep stosb +rep stosw + +call [39201] +call [bp - 100] +call sp +call ax + +jmp ax +jmp di +jmp [12] +jmp [4395] + +ret -7 +ret 500 +ret + +label: +je label +jl label +jle label +jb label +jbe label +jp label +jo label +js label +jne label +jnl label +jg label +jnb label +ja label +jnp label +jno label +jns label +loop label +loopz label +loopnz label +jcxz label + +int 13 +int3 + +into +iret + +clc +cmc +stc +cld +std +cli +sti +hlt +wait + +lock not byte [bp + 9905] +lock xchg [100], al + +mov al, cs:[bx + si] +mov bx, ds:[bp + di] +mov dx, es:[bp] +mov ah, ss:[bx + si + 4] + +and ss:[bp + si + 10], ch +or ds:[bx + di + 1000], dx +xor bx, es:[bp] +cmp cx, es:[4384] +test byte cs:[bp - 39], 239 +sbb word cs:[bx + si - 4332], 10328 + +lock not byte CS:[bp + 9905] + +; +; NOTE(casey): These were not in the original homework set, but have been added since, as people +; found instruction encodings that were not previously covered. Thank you to everyone who +; submitted test cases! +; + +call 123:456 +jmp 789:34 + +mov [bx+si+59],es + +jmp 2620 +call 11804 + +retf 17556 +ret 17560 +retf +ret + +call [bp+si-0x3a] +call far [bp+si-0x3a] +jmp [di] +jmp far [di] + +jmp 21862:30600 + +; +; TODO(casey): I would like to uncomment this, but as far as I can tell, NASM doesn't recognize the ESC instruction :( +; so even if I just force the assembler to output the bits here, our disasm will fail to assemble because it will (correctly!) +; print the esc instruction and NASM will error because it doesn't know what that is. +; +; esc 938,ax +; + +; +; TODO(casey): According to NASM, "rep movsb" is "not lockable". However the 8086 manual seems to think it is, and +; even describes what happens when you you lock a rep: the lock is held for the duration of the rep operation. So... +; yeah. Not sure why this doesn't work in NASM: +; +; lock rep movsb +;
\ No newline at end of file diff --git a/archived/less_old_sim8086/misc/test_listing b/archived/less_old_sim8086/misc/test_listing new file mode 100755 index 0000000..472c0d2 --- /dev/null +++ b/archived/less_old_sim8086/misc/test_listing @@ -0,0 +1,50 @@ +#!/bin/sh + +NoColor= +if [ "$1" = "--no-color" ] +then + NoColor=1 + shift +fi + +ThisDir="$(dirname "$(readlink -f "$0")")" + +for File in $@ +do + File="$(readlink -f "$File")" + [ "$File" ] || exit 1 + + ( + cd "$ThisDir" + + SourceFile="$File" + SourceFile="${File%.asm}.asm" + [ -r "$SourceFile" ] || exit 1 + + ErrorFile="../build/error" + TestSource="../build/first" + OutFile="../build/$(basename "${SourceFile%.asm}")" + nasm -o "$OutFile" "$SourceFile" 2> "$ErrorFile" + ../build/sim8086 "$OutFile" > "$TestSource".asm 2>> "$ErrorFile" + nasm -o "${TestSource}" "$TestSource".asm 2>> "$ErrorFile" + + RelPath="$(realpath --relative-to=. "$SourceFile")" + if diff -q "$TestSource" "$OutFile" > /dev/null 2>> "$ErrorFile" + then + if [ "$NoColor" ] + then + printf ' PASSED: '\''%s'\''\n' "$RelPath" + else + printf '\033[32m PASSED: '\''%s'\''\033[0m\n' "$RelPath" + fi + else + if [ "$NoColor" ] + then + printf ' FAILED: '\''%s'\''\n' "$RelPath" + else + printf '\033[31m FAILED: '\''%s'\''\033[0m\n' "$RelPath" + fi + cat "$ErrorFile" | sed 's/.*/ &/' + fi + ) +done diff --git a/archived/less_old_sim8086/misc/test_source_line_by_line b/archived/less_old_sim8086/misc/test_source_line_by_line new file mode 100755 index 0000000..dbf657c --- /dev/null +++ b/archived/less_old_sim8086/misc/test_source_line_by_line @@ -0,0 +1,80 @@ +#!/bin/sh + +PauseOnFail= +if [ "$1" = "--pause-on-fail" ] +then + PauseOnFail=1 + shift +fi + +NoColor= +if [ "$1" = "--no-color" ] +then + NoColor=1 + shift +fi + +if [ -z "$1" ] +then + >&2 printf 'Usage: single_tester.sh <source>\n' + return 1 +fi + +ThisDir="$(dirname "$(readlink -f "$0")")" + +for File in $@ +do + SourceFile="$(readlink -f "$File")" + [ "$SourceFile" ] || exit 1 + SourceFile="${SourceFile%.asm}.asm" + [ -r "$SourceFile" ] || exit 1 + >&2 printf 'File: %s\n' "$File" + + ( + cd "$ThisDir" + + Stripped="${SourceFile%.asm}" + OutName="../build/$(basename "$Stripped")" + + grep -v '^$\|^;\|^bits' "$SourceFile" | + while read -r line + do + first="${OutName}_first" + second="${OutName}_second" + error="${OutName}_error" + + printf 'bits 16\n%s\n' "$line" > "$first".asm + + nasm -o "$first" "$first".asm 2> "$error" + ../build/sim8086 "$first" > "$second".asm 2> "$error" + nasm -o "$second" "$second".asm 2>> "$error" + + if diff -q "$first" "$second" > /dev/null 2>> "$error" + then + if [ "$NoColor" ] + then + printf ' PASSED: '\''%s'\''\n' "$line" + else + printf '\033[32m PASSED: '\''%s'\''\033[0m\n' "$line" + fi + else + if [ "$NoColor" ] + then + printf ' FAILED: '\''%s'\''\n' "$SourceFile" + else + printf '\033[31m FAILED: '\''%s'\''\033[0m\n' "$SourceFile" + fi + cat "$error" | sed 's/.*/ &/' + printf ' listing> %s\n' "$line" + printf ' sim8086> %s\n' "$(grep -v '^$\|^;\|bits' "$second".asm)" + + if [ "$PauseOnFail" ] + then + exit + fi + + fi + rm -f "$error" "$second" "$second".asm "$first" "$first".asm + done + ) +done diff --git a/archived/less_old_sim8086/project.4coder b/archived/less_old_sim8086/project.4coder new file mode 100644 index 0000000..0b2c2fb --- /dev/null +++ b/archived/less_old_sim8086/project.4coder @@ -0,0 +1,43 @@ +version(2); +project_name = "sim8086"; +patterns = { +"*.c", +"*.cpp", +"*.h", +"*.m", +"*.bat", +"*.sh", +"*.4coder", +}; +blacklist_patterns = { +"_old.*", +}; +load_paths_base = { + { ".", .relative = true, .recursive = true, }, +}; +load_paths = { + .win = load_paths_base, + .linux = load_paths_base, + .mac = load_paths_base, +}; + +commands = { + .build = { .out = "*compilation*", .footer_panel = true, .save_dirty_files = true, + .linux = "./code/build.sh", + .mac = "./code/build.sh", }, + .build_release = { .out = "*compilation*", .footer_panel = false, .save_dirty_files = false, + .win = ".\code\build.bat release", + .linux = "./code/build.sh --release", + .mac = "./code/build.sh --release", }, + .test_current = { .out = "*tests*", .footer_panel = false, .save_dirty_files = false, + .linux = "./misc/test_source_line_by_line --pause-on-fail --no-color ./listings/listing_0042_completionist_decode.asm" }, + .test_all = { .out = "*tests*", .footer_panel = false, .save_dirty_files = false, + .linux = "./misc/test_listing --no-color ./listings/listing_0037_single_register_mov.asm ./listings/listing_0038_many_register_mov.asm ./listings/listing_0039_more_movs.asm ./listings/listing_0040_challenge_movs.asm ./listings/listing_0041_add_sub_cmp_jnz.asm ./listings/listing_0042*" }, + .debug = { .linux = "gf2 ./build/sim8086", }, +}; +fkey_command = { +.F1 = "build", +.F2 = "build_release", +.F3 = "test_current", +.F4 = "test_all", +}; diff --git a/archived/old_sim8086/8086disassembler b/archived/old_sim8086/8086disassembler Binary files differnew file mode 100755 index 0000000..17185c7 --- /dev/null +++ b/archived/old_sim8086/8086disassembler diff --git a/archived/old_sim8086/8086disassembler.c b/archived/old_sim8086/8086disassembler.c new file mode 100644 index 0000000..91e1a59 --- /dev/null +++ b/archived/old_sim8086/8086disassembler.c @@ -0,0 +1,157 @@ +/* + 8086 asm disassembler + For now this assumes that instructions are two bytes long. +*/ + +#include <stdio.h> +#include <string.h> + +#define INSTRUCTION_MASK 0b11111100 +#define D_MASK (1 << 1) +#define W_MASK 1 +#define REG_MASK 0b00111000 +#define MOD_MASK 0b00000111 +#define INSTRUCTION_MOV 0b10001000 + +void print_reg(unsigned char reg, int wide_flag); + +int main(int argc, char **argv) +{ + FILE *file; + char *filename = argv[1]; + + if (argc < 2) + { + printf("No argument provided.\n"); + return 1; + } + + file = fopen(filename ,"rb"); + + if (file == NULL) + { + printf("File, %s not found.\n", filename); + return 1; + } + + char file_byte; + unsigned char reg, mod, c; + + /* flags */ + int is_reg_destination = 0; + int is_wide_operation = 0; + char instruction[4]; + + /* print useful header */ + printf("; %s disassembly:\n", filename); + printf("bits 16\n"); + + + while ((file_byte = fgetc(file)) != EOF) + { + /* get the six first instruction bits */ + c = file_byte & INSTRUCTION_MASK; + if (c == INSTRUCTION_MOV) + strcpy(instruction, "mov\0"); + else + strcpy(instruction, "ERR\0"); + + /* check D bit */ + /* NOTE: This shift could be defined to show that this is the D-mask */ + c = file_byte & D_MASK; + is_reg_destination = c ? 1 : 0; + + /* check W bit */ + c = file_byte & 1; + is_wide_operation = c ? 1 : 0; + + file_byte = fgetc(file); + + /* get REG */ + c = file_byte & REG_MASK; + reg = c >> 3; + + /* get R/M */ + c = file_byte & MOD_MASK; + mod = c; + + /* print the decoded instructions */ + printf("%s ", instruction); + + /* print operands */ + if (is_reg_destination) + print_reg(reg, is_wide_operation); + else + print_reg(mod, is_wide_operation); + printf(", "); + if (is_reg_destination) + print_reg(mod, is_wide_operation); + else + print_reg(reg, is_wide_operation); + + printf("\n"); + } + + fclose(file); + + return 0; +} + +void print_reg(unsigned char reg, int wide_flag) +{ + switch (reg) + { + case 0b000: + if (wide_flag) + printf("ax"); + else + printf("al"); + break; + case 0b001: + if (wide_flag) + printf("cx"); + else + printf("cl"); + break; + case 0b010: + if (wide_flag) + printf("dx"); + else + printf("dl"); + break; + case 0b011: + if (wide_flag) + printf("bx"); + else + printf("bl"); + break; + case 0b100: + if (wide_flag) + printf("sp"); + else + printf("ah"); + break; + case 0b101: + if (wide_flag) + printf("bp"); + else + printf("ch"); + break; + case 0b110: + if (wide_flag) + printf("si"); + else + printf("dh"); + break; + case 0b111: + if (wide_flag) + printf("di"); + else + printf("dh"); + break; + default: + /* unknown register */ + printf("ER"); + break; + } +}
\ No newline at end of file diff --git a/archived/old_sim8086/build.sh b/archived/old_sim8086/build.sh new file mode 100755 index 0000000..53d5835 --- /dev/null +++ b/archived/old_sim8086/build.sh @@ -0,0 +1,6 @@ +#!/bin/sh +set -ex + +gcc -ggdb -Wall 8086disassembler.c -o 8086disassembler +nasm -o listing_0037_single_register_mov listing_0037_single_register_mov.asm +nasm -o listing_0038_many_register_mov listing_0038_many_register_mov.asm diff --git a/archived/old_sim8086/listing_0037_single_register_mov b/archived/old_sim8086/listing_0037_single_register_mov new file mode 100644 index 0000000..93b300d --- /dev/null +++ b/archived/old_sim8086/listing_0037_single_register_mov @@ -0,0 +1 @@ +
\ No newline at end of file diff --git a/archived/old_sim8086/listing_0037_single_register_mov.asm b/archived/old_sim8086/listing_0037_single_register_mov.asm new file mode 100644 index 0000000..1b58e05 --- /dev/null +++ b/archived/old_sim8086/listing_0037_single_register_mov.asm @@ -0,0 +1,19 @@ +; ======================================================================== +; +; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Please see https://computerenhance.com for further information +; +; ======================================================================== + +; ======================================================================== +; LISTING 37 +; ======================================================================== + +bits 16 + +mov cx, bx diff --git a/archived/old_sim8086/listing_0038_many_register_mov b/archived/old_sim8086/listing_0038_many_register_mov new file mode 100644 index 0000000..5605230 --- /dev/null +++ b/archived/old_sim8086/listing_0038_many_register_mov @@ -0,0 +1 @@ +وډމȈÉ
\ No newline at end of file diff --git a/archived/old_sim8086/listing_0038_many_register_mov.asm b/archived/old_sim8086/listing_0038_many_register_mov.asm new file mode 100644 index 0000000..c157b91 --- /dev/null +++ b/archived/old_sim8086/listing_0038_many_register_mov.asm @@ -0,0 +1,29 @@ +; ======================================================================== +; +; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Please see https://computerenhance.com for further information +; +; ======================================================================== + +; ======================================================================== +; LISTING 38 +; ======================================================================== + +bits 16 + +mov cx, bx +mov ch, ah +mov dx, bx +mov si, bx +mov bx, di +mov al, cl +mov ch, ch +mov bx, ax +mov bx, si +mov sp, di +mov bp, ax diff --git a/archived/sim8086_old.c b/archived/sim8086_old.c new file mode 100644 index 0000000..fbc20dc --- /dev/null +++ b/archived/sim8086_old.c @@ -0,0 +1,596 @@ +#include <stdio.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <stdint.h> +#include <signal.h> + +#ifdef DEBUG +#define Assert(Expr) \ +if (Expr) \ +{ \ +raise(SIGTRAP); \ +} +#else +#define Assert(Expr) ; +#endif + +void +print_binary(uint8_t Byte) +{ + printf(" "); + int Count = 8; + while (Count--) + { + printf("%d", Byte >> 7); + Byte <<= 1; + } + printf("\n"); +} + +int main(int ArgC, char *Args[]) +{ + if (ArgC < 2) + { + fprintf(stderr, "No argument provided.\n"); + } + else + { + char *Filename = Args[1]; + int Ret = 0; + int FD = 0; + int Filesize = 0; + struct stat StatBuffer = {0}; + uint8_t *Buffer = 0; + + FD = open(Filename, O_RDONLY); + if (FD == -1) + { + fprintf(stderr, "Failed to open file '%s'.\n", Filename); + } + else + { + Ret = fstat(FD, &StatBuffer); + if (Ret == -1) + { + fprintf(stderr, "Failed to get file stats.\n"); + return 1; + } + else + { + Filesize = StatBuffer.st_size; + if (!Filesize) + { + fprintf(stderr, "Cannot disassemble empty file.\n"); + return 1; + } + + Buffer = mmap(0, Filesize, PROT_READ, MAP_FILE | MAP_SHARED, FD, 0); + if (Buffer == (void*)-1) + { + fprintf(stderr, "Failed to allocate buffer for file.\n"); + return 1; + } + else + { + printf("; %s:\n", Filename); + printf("bits 16\n"); + +#define MOV_RM_REG(Byte) ((Byte & 0b11111100) == 0b10001000) +#define MOV_IM_REG(Byte) ((Byte & 0b11110000) == 0b10110000) +#define MOV_IM_RM(Byte) ((Byte & 0b11111110) == 0b11000110) +#define MOV_MEM_ACC(Byte) ((Byte & 0b11111110) == 0b10100000) +#define MOV_ACC_MEM(Byte) ((Byte & 0b11111110) == 0b10100010) +#define ARITH_RM_RM(Byte) ((Byte & 0b11000100) == 0b00000000) +#define ARITH_IM_RM(Byte) ((Byte & 0b11111100) == 0b10000000) +#define ARITH_IM_ACC(Byte) ((Byte & 0b11000100) == 0b00000100) + // NOTE(luca): Arithmetic operations add, sub and cmp have an octal field to + // differentiate between them. + // 00 000 0 dw -> ADD + // 00 101 0 dw -> SUB + // 00 111 0 dw -> CMP + +#define JE 0b01110100 +#define JL 0b01111100 +#define JLE 0b01111110 +#define JB 0b01110010 +#define JBE 0b01110110 +#define JP 0b01111010 +#define JO 0b01110000 +#define JS 0b01111000 +#define JNL 0b01111101 +#define JG 0b01111111 +#define JNB 0b01110011 +#define JA 0b01110111 +#define JNP 0b01111011 +#define JNO 0b01110001 +#define JNS 0b01111001 +#define JNZ 0b01110101 +#define LOOP 0b11100010 +#define LOOPZ 0b11100001 +#define LOOPNZ 0b11100000 +#define JCXZ 0b11100011 + char *JumpOperations[] = { + [JE] = "je", + [JNZ] = "jnz", + [JL] = "jl", + [JLE] = "jle", + [JB] = "jb", + [JBE] = "jbe", + [JP] = "jp", + [JO] = "jo", + [JS] = "js", + [JNL] = "jnl", + [JG] = "jg", + [JNB] = "jnb", + [JA] = "ja", + [JNP] = "jnp", + [JNO] = "jno", + [JNS] = "jns", + [LOOP] = "loop", + [LOOPZ] = "loopz", + [LOOPNZ] = "loopnz", + [JCXZ] = "jcxz", + }; + + // NOTE(luca): See 4-20 in the manual + // The following arrays are in order such that hey can be indexed with the r/m or reg field. + char *Registers[][2] = { + [0b000] = { "al", "ax" }, + [0b001] = { "cl", "cx" }, + [0b010] = { "dl", "dx" }, + [0b011] = { "bl", "bx" }, + [0b100] = { "ah", "sp" }, + [0b101] = { "ch", "bp" }, + [0b110] = { "dh", "si" }, + [0b111] = { "bh", "di" } + }; +#define ACCUMULATOR 0b000 + char *Displacements[] = { + [0b000] = "bx + si", + [0b001] = "bx + di", + [0b010] = "bp + si", + [0b011] = "bp + di", + [0b100] = "si", + [0b101] = "di", + [0b110] = "bp", + [0b111] = "bx" + }; + + for (int At = 0; At < Filesize; At++) + { + uint8_t Byte1 = Buffer[At]; + + if (MOV_RM_REG(Byte1)) + { + // register/memory to/from register + // no displacement except when r/m is110 + +#define MOD(Byte1, Mask) ((Byte1 & 0b11000000) == (Mask << 6)) + + int DSet = Byte1 & (1 << 1); + // NOTE(Luca): Truncated to a boolean so that it can index the Registers array. + int WSet = Byte1 & (1 << 0) ? 1 : 0; + uint8_t Byte2 = Buffer[++At]; + int RM = Byte2 & 0b111; + int REG = (Byte2 >> 3) & 0b111; + + if (MOD(Byte2, 0b00)) + { + // memory mode no displacement + + if (RM == 0b110) + { + // exception, 16 bit displacement + uint8_t DispL = Buffer[++At]; + uint8_t DispH = Buffer[++At]; + int Displacement = (DispH << 8) + DispL; + + // NOTE(Luca): Since a 16 bit displacement follows, shouldn't W bit always be set? + printf("mov %s, [%d]\n", Registers[REG][WSet], Displacement); + } + else + { + if (DSet) + { + printf("mov %s, [%s]\n", Registers[REG][WSet], Displacements[RM]); + } + else + { + printf("mov [%s], %s\n", Displacements[RM], Registers[REG][WSet]); + } + } + + } + else if (MOD(Byte2, 0b01)) + { + // 8 bit displacement + uint8_t DispL = Buffer[++At]; + int8_t Displacement = DispL; + char Sign = (Displacement < 0) ? '-' : '+'; + Displacement = abs(Displacement); + + if (DSet) + { + if (Displacement) + { + printf("mov %s, [%s %c %d]\n", Registers[REG][WSet], Displacements[RM], Sign, Displacement); + } + else + { + printf("mov %s, [%s]\n", Registers[REG][WSet], Displacements[RM]); + } + } + else + { + if (Displacement) + { + printf("mov [%s %c %d], %s\n", Displacements[RM], Sign, Displacement, Registers[REG][WSet]); + } + else + { + printf("mov [%s], %s\n", Displacements[RM], Registers[REG][WSet]); + } + } + } + else if (MOD(Byte2, 0b10)) + { + // 16 bit displacement + uint8_t DispL = Buffer[++At]; + uint8_t DispH = Buffer[++At]; + int16_t Displacement = DispL + (DispH << 8); + char Sign = (Displacement < 0) ? '-' : '+'; + Displacement = abs(Displacement); + + if (DSet) + { + if (Displacement) + { + printf("mov %s, [%s %c %d]\n", Registers[REG][WSet], Displacements[RM], Sign, Displacement); + } + else + { + printf("mov %s, [%s]\n", Registers[REG][WSet], Displacements[RM]); + } + } + else + { + if (Displacement) + { + printf("mov [%s %c %d], %s\n", Displacements[RM], Sign, Displacement, Registers[REG][WSet]); + } + else + { + printf("mov [%s], %s\n", Displacements[RM], Registers[REG][WSet]); + } + } + } + else if (MOD(Byte2, 0b11)) + { + // register mode + printf("mov %s, %s\n", Registers[RM][WSet], Registers[REG][WSet]); + } + + } + else if (MOV_IM_REG(Byte1)) + { + // Immediate to register + int WSet = Byte1 & (1 << 3) ? 1 : 0; + int REG = Byte1 & 0b111; + uint8_t DataL = Buffer[++At]; + + if (WSet) + { + uint8_t DataH = Buffer[++At]; + uint16_t Immediate = (DataH << 8) + DataL; + printf("mov %s, %d\n", Registers[REG][WSet], Immediate); + } + else + { + uint16_t Immediate = DataL; + printf("mov %s, %d\n", Registers[REG][WSet], Immediate); + } + + } + else if (MOV_IM_RM(Byte1)) + { + // Immediate to register/memory + int WSet = Byte1 & 1; + uint8_t Byte2 = Buffer[++At]; + int MOD = (Byte2 & 0b10000000) >> 6; + int RM = Byte2 & 0b111; + Assert(0); + + if (MOD == 0b00) + { + // memory mode no displacement + uint8_t DataL = Buffer[++At]; + char *OperationSize = 0; + uint8_t Immediate = DataL; + + if (WSet) + { + uint8_t DataH = Buffer[++At]; + OperationSize = "word" ; + Immediate += DataH << 8; + } + else + { + OperationSize = "byte"; + } + + printf("mov [%s], %s %d\n", Displacements[RM], OperationSize, Immediate); + } + else if (MOD == 0b01) + { + // memory mode 8 bit displacement + uint8_t DispL = Buffer[++At]; + int8_t Displacement = DispL; + char Sign = (Displacement < 0) ? '-' : '+'; + + if (WSet) + { + uint8_t DataL = Buffer[++At]; + uint8_t DataH = Buffer[++At]; + uint16_t Immediate = DataL + (DataH << 8); + printf("mov [%s %c %d], %d", Displacements[RM], Sign, Displacement, Immediate); + } + else + { + uint8_t Immediate = Buffer[++At]; + printf("mov [%s %c %d], %d", Displacements[RM], Sign, Displacement, Immediate); + } + } + else if (MOD == 0b10) + { + // memory mode 16 bit displacement + + uint8_t Byte3 = Buffer[++At]; + uint8_t Byte4 = Buffer[++At]; + + int16_t Displacement = Byte3 + (Byte4 << 8); + char Sign = (Displacement < 0) ? '-' : '+'; + + if (WSet) + { + uint8_t Byte5 = Buffer[++At]; + uint8_t Byte6 = Buffer[++At]; + uint16_t Immediate = Byte5 + (Byte6 << 8); + printf("mov [%s %c %d], word %d\n", Displacements[RM], Sign, abs(Displacement), Immediate); + } + else + { + uint8_t Immediate = Buffer[++At]; + printf("mov [%s %c %d], byte %d\n", Displacements[RM], Sign, abs(Displacement), Immediate); + } + } + else if (MOD == 0b11) + { + printf("; register mode no displacement\n"); + } + + } + else if (MOV_MEM_ACC(Byte1)) + { + // memory to accumulator + int WSet = Byte1 & (1 << 0); + uint8_t DataL = Buffer[++At]; + int Immediate = DataL; + + if (WSet) + { + uint8_t DataH = Buffer[++At]; + Immediate += DataH << 8; + } + printf("mov ax, [%d]\n", Immediate); + + } + else if (MOV_ACC_MEM(Byte1)) + { + // accumulator to memory + int WSet = Byte1 & (1 << 0); + uint8_t DataL = Buffer[++At]; + int Immediate = DataL; + + if (WSet) + { + uint8_t DataH = Buffer[++At]; + Immediate += DataH << 8; + } + printf("mov [%d], ax\n", Immediate); + } + else if (ARITH_RM_RM(Byte1)) + { + int WSet = Byte1 & (1 << 0); + int DSet = Byte1 & (1 << 1); + char *Operation = 0; + int Displacement = 0; + int OP = (Byte1 & 0b00111000) >> 3; + uint8_t Byte2 = Buffer[++At]; + int MOD = (Byte2 & 0b11000000) >> 6; + int REG = (Byte2 & 0b00111000) >> 3; + int RM = (Byte2 & 0b00000111); + + switch (OP) + { + case 0b000: Operation = "add"; break; + case 0b101: Operation = "sub"; break; + case 0b111: Operation = "cmp"; break; + } + + if (MOD == 0b01) + { + uint8_t DispL = Buffer[++At]; + Displacement = DispL; + } + else if (MOD == 0b10) + { + uint8_t DispL = Buffer[++At]; + uint8_t DispH = Buffer[++At]; + Displacement = DispL + (DispH << 8); + } + + if (MOD == 0b11) + { + char *Src = 0; + char *Dest = 0; + if (DSet) + { + Src = Registers[REG][WSet]; + Dest = Registers[RM][WSet]; + } + else + { + Dest = Registers[REG][WSet]; + Src = Registers[RM][WSet]; + } + printf("%s %s, %s\n", Operation, Src, Dest); + } + else + { + char *Src = 0; + char *Dest = 0; + if (DSet) + { + printf("%s %s, [%s + %d]\n", Operation, Registers[REG][WSet], Displacements[RM], Displacement); + } + else + { + printf("%s [%s + %d], %s\n", Operation, Displacements[RM], Displacement, Registers[REG][WSet]); + } + } + + } + else if (ARITH_IM_RM(Byte1)) + { + int WSet = Byte1 & (1 << 0); + int SSet = Byte1 & (1 << 1); + int Displacement = 0; + int Immediate = 0; + char *Operation = 0; + char *OperationSize = WSet ? "word" : "byte"; + uint8_t Byte2 = Buffer[++At]; + int MOD = (Byte2 & 0b11000000) >> 6; + int RM = (Byte2 & 0b00000111); + int OP = (Byte2 & 0b00111000) >> 3; + + // TODO: sign extension + + switch (OP) + { + case 0b000: Operation = "add"; break; + case 0b101: Operation = "sub"; break; + case 0b111: Operation = "cmp"; break; + } + + if (MOD == 0b01) + { + uint8_t DispL = Buffer[++At]; + Displacement = DispL; + } + else if (MOD == 0b10) + { + uint8_t DispL = Buffer[++At]; + uint8_t DispH = Buffer[++At]; + Displacement = DispL + (DispH << 8); + } + + uint8_t DataL = Buffer[++At]; + Immediate = DataL; + if (WSet) + { + uint8_t DataH = Buffer[++At]; + Immediate += DataH << 8; + } + + if (MOD == 0b00 && RM == 0b110) + { + int Memory = Immediate; + Immediate = Buffer[++At]; + printf("%s %s [%d], %d\n", Operation, OperationSize, Memory, Immediate); + } + else if (MOD == 0b11) + { + printf("%s %s, %d\n", Operation, Registers[RM][WSet], Immediate); + } + else + { + // TODO(luca): Support signed displacements like in MOV + printf("%s %s [%s + %d], %d\n", + Operation, + OperationSize, + Displacements[RM], + Displacement, + Immediate); + } + } + else if (ARITH_IM_ACC(Byte1)) + { + int WSet = Byte1 & (1 << 0); + uint8_t DataL = Buffer[++At]; + int Immediate = DataL; + char *Operation = 0; + + int OP = (Byte1 & 0b00111000) >> 3; + switch (OP) + { + case 0b000: Operation = "add"; break; + case 0b101: Operation = "sub"; break; + case 0b111: Operation = "cmp"; break; + } + + if (WSet) + { + uint8_t DataH = Buffer[++At]; + Immediate += DataH << 8; + } + + printf("%s %s, %d\n", Operation, Registers[ACCUMULATOR][WSet], Immediate); + } + else + { + // Handles all jump instructions + // TODO: labels + switch (Byte1) + { + case JE: + case JL: + case JLE: + case JB: + case JBE: + case JP: + case JO: + case JS: + case JNL: + case JG: + case JNB: + case JA: + case JNP: + case JNO: + case JNS: + case JNZ: + case LOOP: + case LOOPZ: + case LOOPNZ: + case JCXZ: + { + uint8_t INC = Buffer[++At]; + printf("%s %d\n", JumpOperations[Byte1], INC); + } break; + default: + { + fprintf(stderr, "Unrecognized Operation from byte:\n"); + print_binary(Byte1); + return 1; + } break; + } + } + } + } + } + } + } + + return 0; +} diff --git a/src/build/shared_library_test b/src/build/shared_library_test Binary files differnew file mode 100755 index 0000000..9a69cf1 --- /dev/null +++ b/src/build/shared_library_test diff --git a/src/build/sim86 b/src/build/sim86 Binary files differnew file mode 100755 index 0000000..c3dc369 --- /dev/null +++ b/src/build/sim86 diff --git a/src/code/build.sh b/src/code/build.sh new file mode 100755 index 0000000..eb49392 --- /dev/null +++ b/src/code/build.sh @@ -0,0 +1,40 @@ +#!/bin/sh + +ThisDir="$(dirname "$(readlink -f "$0")")" +cd "$ThisDir" + +Compiler="clang" + +CompilerFlags=" +-g +-nostdinc++ +-DSIM86_INTERNAL +" + +WarningFlags=" +-Wall +-Wextra +-Wno-unused-label +-Wno-unused-variable +-Wno-unused-function +-Wno-unused-but-set-variable +-Wno-missing-field-initializers +-Wno-write-strings +" + +Libs="./reference_decoder/sim86_lib.cpp" + +if false +then + Source="./shared_library_test.cpp" + printf '%s\n' "$Source" + $Compiler $CompilerFlags $WarningFlags \ + -o ../build/shared_library_test \ + $Libs $Source +fi + +Source="sim86.cpp" +printf '%s\n' "$Source" +$Compiler $CompilerFlags $WarningFlags \ + -o ../build/sim86 \ + $Libs $Source diff --git a/src/code/reference_decoder/sim86.h b/src/code/reference_decoder/sim86.h new file mode 100644 index 0000000..b065a0e --- /dev/null +++ b/src/code/reference_decoder/sim86.h @@ -0,0 +1,27 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +typedef char unsigned u8; +typedef short unsigned u16; +typedef int unsigned u32; +typedef long long unsigned u64; + +typedef char s8; +typedef short s16; +typedef int s32; +typedef long long s64; + +typedef s32 b32; + +#define ArrayCount(Array) (sizeof(Array) / sizeof((Array)[0])) + +static u32 const SIM86_VERSION = 4; diff --git a/src/code/reference_decoder/sim86_decode.cpp b/src/code/reference_decoder/sim86_decode.cpp new file mode 100644 index 0000000..dc084d5 --- /dev/null +++ b/src/code/reference_decoder/sim86_decode.cpp @@ -0,0 +1,303 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +struct decode_context +{ + u32 DefaultSegment; + u32 AdditionalFlags; +}; + +static instruction_operand GetRegOperand(u32 IntelRegIndex, b32 Wide) +{ + // NOTE(casey): This maps Intel's REG and RM field encodings for registers to our encoding for registers. + register_access RegTable[][2] = + { + {{Register_a, 0, 1}, {Register_a, 0, 2}}, + {{Register_c, 0, 1}, {Register_c, 0, 2}}, + {{Register_d, 0, 1}, {Register_d, 0, 2}}, + {{Register_b, 0, 1}, {Register_b, 0, 2}}, + {{Register_a, 1, 1}, {Register_sp, 0, 2}}, + {{Register_c, 1, 1}, {Register_bp, 0, 2}}, + {{Register_d, 1, 1}, {Register_si, 0, 2}}, + {{Register_b, 1, 1}, {Register_di, 0, 2}}, + }; + + instruction_operand Result = {}; + Result.Type = Operand_Register; + Result.Register = RegTable[IntelRegIndex & 0x7][(Wide != 0)]; + + return Result; +} + +// NOTE(casey): ParseDataValue is not a real function, it's basically just a macro that is used in +// TryParse. It should never be called otherwise, but that is not something you can do in C++. +// In other languages it would be a "local function". +static u32 ParseDataValue(segmented_access *Access, b32 Exists, b32 Wide, b32 SignExtended) +{ + u32 Result = {}; + + if(Exists) + { + if(Wide) + { + u8 D0 = *AccessMemory(*Access, 0); + u8 D1 = *AccessMemory(*Access, 1); + Result = (D1 << 8) | D0; + Access->SegmentOffset += 2; + } + else + { + Result = *AccessMemory(*Access); + if(SignExtended) + { + Result = (s32)*(s8 *)&Result; + } + Access->SegmentOffset += 1; + } + } + + return Result; +} + +static instruction TryDecode(decode_context *Context, instruction_encoding *Inst, segmented_access At) +{ + instruction Dest = {}; + b32 Has[Bits_Count] = {}; + u32 Bits[Bits_Count] = {}; + b32 Valid = true; + + u64 StartingAddress = GetAbsoluteAddressOf(At); + + u8 BitsPendingCount = 0; + u8 BitsPending = 0; + for(u32 BitsIndex = 0; Valid && (BitsIndex < ArrayCount(Inst->Bits)); ++BitsIndex) + { + instruction_bits TestBits = Inst->Bits[BitsIndex]; + if(TestBits.Usage == Bits_End) + { + // NOTE(casey): That's the end of the instruction format. + break; + } + + u32 ReadBits = TestBits.Value; + if(TestBits.BitCount != 0) + { + if(BitsPendingCount == 0) + { + BitsPendingCount = 8; + BitsPending = *AccessMemory(At); + ++At.SegmentOffset; + } + + // NOTE(casey): If this assert fires, it means we have an error in our table, + // since there are no 8086 instructions that have bit values straddling a + // byte boundary. + assert(TestBits.BitCount <= BitsPendingCount); + + BitsPendingCount -= TestBits.BitCount; + ReadBits = BitsPending; + ReadBits >>= BitsPendingCount; + ReadBits &= ~(0xff << TestBits.BitCount); + } + + if(TestBits.Usage == Bits_Literal) + { + // NOTE(casey): This is a "required" sequence + Valid = Valid && (ReadBits == TestBits.Value); + } + else + { + Bits[TestBits.Usage] |= (ReadBits << TestBits.Shift); + Has[TestBits.Usage] = true; + } + } + + if(Valid) + { + u32 Mod = Bits[Bits_MOD]; + u32 RM = Bits[Bits_RM]; + u32 W = Bits[Bits_W]; + b32 S = Bits[Bits_S]; + b32 D = Bits[Bits_D]; + + b32 HasDirectAddress = ((Mod == 0b00) && (RM == 0b110)); + Has[Bits_Disp] = ((Has[Bits_Disp]) || (Mod == 0b10) || (Mod == 0b01) || HasDirectAddress); + + b32 DisplacementIsW = ((Bits[Bits_DispAlwaysW]) || (Mod == 0b10) || HasDirectAddress); + b32 DataIsW = ((Bits[Bits_WMakesDataW]) && !S && W); + + Bits[Bits_Disp] |= ParseDataValue(&At, Has[Bits_Disp], DisplacementIsW, (!DisplacementIsW)); + Bits[Bits_Data] |= ParseDataValue(&At, Has[Bits_Data], DataIsW, S); + + Dest.Op = Inst->Op; + Dest.Flags = Context->AdditionalFlags; + Dest.Address = StartingAddress; + Dest.Size = GetAbsoluteAddressOf(At) - StartingAddress; + Dest.SegmentOverride = Context->DefaultSegment; + + if(W) + { + Dest.Flags |= Inst_Wide; + } + + if(Bits[Bits_Far]) + { + Dest.Flags |= Inst_Far; + } + + if(Bits[Bits_Z]) + { + Dest.Flags |= Inst_RepNE; + } + + u32 Disp = Bits[Bits_Disp]; + s16 Displacement = (s16)Disp; + + instruction_operand *RegOperand = &Dest.Operands[D ? 0 : 1]; + instruction_operand *ModOperand = &Dest.Operands[D ? 1 : 0]; + + if(Has[Bits_SR]) + { + *RegOperand = RegisterOperand(Register_es + (Bits[Bits_SR] & 0x3), 2); + } + + if(Has[Bits_REG]) + { + *RegOperand = GetRegOperand(Bits[Bits_REG], W); + } + + if(Has[Bits_MOD]) + { + if(Mod == 0b11) + { + *ModOperand = GetRegOperand(RM, W || (Bits[Bits_RMRegAlwaysW])); + } + else + { + register_mapping_8086 IntelTerm0[8] = { Register_b, Register_b, Register_bp, Register_bp, Register_si, Register_di, Register_bp, Register_b}; + register_mapping_8086 IntelTerm1[8] = {Register_si, Register_di, Register_si, Register_di}; + + u32 I = RM&0x7; + register_mapping_8086 Term0 = IntelTerm0[I]; + register_mapping_8086 Term1 = IntelTerm1[I]; + if((Mod == 0b00) && (RM == 0b110)) + { + Term0 = {}; + Term1 = {}; + } + + *ModOperand = EffectiveAddressOperand(RegisterAccess(Term0, 0, 2), RegisterAccess(Term1, 0, 2), Displacement); + } + } + + if(Has[Bits_Data] && Has[Bits_Disp] && !Has[Bits_MOD]) + { + Dest.Operands[0] = IntersegmentAddressOperand(Bits[Bits_Data], Bits[Bits_Disp]); + } + else + { + // + // NOTE(casey): Because there are some strange opcodes that do things like have an immediate as + // a _destination_ ("out", for example), I define immediates and other "additional operands" to + // go in "whatever slot was not used by the reg and mod fields". + // + + instruction_operand *LastOperand = &Dest.Operands[0]; + if(LastOperand->Type) + { + LastOperand = &Dest.Operands[1]; + } + + if(Bits[Bits_RelJMPDisp]) + { + *LastOperand = ImmediateOperand(Displacement, Immediate_RelativeJumpDisplacement); + } + else if(Has[Bits_Data]) + { + *LastOperand = ImmediateOperand(Bits[Bits_Data]); + } + else if(Has[Bits_V]) + { + if(Bits[Bits_V]) + { + *LastOperand = RegisterOperand(Register_c, 1); + } + else + { + *LastOperand = ImmediateOperand(1); + } + } + } + } + + return Dest; +} + +static instruction DecodeInstruction(instruction_table Table, segmented_access At) +{ + /* TODO(casey): Hmm. It seems like this is a very inefficient way to parse + instructions, isn't it? For every instruction, we check every entry in the + table until we find a match. Is this bad design? Or did the person who wrote + it know what they were doing, and has a plan for how it can be optimized + later? Only time will tell... :) */ + + decode_context Context = {}; + instruction Result = {}; + + u32 StartingAddress = GetAbsoluteAddressOf(At); + u32 TotalSize = 0; + while(TotalSize < Table.MaxInstructionByteCount) + { + Result = {}; + for(u32 Index = 0; Index < Table.EncodingCount; ++Index) + { + instruction_encoding Inst = Table.Encodings[Index]; + Result = TryDecode(&Context, &Inst, At); + if(Result.Op) + { + At.SegmentOffset += Result.Size; + TotalSize += Result.Size; + break; + } + } + + if(Result.Op == Op_lock) + { + Context.AdditionalFlags |= Inst_Lock; + } + else if(Result.Op == Op_rep) + { + Context.AdditionalFlags |= Inst_Rep | (Result.Flags & Inst_RepNE); + } + else if(Result.Op == Op_segment) + { + Context.AdditionalFlags |= Inst_Segment; + Context.DefaultSegment = Result.Operands[1].Register.Index; + } + else + { + break; + } + } + + if(TotalSize <= Table.MaxInstructionByteCount) + { + Result.Address = StartingAddress; + Result.Size = TotalSize; + } + else + { + Result = {}; + } + + return Result; +} diff --git a/src/code/reference_decoder/sim86_decode.h b/src/code/reference_decoder/sim86_decode.h new file mode 100644 index 0000000..534ce2d --- /dev/null +++ b/src/code/reference_decoder/sim86_decode.h @@ -0,0 +1,35 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +enum register_mapping_8086 +{ + Register_none, + + Register_a, + Register_b, + Register_c, + Register_d, + Register_sp, + Register_bp, + Register_si, + Register_di, + Register_es, + Register_cs, + Register_ss, + Register_ds, + Register_ip, + Register_flags, + + Register_count, +}; + +static instruction DecodeInstruction(instruction_table Table, segmented_access At); diff --git a/src/code/reference_decoder/sim86_instruction.cpp b/src/code/reference_decoder/sim86_instruction.cpp new file mode 100644 index 0000000..34fcf1f --- /dev/null +++ b/src/code/reference_decoder/sim86_instruction.cpp @@ -0,0 +1,84 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +static b32 OperandIsType(instruction Instruction, u32 Index, operand_type Type) +{ + b32 Result = (Instruction.Operands[Index].Type == Type); + return Result; +} + +static instruction_operand GetOperand(instruction Instruction, u32 Index) +{ + assert(Index < ArrayCount(Instruction.Operands)); + instruction_operand Result = Instruction.Operands[Index]; + return Result; +} + +static register_access RegisterAccess(u32 Index, u32 Offset, u32 Count) +{ + register_access Result = {}; + + Result.Index = Index; + Result.Offset = Offset; + Result.Count = Count; + + return Result; +} + +static instruction_operand IntersegmentAddressOperand(u32 Segment, s32 Displacement) +{ + instruction_operand Result = {}; + + Result.Type = Operand_Memory; + Result.Address.ExplicitSegment = Segment; + Result.Address.Displacement = Displacement; + Result.Address.Flags = Address_ExplicitSegment; + + return Result; +} + +static instruction_operand EffectiveAddressOperand(register_access Term0, register_access Term1, s32 Displacement) +{ + instruction_operand Result = {}; + + Result.Type = Operand_Memory; + Result.Address.Terms[0].Register = Term0; + Result.Address.Terms[0].Scale = 1; + Result.Address.Terms[1].Register = Term1; + Result.Address.Terms[1].Scale = 1; + Result.Address.Displacement = Displacement; + + return Result; +} + +static instruction_operand RegisterOperand(u32 Index, u32 Count) +{ + instruction_operand Result = {}; + + Result.Type = Operand_Register; + Result.Register.Index = Index; + Result.Register.Offset = 0; + Result.Register.Count = Count; + + return Result; +} + +static instruction_operand ImmediateOperand(u32 Value, u32 Flags = 0) +{ + instruction_operand Result = {}; + + Result.Type = Operand_Immediate; + Result.Immediate.Value = Value; + Result.Immediate.Flags = Flags; + + return Result; +} diff --git a/src/code/reference_decoder/sim86_instruction.h b/src/code/reference_decoder/sim86_instruction.h new file mode 100644 index 0000000..191635a --- /dev/null +++ b/src/code/reference_decoder/sim86_instruction.h @@ -0,0 +1,109 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +typedef u32 register_index; + +// NOTE(casey): To make it easier to compile with languages which do not +// have auto-typedef'ing (like C, etc.), all types are manually typedef'd here. +typedef struct register_access register_access; +typedef struct effective_address_term effective_address_term; +typedef struct effective_address_expression effective_address_expression; +typedef struct immediate immediate; +typedef struct instruction_operand instruction_operand; +typedef struct instruction instruction; + +typedef enum operation_type : u32 +{ + Op_None, + +#define INST(Mnemonic, ...) Op_##Mnemonic, +#define INSTALT(...) +#include "sim86_instruction_table.inl" + + Op_Count, +} operation_type; + +enum instruction_flag +{ + Inst_Lock = 0x1, + Inst_Rep = 0x2, + Inst_Segment = 0x4, + Inst_Wide = 0x8, + Inst_Far = 0x10, + Inst_RepNE = 0x20, // NOTE(casey): For user convenience, this will be set _in addition to_ Inst_Rep for REPNE/REPNZ +}; + +struct register_access +{ + register_index Index; + u32 Offset; + u32 Count; +}; + +struct effective_address_term +{ + register_access Register; + s32 Scale; +}; + +enum effective_address_flag +{ + Address_ExplicitSegment = 0x1, +}; +struct effective_address_expression +{ + effective_address_term Terms[2]; + u32 ExplicitSegment; + s32 Displacement; + u32 Flags; +}; + +enum immediate_flag +{ + Immediate_RelativeJumpDisplacement = 0x1, +}; +struct immediate +{ + s32 Value; + u32 Flags; +}; + +typedef enum operand_type : u32 +{ + Operand_None, + Operand_Register, + Operand_Memory, + Operand_Immediate, +} operand_type; +struct instruction_operand +{ + operand_type Type; + union + { + effective_address_expression Address; + register_access Register; + immediate Immediate; + }; +}; + +struct instruction +{ + u32 Address; + u32 Size; + + operation_type Op; + u32 Flags; + + instruction_operand Operands[2]; + + register_index SegmentOverride; +}; diff --git a/src/code/reference_decoder/sim86_instruction_table.cpp b/src/code/reference_decoder/sim86_instruction_table.cpp new file mode 100644 index 0000000..be3ec82 --- /dev/null +++ b/src/code/reference_decoder/sim86_instruction_table.cpp @@ -0,0 +1,27 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +static instruction_encoding InstructionTable8086[] = +{ +#include "sim86_instruction_table.inl" +}; + +static instruction_table Get8086InstructionTable() +{ + instruction_table Result = {}; + + Result.EncodingCount = ArrayCount(InstructionTable8086); + Result.Encodings = InstructionTable8086; + Result.MaxInstructionByteCount = 15; // NOTE(casey): This is the "Intel-specified" maximum length of an instruction, including prefixes + + return Result; +} diff --git a/src/code/reference_decoder/sim86_instruction_table.h b/src/code/reference_decoder/sim86_instruction_table.h new file mode 100644 index 0000000..ed8aea8 --- /dev/null +++ b/src/code/reference_decoder/sim86_instruction_table.h @@ -0,0 +1,60 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +enum instruction_bits_usage : u8 +{ + Bits_End, // NOTE(casey): The 0 value, indicating the end of the instruction encoding array + + Bits_Literal, // NOTE(casey): These are opcode bits that identify instructions + + // NOTE(casey): These bits correspond directly to the 8086 instruction manual + Bits_D, + Bits_S, + Bits_W, + Bits_V, + Bits_Z, + Bits_MOD, + Bits_REG, + Bits_RM, + Bits_SR, + Bits_Disp, + Bits_Data, + + Bits_DispAlwaysW, // NOTE(casey): Tag for instructions where the displacement is always 16 bits + Bits_WMakesDataW, // NOTE(casey): Tag for instructions where SW=01 makes the data field become 16 bits + Bits_RMRegAlwaysW, // NOTE(casey): Tag for instructions where the register encoded in RM is always 16-bit width + Bits_RelJMPDisp, // NOTE(casey): Tag for instructions that require address adjustment to go through NASM properly + Bits_Far, // NOTE(casey): Tag for instructions that require a "far" keyword in their ASM to select the right opcode + + Bits_Count, +}; + +struct instruction_bits +{ + instruction_bits_usage Usage; + u8 BitCount; + u8 Shift; + u8 Value; +}; + +struct instruction_encoding +{ + operation_type Op; + instruction_bits Bits[16]; +}; + +struct instruction_table +{ + instruction_encoding *Encodings; + u32 EncodingCount; + u32 MaxInstructionByteCount; +}; diff --git a/src/code/reference_decoder/sim86_instruction_table.inl b/src/code/reference_decoder/sim86_instruction_table.inl new file mode 100644 index 0000000..576c0f6 --- /dev/null +++ b/src/code/reference_decoder/sim86_instruction_table.inl @@ -0,0 +1,250 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +/* + NOTE(casey): This instruction table is a direct translation of table 4-12 in the Intel 8086 manual. + The macros are designed to allow direct transcription, without changing the order or manner + of specification in the table in any way. Additional "implicit" versions of the macros are provided + so that hard-coded fields can be supplied uniformly. + + The table is also designed to allow you to include it multiple times to "pull out" other things + from the table, such as opcode mnemonics as strings or enums, etc. +*/ + +#ifndef INST +#define INST(Mnemonic, ...) {Op_##Mnemonic, __VA_ARGS__}, +#endif + +#ifndef INSTALT +#define INSTALT INST +#endif + +#define B(Bits) {Bits_Literal, sizeof(#Bits)-1, 0, 0b##Bits} +#define D {Bits_D, 1} +#define S {Bits_S, 1} +#define W {Bits_W, 1} +#define V {Bits_V, 1} +#define Z {Bits_Z, 1} + +#define XXX {Bits_Data, 3, 0} +#define YYY {Bits_Data, 3, 3} +#define RM {Bits_RM, 3} +#define MOD {Bits_MOD, 2} +#define REG {Bits_REG, 3} +#define SR {Bits_SR, 2} + +#define ImpW(Value) {Bits_W, 0, 0, Value} +#define ImpREG(Value) {Bits_REG, 0, 0, Value} +#define ImpMOD(Value) {Bits_MOD, 0, 0, Value} +#define ImpRM(Value) {Bits_RM, 0, 0, Value} +#define ImpD(Value) {Bits_D, 0, 0, Value} +#define ImpS(Value) {Bits_S, 0, 0, Value} + +#define DISP {Bits_Disp, 0, 0, 0} +#define ADDR {Bits_Disp, 0, 0, 0}, {Bits_DispAlwaysW, 0, 0, 1} +#define DATA {Bits_Data, 0, 0, 0} +#define DATA_IF_W {Bits_WMakesDataW, 0, 0, 1} +#define Flags(F) {F, 0, 0, 1} + +INST(mov, {B(100010), D, W, MOD, REG, RM}) +INSTALT(mov, {B(1100011), W, MOD, B(000), RM, DATA, DATA_IF_W, ImpD(0)}) +INSTALT(mov, {B(1011), W, REG, DATA, DATA_IF_W, ImpD(1)}) +INSTALT(mov, {B(1010000), W, ADDR, ImpREG(0), ImpMOD(0), ImpRM(0b110), ImpD(1)}) +INSTALT(mov, {B(1010001), W, ADDR, ImpREG(0), ImpMOD(0), ImpRM(0b110), ImpD(0)}) +INSTALT(mov, {B(100011), D, B(0), MOD, B(0), SR, RM, ImpW(1)}) // NOTE(casey): This collapses 2 entries in the 8086 table by adding an explicit D bit + +INST(push, {B(11111111), MOD, B(110), RM, ImpW(1), ImpD(1)}) +INSTALT(push, {B(01010), REG, ImpW(1), ImpD(1)}) +INSTALT(push, {B(000), SR, B(110), ImpW(1), ImpD(1)}) + +INST(pop, {B(10001111), MOD, B(000), RM, ImpW(1), ImpD(1)}) +INSTALT(pop, {B(01011), REG, ImpW(1), ImpD(1)}) +INSTALT(pop, {B(000), SR, B(111), ImpW(1), ImpD(1)}) + +INST(xchg, {B(1000011), W, MOD, REG, RM, ImpD(1)}) +INSTALT(xchg, {B(10010), REG, ImpMOD(0b11), ImpW(1), ImpRM(0)}) + +INST(in, {B(1110010), W, DATA, ImpREG(0), ImpD(1)}) +INSTALT(in, {B(1110110), W, ImpREG(0), ImpD(1), ImpMOD(0b11), ImpRM(2), Flags(Bits_RMRegAlwaysW)}) +INST(out, {B(1110011), W, DATA, ImpREG(0), ImpD(0)}) +INSTALT(out, {B(1110111), W, ImpREG(0), ImpD(0), ImpMOD(0b11), ImpRM(2), Flags(Bits_RMRegAlwaysW)}) + +INST(xlat, {B(11010111)}) +INST(lea, {B(10001101), MOD, REG, RM, ImpD(1), ImpW(1)}) +INST(lds, {B(11000101), MOD, REG, RM, ImpD(1), ImpW(1)}) +INST(les, {B(11000100), MOD, REG, RM, ImpD(1), ImpW(1)}) +INST(lahf, {B(10011111)}) +INST(sahf, {B(10011110)}) +INST(pushf, {B(10011100)}) +INST(popf, {B(10011101)}) + +INST(add, {B(000000), D, W, MOD, REG, RM}) +INSTALT(add, {B(100000), S, W, MOD, B(000), RM, DATA, DATA_IF_W}) +INSTALT(add, {B(0000010), W, DATA, DATA_IF_W, ImpREG(0), ImpD(1)}) + +INST(adc, {B(000100), D, W, MOD, REG, RM}) +INSTALT(adc, {B(100000), S, W, MOD, B(010), RM, DATA, DATA_IF_W}) +INSTALT(adc, {B(0001010), W, DATA, DATA_IF_W, ImpREG(0), ImpD(1)}) + +INST(inc, {B(1111111), W, MOD, B(000), RM, ImpD(1)}) +INSTALT(inc, {B(01000), REG, ImpW(1), ImpD(1)}) + +INST(aaa, {B(00110111)}) +INST(daa, {B(00100111)}) + +INST(sub, {B(001010), D, W, MOD, REG, RM}) +INSTALT(sub, {B(100000), S, W, MOD, B(101), RM, DATA, DATA_IF_W}) +INSTALT(sub, {B(0010110), W, DATA, DATA_IF_W, ImpREG(0), ImpD(1)}) + +INST(sbb, {B(000110), D, W, MOD, REG, RM}) +INSTALT(sbb, {B(100000), S, W, MOD, B(011), RM, DATA, DATA_IF_W}) +INSTALT(sbb, {B(0001110), W, DATA, DATA_IF_W, ImpREG(0), ImpD(1)}) + +INST(dec, {B(1111111), W, MOD, B(001), RM, ImpD(1)}) +INSTALT(dec, {B(01001), REG, ImpW(1), ImpD(1)}) + +INST(neg, {B(1111011), W, MOD, B(011), RM}) + +INST(cmp, {B(001110), D, W, MOD, REG, RM}) +INSTALT(cmp, {B(100000), S, W, MOD, B(111), RM, DATA, DATA_IF_W}) +INSTALT(cmp, {B(0011110), W, DATA, DATA_IF_W, ImpREG(0), ImpD(1)}) // NOTE(casey): The manual table suggests this data is only 8-bit, but wouldn't it be 16 as well? + +INST(aas, {B(00111111)}) +INST(das, {B(00101111)}) +INST(mul, {B(1111011), W, MOD, B(100), RM, ImpS(0)}) +INST(imul, {B(1111011), W, MOD, B(101), RM, ImpS(1)}) +INST(aam, {B(11010100), B(00001010)}) // NOTE(casey): The manual says this has a DISP... but how could it? What for?? +INST(div, {B(1111011), W, MOD, B(110), RM, ImpS(0)}) +INST(idiv, {B(1111011), W, MOD, B(111), RM, ImpS(1)}) +INST(aad, {B(11010101), B(00001010)}) +INST(cbw, {B(10011000)}) +INST(cwd, {B(10011001)}) + +INST(not, {B(1111011), W, MOD, B(010), RM}) +INST(shl, {B(110100), V, W, MOD, B(100), RM}) +INST(shr, {B(110100), V, W, MOD, B(101), RM}) +INST(sar, {B(110100), V, W, MOD, B(111), RM}) +INST(rol, {B(110100), V, W, MOD, B(000), RM}) +INST(ror, {B(110100), V, W, MOD, B(001), RM}) +INST(rcl, {B(110100), V, W, MOD, B(010), RM}) +INST(rcr, {B(110100), V, W, MOD, B(011), RM}) + +INST(and, {B(001000), D, W, MOD, REG, RM}) +INSTALT(and, {B(1000000), W, MOD, B(100), RM, DATA, DATA_IF_W}) +INSTALT(and, {B(0010010), W, DATA, DATA_IF_W, ImpREG(0), ImpD(1)}) + +INST(test, {B(1000010), W, MOD, REG, RM}) // NOTE(casey): The manual suggests there is a D flag here, but it doesn't appear to be true (it would conflict with xchg if it did) +INSTALT(test, {B(1111011), W, MOD, B(000), RM, DATA, DATA_IF_W}) +INSTALT(test, {B(1010100), W, DATA, DATA_IF_W, ImpREG(0), ImpD(1)}) // NOTE(casey): The manual table suggests this data is only 8-bit, but it seems like it could be 16 too? + +INST(or, {B(000010), D, W, MOD, REG, RM}) +INSTALT(or, {B(1000000), W, MOD, B(001), RM, DATA, DATA_IF_W}) +INSTALT(or, {B(0000110), W, DATA, DATA_IF_W, ImpREG(0), ImpD(1)}) + +INST(xor, {B(001100), D, W, MOD, REG, RM}) +INSTALT(xor, {B(1000000), W, MOD, B(110), RM, DATA, DATA_IF_W}) // NOTE(casey): The manual has conflicting information about this encoding, but I believe this is the correct binary pattern. +INSTALT(xor, {B(0011010), W, DATA, DATA_IF_W, ImpREG(0), ImpD(1)}) + +INST(rep, {B(1111001), Z}) +INST(movs, {B(1010010), W}) +INST(cmps, {B(1010011), W}) +INST(scas, {B(1010111), W}) +INST(lods, {B(1010110), W}) +INST(stos, {B(1010101), W}) + +INST(call, {B(11101000), ADDR, Flags(Bits_RelJMPDisp)}) +INSTALT(call, {B(11111111), MOD, B(010), RM, ImpW(1)}) +INSTALT(call, {B(10011010), ADDR, DATA, DATA_IF_W, ImpW(1), Flags(Bits_Far)}) +INSTALT(call, {B(11111111), MOD, B(011), RM, ImpW(1), Flags(Bits_Far)}) + +INST(jmp, {B(11101001), ADDR, Flags(Bits_RelJMPDisp)}) +INSTALT(jmp, {B(11101011), DISP, Flags(Bits_RelJMPDisp)}) +INSTALT(jmp, {B(11111111), MOD, B(100), RM, ImpW(1)}) +INSTALT(jmp, {B(11101010), ADDR, DATA, DATA_IF_W, ImpW(1), Flags(Bits_Far)}) +INSTALT(jmp, {B(11111111), MOD, B(101), RM, ImpW(1), Flags(Bits_Far)}) + +// NOTE(casey): The actual Intel manual does not distinguish mnemonics RET and RETF, +// but NASM needs this to reassemble properly, so we do. +INST(ret, {B(11000011)}) +INSTALT(ret, {B(11000010), DATA, DATA_IF_W, ImpW(1)}) +INST(retf, {B(11001011), Flags(Bits_Far)}) +INSTALT(retf, {B(11001010), DATA, DATA_IF_W, ImpW(1), Flags(Bits_Far)}) + +INST(je, {B(01110100), DISP, Flags(Bits_RelJMPDisp)}) +INST(jl, {B(01111100), DISP, Flags(Bits_RelJMPDisp)}) +INST(jle, {B(01111110), DISP, Flags(Bits_RelJMPDisp)}) +INST(jb, {B(01110010), DISP, Flags(Bits_RelJMPDisp)}) +INST(jbe, {B(01110110), DISP, Flags(Bits_RelJMPDisp)}) +INST(jp, {B(01111010), DISP, Flags(Bits_RelJMPDisp)}) +INST(jo, {B(01110000), DISP, Flags(Bits_RelJMPDisp)}) +INST(js, {B(01111000), DISP, Flags(Bits_RelJMPDisp)}) +INST(jne, {B(01110101), DISP, Flags(Bits_RelJMPDisp)}) +INST(jnl, {B(01111101), DISP, Flags(Bits_RelJMPDisp)}) +INST(jg, {B(01111111), DISP, Flags(Bits_RelJMPDisp)}) +INST(jnb, {B(01110011), DISP, Flags(Bits_RelJMPDisp)}) +INST(ja, {B(01110111), DISP, Flags(Bits_RelJMPDisp)}) +INST(jnp, {B(01111011), DISP, Flags(Bits_RelJMPDisp)}) +INST(jno, {B(01110001), DISP, Flags(Bits_RelJMPDisp)}) +INST(jns, {B(01111001), DISP, Flags(Bits_RelJMPDisp)}) +INST(loop, {B(11100010), DISP, Flags(Bits_RelJMPDisp)}) +INST(loopz, {B(11100001), DISP, Flags(Bits_RelJMPDisp)}) +INST(loopnz, {B(11100000), DISP, Flags(Bits_RelJMPDisp)}) +INST(jcxz, {B(11100011), DISP, Flags(Bits_RelJMPDisp)}) + +INST(int, {B(11001101), DATA}) +INST(int3, {B(11001100)}) // TODO(casey): The manual does not suggest that this intrinsic has an "int3" mnemonic, but NASM thinks so + +INST(into, {B(11001110)}) +INST(iret, {B(11001111)}) + +INST(clc, {B(11111000)}) +INST(cmc, {B(11110101)}) +INST(stc, {B(11111001)}) +INST(cld, {B(11111100)}) +INST(std, {B(11111101)}) +INST(cli, {B(11111010)}) +INST(sti, {B(11111011)}) +INST(hlt, {B(11110100)}) +INST(wait, {B(10011011)}) +INST(esc, {B(11011), XXX, MOD, YYY, RM}) +INST(lock, {B(11110000)}) +INST(segment, {B(001), SR, B(110)}) + +#undef INST +#undef INSTALT + +#undef B +#undef D +#undef S +#undef W +#undef V +#undef Z + +#undef XXX +#undef YYY +#undef RM +#undef MOD +#undef REG +#undef SR + +#undef ImpW +#undef ImpREG +#undef ImpMOD +#undef ImpRM +#undef ImpD +#undef ImpS + +#undef DISP +#undef ADDR +#undef DATA +#undef DATA_IF_W +#undef Flags diff --git a/src/code/reference_decoder/sim86_lib.cpp b/src/code/reference_decoder/sim86_lib.cpp new file mode 100644 index 0000000..6971eb2 --- /dev/null +++ b/src/code/reference_decoder/sim86_lib.cpp @@ -0,0 +1,73 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +#define assert(...) + +#include "sim86.h" + +#include "sim86_instruction.h" +#include "sim86_instruction_table.h" +#include "sim86_memory.h" +#include "sim86_decode.h" + +#include "sim86_instruction.cpp" +#include "sim86_instruction_table.cpp" +#include "sim86_memory.cpp" +#include "sim86_decode.cpp" +#include "sim86_text_table.cpp" + +extern "C" u32 Sim86_GetVersion(void) +{ + u32 Result = SIM86_VERSION; + return Result; +} + +extern "C" void Sim86_Decode8086Instruction(u32 SourceSize, u8 *Source, instruction *Dest) +{ + instruction_table Table = Get8086InstructionTable(); + + // NOTE(casey): The 8086 decoder requires the ability to read up to 15 bytes (the maximum + // allowable instruction size) + assert(Table.MaxInstructionByteCount == 15); + u8 GuardBuffer[16] = {}; + if(SourceSize < Table.MaxInstructionByteCount) + { + // NOTE(casey): I replaced the memcpy here with a manual copy to make it easier for + // people compiling on things like WebAssembly who do not want to use Emscripten. + for(u32 I = 0; I < SourceSize; ++I) + { + GuardBuffer[I] = Source[I]; + } + + Source = GuardBuffer; + } + + segmented_access At = FixedMemoryPow2(4, Source); + *Dest = DecodeInstruction(Table, At); +} + +extern "C" char const *Sim86_RegisterNameFromOperand(register_access *RegAccess) +{ + char const *Result = GetRegName(*RegAccess); + return Result; +} + +extern "C" char const *Sim86_MnemonicFromOperationType(operation_type Type) +{ + char const *Result = GetMnemonic(Type); + return Result; +} + +extern "C" void Sim86_Get8086InstructionTable(instruction_table *Dest) +{ + *Dest = Get8086InstructionTable(); +}
\ No newline at end of file diff --git a/src/code/reference_decoder/sim86_memory.cpp b/src/code/reference_decoder/sim86_memory.cpp new file mode 100644 index 0000000..1d58ede --- /dev/null +++ b/src/code/reference_decoder/sim86_memory.cpp @@ -0,0 +1,66 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +static u32 GetHighestAddress(segmented_access SegMem) +{ + u32 Result = SegMem.Mask; + return Result; +} + +static u32 GetAbsoluteAddressOf(u32 Mask, u16 SegmentBase, u16 SegmentOffset, u16 AdditionalOffset) +{ + u32 Result = (((u32)SegmentBase << 4) + (u32)(SegmentOffset + AdditionalOffset)) & Mask; + return Result; +} + +static u32 GetAbsoluteAddressOf(segmented_access SegMem, u16 Offset) +{ + u32 Result = GetAbsoluteAddressOf(SegMem.Mask, SegMem.SegmentBase, SegMem.SegmentOffset, Offset); + return Result; +} + +static segmented_access MoveBaseBy(segmented_access Access, s32 Offset) +{ + Access.SegmentOffset += Offset; + + segmented_access Result = Access; + + Result.SegmentBase += (Result.SegmentOffset >> 4); + Result.SegmentOffset &= 0xf; + + assert(GetAbsoluteAddressOf(Result, 0) == GetAbsoluteAddressOf(Access, 0)); + + return Result; +} + +static u8 *AccessMemory(segmented_access SegMem, u16 Offset) +{ + u32 AbsAddr = GetAbsoluteAddressOf(SegMem, Offset); + u8 *Result = SegMem.Memory + AbsAddr; + return Result; +} + +static b32 IsValid(segmented_access SegMem) +{ + b32 Result = (SegMem.Mask != 0); + return Result; +} + +static segmented_access FixedMemoryPow2(u32 SizePow2, u8 *Memory) +{ + segmented_access Result = {}; + + Result.Memory = Memory; + Result.Mask = (1 << SizePow2) - 1; + + return Result; +} diff --git a/src/code/reference_decoder/sim86_memory.h b/src/code/reference_decoder/sim86_memory.h new file mode 100644 index 0000000..4e790dc --- /dev/null +++ b/src/code/reference_decoder/sim86_memory.h @@ -0,0 +1,28 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +struct segmented_access +{ + u8 *Memory; + u32 Mask; + u16 SegmentBase; + u16 SegmentOffset; +}; + +static u32 GetHighestAddress(segmented_access SegMem); +static u32 GetAbsoluteAddressOf(segmented_access SegMem, u16 Offset = 0); +static segmented_access MoveBaseBy(segmented_access Access, s32 Offset); + +static u8 *AccessMemory(segmented_access SegMem, u16 Offset = 0); + +static b32 IsValid(segmented_access SegMem); +static segmented_access FixedMemoryPow2(u32 SizePow2, u8 *Memory); diff --git a/src/code/reference_decoder/sim86_text_table.cpp b/src/code/reference_decoder/sim86_text_table.cpp new file mode 100644 index 0000000..e90a649 --- /dev/null +++ b/src/code/reference_decoder/sim86_text_table.cpp @@ -0,0 +1,57 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +char const *OpcodeMnemonics[] = +{ + "", + +#define INST(Mnemonic, ...) #Mnemonic, +#define INSTALT(...) +#include "sim86_instruction_table.inl" +}; + +static char const *GetMnemonic(operation_type Op) +{ + char const *Result = ""; + if(Op < Op_Count) + { + Result = OpcodeMnemonics[Op]; + } + + return Result; +} + +static char const *GetRegName(register_access Reg) +{ + char const *Names[][3] = + { + {"", "", ""}, + {"al", "ah", "ax"}, + {"bl", "bh", "bx"}, + {"cl", "ch", "cx"}, + {"dl", "dh", "dx"}, + {"sp", "sp", "sp"}, + {"bp", "bp", "bp"}, + {"si", "si", "si"}, + {"di", "di", "di"}, + {"es", "es", "es"}, + {"cs", "cs", "cs"}, + {"ss", "ss", "ss"}, + {"ds", "ds", "ds"}, + {"ip", "ip", "ip"}, + {"flags", "flags", "flags"}, + {"", "", ""} + }; + + char const *Result = Names[Reg.Index % ArrayCount(Names)][(Reg.Count == 2) ? 2 : Reg.Offset&1]; + return Result; +} diff --git a/src/code/shared_library_test.cpp b/src/code/shared_library_test.cpp new file mode 100644 index 0000000..aef4fa1 --- /dev/null +++ b/src/code/shared_library_test.cpp @@ -0,0 +1,69 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +#include <stdio.h> + +#include "sim86_shared.h" + +unsigned char ExampleDisassembly[247] = +{ + 0x03, 0x18, 0x03, 0x5E, 0x00, 0x83, 0xC6, 0x02, 0x83, 0xC5, 0x02, 0x83, 0xC1, 0x08, 0x03, 0x5E, + 0x00, 0x03, 0x4F, 0x02, 0x02, 0x7A, 0x04, 0x03, 0x7B, 0x06, 0x01, 0x18, 0x01, 0x5E, 0x00, 0x01, + 0x5E, 0x00, 0x01, 0x4F, 0x02, 0x00, 0x7A, 0x04, 0x01, 0x7B, 0x06, 0x80, 0x07, 0x22, 0x83, 0x82, + 0xE8, 0x03, 0x1D, 0x03, 0x46, 0x00, 0x02, 0x00, 0x01, 0xD8, 0x00, 0xE0, 0x05, 0xE8, 0x03, 0x04, + 0xE2, 0x04, 0x09, 0x2B, 0x18, 0x2B, 0x5E, 0x00, 0x83, 0xEE, 0x02, 0x83, 0xED, 0x02, 0x83, 0xE9, + 0x08, 0x2B, 0x5E, 0x00, 0x2B, 0x4F, 0x02, 0x2A, 0x7A, 0x04, 0x2B, 0x7B, 0x06, 0x29, 0x18, 0x29, + 0x5E, 0x00, 0x29, 0x5E, 0x00, 0x29, 0x4F, 0x02, 0x28, 0x7A, 0x04, 0x29, 0x7B, 0x06, 0x80, 0x2F, + 0x22, 0x83, 0x29, 0x1D, 0x2B, 0x46, 0x00, 0x2A, 0x00, 0x29, 0xD8, 0x28, 0xE0, 0x2D, 0xE8, 0x03, + 0x2C, 0xE2, 0x2C, 0x09, 0x3B, 0x18, 0x3B, 0x5E, 0x00, 0x83, 0xFE, 0x02, 0x83, 0xFD, 0x02, 0x83, + 0xF9, 0x08, 0x3B, 0x5E, 0x00, 0x3B, 0x4F, 0x02, 0x3A, 0x7A, 0x04, 0x3B, 0x7B, 0x06, 0x39, 0x18, + 0x39, 0x5E, 0x00, 0x39, 0x5E, 0x00, 0x39, 0x4F, 0x02, 0x38, 0x7A, 0x04, 0x39, 0x7B, 0x06, 0x80, + 0x3F, 0x22, 0x83, 0x3E, 0xE2, 0x12, 0x1D, 0x3B, 0x46, 0x00, 0x3A, 0x00, 0x39, 0xD8, 0x38, 0xE0, + 0x3D, 0xE8, 0x03, 0x3C, 0xE2, 0x3C, 0x09, 0x75, 0x02, 0x75, 0xFC, 0x75, 0xFA, 0x75, 0xFC, 0x74, + 0xFE, 0x7C, 0xFC, 0x7E, 0xFA, 0x72, 0xF8, 0x76, 0xF6, 0x7A, 0xF4, 0x70, 0xF2, 0x78, 0xF0, 0x75, + 0xEE, 0x7D, 0xEC, 0x7F, 0xEA, 0x73, 0xE8, 0x77, 0xE6, 0x7B, 0xE4, 0x71, 0xE2, 0x79, 0xE0, 0xE2, + 0xDE, 0xE1, 0xDC, 0xE0, 0xDA, 0xE3, 0xD8 +}; + +int main(void) +{ + u32 Version = Sim86_GetVersion(); + printf("Sim86 Version: %u (expected %u)\n", Version, SIM86_VERSION); + if(Version != SIM86_VERSION) + { + printf("ERROR: Header file version doesn't match DLL.\n"); + return -1; + } + + instruction_table Table; + Sim86_Get8086InstructionTable(&Table); + printf("8086 Instruction Instruction Encoding Count: %u\n", Table.EncodingCount); + + u32 Offset = 0; + while(Offset < sizeof(ExampleDisassembly)) + { + instruction Decoded; + Sim86_Decode8086Instruction(sizeof(ExampleDisassembly) - Offset, ExampleDisassembly + Offset, &Decoded); + if(Decoded.Op) + { + Offset += Decoded.Size; + printf("Size:%u Op:%s Flags:0x%x\n", Decoded.Size, Sim86_MnemonicFromOperationType(Decoded.Op), Decoded.Flags); + } + else + { + printf("Unrecognized instruction\n"); + break; + } + } + + return 0; +} diff --git a/src/code/sim86.cpp b/src/code/sim86.cpp new file mode 100644 index 0000000..e7cc2da --- /dev/null +++ b/src/code/sim86.cpp @@ -0,0 +1,194 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +#include <stdio.h> +#include <string.h> + +#include "sim86.h" +#include "sim86_shared.h" + +internal void +Run8086(psize DisassemblySize, u8 *Disassembly) +{ + s32 Registers[Register_count] = {}; + + u32 Offset = 0; + while(Offset < DisassemblySize) + { + instruction Decoded; + Sim86_Decode8086Instruction(DisassemblySize - Offset, Disassembly + Offset, &Decoded); + if(Decoded.Op) + { + Offset += Decoded.Size; + +#if SIM86_INTERNAL + printf("Size:%u Op:%s Flags:0x%x\n", Decoded.Size, Sim86_MnemonicFromOperationType(Decoded.Op), Decoded.Flags); +#endif + if(0) {} + else if(Decoded.Op == Op_mov) + { + s32 *Destination = 0; + s32 *Source = 0; + + if(0) {} + else if(Decoded.Operands[0].Type == Operand_Register) + { + Destination = &Registers[Decoded.Operands[0].Register.Index]; + } + else if(Decoded.Operands[0].Type == Operand_Memory) + { + Assert(0 && "mov to memory not implemented yet."); + } + else if(Decoded.Operands[0].Type == Operand_Immediate) + { + Assert(0 && "Cannot move to immediate value."); + } + else + { + Assert(0); + } + + if(0) {} + else if(Decoded.Operands[1].Type == Operand_Register) + { + Source = &Registers[Decoded.Operands[1].Register.Index]; + } + else if(Decoded.Operands[1].Type == Operand_Immediate) + { + Source = &Decoded.Operands[1].Immediate.Value; + } + else if(Decoded.Operands[1].Type == Operand_Memory) + { + Assert(0 && "mov from memory not implemented yet."); + } + else + { + Assert(0); + } + + *Destination = *Source; + } + else + { + Assert(0 && "Op not implemented yet."); + } + + } + else + { + printf("Unrecognized instruction\n"); + break; + } + } + + printf("Final registers:\n"); + for(u32 RegisterIndex = Register_a; + RegisterIndex < Register_di + 1; + RegisterIndex++) + { + register_access Register = {}; + Register.Index = RegisterIndex; + Register.Offset = 0; + Register.Count = 2; + printf(" %s: 0x%04x (%d)\n", + Sim86_RegisterNameFromOperand(&Register), + Registers[RegisterIndex], + Registers[RegisterIndex]); + } +} + +void PrintUsage(char *ExePath) +{ + fprintf(stderr, "usage: %s [-exec] <assembly>\n", ExePath); +} + +int main(int ArgCount, char *Args[]) +{ + u32 Version = Sim86_GetVersion(); + +#if SIM86_INTERNAL + printf("Sim86 Version: %u (expected %u)\n", Version, SIM86_VERSION); +#endif + + if(Version != SIM86_VERSION) + { + printf("ERROR: Header file version doesn't match DLL.\n"); + return -1; + } + + instruction_table Table; + Sim86_Get8086InstructionTable(&Table); + +#if SIM86_INTERNAL + printf("8086 Instruction Instruction Encoding Count: %u\n", Table.EncodingCount); +#endif + + // print (default) + // -exec (also execute the disassembled instructinos) + + b32 Execute = false; + char *FileName = 0; + if(ArgCount == 2) + { + FileName = Args[1]; + Execute = false; + // Print disassembly from assembly + } + else if(ArgCount == 3) + { + char *Command = Args[1]; + FileName = Args[2]; + if(!strcmp(Command, "-exec")) + { + Execute = true; + } + else + { + fprintf(stderr, "ERROR: Unknown command %s.\n", Command); + PrintUsage(Args[0]); + } + } + else + { + PrintUsage(Args[0]); + } + + u8 Memory[1024] = {}; + if(FileName) + { + FILE *File = fopen(FileName, "rb"); + if(File) + { + psize Result = fread(Memory, 1, sizeof(Memory), File); + fclose(File); + + if(Execute) + { + printf("--- %s execution ---\n", FileName); + + Run8086(Result, Memory); + } + else + { + fprintf(stderr, "ERROR: Disassembling not implemented yet.\n"); + } + + } + else + { + fprintf(stderr, "ERROR: Unable to open %s.\n", FileName); + PrintUsage(Args[0]); + } + } + + return 0; +} diff --git a/src/code/sim86.h b/src/code/sim86.h new file mode 100644 index 0000000..2a3427e --- /dev/null +++ b/src/code/sim86.h @@ -0,0 +1,39 @@ +/* date = August 13th 2025 1:51 pm */ + +#ifndef SIM86_H +#define SIM86_H + +#define internal static +#define global_variable static +#define local_persist static + +typedef size_t psize; + +#define Assert(Expression) if(!(Expression)) { __asm__ volatile("int3"); } +#define ArrayCount(Array) (sizeof(Array) / sizeof((Array)[0])) + +//~ Stolen from the decoder. +enum register_mapping_8086 +{ + Register_none, + + Register_a, + Register_b, + Register_c, + Register_d, + Register_sp, + Register_bp, + Register_si, + Register_di, + Register_es, + Register_cs, + Register_ss, + Register_ds, + Register_ip, + Register_flags, + + Register_count, +}; + + +#endif //SIM86_H diff --git a/src/code/sim86_shared.h b/src/code/sim86_shared.h new file mode 100644 index 0000000..7a54beb --- /dev/null +++ b/src/code/sim86_shared.h @@ -0,0 +1,285 @@ +typedef char unsigned u8; +typedef short unsigned u16; +typedef int unsigned u32; +typedef long long unsigned u64; + +typedef char s8; +typedef short s16; +typedef int s32; +typedef long long s64; + +typedef s32 b32; + +static u32 const SIM86_VERSION = 4; +typedef u32 register_index; + +typedef struct register_access register_access; +typedef struct effective_address_term effective_address_term; +typedef struct effective_address_expression effective_address_expression; +typedef struct immediate immediate; +typedef struct instruction_operand instruction_operand; +typedef struct instruction instruction; + +typedef enum operation_type : u32 +{ + Op_None, + + Op_mov, + + Op_push, + + Op_pop, + + Op_xchg, + + Op_in, + + Op_out, + + Op_xlat, + Op_lea, + Op_lds, + Op_les, + Op_lahf, + Op_sahf, + Op_pushf, + Op_popf, + + Op_add, + + Op_adc, + + Op_inc, + + Op_aaa, + Op_daa, + + Op_sub, + + Op_sbb, + + Op_dec, + + Op_neg, + + Op_cmp, + + Op_aas, + Op_das, + Op_mul, + Op_imul, + Op_aam, + Op_div, + Op_idiv, + Op_aad, + Op_cbw, + Op_cwd, + + Op_not, + Op_shl, + Op_shr, + Op_sar, + Op_rol, + Op_ror, + Op_rcl, + Op_rcr, + + Op_and, + + Op_test, + + Op_or, + + Op_xor, + + Op_rep, + Op_movs, + Op_cmps, + Op_scas, + Op_lods, + Op_stos, + + Op_call, + + Op_jmp, + + Op_ret, + + Op_retf, + + Op_je, + Op_jl, + Op_jle, + Op_jb, + Op_jbe, + Op_jp, + Op_jo, + Op_js, + Op_jne, + Op_jnl, + Op_jg, + Op_jnb, + Op_ja, + Op_jnp, + Op_jno, + Op_jns, + Op_loop, + Op_loopz, + Op_loopnz, + Op_jcxz, + + Op_int, + Op_int3, + + Op_into, + Op_iret, + + Op_clc, + Op_cmc, + Op_stc, + Op_cld, + Op_std, + Op_cli, + Op_sti, + Op_hlt, + Op_wait, + Op_esc, + Op_lock, + Op_segment, + + Op_Count, +} operation_type; + +enum instruction_flag +{ + Inst_Lock = 0x1, + Inst_Rep = 0x2, + Inst_Segment = 0x4, + Inst_Wide = 0x8, + Inst_Far = 0x10, + Inst_RepNE = 0x20, +}; + +struct register_access +{ + register_index Index; + u32 Offset; + u32 Count; +}; + +struct effective_address_term +{ + register_access Register; + s32 Scale; +}; + +enum effective_address_flag +{ + Address_ExplicitSegment = 0x1, +}; +struct effective_address_expression +{ + effective_address_term Terms[2]; + u32 ExplicitSegment; + s32 Displacement; + u32 Flags; +}; + +enum immediate_flag +{ + Immediate_RelativeJumpDisplacement = 0x1, +}; +struct immediate +{ + s32 Value; + u32 Flags; +}; + +typedef enum operand_type : u32 +{ + Operand_None, + Operand_Register, + Operand_Memory, + Operand_Immediate, +} operand_type; +struct instruction_operand +{ + operand_type Type; + union { + effective_address_expression Address; + register_access Register; + immediate Immediate; + }; +}; + +struct instruction +{ + u32 Address; + u32 Size; + + operation_type Op; + u32 Flags; + + instruction_operand Operands[2]; + + register_index SegmentOverride; +}; +enum instruction_bits_usage : u8 +{ + Bits_End, + + Bits_Literal, + + Bits_D, + Bits_S, + Bits_W, + Bits_V, + Bits_Z, + Bits_MOD, + Bits_REG, + Bits_RM, + Bits_SR, + Bits_Disp, + Bits_Data, + + Bits_DispAlwaysW, + Bits_WMakesDataW, + Bits_RMRegAlwaysW, + Bits_RelJMPDisp, + Bits_Far, + + Bits_Count, +}; + +struct instruction_bits +{ + instruction_bits_usage Usage; + u8 BitCount; + u8 Shift; + u8 Value; +}; + +struct instruction_encoding +{ + operation_type Op; + instruction_bits Bits[16]; +}; + +struct instruction_table +{ + instruction_encoding *Encodings; + u32 EncodingCount; + u32 MaxInstructionByteCount; +}; +#ifdef __cplusplus +extern "C" +{ +#endif + u32 Sim86_GetVersion(void); + void Sim86_Decode8086Instruction(u32 SourceSize, u8 *Source, instruction *Dest); + char const *Sim86_RegisterNameFromOperand(register_access *RegAccess); + char const *Sim86_MnemonicFromOperationType(operation_type Type); + void Sim86_Get8086InstructionTable(instruction_table *Dest); +#ifdef __cplusplus +} +#endif diff --git a/src/data/listing_0043_immediate_movs b/src/data/listing_0043_immediate_movs Binary files differnew file mode 100644 index 0000000..a965538 --- /dev/null +++ b/src/data/listing_0043_immediate_movs diff --git a/src/data/listing_0043_immediate_movs.asm b/src/data/listing_0043_immediate_movs.asm new file mode 100644 index 0000000..475afaf --- /dev/null +++ b/src/data/listing_0043_immediate_movs.asm @@ -0,0 +1,27 @@ +; ======================================================================== +; +; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Please see https://computerenhance.com for further information +; +; ======================================================================== + +; ======================================================================== +; LISTING 43 +; ======================================================================== + +bits 16 + +mov ax, 1 +mov bx, 2 +mov cx, 3 +mov dx, 4 + +mov sp, 5 +mov bp, 6 +mov si, 7 +mov di, 8 diff --git a/src/data/listing_0043_immediate_movs.txt b/src/data/listing_0043_immediate_movs.txt new file mode 100644 index 0000000..fb36287 --- /dev/null +++ b/src/data/listing_0043_immediate_movs.txt @@ -0,0 +1,20 @@ +--- test\listing_0043_immediate_movs execution --- +mov ax, 1 ; ax:0x0->0x1 +mov bx, 2 ; bx:0x0->0x2 +mov cx, 3 ; cx:0x0->0x3 +mov dx, 4 ; dx:0x0->0x4 +mov sp, 5 ; sp:0x0->0x5 +mov bp, 6 ; bp:0x0->0x6 +mov si, 7 ; si:0x0->0x7 +mov di, 8 ; di:0x0->0x8 + +Final registers: + ax: 0x0001 (1) + bx: 0x0002 (2) + cx: 0x0003 (3) + dx: 0x0004 (4) + sp: 0x0005 (5) + bp: 0x0006 (6) + si: 0x0007 (7) + di: 0x0008 (8) + diff --git a/src/data/listing_0044_register_movs b/src/data/listing_0044_register_movs Binary files differnew file mode 100644 index 0000000..346ff45 --- /dev/null +++ b/src/data/listing_0044_register_movs diff --git a/src/data/listing_0044_register_movs.asm b/src/data/listing_0044_register_movs.asm new file mode 100644 index 0000000..58988fe --- /dev/null +++ b/src/data/listing_0044_register_movs.asm @@ -0,0 +1,32 @@ +; ======================================================================== +; +; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Please see https://computerenhance.com for further information +; +; ======================================================================== + +; ======================================================================== +; LISTING 44 +; ======================================================================== + +bits 16 + +mov ax, 1 +mov bx, 2 +mov cx, 3 +mov dx, 4 + +mov sp, ax +mov bp, bx +mov si, cx +mov di, dx + +mov dx, sp +mov cx, bp +mov bx, si +mov ax, di diff --git a/src/data/listing_0044_register_movs.txt b/src/data/listing_0044_register_movs.txt new file mode 100644 index 0000000..e46c56c --- /dev/null +++ b/src/data/listing_0044_register_movs.txt @@ -0,0 +1,24 @@ +--- test\listing_0044_register_movs execution --- +mov ax, 1 ; ax:0x0->0x1 +mov bx, 2 ; bx:0x0->0x2 +mov cx, 3 ; cx:0x0->0x3 +mov dx, 4 ; dx:0x0->0x4 +mov sp, ax ; sp:0x0->0x1 +mov bp, bx ; bp:0x0->0x2 +mov si, cx ; si:0x0->0x3 +mov di, dx ; di:0x0->0x4 +mov dx, sp ; dx:0x4->0x1 +mov cx, bp ; cx:0x3->0x2 +mov bx, si ; bx:0x2->0x3 +mov ax, di ; ax:0x1->0x4 + +Final registers: + ax: 0x0004 (4) + bx: 0x0003 (3) + cx: 0x0002 (2) + dx: 0x0001 (1) + sp: 0x0001 (1) + bp: 0x0002 (2) + si: 0x0003 (3) + di: 0x0004 (4) + diff --git a/src/data/listing_0045_challenge_register_movs b/src/data/listing_0045_challenge_register_movs new file mode 100644 index 0000000..dd781b2 --- /dev/null +++ b/src/data/listing_0045_challenge_register_movs @@ -0,0 +1 @@ +""DDffЎێ3Uw܈ЎێԌƉ
\ No newline at end of file diff --git a/src/data/listing_0045_challenge_register_movs.asm b/src/data/listing_0045_challenge_register_movs.asm new file mode 100644 index 0000000..9e25fda --- /dev/null +++ b/src/data/listing_0045_challenge_register_movs.asm @@ -0,0 +1,43 @@ +; ======================================================================== +; +; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Please see https://computerenhance.com for further information +; +; ======================================================================== + +; ======================================================================== +; LISTING 45 +; ======================================================================== + +bits 16 + +mov ax, 0x2222 +mov bx, 0x4444 +mov cx, 0x6666 +mov dx, 0x8888 + +mov ss, ax +mov ds, bx +mov es, cx + +mov al, 0x11 +mov bh, 0x33 +mov cl, 0x55 +mov dh, 0x77 + +mov ah, bl +mov cl, dh + +mov ss, ax +mov ds, bx +mov es, cx + +mov sp, ss +mov bp, ds +mov si, es +mov di, dx diff --git a/src/data/listing_0045_challenge_register_movs.txt b/src/data/listing_0045_challenge_register_movs.txt new file mode 100644 index 0000000..66c8c7b --- /dev/null +++ b/src/data/listing_0045_challenge_register_movs.txt @@ -0,0 +1,35 @@ +--- test\listing_0045_challenge_register_movs execution --- +mov ax, 8738 ; ax:0x0->0x2222 +mov bx, 17476 ; bx:0x0->0x4444 +mov cx, 26214 ; cx:0x0->0x6666 +mov dx, 34952 ; dx:0x0->0x8888 +mov ss, ax ; ss:0x0->0x2222 +mov ds, bx ; ds:0x0->0x4444 +mov es, cx ; es:0x0->0x6666 +mov al, 17 ; ax:0x2222->0x2211 +mov bh, 51 ; bx:0x4444->0x3344 +mov cl, 85 ; cx:0x6666->0x6655 +mov dh, 119 ; dx:0x8888->0x7788 +mov ah, bl ; ax:0x2211->0x4411 +mov cl, dh ; cx:0x6655->0x6677 +mov ss, ax ; ss:0x2222->0x4411 +mov ds, bx ; ds:0x4444->0x3344 +mov es, cx ; es:0x6666->0x6677 +mov sp, ss ; sp:0x0->0x4411 +mov bp, ds ; bp:0x0->0x3344 +mov si, es ; si:0x0->0x6677 +mov di, dx ; di:0x0->0x7788 + +Final registers: + ax: 0x4411 (17425) + bx: 0x3344 (13124) + cx: 0x6677 (26231) + dx: 0x7788 (30600) + sp: 0x4411 (17425) + bp: 0x3344 (13124) + si: 0x6677 (26231) + di: 0x7788 (30600) + es: 0x6677 (26231) + ss: 0x4411 (17425) + ds: 0x3344 (13124) + diff --git a/src/project.4coder b/src/project.4coder new file mode 100644 index 0000000..0b2c2fb --- /dev/null +++ b/src/project.4coder @@ -0,0 +1,43 @@ +version(2); +project_name = "sim8086"; +patterns = { +"*.c", +"*.cpp", +"*.h", +"*.m", +"*.bat", +"*.sh", +"*.4coder", +}; +blacklist_patterns = { +"_old.*", +}; +load_paths_base = { + { ".", .relative = true, .recursive = true, }, +}; +load_paths = { + .win = load_paths_base, + .linux = load_paths_base, + .mac = load_paths_base, +}; + +commands = { + .build = { .out = "*compilation*", .footer_panel = true, .save_dirty_files = true, + .linux = "./code/build.sh", + .mac = "./code/build.sh", }, + .build_release = { .out = "*compilation*", .footer_panel = false, .save_dirty_files = false, + .win = ".\code\build.bat release", + .linux = "./code/build.sh --release", + .mac = "./code/build.sh --release", }, + .test_current = { .out = "*tests*", .footer_panel = false, .save_dirty_files = false, + .linux = "./misc/test_source_line_by_line --pause-on-fail --no-color ./listings/listing_0042_completionist_decode.asm" }, + .test_all = { .out = "*tests*", .footer_panel = false, .save_dirty_files = false, + .linux = "./misc/test_listing --no-color ./listings/listing_0037_single_register_mov.asm ./listings/listing_0038_many_register_mov.asm ./listings/listing_0039_more_movs.asm ./listings/listing_0040_challenge_movs.asm ./listings/listing_0041_add_sub_cmp_jnz.asm ./listings/listing_0042*" }, + .debug = { .linux = "gf2 ./build/sim8086", }, +}; +fkey_command = { +.F1 = "build", +.F2 = "build_release", +.F3 = "test_current", +.F4 = "test_all", +}; |
