2 files changed, 215 insertions, 0 deletions
diff --git a/utf8toASCII.c b/utf8toASCII.c
new file mode 100644
index 0000000..988a69b
--- /dev/null
+++ b/utf8toASCII.c
@@ -0,0 +1,163 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdio.h>
+#include <locale.h>
+#include <assert.h>
+#include <wchar.h>
+#include <stdint.h>
+
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+typedef int8_t s8;
+typedef int16_t s16;
+typedef int32_t s32;
+typedef int64_t s64;
+
+static size_t
+UTF8Compress(size_t InSize, wchar_t* In, size_t OutSize, u8* OutBase)
+{
+    wchar_t* InEnd = (wchar_t*)((u8*)In + InSize);
+    u8* Out = OutBase;
+
+#define MAX_LITERAL_COUNT 255
+    u8 ASCIILiterals[MAX_LITERAL_COUNT] = {0};
+    u8 ASCIILiteralsCount = 0;
+    wchar_t UTF8Literals[MAX_LITERAL_COUNT] = {0};
+    u8 UTF8LiteralsCount = 0;
+
+    while (In < InEnd)
+    {
+        wchar_t CurrentChar = In[0];
+
+        // Check consecutive ascii characters
+        while(CurrentChar == (u8)CurrentChar &&
+              ASCIILiteralsCount < MAX_LITERAL_COUNT)
+        {
+            ASCIILiterals[ASCIILiteralsCount++] = (u8)CurrentChar;
+            CurrentChar = *++In;
+        }
+
+        while(CurrentChar != (u8)CurrentChar &&
+              UTF8LiteralsCount < MAX_LITERAL_COUNT)
+        {
+            UTF8Literals[UTF8LiteralsCount++] = CurrentChar;
+            CurrentChar = *++In;
+        }
+
+        // Encode ASCII/UTF8 pair
+        *Out++ = ASCIILiteralsCount;
+        for (u8 ch = 0;
+             ch < ASCIILiteralsCount;
+             ch++)
+        {
+            *Out = ASCIILiterals[ch];
+            Out += sizeof(ASCIILiterals[ch]);
+        }
+        ASCIILiteralsCount = 0;
+
+        *Out++ = UTF8LiteralsCount;
+        for (u8 ch = 0;
+             ch < UTF8LiteralsCount;
+             ch++)
+        {
+            *(wchar_t*)Out = UTF8Literals[ch];
+            Out += sizeof(UTF8Literals[ch]);
+        }
+        UTF8LiteralsCount = 0;
+
+    }
+#undef MAX_LITERAL_COUNT
+    assert(In == InEnd);
+
+    return Out - OutBase;
+}
+
+static void
+PrintCompressedUTF8(u8* In, size_t InSize)
+{
+    u8* InEnd = In + InSize;
+
+    while (In < InEnd)
+    {
+        u8 ASCIICount = *In++;
+        wprintf(L"%dA(\"", ASCIICount);
+        while(ASCIICount--)
+        {
+            wprintf(L"%c", *In);
+            In += sizeof(u8);
+        }
+        wprintf(L"\") ");
+
+        u8 UTF8Count = *In++;
+        wprintf(L"%dU(\"", UTF8Count);
+        while(UTF8Count--)
+        {
+            wprintf(L"%lc", *(wchar_t*)In);
+            In += sizeof(wchar_t);
+        }
+        wprintf(L"\") ");
+    }
+    wprintf(L"\n");
+
+    assert(In == InEnd);
+}
+
+static void
+UTF8Decompress(size_t InSize, u8* In, size_t OutSize, wchar_t* Out)
+{
+    u8* InEnd = In + InSize;
+
+    while (In < InEnd)
+    {
+        u8 ASCIICount = *In++;
+        while(ASCIICount--)
+        {
+            *Out++ = *In++;
+        }
+
+        u8 UTF8Count = *In++;
+        while(UTF8Count--)
+        {
+            *Out++ = *(wchar_t*)In;
+            In += sizeof(wchar_t);
+        }
+    }
+    assert(In == InEnd);
+}
+
+// Size is the size of the UTF8 string in bytes. "aaa" would be 12.
+size_t
+UTF8GetMaximumCompressedSize(size_t Size)
+{
+    // The largest would be if there was only one unicode point in which case we store 0 for ascii 1
+    // for unicode and the raw codepoint. 1 + 1 + 4 * CodepointNum
+    return Size + 2;
+}
+
+int
+main(int Argc, char* Argv[]) {
+    assert(setlocale(LC_ALL, "") != 0);
+
+    wchar_t* InBuf = L"text│tt│";
+    size_t InSize = wcslen(InBuf) * 4;
+
+    size_t OutSize = UTF8GetMaximumCompressedSize(InSize);
+    u8 OutBuf[OutSize];
+
+    size_t CompressedSize = UTF8Compress(InSize, InBuf, OutSize, OutBuf);
+
+    fwprintf(stderr, L"Raw string: \"%ls\"\n", InBuf);
+    fwprintf(stderr, L"Compressed %lu bytes -> %lu bytes.\n", InSize, CompressedSize);
+
+    size_t DecompressedSize = InSize;
+    wchar_t *DecompressedBuffer = malloc(DecompressedSize);
+
+    UTF8Decompress(CompressedSize, OutBuf, DecompressedSize, DecompressedBuffer);
+    fwprintf(stderr, L"Decompressed: \"%ls\"\n", DecompressedBuffer);
+
+    PrintCompressedUTF8(OutBuf, CompressedSize);
+
+    return 0;
+}
diff --git a/wrap.c b/wrap.c
new file mode 100644
index 0000000..57506b8
--- /dev/null
+++ b/wrap.c
@@ -0,0 +1,52 @@
+// 1. Search backwards for whitespace
+// - found?
+//   y) wrap
+//   n) break at limit
+// - end?
+//   y) terminate
+//   n) goto 1. with offset += limit
+void
+wrap(u8* Text, u32 Len, u32 XLimit, u32 YLimit)
+{
+    u32 SearchingOffset = XLimit;
+    u32 X = SearchingOffset;
+    u32 Y = 0;
+    u8 t;
+    u32 PrevX = 0;
+
+    while (X < Len)
+    {
+        // Search for whitespace to break on
+        while (1)
+        {
+            if (is_whitespace(Text[X])) break;
+
+            X--;
+
+            // if we got back to the previous position break on Text[SearchingOffset]
+            if (X == PrevX)
+            {
+                X = XLimit;
+                break;
+            }
+        }
+
+        // break
+        t = Text[X];
+        *(Text + X) = '\0';
+        tb_printf(0, Y, 0, 0, "%s", Text + PrevX);
+        Text[X] = t;
+        Y++;
+        if (Y >= YLimit) break;
+
+        // consume leading whitespace
+        while (is_whitespace(Text[X])) X++;
+
+        PrevX = X;
+        X += XLimit;
+    }
+
+    tb_printf(0, Y, 0, 0, "%s", Text + PrevX);
+
+    return;
+}