From 087025fdd48aedb14f89bb7b9d713de107c28a57 Mon Sep 17 00:00:00 2001
From: Trysdyn Black <trysdyn@voidfox.com>
Date: Mon, 7 Oct 2024 03:10:01 -0700
Subject: [PATCH] Code cleanup and version bump

- Tidy parse_MONSTERS and use a loop for all the sub-sections that use
the same logic rather than repeat the logic over and over.
- Docstring and type hint everything
- Use Pathlib for opening the log file
- Use `.values()` instead of `.items()` where I only want values
- Bump to 0.3
---
 main.py | 119 +++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 71 insertions(+), 48 deletions(-)

diff --git a/main.py b/main.py
index b472ec9..18891a3 100644
--- a/main.py
+++ b/main.py
@@ -1,13 +1,21 @@
 #!/usr/bin/env python3
 
-__version__ = "0.2"
+"""Parse BCEX (or BCCE) logs into json objects."""
+
+__version__ = "0.3"
 __author__ = "Trysdyn Black"
 
 import json
 import sys
+from pathlib import Path
 
 
-def parse_MONSTERS(data):
+def parse_MONSTERS(data: str) -> dict[str, dict]:  # noqa: C901, PLR0912
+    """
+    Parse the MONSTERS section.
+
+    This contains data on monsters including stat sheets, loot, and weaknesses.
+    """
     result = {}
     for m_text in data.split("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"):
         info = {}
@@ -36,16 +44,6 @@ def parse_MONSTERS(data):
                     weak_text = "WEAK: "
                 info["nullifies"] = null_text.split(": ")[1].split(", ")
                 info["weak"] = weak_text.split(": ")[1].split(", ")
-            elif line.startswith("IMMUNE:"):
-                if len(line) >= 9:
-                    info["immune"] = line.split(": ")[1].split(", ")
-                else:
-                    info["immune"] = []
-            elif line.startswith("AUTO:"):
-                if len(line) >= 7:
-                    info["auto"] = line.split(": ")[1].split(", ")
-                else:
-                    info["auto"] = []
             # Specials are name=>desc as k:v
             # I *think* you can only have one special...
             elif line.startswith("SPECIAL"):
@@ -56,25 +54,13 @@ def parse_MONSTERS(data):
                     info["special"] = {special_name: special_desc}
                 else:
                     info["special"] = {}
-            elif line.startswith("SKILLS:"):
-                if len(line) >= 9:
-                    info["skills"] = line.split(": ")[1].split(", ")
-                info["skills"] = []
-            elif line.startswith("STEAL:"):
-                if len(line) >= 8:
-                    info["steal"] = line.split(": ")[1].split(", ")
-                else:
-                    info["steal"] = []
-            elif line.startswith("DROPS:"):
-                if len(line) >= 8:
-                    info["drops"] = line.split(": ")[1].split(", ")
-                else:
-                    info["drops"] = []
-            elif line.startswith("LOCATION:"):
-                if len(line) >= 11:
-                    info["location"] = line.split(": ", 1)[1]
-                else:
-                    info["location"] = None
+            # Everything else is a simple k: v list where v is comma-delimited
+            else:
+                for k in ["immune", "auto", "skills", "steal", "drops", "location"]:
+                    str_match = f"{k.upper()}:"
+                    if line.startswith(str_match):
+                        info[k] = line.split(": ")[1].split(", ") if line.upper().strip() != str_match else []
+                        break
 
         if name != "NULL":
             result[name] = info
@@ -82,8 +68,13 @@ def parse_MONSTERS(data):
     return result
 
 
-def parse_REMONSTERATE(data):
-    # BCCE only. Remapping info if you use BCCE to also remonsterate
+def parse_REMONSTERATE(data: str) -> dict[str, dict]:
+    """
+    Parse the BCCE-only REMONSTERATE section.
+
+    This contains a mapping of monster sprites: what they were and what they
+    turned into post-alteration.
+    """
     result = {}
     for line in data.split("\n"):
         if not line or line.startswith("-----"):
@@ -97,7 +88,17 @@ def parse_REMONSTERATE(data):
     return result
 
 
-def parse_CHARACTERS(data):
+def parse_CHARACTERS(data: str) -> dict[str, dict]:  # noqa: C901
+    """
+    Parse the CHARACTERS section.
+
+    This differs based on BCEX vs BCCE. In both flavors it contains basic data
+    like name, spells, location and special abilities. In BCEX it includes
+    stats as well. In BCCE stats is its own section.
+
+    Regardless of flavor, core logic will snap stats back into this section
+    later.
+    """
     replacements = {"Looks like": "looks", "World of Ruin location": "wor_location", "Notable equipment": "equipment"}
 
     result = {}
@@ -149,9 +150,14 @@ def parse_CHARACTERS(data):
     return result
 
 
-def parse_STATS(data):
-    # BCCE Version Only
-    # BCCE Splits stats into its own section that we need to parse, return, then snap together
+def parse_STATS(data: str) -> dict[str, dict]:
+    """
+    Parse the BCCE-only STATS section.
+
+    BCCE splits character stats into its own section. We use largely the same
+    logic as CHARACTERS here to parse it, then return it as its own dict for
+    merging back into the CHARACTERS blob later.
+    """
     result = {}
 
     # This is pretty identical to CHARACTERS
@@ -179,7 +185,13 @@ def parse_STATS(data):
     return result
 
 
-def parse_COMMANDS(data):
+def parse_COMMANDS(data: str) -> dict[str, dict]:
+    """
+    Parse the COMMANDS section.
+
+    This contains information on special commands, expanding contracted command
+    names into more detailed explanations like GranSaw = Grand Train + Chainsaw.
+    """
     commands = {}
 
     # We split by ------ which divides the command name from its data
@@ -213,25 +225,36 @@ def parse_COMMANDS(data):
     return commands
 
 
-def parse_SEED(data):
-    # This is a fake section injected by the file loader. It contains only the seed code
-    is_BCCE = True if data.startswith("CE") else False
+def parse_SEED(data: str) -> dict[str, bool | str]:
+    """
+    Parse the injected SEED section.
 
+    This is a fake section injected by the loader code. It contains nothing
+    but the seed code and we derive from this if the randomizer is BCCE or
+    BCEX, and normalize the seed code to a standard format by undoing the
+    changes BCCE makes to it.
+    """
     # Normalize seed codes to BCEX format, removing spaces and replacing pipes with dots
     seed = data.replace("|", ".").replace(" ", "")
 
-    return {"is_bcce": is_BCCE, "seed": seed}
+    return {"is_bcce": data.startswith("CE"), "seed": seed}
 
 
-def parse_SECRET_ITEMS(data):
-    # BCCE Only
+def parse_SECRET_ITEMS(data: str) -> list[str]:
+    """
+    Parse the BCCE-only SECRET ITEMS section.
+
+    I'm unsure what this is for. It's a series of strings with no real obvious
+    significance, so we just return it as a list.
+    """
     # I have no idea what this is lol, dump it to a list for now
     return [line for line in data.split("\n") if not line.startswith("---")]
 
 
-def load(filename):
+def load(filename: str) -> dict[str, str]:
+    """Load file and tokenize into sections."""
     # Load our file, tokenize by section header (starting with ====)
-    with open(filename) as infile:
+    with Path(filename).open(encoding="utf-8") as infile:
         tok_data = infile.read().split("============================================================\n")
 
     sections = {}
@@ -270,7 +293,7 @@ if __name__ == "__main__":
     # Command name => Textual desc of command
     # Certain flags don't shuffle commands like this so we have to check
     if "COMMANDS" in data:
-        for character, c_data in data["CHARACTERS"].items():
+        for c_data in data["CHARACTERS"].values():
             new_commands = {}
 
             for command in c_data["commands"]:
@@ -282,7 +305,7 @@ if __name__ == "__main__":
     # Worse, it keys on slot name, not randomized character name
     if "STATS" in data:
         for slot, stats in data["STATS"].items():
-            for c_name, c_data in data["CHARACTERS"].items():
+            for c_data in data["CHARACTERS"].values():
                 if c_data["originally"].lower() == slot.lower():
                     c_data["stats"] = stats
         del data["STATS"]