Refactor entire script into a class

- Shove all the logic in a class - Allow for `cleanup_SECTION` functions, one that returns true will delete the calling section in post-run - Get rid of that awful awful `globals()` call, which was the main motivation of this - Document the new methods This should result in a horrifying diff that claims 98% of the file has changed but no actual change in logic or output.
2024-10-11 17:45:31 -07:00 · 2024-10-11 17:45:31 -07:00 · 14ec718890
commit 14ec718890
parent eb924c14c2
1 changed files with 332 additions and 287 deletions
--- a/main.py
+++ b/main.py
@ -2,7 +2,7 @@
 """Parse BCEX (or BCCE) logs into json objects."""
-__version__ = "0.4.0"
+__version__ = "0.4.1"
 __author__ = "Trysdyn Black"
 import json
@ -10,321 +10,366 @@ import sys
 from pathlib import Path
-def parse_MONSTERS(data: str) -> dict[str, dict]:  # noqa: C901, PLR0912
+class Parser:
-    """
+    """BCEX/BCCE spoiler logfile parser."""
    Parse the MONSTERS section.
-    This contains data on monsters including stat sheets, loot, and weaknesses.
+    def __init__(self, filename: str) -> None:
-    """
+        """Initialize parser with filename."""
-    result = {}
+        self.filename = filename
-    for m_text in data.split("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"):
+        self.config_sections = {}
-        info = {}
+        self.data_sections = {}
-        info["stats"] = {}
+
-        info["spells"] = {}
+    @staticmethod
-        name = "NULL"
+    def parse_MONSTERS(data: str) -> dict[str, dict]:  # noqa: C901, PLR0912
-        for line in m_text.split("\n"):
+        """
-            # Name and level
+        Parse the MONSTERS section.
-            if "(Level " in line:
+
-                name = line.split(" (")[0]
+        This contains data on monsters including stat sheets, loot, and weaknesses.
-                info["stats"]["level"] = int(line.split("(Level ")[1][:-1])
+        """
-            # Stat chart rows
+        result = {}
-            elif line.startswith("|"):
+        for m_text in data.split("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"):
-                for stat in line[1:-1].split("|"):
+            info = {}
-                    if ":" in stat:
+            info["stats"] = {}
-                        stat_name, stat_value = stat.split(":")
+            info["spells"] = {}
-                        stat_name = stat_name.replace(".", "").strip().lower()
+            name = "NULL"
-                        info["stats"][stat_name] = int(stat_value)
+            for line in m_text.split("\n"):
-            # Nullifies AND weaks, split by a ;
+                # Name and level
-            elif line.startswith("NULLIFY:"):
+                if "(Level " in line:
-                # If no weaknesses, WEAK section just doesn't appear, fudge it
+                    name = line.split(" (")[0]
-                if "WEAK:" in line:
+                    info["stats"]["level"] = int(line.split("(Level ")[1][:-1])
-                    null_text, weak_text = line.split(";")
+                # Stat chart rows
                elif line.startswith("|"):
                    for stat in line[1:-1].split("|"):
                        if ":" in stat:
                            stat_name, stat_value = stat.split(":")
                            stat_name = stat_name.replace(".", "").strip().lower()
                            info["stats"][stat_name] = int(stat_value)
                # Nullifies AND weaks, split by a ;
                elif line.startswith("NULLIFY:"):
                    # If no weaknesses, WEAK section just doesn't appear, fudge it
                    if "WEAK:" in line:
                        null_text, weak_text = line.split(";")
                    else:
                        null_text = line
                        weak_text = "WEAK: "
                    info["nullifies"] = null_text.split(": ")[1].split(", ")
                    info["weak"] = weak_text.split(": ")[1].split(", ")
                # Specials are name=>desc as k:v
                # I *think* you can only have one special...
                elif line.startswith("SPECIAL"):
                    content = line.split(" ", 1)[1]
                    if len(content) > 1:
                        special_name = content.split('"')[1]
                        special_desc = content.split(": ")[1]
                        info["special"] = {special_name: special_desc}
                    else:
                        info["special"] = {}
                # Everything else is a simple k: v list where v is comma-delimited
                else:
-                    null_text = line
+                    for k in ["immune", "auto", "skills", "steal", "drops", "location"]:
-                    weak_text = "WEAK: "
+                        str_match = f"{k.upper()}:"
-                info["nullifies"] = null_text.split(": ")[1].split(", ")
+                        if line.startswith(str_match):
-                info["weak"] = weak_text.split(": ")[1].split(", ")
+                            info[k] = line.split(": ")[1].split(", ") if line.upper().strip() != str_match else []
-            # Specials are name=>desc as k:v
+                            break
-            # I *think* you can only have one special...
+
-            elif line.startswith("SPECIAL"):
+            if name != "NULL":
-                content = line.split(" ", 1)[1]
+                result[name] = info
-                if len(content) > 1:
+
-                    special_name = content.split('"')[1]
+        return result
-                    special_desc = content.split(": ")[1]
+
-                    info["special"] = {special_name: special_desc}
+    @staticmethod
    def parse_REMONSTERATE(data: str) -> dict[str, dict]:
        """
        Parse the BCCE-only REMONSTERATE section.
        This contains a mapping of monster sprites: what they were and what they
        turned into post-alteration.
        """
        result = {}
        for line in data.split("\n"):
            if not line or line.startswith("-----"):
                continue
            name = line.split("(")[0].strip()
            originally = line.split("(", 1)[1].split(")")[0].strip()
            sprite = line.split("->")[1].strip().strip(".")
            result[name] = {"originally": originally, "sprite": sprite}
        return result
    @staticmethod
    def parse_CHARACTERS(data: str) -> dict[str, dict]:  # noqa: C901
        """
        Parse the CHARACTERS section.
        This differs based on BCEX vs BCCE. In both flavors it contains basic data
        like name, spells, location and special abilities. In BCEX it includes
        stats as well. In BCCE stats is its own section.
        Regardless of flavor, core logic will snap stats back into this section
        later.
        """
        replacements = {
            "Looks like": "looks",
            "World of Ruin location": "wor_location",
            "Notable equipment": "equipment",
        }
        result = {}
        for c_data in data.split("\n\n")[1:-1]:
            info = {"stats": {}, "spells": {}, "natural_magic": False}
            name = "NULL"
            for line in c_data.split("\n"):
                # Name
                if line[0:2].isdigit():
                    name = line[4:]
                # Stat chart rows: BCEX Version only
                elif line.startswith("|"):
                    for stat in line[1:-1].split("|"):
                        if ":" in stat:
                            stat_name, stat_value = stat.split(":")
                            stat_name = stat_name.replace(".", "").strip().lower()
                            info["stats"][stat_name] = int(stat_value)
                # Spell learnset rows
                elif line.startswith("  LV"):
                    spell_level, spell_name = line.split("-", 1)
                    info["spells"][spell_name.strip()] = int(spell_level.strip().split(" ")[1])
                # Command list
                # Commands: is just a CSV list of things like "fight" "magic" etc. However if command
                # randomization is on, these will be exotic things like "KitMerton" that need additional
                # info provided from the COMMANDS section later.
                # As such, we begin with a dehydrated hash of command_name=None, then the COMMANDS section
                # will provide hydration data for the values.
                elif line.startswith("Commands:"):
                    info["commands"] = {command.strip(): None for command in line.split(":")[1].split(",")}
                elif line.startswith("Notable"):
                    info["equipment"] = [eq.strip() for eq in line.split(":")[1].split(",")]
                # Special bare strings
                elif line.startswith("Has natural"):
                    info["natural_magic"] = True
                # Everything else: normal k=v colon strings
                elif ":" in line:
                    field, value = line.split(":", 1)
                    if field in replacements:
                        field = replacements[field]
                    field = field.lower()
                    info[field] = value.strip()
                result[name] = info
        return result
    @staticmethod
    def parse_STATS(data: str) -> dict[str, dict]:
        """
        Parse the BCCE-only STATS section.
        BCCE splits character stats into its own section. We use largely the same
        logic as CHARACTERS here to parse it, then return it as its own dict for
        merging back into the CHARACTERS blob later.
        """
        result = {}
        # This is pretty identical to CHARACTERS
        # Each character has a blank line between them
        # Most everything else is k : v
        for c_text in data.split("\n\n"):
            name = "NULL"
            c_data = {}
            for line in c_text.split("\n"):
                # Character name
                if line[0:2].isdigit():
                    name = line[4:]
                # Should be nothing, but let's be safe
                elif ":" not in line:
                    pass
                # A stat we can just save k : v
                else:
-                    info["special"] = {}
+                    stat, value = line.split(":")
-            # Everything else is a simple k: v list where v is comma-delimited
+                    c_data[stat] = int(value)
            else:
                for k in ["immune", "auto", "skills", "steal", "drops", "location"]:
                    str_match = f"{k.upper()}:"
                    if line.startswith(str_match):
                        info[k] = line.split(": ")[1].split(", ") if line.upper().strip() != str_match else []
                        break
-        if name != "NULL":
+            if name != "NULL":
-            result[name] = info
+                result[name] = c_data
-    return result
+        return result
    @staticmethod
    def parse_COMMANDS(data: str) -> dict[str, dict]:
        """
        Parse the COMMANDS section.
-def parse_REMONSTERATE(data: str) -> dict[str, dict]:
+        This contains information on special commands, expanding contracted command
-    """
+        names into more detailed explanations like GranSaw = Grand Train + Chainsaw.
-    Parse the BCCE-only REMONSTERATE section.
+        """
        commands = {}
-    This contains a mapping of monster sprites: what they were and what they
+        # We split by ------ which divides the command name from its data
-    turned into post-alteration.
+        # As a result we have to pull the last line from each block and remember
-    """
+        # it as the name of the command in the next block. Blorf :)
-    result = {}
+        next_command_name = None
-    for line in data.split("\n"):
+        for c_data in data.split("\n-------\n"):
-        if not line or line.startswith("-----"):
+            c_data_lines = [c_data_line.strip() for c_data_line in c_data.split("\n")]
-            continue
+            if "" in c_data_lines:
-        name = line.split("(")[0].strip()
+                c_data_lines.remove("")
-        originally = line.split("(", 1)[1].split(")")[0].strip()
+            if next_command_name:
-        sprite = line.split("->")[1].strip().strip(".")
+                command_string = "; ".join(c_data_lines[:-1])
        result[name] = {"originally": originally, "sprite": sprite}
-    return result
+                # Clip trailing junk from inconsistent spoiler log generation
                # as well as the join above
                if command_string.endswith("; "):
                    command_string = command_string[:-2]
                if command_string.endswith("."):
                    command_string = command_string[:-1]
                # Clean up a couple of clumsy string cases from the join above
                command_string = command_string.replace(".; ", ": ")
                command_string = command_string.replace("  ", " ")
                command_string = command_string.replace(":;", ":")
-def parse_CHARACTERS(data: str) -> dict[str, dict]:  # noqa: C901
+                # Commit the command to the dict
-    """
+                commands[next_command_name] = command_string
    Parse the CHARACTERS section.
-    This differs based on BCEX vs BCCE. In both flavors it contains basic data
+            next_command_name = c_data_lines[-1].lower()
    like name, spells, location and special abilities. In BCEX it includes
    stats as well. In BCCE stats is its own section.
-    Regardless of flavor, core logic will snap stats back into this section
+        return commands
    later.
    """
    replacements = {"Looks like": "looks", "World of Ruin location": "wor_location", "Notable equipment": "equipment"}
-    result = {}
+    @staticmethod
    def parse_SEED(data: str) -> dict[str, bool | str]:
        """
        Parse the injected SEED section.
-    for c_data in data.split("\n\n")[1:-1]:
+        This is a fake section injected by the loader code. It contains nothing
-        info = {"stats": {}, "spells": {}, "natural_magic": False}
+        but the seed code and we derive from this if the randomizer is BCCE or
-        name = "NULL"
+        BCEX, and try to pluck out other data.
-        for line in c_data.split("\n"):
+        We can't do much because the format is really hard to reverse.
-            # Name
+        """
-            if line[0:2].isdigit():
+        version, mode, flags, seed_num = data.split("|") if "|" in data else data.split(".")
-                name = line[4:]
+        return {
            "version": version,
            "flags": flags,
            "seed_num": seed_num,
            "mode": mode,
            "is_bcce": data.startswith("CE"),
            "seed": data,
        }
-            # Stat chart rows: BCEX Version only
+    @staticmethod
-            elif line.startswith("|"):
+    def parse_SECRET_ITEMS(data: str) -> list[str]:
-                for stat in line[1:-1].split("|"):
+        """
-                    if ":" in stat:
+        Parse the BCCE-only SECRET ITEMS section.
                        stat_name, stat_value = stat.split(":")
                        stat_name = stat_name.replace(".", "").strip().lower()
                        info["stats"][stat_name] = int(stat_value)
-            # Spell learnset rows
+        I'm unsure what this is for. It's a series of strings with no real obvious
-            elif line.startswith("  LV"):
+        significance, so we just return it as a list.
-                spell_level, spell_name = line.split("-", 1)
+        """
-                info["spells"][spell_name.strip()] = int(spell_level.strip().split(" ")[1])
+        # I have no idea what this is lol, dump it to a list for now
        return [line for line in data.split("\n") if not line.startswith("---")]
-            # Command list
+    @staticmethod
-            # Commands: is just a CSV list of things like "fight" "magic" etc. However if command
+    def cleanup_STATS(data: dict) -> bool:
-            # randomization is on, these will be exotic things like "KitMerton" that need additional
+        """
-            # info provided from the COMMANDS section later.
+        Fold BCCE-only STATS section back into CHARACTERS data.
            # As such, we begin with a dehydrated hash of command_name=None, then the COMMANDS section
            # will provide hydration data for the values.
            elif line.startswith("Commands:"):
                info["commands"] = {command.strip(): None for command in line.split(":")[1].split(",")}
-            elif line.startswith("Notable"):
+        This returns BCCE logs back to how they were laid out in BCEX: where stat blocks
-                info["equipment"] = [eq.strip() for eq in line.split(":")[1].split(",")]
+        were simply part of the CHARACTERS data.
-            # Special bare strings
+        The BCCE STATS section keys on character slot (Terra, Locke, etc) and not the
-            elif line.startswith("Has natural"):
+        new randomized character name, so some hunting has to happen here.
-                info["natural_magic"] = True
+        """
-
+        for slot, stats in data.get("STATS", {}).items():
-            # Everything else: normal k=v colon strings
+            for c_data in data.get("CHARACTERS", {}).values():
            elif ":" in line:
                field, value = line.split(":", 1)
                if field in replacements:
                    field = replacements[field]
                field = field.lower()
                info[field] = value.strip()
            result[name] = info
    return result
 def parse_STATS(data: str) -> dict[str, dict]:
    """
    Parse the BCCE-only STATS section.
    BCCE splits character stats into its own section. We use largely the same
    logic as CHARACTERS here to parse it, then return it as its own dict for
    merging back into the CHARACTERS blob later.
    """
    result = {}
    # This is pretty identical to CHARACTERS
    # Each character has a blank line between them
    # Most everything else is k : v
    for c_text in data.split("\n\n"):
        name = "NULL"
        c_data = {}
        for line in c_text.split("\n"):
            # Character name
            if line[0:2].isdigit():
                name = line[4:]
            # Should be nothing, but let's be safe
            elif ":" not in line:
                pass
            # A stat we can just save k : v
            else:
                stat, value = line.split(":")
                c_data[stat] = int(value)
        if name != "NULL":
            result[name] = c_data
    return result
 def parse_COMMANDS(data: str) -> dict[str, dict]:
    """
    Parse the COMMANDS section.
    This contains information on special commands, expanding contracted command
    names into more detailed explanations like GranSaw = Grand Train + Chainsaw.
    """
    commands = {}
    # We split by ------ which divides the command name from its data
    # As a result we have to pull the last line from each block and remember
    # it as the name of the command in the next block. Blorf :)
    next_command_name = None
    for c_data in data.split("\n-------\n"):
        c_data_lines = [c_data_line.strip() for c_data_line in c_data.split("\n")]
        if "" in c_data_lines:
            c_data_lines.remove("")
        if next_command_name:
            command_string = "; ".join(c_data_lines[:-1])
            # Clip trailing junk from inconsistent spoiler log generation
            # as well as the join above
            if command_string.endswith("; "):
                command_string = command_string[:-2]
            if command_string.endswith("."):
                command_string = command_string[:-1]
            # Clean up a couple of clumsy string cases from the join above
            command_string = command_string.replace(".; ", ": ")
            command_string = command_string.replace("  ", " ")
            command_string = command_string.replace(":;", ":")
            # Commit the command to the dict
            commands[next_command_name] = command_string
        next_command_name = c_data_lines[-1].lower()
    return commands
 def parse_SEED(data: str) -> dict[str, bool | str]:
    """
    Parse the injected SEED section.
    This is a fake section injected by the loader code. It contains nothing
    but the seed code and we derive from this if the randomizer is BCCE or
    BCEX, and try to pluck out other data.
    We can't do much because the format is really hard to reverse.
    """
    version, mode, flags, seed_num = data.split("|") if "|" in data else data.split(".")
    return {
        "version": version,
        "flags": flags,
        "seed_num": seed_num,
        "mode": mode,
        "is_bcce": data.startswith("CE"),
        "seed": data,
    }
 def parse_SECRET_ITEMS(data: str) -> list[str]:
    """
    Parse the BCCE-only SECRET ITEMS section.
    I'm unsure what this is for. It's a series of strings with no real obvious
    significance, so we just return it as a list.
    """
    # I have no idea what this is lol, dump it to a list for now
    return [line for line in data.split("\n") if not line.startswith("---")]
 def load(filename: str) -> dict[str, str]:
    """Load file and tokenize into sections."""
    # Load our file, tokenize by section header (starting with ====)
    with Path(filename).open(encoding="utf-8") as infile:
        tok_data = infile.read().split("============================================================\n")
    sections = {}
    top_section = True
    for s in tok_data:
        # The top section needs special handling and contains only seed code
        if top_section:
            sections["SEED"] = s.split("\n", 1)[0][12:]
            top_section = False
            continue
        # Everything else we just dump into named sections for now
        section_header, section_data = s.split("\n", 1)
        sections[section_header[5:]] = section_data
    return sections
 if __name__ == "__main__":
    sections = load(sys.argv[1])
    data = {}
    # This mess tries to run a function named parse_SECTION for each section,
    # and just continues to the next section if one doesn't exist.
    for k, v in sections.items():
        try:
            section_func = f"parse_{k.replace(' ', '_')}"
            data[k] = globals()[section_func](v)
        except KeyError:
            continue
    # Hydrate each character's command list with descriptions of the commands
    # This uses the COMMANDS section, but if one doesn't exist just repeat the command
    # name because it should be simple things like "fight" and "magic"
    command_info = data.get("COMMANDS", {})
    for c_data in data["CHARACTERS"].values():
        for command in c_data["commands"]:
            c_data["commands"][command] = command_info.get(command, command)
    # If we have a STATS block, snap it into CHARACTER data
    # BCCE broke this out into its own section
    # Worse, it keys on slot name, not randomized character name
    if "STATS" in data:
        for slot, stats in data["STATS"].items():
            for c_data in data["CHARACTERS"].values():
                if c_data["originally"].lower() == slot.lower():
                    c_data["stats"] = stats
-        del data["STATS"]
+        return True
-    # If we ran BCCE Remonsterate, fold sprite data into monster block
+    @staticmethod
-    if "REMONSTERATE" in data:
+    def cleanup_COMMANDS(data: dict) -> bool:
-        for name, info in data["REMONSTERATE"].items():
+        """Fold COMMANDS expanded descriptions into CHARACTERS command data."""
-            for m_name, m_info in data["MONSTERS"].items():
+        # If our COMMANDS section is missing or somehow missing a given command, we just
        # repeat the command's name as its description. This should only be simple things
        # like "fight" and "magic" unless something goes wrong.
        command_info = data.get("COMMANDS", {})
        for c_data in data.get("CHARACTERS", {}).values():
            for command in c_data.get("commands", {}):
                c_data["commands"][command] = command_info.get(command, command)
        return False
    @staticmethod
    def cleanup_REMONSTERATE(data: dict) -> bool:
        """Fold REMONSTERATE section into MONSTERS section data."""
        for name, info in data.get("REMONSTERATE", {}).items():
            for m_name, m_info in data.get("MONSTERS", {}).items():
                if name == m_name:
                    m_info["originally"] = info["originally"]
                    m_info["sprite"] = info["sprite"]
-        del data["REMONSTERATE"]
+        return True
    def get_sections(self, data: str) -> dict[str, str]:
        """Split logfile text and return a dict of sections for parsing."""
        tok_data = data.split("============================================================\n")
        sections = {}
        top_section = True
        for s in tok_data:
            # The top section needs special handling and contains only seed code
            if top_section:
                sections["SEED"] = s.split("\n", 1)[0][12:]
                top_section = False
                continue
            # Everything else we just dump into named sections for now
            section_header, section_data = s.split("\n", 1)
            sections[section_header[5:]] = section_data
        self.config_sections = sections
        return sections
    def parse(self) -> dict:
        """Fully parse the logfile and return the full data object."""
        # Get individual sections to work on
        with Path(self.filename).open(encoding="utf-8") as infile:
            sections = self.get_sections(infile.read())
        data = {}
        # For each section attempt to run a parser function for it
        for k, v in sections.items():
            section_func = f"parse_{k.replace(' ', '_')}"
            if hasattr(self, section_func):
                data[k] = getattr(self, section_func)(v)
        # Do post-parse cleanup. We need all sections parsed to do these
        # Any cleanup function that returns true has its respective section deleted
        section_dels = set()
        for k in data:
            section_func = f"cleanup_{k.replace(' ', '_')}"
            if hasattr(self, section_func) and getattr(self, section_func)(data):
                section_dels.add(k)
        # Any section cleanup that returns true means delete that section
        for k in section_dels:
            if k in data:
                del data[k]
        self.data_sections = data
        return data
 if __name__ == "__main__":
    p = Parser(sys.argv[1])
    data = p.parse()
    # Barf this pile of trash out
    print(json.dumps(data))