bcexparse/main.py

#!/usr/bin/env python3

"""Parse BCEX (or BCCE) logs into json objects."""

__version__ = "0.5.1"
__author__ = "Trysdyn Black"

import json
import sys
from pathlib import Path


class Parser:  # noqa: PLR0904
    """
    BCEX/BCCE spoiler logfile parser.

    Sections missing support:
    - TREASURE CHESTS
    - JUNCTIONS
    """

    def __init__(self, filename: str) -> None:
        """Initialize parser with filename."""
        self.filename = filename
        self.config_sections = {}
        self.data_sections = {}

    @staticmethod
    def parse_MONSTERS(data: str) -> dict[str, dict]:  # noqa: C901, PLR0912
        """
        Parse the MONSTERS section.

        This contains data on monsters including stat sheets, loot, and weaknesses.
        """
        result = {}
        for m_text in data.split("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"):
            info = {}
            info["stats"] = {}
            info["spells"] = {}
            name = "NULL"
            for line in m_text.split("\n"):
                # Name and level
                if "(Level " in line:
                    name = line.split(" (")[0]
                    info["stats"]["level"] = int(line.split("(Level ")[1][:-1])
                    info["name"] = name
                # Stat chart rows
                elif line.startswith("|"):
                    for stat in line[1:-1].split("|"):
                        if ":" in stat:
                            stat_name, stat_value = stat.split(":")
                            stat_name = stat_name.replace(".", "").strip().lower()
                            info["stats"][stat_name] = int(stat_value)
                # Nullifies AND weaks, split by a ;
                elif line.startswith("NULLIFY:"):
                    # If no weaknesses, WEAK section just doesn't appear, fudge it
                    if "WEAK:" in line:
                        null_text, weak_text = line.split(";")
                    else:
                        null_text = line
                        weak_text = "WEAK: "
                    info["nullifies"] = null_text.split(": ")[1].split(", ")
                    info["weak"] = weak_text.split(": ")[1].split(", ")

                    # Due to split oddness we can populate a blank string into weakness
                    # Delete it if we did.
                    if info["weak"] == [""]:
                        del info["weak"]
                # Specials are name=>desc as k:v
                # I *think* you can only have one special...
                elif line.startswith("SPECIAL"):
                    content = line.split(" ", 1)[1]
                    if len(content) > 1:
                        # BCEX 5.0 changes the skill name to be single quoted instead of double
                        special_name = content.split('"')[1] if '"' in content else content.split("'")[1]
                        special_desc = content.split(": ")[1]
                        info["special"] = {special_name: special_desc}
                    else:
                        info["special"] = {}
                # Morph results, with a percent chance in each one
                elif line.startswith("MORPH"):
                    _, chance, items = line.split(" ", 2)
                    # BCEX 5.0 has a bug where % can be doubled sometimes
                    chance = int(chance[1:-3].replace("%", ""))
                    items = items.split(", ")
                    if "morph" not in info:
                        info["morph"] = {"percent_chance": chance, "items": items}
                # Everything else is a simple k: v list where v is comma-delimited
                else:
                    for k in ["immune", "auto", "skills", "steal", "drops", "location"]:
                        str_match = f"{k.upper()}:"
                        if line.startswith(str_match):
                            info[k] = line.split(": ")[1].split(", ") if line.upper().strip() != str_match else []
                            break

            if name != "NULL":
                result[name] = info

        return result

    @staticmethod
    def parse_REMONSTERATE(data: str) -> dict[str, dict]:
        """
        Parse the BCCE-only REMONSTERATE section.

        This contains a mapping of monster sprites: what they were and what they
        turned into post-alteration.
        """
        result = {}
        for line in data.split("\n"):
            if not line or line.startswith("-----"):
                continue
            name = line.split("(")[0].strip()
            originally = line.split("(", 1)[1].split(")")[0].strip()
            sprite = line.split("->")[1].strip().strip(".")
            result[name] = {"originally": originally, "sprite": sprite}

        return result

    @staticmethod
    def parse_CHARACTERS(data: str) -> dict[str, dict]:  # noqa: C901
        """
        Parse the CHARACTERS section.

        This differs based on BCEX vs BCCE. In both flavors it contains basic data
        like name, spells, location and special abilities. In BCEX it includes
        stats as well. In BCCE stats is its own section.

        Regardless of flavor, core logic will snap stats back into this section
        later.
        """
        replacements = {
            "Looks like": "looks",
            "World of Ruin location": "wor_location",
            "Notable equipment": "equipment",
        }

        result = {}

        for c_data in data.split("\n\n")[1:-1]:
            info = {"stats": {}, "spells": {}, "natural_magic": False}
            name = "NULL"

            for line in c_data.split("\n"):
                # Name
                if line[0:2].isdigit():
                    name = line[4:]
                    info["name"] = name

                # Stat chart rows: BCEX Version only
                elif line.startswith("|"):
                    for stat in line[1:-1].split("|"):
                        if ":" in stat:
                            stat_name, stat_value = stat.split(":")
                            stat_name = stat_name.replace(".", "").strip().lower()
                            info["stats"][stat_name] = int(stat_value)

                # Spell learnset rows
                elif line.startswith("  LV"):
                    spell_level, spell_name = line.split("-", 1)
                    info["spells"][spell_name.strip()] = int(spell_level.strip().split(" ")[1])

                # Command list
                # Commands: is just a CSV list of things like "fight" "magic" etc. However if command
                # randomization is on, these will be exotic things like "KitMerton" that need additional
                # info provided from the COMMANDS section later.
                # As such, we begin with a dehydrated hash of command_name=None, then the COMMANDS section
                # will provide hydration data for the values.
                elif line.startswith("Commands:"):
                    info["commands"] = {command.strip(): None for command in line.split(":")[1].split(",")}

                elif line.startswith("Notable"):
                    info["equipment"] = [eq.strip() for eq in line.split(":")[1].split(",")]

                # Special bare strings
                elif line.startswith("Has natural"):
                    info["natural_magic"] = True

                # Everything else: normal k=v colon strings
                elif ":" in line:
                    field, value = line.split(":", 1)
                    if field in replacements:
                        field = replacements[field]
                    field = field.lower()
                    info[field] = value.strip()

                result[name] = info

        return result

    @staticmethod
    def parse_STATS(data: str) -> dict[str, dict]:
        """
        Parse the BCCE-only STATS section.

        BCCE splits character stats into its own section. We use largely the same
        logic as CHARACTERS here to parse it, then return it as its own dict for
        merging back into the CHARACTERS blob later.
        """
        result = {}

        # This is pretty identical to CHARACTERS
        # Each character has a blank line between them
        # Most everything else is k : v
        for c_text in data.split("\n\n"):
            name = "NULL"
            c_data = {}

            for line in c_text.split("\n"):
                # Character name
                if line[0:2].isdigit():
                    name = line[4:]
                # Should be nothing, but let's be safe
                elif ":" not in line:
                    pass
                # A stat we can just save k : v
                else:
                    stat, value = line.split(":")
                    c_data[stat] = int(value)

            if name != "NULL":
                result[name] = c_data

        return result

    @staticmethod
    def parse_COMMANDS(data: str) -> dict[str, dict]:
        """
        Parse the COMMANDS section.

        This contains information on special commands, expanding contracted command
        names into more detailed explanations like GranSaw = Grand Train + Chainsaw.
        """
        commands = {}

        # We split by ------ which divides the command name from its data
        # As a result we have to pull the last line from each block and remember
        # it as the name of the command in the next block. Blorf :)
        next_command_name = None
        for c_data in data.split("\n-------\n"):
            c_data_lines = [c_data_line.strip() for c_data_line in c_data.split("\n")]
            if "" in c_data_lines:
                c_data_lines.remove("")
            if next_command_name:
                command_string = "; ".join(c_data_lines[:-1])

                # Clip trailing junk from inconsistent spoiler log generation
                # as well as the join above
                if command_string.endswith("; "):
                    command_string = command_string[:-2]
                if command_string.endswith("."):
                    command_string = command_string[:-1]

                # Clean up a couple of clumsy string cases from the join above
                command_string = command_string.replace(".; ", ": ")
                command_string = command_string.replace("  ", " ")
                command_string = command_string.replace(":;", ":")

                # Commit the command to the dict
                commands[next_command_name] = command_string

            next_command_name = c_data_lines[-1].lower()

        return commands

    @staticmethod
    def parse_SEED(data: str) -> dict[str, bool | str]:
        """
        Parse the injected SEED section.

        This is a fake section injected by the loader code. It contains nothing
        but the seed code and we derive from this if the randomizer is BCCE or
        BCEX, and try to pluck out other data.

        We can't do much because the format is really hard to reverse.
        """
        version, mode, flags, seed_num = data.split("|") if "|" in data else data.split(".")
        return {
            "version": version,
            "flags": flags,
            "seed_num": seed_num,
            "mode": mode,
            "is_bcce": data.startswith("CE"),
            "seed": data,
        }

    @staticmethod
    def parse_SECRET_ITEMS(data: str) -> list[str]:
        """
        Parse the BCCE-only SECRET ITEMS section.

        I'm unsure what this is for. It's a series of strings with no real obvious
        significance, so we just return it as a list.
        """
        # I have no idea what this is lol, dump it to a list for now
        return [line for line in data.split("\n") if not line.startswith("---")]

    @staticmethod
    def parse_MUSIC(data: str) -> dict[str, dict]:
        """
        Parse the MUSIC section.

        This differs significantly between BCEX and BCCE: Numbers and data are split
        with a period in BCCE and a colon in BCEX. BCEX puts arranger on the same line
        as composer, split with --, BCEX lacks the Jukebox Title section entirely.
        """
        music_sections = data.split("\n\n")

        replacements = {}

        for section in music_sections[1:]:
            if not section.strip():
                continue

            # BCEX and BCCE divide numbers from data differently
            if ":" in section[:6]:
                _, info = section.split(":", 1)
            else:
                _, info = section.split(".", 1)

            # The name of the song being replaced preceeds a ->
            old_name, info = info.split("->", 1)
            old_name = old_name.strip()

            replacements[old_name] = {}

            # Info is, mostly, one item per line, so let's go by line
            tok_info = info.split("\n")

            for k in ("name", "title", "composer", "arranger", "jukebox_title"):
                # Not every song has all the data
                if not len(tok_info):
                    break

                line = tok_info.pop(0).strip()

                # BCCE puts arranger on its own line. BCEX puts it on the same line as
                # composed, split by "--". So we have to handle both
                if "-- Arranged by " in line:
                    line, arranger = line.split("-- Arranged by ", 1)
                    replacements[old_name]["arranger"] = arranger

                # Jukebox Title is in prens with extra stuff to chomp
                if "Jukebox title" in line:
                    line = line[16:-1]

                # These strings are fluff but should only appear in compose/arranger lines
                # so it's safe to just blindly chomp them
                line = line.split("Composed by ", 1)[-1].strip()
                line = line.replace("Ripped and/or arranged by ", "")

                replacements[old_name][k] = line

        return replacements

    @staticmethod
    def parse_AESTHETICS(data: str) -> dict[str, str]:
        """
        Parse the BCCE-only AESTHETICS section.

        This is just a k=v list we split up and strip.
        """
        replacements = {}

        for line in data.split("\n"):
            if ":" not in line:
                continue

            old, new = line.split(":")
            old = old.strip()
            new = new.strip()

            replacements[old] = new

        return replacements

    @staticmethod
    def parse_MAGITEK(data: str) -> dict[str, list]:
        """
        Parse the BCCE-only MAGITEK section.

        This section contains info on what Terra and the guards have in their magitek skill
        lists at the start of the game. Unfortunately like the STATS block, this keys on slot
        name and not randomized name, but at least it's super simple.
        """
        result = {"terra": [], "others": []}

        mode = None
        for line in data.split("\n"):
            if line.startswith("Terra Magitek"):
                mode = "terra"
            elif line.startswith("Other Actor"):
                mode = "others"
            elif line.strip() and mode:
                result[mode].append(line.strip())

        return result

    @staticmethod
    def parse_DANCES(data: str) -> dict[str, dict]:
        """
        Parse the BCCE-only DANCES section.

        This section's a list of dances with their effects chances. Each dance seems to
        always have four effects, spaced at specific locations in the string, so we chomp
        by hardcoded locations. Brittle.
        """
        result = {}

        dance = None
        for line in data.split("\n"):
            # Formatting line
            if "-----" in line:
                continue
            # Result list
            if line.startswith(" "):
                # This is more brittle than I'd like but the logs space results out by
                # character position and some dance results have spaces so it's hard to
                # split properly.
                for i in range(2, 57, 18):
                    chance, effect = line[i : i + 18].split(" ", 1)
                    result[dance][effect.strip()] = chance.strip()
            # New dance section
            elif line.strip():
                dance = line.strip()
                if dance not in result:
                    result[dance] = {"name": dance}

        return result

    @staticmethod
    def parse_ESPERS(data: str) -> dict[str, dict]:
        """
        Parse the ESPERS section.

        This section lists espers, what they teach, their bonuses, and locations. We
        assume any line with a : in it is either BONUS or LOCATION and just k=v it. Any
        line after a blank line is an esper name, and anything else is a spell learn option.
        """
        result = {}

        esper = None
        next_esper = False
        for line in data.split("\n"):
            # Formatting line
            if "-----" in line:
                continue
            # Blank lines divide esper sections
            if not line.strip():
                next_esper = True
            # The first line in a new section is the esper name
            elif next_esper:
                esper = line.strip()
                if esper not in result:
                    result[esper] = {"learnset": {}, "name": esper}
                next_esper = False
            # Any line with ":" is a k=v we should just shove into the dict
            elif ": " in line:
                k, v = line.split(": ")
                result[esper][k.lower()] = v.strip()
            # Everything else should be spell learnset
            else:
                spell, mult = line.split(" x")
                result[esper]["learnset"][spell.strip()] = f"x{mult}"

        return result

    @staticmethod
    def parse_ITEM_MAGIC(data: str) -> dict[str, dict]:
        """
        Parse the ITEM MAGIC section.

        This section is actually three distinct subsections. Breakable items and procs
        we can just store as k=v in sub-dicts. The spell-teaching items section we treat
        like the esper learnset and store spell_name = learn multiplier.
        """
        result = {}

        section = None
        for line in data.split("\n"):
            # Formatting lines
            if "-----" in line or not line.strip():
                continue
            # Anything with ":" is a k=v to insert
            if ":" in line:
                k, v = line.split(": ")
                # Spell-teaching needs special logic to get the {spell: multiplier} format
                if section == "spell-teaching":
                    spell, mult = v.split(" x")
                    result[section][k.strip()] = {spell.strip(): f"x{mult}"}
                else:
                    result[section][k.strip()] = v.strip()
            # Anything else should be a section header. Use the first word unless it's "ITEM"
            else:
                tok_line = line.split()
                section = tok_line[1].lower() if "ITEM" in tok_line[0] else tok_line[0].lower()
                result[section] = {}

        return result

    @staticmethod
    def parse_COLOSSEUM(data: str) -> dict[str, dict]:
        """
        Parse the COLOSSEUM section.

        Each line is one item offered, which has a resulting item, a monster level, and a
        a monster name. This results in basic string splitting giving us all the data we
        need.
        """
        result = {}

        for line in data.split("\n"):
            if "-----" in line or not line.strip():
                continue

            item, tok_line = line.split("->")
            new_item, tok_line = tok_line.split(":  LV ")
            level, name = tok_line.split(" ", 1)

            result[item.strip()] = {
                "becomes": new_item.strip(),
                "battle": {"monster": name.strip(), "level": int(level.strip())},
            }

        return result

    @staticmethod
    def parse_ITEM_EFFECTS(data: str) -> dict[str, dict]:  # noqa: C901, PLR0912
        """
        Parse the BCCE-only ITEM EFFECTS section.

        This is a weird chimera section with multiple sub-sections. Most of these
        sub-sections can just be k:v or k:v, v, v parsed. Elemental properties is
        a special indented list and special features can contain command changers
        so we need a lot of special parsing.
        """
        result = {}
        mode = None
        item = None

        for line in data.split("\n"):
            if "-----" in line or not line.strip():
                continue

            if line == line.upper():
                mode = line.lower().strip().replace(" ", "_")
                result[mode] = {}
            # Everything here is k:v splitable except the elemental properties
            # section which is k:v but with a return and indent and multiple values.
            elif mode == "elemental_properties":
                if line.startswith(" "):
                    tok_line = line.split()
                    operator = "+" if "Gained" in tok_line else "-" if "Lost" in tok_line else ""
                    element = tok_line[-1]
                    effect = tok_line[2].strip(":")

                    if item not in result[mode]:
                        result[mode][item] = []

                    result[mode][item].append(f"{operator}{element} {effect}")
                else:
                    item = line.split(":")[0]
                    result[mode][item] = []
            else:
                item, effect = line.split(":")
                item = item.strip()
                effect = effect.strip()

                for effect_token in effect.split(", "):
                    # This is a command change. This can appear in command_changers *or* features
                    # In either case the result goes in command changers for consistency
                    if "->" in effect_token:
                        old, new = effect_token.split("->")
                        # We don't have a guarantee COMMAND CHANGERS is in yet...
                        if "command_changers" not in result:
                            result["command_changers"] = {}
                        if item not in result["command_changers"]:
                            result["command_changers"][item] = {}

                        result["command_changers"][item][old.strip()] = new.strip()
                    # Everything else should hopefully just be a basic effect list
                    else:
                        if item not in result[mode]:
                            result[mode][item] = []
                        result[mode][item].append(effect_token.strip())

        return result

    @staticmethod
    def parse_SHOPS(data: str) -> dict[str, dict]:
        """
        Parse SHOPS section.

        For now this is just a dict of shop name => {item => cost}. I would like to split
        this up better so you have, for example narshe["wob"]["after_kefka"]["weapons"] but
        that's probably more lifting than is necessary. Most people will just be searching
        for buyable items period.
        """
        result = {}
        shop = None

        for line in data.split("\n"):
            if "-----" in line or not line.strip():
                continue

            if line == line.upper():
                shop = line.strip()
                result[shop] = {"stock": {}, "female_discount": False}
            elif line.startswith("Discounts for female characters"):
                result[shop]["female_discount"] = True
            else:
                tok_line = line.split()
                item = " ".join(tok_line[:-1])
                cost = tok_line[-1]
                result[shop]["stock"][item.strip()] = int(cost.strip())

        return result

    @staticmethod
    def cleanup_STATS(data: dict) -> bool:
        """
        Fold BCCE-only STATS section back into CHARACTERS data.

        This returns BCCE logs back to how they were laid out in BCEX: where stat blocks
        were simply part of the CHARACTERS data.

        The BCCE STATS section keys on character slot (Terra, Locke, etc) and not the
        new randomized character name, so some hunting has to happen here.
        """
        for slot, stats in data.get("STATS", {}).items():
            for c_data in data.get("CHARACTERS", {}).values():
                if c_data["originally"].lower() == slot.lower():
                    c_data["stats"] = stats

        return True

    @staticmethod
    def cleanup_COMMANDS(data: dict) -> bool:
        """Fold COMMANDS expanded descriptions into CHARACTERS command data."""
        # If our COMMANDS section is missing or somehow missing a given command, we just
        # repeat the command's name as its description. This should only be simple things
        # like "fight" and "magic" unless something goes wrong.
        command_info = data.get("COMMANDS", {})
        for c_data in data.get("CHARACTERS", {}).values():
            for command in c_data.get("commands", {}):
                c_data["commands"][command] = command_info.get(command, command)

        return False

    @staticmethod
    def cleanup_REMONSTERATE(data: dict) -> bool:
        """Fold REMONSTERATE section into MONSTERS section data."""
        for name, info in data.get("REMONSTERATE", {}).items():
            for m_name, m_info in data.get("MONSTERS", {}).items():
                if name == m_name:
                    m_info["originally"] = info["originally"]
                    m_info["sprite"] = info["sprite"]

        return True

    def get_sections(self, data: str) -> dict[str, str]:
        """Split logfile text and return a dict of sections for parsing."""
        tok_data = data.split("============================================================\n")

        sections = {}

        top_section = True
        for s in tok_data:
            # The top section needs special handling and contains only seed code
            if top_section:
                sections["SEED"] = s.split("\n", 1)[0][12:]
                top_section = False
                continue

            # Everything else we just dump into named sections for now
            section_header, section_data = s.split("\n", 1)
            sections[section_header[5:]] = section_data

        self.config_sections = sections
        return sections

    def parse(self) -> dict:
        """Fully parse the logfile and return the full data object."""
        # Get individual sections to work on
        with Path(self.filename).open(encoding="utf-8") as infile:
            sections = self.get_sections(infile.read())

        data = {}

        # For each section attempt to run a parser function for it
        for k, v in sections.items():
            section_func = f"parse_{k.replace(' ', '_')}"
            if hasattr(self, section_func):
                data[k] = getattr(self, section_func)(v)

        # Do post-parse cleanup. We need all sections parsed to do these
        # Any cleanup function that returns true has its respective section deleted
        section_dels = set()
        for k in data:
            section_func = f"cleanup_{k.replace(' ', '_')}"
            if hasattr(self, section_func) and getattr(self, section_func)(data):
                section_dels.add(k)

        # Any section cleanup that returns true means delete that section
        for k in section_dels:
            if k in data:
                del data[k]

        self.data_sections = data
        return data


if __name__ == "__main__":
    p = Parser(sys.argv[1])
    data = p.parse()

    # Barf this pile of trash out
    print(json.dumps(data))