bcexparse/main.py

#!/usr/bin/env python3

__version__ = "0.1"
__author__ = "Trysdyn Black"

import json
import sys


def parse_CHARACTERS(data):
    replacements = {"Looks like": "looks", "World of Ruin location": "wor_location", "Notable equipment": "equipment"}

    result = {}

    for c_data in data.split("\n\n")[1:-1]:
        info = {"stats": {}, "spells": {}, "natural_magic": False}
        name = "NULL"

        for line in c_data.split("\n"):
            # Name
            if line[0:2].isdigit():
                name = line[4:]

            # Stat chart rows
            # BCEX Version Only
            elif line.startswith("|"):
                for stat in line[1:-1].split("|"):
                    if ":" in stat:
                        stat_name, stat_value = stat.split(":")
                        stat_name = stat_name.replace(".", "").strip().lower()
                        info["stats"][stat_name] = int(stat_value)

            # Spell learnset rows
            elif line.startswith("  LV"):
                spell_level, spell_name = line.split("-", 1)
                info["spells"][spell_name.strip()] = int(spell_level.strip().split(" ")[1])

            # Special k=v strings with comma-delimited lists
            elif line.startswith("Commands:"):
                info["commands"] = [command.strip() for command in line.split(":")[1].split(",")]

            elif line.startswith("Notable"):
                info["equipment"] = [eq.strip() for eq in line.split(":")[1].split(",")]

            # Special bare strings
            elif line.startswith("Has natural"):
                info["natural_magic"] = True

            # Everything else: normal k=v colon strings
            elif ":" in line:
                field, value = line.split(":", 1)
                if field in replacements:
                    field = replacements[field]
                field = field.lower()
                info[field] = value.strip()

            result[name] = info

    return result


def parse_STATS(data):
    # BCCE Version Only
    # BCCE Splits stats into its own section that we need to parse, return, then snap together
    result = {}

    for c_text in data.split("\n\n"):
        name = "NULL"
        c_data = {}

        for line in c_text.split("\n"):
            if line[0:2].isdigit():
                name = line[4:]
            elif ":" not in line:
                pass
            else:
                stat, value = line.split(":")
                c_data[stat] = int(value)

        if name != "NULL":
            result[name] = c_data

    return result


def parse_COMMANDS(data):
    commands = {}

    # We split by ------ which divides the command name from its data
    # As a result we have to pull the last line from each block and remember
    # it as the name of the command in the next block. Blorf :)
    next_command_name = None
    for c_data in data.split("\n-------\n"):
        c_data_lines = [c_data_line.strip() for c_data_line in c_data.split("\n")]
        if "" in c_data_lines:
            c_data_lines.remove("")
        if next_command_name:
            command_string = "; ".join(c_data_lines[:-1])

            # Clip trailing junk from inconsistent spoiler log generation
            # as well as the join above
            if command_string.endswith("; "):
                command_string = command_string[:-2]
            if command_string.endswith("."):
                command_string = command_string[:-1]

            # Clean up a couple of clumsy string cases from the join above
            command_string = command_string.replace(".; ", ": ")
            command_string = command_string.replace("  ", " ")
            command_string = command_string.replace(":;", ":")

            # Commit the command to the dict
            commands[next_command_name] = command_string

        next_command_name = c_data_lines[-1].lower()

    return commands


def load(filename):
    # Load our file, tokenize by section header (starting with ====)
    with open(filename, "r") as infile:
        tok_data = infile.read().split("============================================================\n")

    sections = {}

    top_section = True
    for s in tok_data:
        # The top section needs special handling and contains only seed code
        if top_section:
            sections["SEED"] = s.split("\n", 1)[0][12:]
            top_section = False
            continue

        # Everything else we just dump into named sections for now
        section_header, section_data = s.split("\n", 1)
        sections[section_header[5:]] = section_data

    return sections


if __name__ == "__main__":
    sections = load(sys.argv[1])

    data = {}

    # This mess tries to run a function named parse_SECTION for each section,
    # and just continues to the next section if one doesn't exist.
    for k, v in sections.items():
        try:
            data[k] = globals()[f"parse_{k}"](v)
        except KeyError:
            continue

    # Subkey CHARACTERS commands with COMMANDS data
    # This turns lists of commands each character has into hashes where
    # Command name => Textual desc of command
    for character, c_data in data["CHARACTERS"].items():
        new_commands = {}
        for command in c_data["commands"]:
            new_commands[command] = data["COMMANDS"].get(command, command)
        c_data["commands"] = new_commands

    # If we have a STATS block, snap it into CHARACTER data
    # BCCE broke this out into its own section
    # Worse, it keys on slot name, not randomized character name
    if "STATS" in data:
        for slot, stats in data["STATS"].items():
            for c_name, c_data in data["CHARACTERS"].items():
                if c_data["originally"].lower() == slot.lower():
                    c_data["stats"] = stats
        del data["STATS"]

    # Barf this pile of trash out
    print(json.dumps(data))