diff --git a/main.py b/main.py index f933698..b60011c 100644 --- a/main.py +++ b/main.py @@ -2,7 +2,7 @@ """Parse BCEX (or BCCE) logs into json objects.""" -__version__ = "0.4.0" +__version__ = "0.4.1" __author__ = "Trysdyn Black" import json @@ -10,321 +10,366 @@ import sys from pathlib import Path -def parse_MONSTERS(data: str) -> dict[str, dict]: # noqa: C901, PLR0912 - """ - Parse the MONSTERS section. +class Parser: + """BCEX/BCCE spoiler logfile parser.""" - This contains data on monsters including stat sheets, loot, and weaknesses. - """ - result = {} - for m_text in data.split("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"): - info = {} - info["stats"] = {} - info["spells"] = {} - name = "NULL" - for line in m_text.split("\n"): - # Name and level - if "(Level " in line: - name = line.split(" (")[0] - info["stats"]["level"] = int(line.split("(Level ")[1][:-1]) - # Stat chart rows - elif line.startswith("|"): - for stat in line[1:-1].split("|"): - if ":" in stat: - stat_name, stat_value = stat.split(":") - stat_name = stat_name.replace(".", "").strip().lower() - info["stats"][stat_name] = int(stat_value) - # Nullifies AND weaks, split by a ; - elif line.startswith("NULLIFY:"): - # If no weaknesses, WEAK section just doesn't appear, fudge it - if "WEAK:" in line: - null_text, weak_text = line.split(";") + def __init__(self, filename: str) -> None: + """Initialize parser with filename.""" + self.filename = filename + self.config_sections = {} + self.data_sections = {} + + @staticmethod + def parse_MONSTERS(data: str) -> dict[str, dict]: # noqa: C901, PLR0912 + """ + Parse the MONSTERS section. + + This contains data on monsters including stat sheets, loot, and weaknesses. + """ + result = {} + for m_text in data.split("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"): + info = {} + info["stats"] = {} + info["spells"] = {} + name = "NULL" + for line in m_text.split("\n"): + # Name and level + if "(Level " in line: + name = line.split(" (")[0] + info["stats"]["level"] = int(line.split("(Level ")[1][:-1]) + # Stat chart rows + elif line.startswith("|"): + for stat in line[1:-1].split("|"): + if ":" in stat: + stat_name, stat_value = stat.split(":") + stat_name = stat_name.replace(".", "").strip().lower() + info["stats"][stat_name] = int(stat_value) + # Nullifies AND weaks, split by a ; + elif line.startswith("NULLIFY:"): + # If no weaknesses, WEAK section just doesn't appear, fudge it + if "WEAK:" in line: + null_text, weak_text = line.split(";") + else: + null_text = line + weak_text = "WEAK: " + info["nullifies"] = null_text.split(": ")[1].split(", ") + info["weak"] = weak_text.split(": ")[1].split(", ") + # Specials are name=>desc as k:v + # I *think* you can only have one special... + elif line.startswith("SPECIAL"): + content = line.split(" ", 1)[1] + if len(content) > 1: + special_name = content.split('"')[1] + special_desc = content.split(": ")[1] + info["special"] = {special_name: special_desc} + else: + info["special"] = {} + # Everything else is a simple k: v list where v is comma-delimited else: - null_text = line - weak_text = "WEAK: " - info["nullifies"] = null_text.split(": ")[1].split(", ") - info["weak"] = weak_text.split(": ")[1].split(", ") - # Specials are name=>desc as k:v - # I *think* you can only have one special... - elif line.startswith("SPECIAL"): - content = line.split(" ", 1)[1] - if len(content) > 1: - special_name = content.split('"')[1] - special_desc = content.split(": ")[1] - info["special"] = {special_name: special_desc} + for k in ["immune", "auto", "skills", "steal", "drops", "location"]: + str_match = f"{k.upper()}:" + if line.startswith(str_match): + info[k] = line.split(": ")[1].split(", ") if line.upper().strip() != str_match else [] + break + + if name != "NULL": + result[name] = info + + return result + + @staticmethod + def parse_REMONSTERATE(data: str) -> dict[str, dict]: + """ + Parse the BCCE-only REMONSTERATE section. + + This contains a mapping of monster sprites: what they were and what they + turned into post-alteration. + """ + result = {} + for line in data.split("\n"): + if not line or line.startswith("-----"): + continue + name = line.split("(")[0].strip() + originally = line.split("(", 1)[1].split(")")[0].strip() + sprite = line.split("->")[1].strip().strip(".") + result[name] = {"originally": originally, "sprite": sprite} + + return result + + @staticmethod + def parse_CHARACTERS(data: str) -> dict[str, dict]: # noqa: C901 + """ + Parse the CHARACTERS section. + + This differs based on BCEX vs BCCE. In both flavors it contains basic data + like name, spells, location and special abilities. In BCEX it includes + stats as well. In BCCE stats is its own section. + + Regardless of flavor, core logic will snap stats back into this section + later. + """ + replacements = { + "Looks like": "looks", + "World of Ruin location": "wor_location", + "Notable equipment": "equipment", + } + + result = {} + + for c_data in data.split("\n\n")[1:-1]: + info = {"stats": {}, "spells": {}, "natural_magic": False} + name = "NULL" + + for line in c_data.split("\n"): + # Name + if line[0:2].isdigit(): + name = line[4:] + + # Stat chart rows: BCEX Version only + elif line.startswith("|"): + for stat in line[1:-1].split("|"): + if ":" in stat: + stat_name, stat_value = stat.split(":") + stat_name = stat_name.replace(".", "").strip().lower() + info["stats"][stat_name] = int(stat_value) + + # Spell learnset rows + elif line.startswith(" LV"): + spell_level, spell_name = line.split("-", 1) + info["spells"][spell_name.strip()] = int(spell_level.strip().split(" ")[1]) + + # Command list + # Commands: is just a CSV list of things like "fight" "magic" etc. However if command + # randomization is on, these will be exotic things like "KitMerton" that need additional + # info provided from the COMMANDS section later. + # As such, we begin with a dehydrated hash of command_name=None, then the COMMANDS section + # will provide hydration data for the values. + elif line.startswith("Commands:"): + info["commands"] = {command.strip(): None for command in line.split(":")[1].split(",")} + + elif line.startswith("Notable"): + info["equipment"] = [eq.strip() for eq in line.split(":")[1].split(",")] + + # Special bare strings + elif line.startswith("Has natural"): + info["natural_magic"] = True + + # Everything else: normal k=v colon strings + elif ":" in line: + field, value = line.split(":", 1) + if field in replacements: + field = replacements[field] + field = field.lower() + info[field] = value.strip() + + result[name] = info + + return result + + @staticmethod + def parse_STATS(data: str) -> dict[str, dict]: + """ + Parse the BCCE-only STATS section. + + BCCE splits character stats into its own section. We use largely the same + logic as CHARACTERS here to parse it, then return it as its own dict for + merging back into the CHARACTERS blob later. + """ + result = {} + + # This is pretty identical to CHARACTERS + # Each character has a blank line between them + # Most everything else is k : v + for c_text in data.split("\n\n"): + name = "NULL" + c_data = {} + + for line in c_text.split("\n"): + # Character name + if line[0:2].isdigit(): + name = line[4:] + # Should be nothing, but let's be safe + elif ":" not in line: + pass + # A stat we can just save k : v else: - info["special"] = {} - # Everything else is a simple k: v list where v is comma-delimited - else: - for k in ["immune", "auto", "skills", "steal", "drops", "location"]: - str_match = f"{k.upper()}:" - if line.startswith(str_match): - info[k] = line.split(": ")[1].split(", ") if line.upper().strip() != str_match else [] - break + stat, value = line.split(":") + c_data[stat] = int(value) - if name != "NULL": - result[name] = info + if name != "NULL": + result[name] = c_data - return result + return result + @staticmethod + def parse_COMMANDS(data: str) -> dict[str, dict]: + """ + Parse the COMMANDS section. -def parse_REMONSTERATE(data: str) -> dict[str, dict]: - """ - Parse the BCCE-only REMONSTERATE section. + This contains information on special commands, expanding contracted command + names into more detailed explanations like GranSaw = Grand Train + Chainsaw. + """ + commands = {} - This contains a mapping of monster sprites: what they were and what they - turned into post-alteration. - """ - result = {} - for line in data.split("\n"): - if not line or line.startswith("-----"): - continue - name = line.split("(")[0].strip() - originally = line.split("(", 1)[1].split(")")[0].strip() - sprite = line.split("->")[1].strip().strip(".") - result[name] = {"originally": originally, "sprite": sprite} + # We split by ------ which divides the command name from its data + # As a result we have to pull the last line from each block and remember + # it as the name of the command in the next block. Blorf :) + next_command_name = None + for c_data in data.split("\n-------\n"): + c_data_lines = [c_data_line.strip() for c_data_line in c_data.split("\n")] + if "" in c_data_lines: + c_data_lines.remove("") + if next_command_name: + command_string = "; ".join(c_data_lines[:-1]) - return result + # Clip trailing junk from inconsistent spoiler log generation + # as well as the join above + if command_string.endswith("; "): + command_string = command_string[:-2] + if command_string.endswith("."): + command_string = command_string[:-1] + # Clean up a couple of clumsy string cases from the join above + command_string = command_string.replace(".; ", ": ") + command_string = command_string.replace(" ", " ") + command_string = command_string.replace(":;", ":") -def parse_CHARACTERS(data: str) -> dict[str, dict]: # noqa: C901 - """ - Parse the CHARACTERS section. + # Commit the command to the dict + commands[next_command_name] = command_string - This differs based on BCEX vs BCCE. In both flavors it contains basic data - like name, spells, location and special abilities. In BCEX it includes - stats as well. In BCCE stats is its own section. + next_command_name = c_data_lines[-1].lower() - Regardless of flavor, core logic will snap stats back into this section - later. - """ - replacements = {"Looks like": "looks", "World of Ruin location": "wor_location", "Notable equipment": "equipment"} + return commands - result = {} + @staticmethod + def parse_SEED(data: str) -> dict[str, bool | str]: + """ + Parse the injected SEED section. - for c_data in data.split("\n\n")[1:-1]: - info = {"stats": {}, "spells": {}, "natural_magic": False} - name = "NULL" + This is a fake section injected by the loader code. It contains nothing + but the seed code and we derive from this if the randomizer is BCCE or + BCEX, and try to pluck out other data. - for line in c_data.split("\n"): - # Name - if line[0:2].isdigit(): - name = line[4:] + We can't do much because the format is really hard to reverse. + """ + version, mode, flags, seed_num = data.split("|") if "|" in data else data.split(".") + return { + "version": version, + "flags": flags, + "seed_num": seed_num, + "mode": mode, + "is_bcce": data.startswith("CE"), + "seed": data, + } - # Stat chart rows: BCEX Version only - elif line.startswith("|"): - for stat in line[1:-1].split("|"): - if ":" in stat: - stat_name, stat_value = stat.split(":") - stat_name = stat_name.replace(".", "").strip().lower() - info["stats"][stat_name] = int(stat_value) + @staticmethod + def parse_SECRET_ITEMS(data: str) -> list[str]: + """ + Parse the BCCE-only SECRET ITEMS section. - # Spell learnset rows - elif line.startswith(" LV"): - spell_level, spell_name = line.split("-", 1) - info["spells"][spell_name.strip()] = int(spell_level.strip().split(" ")[1]) + I'm unsure what this is for. It's a series of strings with no real obvious + significance, so we just return it as a list. + """ + # I have no idea what this is lol, dump it to a list for now + return [line for line in data.split("\n") if not line.startswith("---")] - # Command list - # Commands: is just a CSV list of things like "fight" "magic" etc. However if command - # randomization is on, these will be exotic things like "KitMerton" that need additional - # info provided from the COMMANDS section later. - # As such, we begin with a dehydrated hash of command_name=None, then the COMMANDS section - # will provide hydration data for the values. - elif line.startswith("Commands:"): - info["commands"] = {command.strip(): None for command in line.split(":")[1].split(",")} + @staticmethod + def cleanup_STATS(data: dict) -> bool: + """ + Fold BCCE-only STATS section back into CHARACTERS data. - elif line.startswith("Notable"): - info["equipment"] = [eq.strip() for eq in line.split(":")[1].split(",")] + This returns BCCE logs back to how they were laid out in BCEX: where stat blocks + were simply part of the CHARACTERS data. - # Special bare strings - elif line.startswith("Has natural"): - info["natural_magic"] = True - - # Everything else: normal k=v colon strings - elif ":" in line: - field, value = line.split(":", 1) - if field in replacements: - field = replacements[field] - field = field.lower() - info[field] = value.strip() - - result[name] = info - - return result - - -def parse_STATS(data: str) -> dict[str, dict]: - """ - Parse the BCCE-only STATS section. - - BCCE splits character stats into its own section. We use largely the same - logic as CHARACTERS here to parse it, then return it as its own dict for - merging back into the CHARACTERS blob later. - """ - result = {} - - # This is pretty identical to CHARACTERS - # Each character has a blank line between them - # Most everything else is k : v - for c_text in data.split("\n\n"): - name = "NULL" - c_data = {} - - for line in c_text.split("\n"): - # Character name - if line[0:2].isdigit(): - name = line[4:] - # Should be nothing, but let's be safe - elif ":" not in line: - pass - # A stat we can just save k : v - else: - stat, value = line.split(":") - c_data[stat] = int(value) - - if name != "NULL": - result[name] = c_data - - return result - - -def parse_COMMANDS(data: str) -> dict[str, dict]: - """ - Parse the COMMANDS section. - - This contains information on special commands, expanding contracted command - names into more detailed explanations like GranSaw = Grand Train + Chainsaw. - """ - commands = {} - - # We split by ------ which divides the command name from its data - # As a result we have to pull the last line from each block and remember - # it as the name of the command in the next block. Blorf :) - next_command_name = None - for c_data in data.split("\n-------\n"): - c_data_lines = [c_data_line.strip() for c_data_line in c_data.split("\n")] - if "" in c_data_lines: - c_data_lines.remove("") - if next_command_name: - command_string = "; ".join(c_data_lines[:-1]) - - # Clip trailing junk from inconsistent spoiler log generation - # as well as the join above - if command_string.endswith("; "): - command_string = command_string[:-2] - if command_string.endswith("."): - command_string = command_string[:-1] - - # Clean up a couple of clumsy string cases from the join above - command_string = command_string.replace(".; ", ": ") - command_string = command_string.replace(" ", " ") - command_string = command_string.replace(":;", ":") - - # Commit the command to the dict - commands[next_command_name] = command_string - - next_command_name = c_data_lines[-1].lower() - - return commands - - -def parse_SEED(data: str) -> dict[str, bool | str]: - """ - Parse the injected SEED section. - - This is a fake section injected by the loader code. It contains nothing - but the seed code and we derive from this if the randomizer is BCCE or - BCEX, and try to pluck out other data. - - We can't do much because the format is really hard to reverse. - """ - version, mode, flags, seed_num = data.split("|") if "|" in data else data.split(".") - return { - "version": version, - "flags": flags, - "seed_num": seed_num, - "mode": mode, - "is_bcce": data.startswith("CE"), - "seed": data, - } - - -def parse_SECRET_ITEMS(data: str) -> list[str]: - """ - Parse the BCCE-only SECRET ITEMS section. - - I'm unsure what this is for. It's a series of strings with no real obvious - significance, so we just return it as a list. - """ - # I have no idea what this is lol, dump it to a list for now - return [line for line in data.split("\n") if not line.startswith("---")] - - -def load(filename: str) -> dict[str, str]: - """Load file and tokenize into sections.""" - # Load our file, tokenize by section header (starting with ====) - with Path(filename).open(encoding="utf-8") as infile: - tok_data = infile.read().split("============================================================\n") - - sections = {} - - top_section = True - for s in tok_data: - # The top section needs special handling and contains only seed code - if top_section: - sections["SEED"] = s.split("\n", 1)[0][12:] - top_section = False - continue - - # Everything else we just dump into named sections for now - section_header, section_data = s.split("\n", 1) - sections[section_header[5:]] = section_data - - return sections - - -if __name__ == "__main__": - sections = load(sys.argv[1]) - - data = {} - - # This mess tries to run a function named parse_SECTION for each section, - # and just continues to the next section if one doesn't exist. - for k, v in sections.items(): - try: - section_func = f"parse_{k.replace(' ', '_')}" - data[k] = globals()[section_func](v) - except KeyError: - continue - - # Hydrate each character's command list with descriptions of the commands - # This uses the COMMANDS section, but if one doesn't exist just repeat the command - # name because it should be simple things like "fight" and "magic" - command_info = data.get("COMMANDS", {}) - for c_data in data["CHARACTERS"].values(): - for command in c_data["commands"]: - c_data["commands"][command] = command_info.get(command, command) - - # If we have a STATS block, snap it into CHARACTER data - # BCCE broke this out into its own section - # Worse, it keys on slot name, not randomized character name - if "STATS" in data: - for slot, stats in data["STATS"].items(): - for c_data in data["CHARACTERS"].values(): + The BCCE STATS section keys on character slot (Terra, Locke, etc) and not the + new randomized character name, so some hunting has to happen here. + """ + for slot, stats in data.get("STATS", {}).items(): + for c_data in data.get("CHARACTERS", {}).values(): if c_data["originally"].lower() == slot.lower(): c_data["stats"] = stats - del data["STATS"] + return True - # If we ran BCCE Remonsterate, fold sprite data into monster block - if "REMONSTERATE" in data: - for name, info in data["REMONSTERATE"].items(): - for m_name, m_info in data["MONSTERS"].items(): + @staticmethod + def cleanup_COMMANDS(data: dict) -> bool: + """Fold COMMANDS expanded descriptions into CHARACTERS command data.""" + # If our COMMANDS section is missing or somehow missing a given command, we just + # repeat the command's name as its description. This should only be simple things + # like "fight" and "magic" unless something goes wrong. + command_info = data.get("COMMANDS", {}) + for c_data in data.get("CHARACTERS", {}).values(): + for command in c_data.get("commands", {}): + c_data["commands"][command] = command_info.get(command, command) + + return False + + @staticmethod + def cleanup_REMONSTERATE(data: dict) -> bool: + """Fold REMONSTERATE section into MONSTERS section data.""" + for name, info in data.get("REMONSTERATE", {}).items(): + for m_name, m_info in data.get("MONSTERS", {}).items(): if name == m_name: m_info["originally"] = info["originally"] m_info["sprite"] = info["sprite"] - del data["REMONSTERATE"] + return True + + def get_sections(self, data: str) -> dict[str, str]: + """Split logfile text and return a dict of sections for parsing.""" + tok_data = data.split("============================================================\n") + + sections = {} + + top_section = True + for s in tok_data: + # The top section needs special handling and contains only seed code + if top_section: + sections["SEED"] = s.split("\n", 1)[0][12:] + top_section = False + continue + + # Everything else we just dump into named sections for now + section_header, section_data = s.split("\n", 1) + sections[section_header[5:]] = section_data + + self.config_sections = sections + return sections + + def parse(self) -> dict: + """Fully parse the logfile and return the full data object.""" + # Get individual sections to work on + with Path(self.filename).open(encoding="utf-8") as infile: + sections = self.get_sections(infile.read()) + + data = {} + + # For each section attempt to run a parser function for it + for k, v in sections.items(): + section_func = f"parse_{k.replace(' ', '_')}" + if hasattr(self, section_func): + data[k] = getattr(self, section_func)(v) + + # Do post-parse cleanup. We need all sections parsed to do these + # Any cleanup function that returns true has its respective section deleted + section_dels = set() + for k in data: + section_func = f"cleanup_{k.replace(' ', '_')}" + if hasattr(self, section_func) and getattr(self, section_func)(data): + section_dels.add(k) + + # Any section cleanup that returns true means delete that section + for k in section_dels: + if k in data: + del data[k] + + self.data_sections = data + return data + + +if __name__ == "__main__": + p = Parser(sys.argv[1]) + data = p.parse() # Barf this pile of trash out print(json.dumps(data))