bcexparse/main.py

467 lines
17 KiB
Python

#!/usr/bin/env python3
"""Parse BCEX (or BCCE) logs into json objects."""
__version__ = "0.4.1"
__author__ = "Trysdyn Black"
import json
import sys
from pathlib import Path
class Parser:
"""
BCEX/BCCE spoiler logfile parser.
Sections missing support:
- MAGITEK
- DANCES
- ESPERS
- ITEM MAGIC
- ITEM EFFECTS
- COLOSSEUM
- SHOPS
- TREASURE CHESTS
- JUNCTIONS
"""
def __init__(self, filename: str) -> None:
"""Initialize parser with filename."""
self.filename = filename
self.config_sections = {}
self.data_sections = {}
@staticmethod
def parse_MONSTERS(data: str) -> dict[str, dict]: # noqa: C901, PLR0912
"""
Parse the MONSTERS section.
This contains data on monsters including stat sheets, loot, and weaknesses.
"""
result = {}
for m_text in data.split("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"):
info = {}
info["stats"] = {}
info["spells"] = {}
name = "NULL"
for line in m_text.split("\n"):
# Name and level
if "(Level " in line:
name = line.split(" (")[0]
info["stats"]["level"] = int(line.split("(Level ")[1][:-1])
# Stat chart rows
elif line.startswith("|"):
for stat in line[1:-1].split("|"):
if ":" in stat:
stat_name, stat_value = stat.split(":")
stat_name = stat_name.replace(".", "").strip().lower()
info["stats"][stat_name] = int(stat_value)
# Nullifies AND weaks, split by a ;
elif line.startswith("NULLIFY:"):
# If no weaknesses, WEAK section just doesn't appear, fudge it
if "WEAK:" in line:
null_text, weak_text = line.split(";")
else:
null_text = line
weak_text = "WEAK: "
info["nullifies"] = null_text.split(": ")[1].split(", ")
info["weak"] = weak_text.split(": ")[1].split(", ")
# Specials are name=>desc as k:v
# I *think* you can only have one special...
elif line.startswith("SPECIAL"):
content = line.split(" ", 1)[1]
if len(content) > 1:
special_name = content.split('"')[1]
special_desc = content.split(": ")[1]
info["special"] = {special_name: special_desc}
else:
info["special"] = {}
# Everything else is a simple k: v list where v is comma-delimited
else:
for k in ["immune", "auto", "skills", "steal", "drops", "location"]:
str_match = f"{k.upper()}:"
if line.startswith(str_match):
info[k] = line.split(": ")[1].split(", ") if line.upper().strip() != str_match else []
break
if name != "NULL":
result[name] = info
return result
@staticmethod
def parse_REMONSTERATE(data: str) -> dict[str, dict]:
"""
Parse the BCCE-only REMONSTERATE section.
This contains a mapping of monster sprites: what they were and what they
turned into post-alteration.
"""
result = {}
for line in data.split("\n"):
if not line or line.startswith("-----"):
continue
name = line.split("(")[0].strip()
originally = line.split("(", 1)[1].split(")")[0].strip()
sprite = line.split("->")[1].strip().strip(".")
result[name] = {"originally": originally, "sprite": sprite}
return result
@staticmethod
def parse_CHARACTERS(data: str) -> dict[str, dict]: # noqa: C901
"""
Parse the CHARACTERS section.
This differs based on BCEX vs BCCE. In both flavors it contains basic data
like name, spells, location and special abilities. In BCEX it includes
stats as well. In BCCE stats is its own section.
Regardless of flavor, core logic will snap stats back into this section
later.
"""
replacements = {
"Looks like": "looks",
"World of Ruin location": "wor_location",
"Notable equipment": "equipment",
}
result = {}
for c_data in data.split("\n\n")[1:-1]:
info = {"stats": {}, "spells": {}, "natural_magic": False}
name = "NULL"
for line in c_data.split("\n"):
# Name
if line[0:2].isdigit():
name = line[4:]
# Stat chart rows: BCEX Version only
elif line.startswith("|"):
for stat in line[1:-1].split("|"):
if ":" in stat:
stat_name, stat_value = stat.split(":")
stat_name = stat_name.replace(".", "").strip().lower()
info["stats"][stat_name] = int(stat_value)
# Spell learnset rows
elif line.startswith(" LV"):
spell_level, spell_name = line.split("-", 1)
info["spells"][spell_name.strip()] = int(spell_level.strip().split(" ")[1])
# Command list
# Commands: is just a CSV list of things like "fight" "magic" etc. However if command
# randomization is on, these will be exotic things like "KitMerton" that need additional
# info provided from the COMMANDS section later.
# As such, we begin with a dehydrated hash of command_name=None, then the COMMANDS section
# will provide hydration data for the values.
elif line.startswith("Commands:"):
info["commands"] = {command.strip(): None for command in line.split(":")[1].split(",")}
elif line.startswith("Notable"):
info["equipment"] = [eq.strip() for eq in line.split(":")[1].split(",")]
# Special bare strings
elif line.startswith("Has natural"):
info["natural_magic"] = True
# Everything else: normal k=v colon strings
elif ":" in line:
field, value = line.split(":", 1)
if field in replacements:
field = replacements[field]
field = field.lower()
info[field] = value.strip()
result[name] = info
return result
@staticmethod
def parse_STATS(data: str) -> dict[str, dict]:
"""
Parse the BCCE-only STATS section.
BCCE splits character stats into its own section. We use largely the same
logic as CHARACTERS here to parse it, then return it as its own dict for
merging back into the CHARACTERS blob later.
"""
result = {}
# This is pretty identical to CHARACTERS
# Each character has a blank line between them
# Most everything else is k : v
for c_text in data.split("\n\n"):
name = "NULL"
c_data = {}
for line in c_text.split("\n"):
# Character name
if line[0:2].isdigit():
name = line[4:]
# Should be nothing, but let's be safe
elif ":" not in line:
pass
# A stat we can just save k : v
else:
stat, value = line.split(":")
c_data[stat] = int(value)
if name != "NULL":
result[name] = c_data
return result
@staticmethod
def parse_COMMANDS(data: str) -> dict[str, dict]:
"""
Parse the COMMANDS section.
This contains information on special commands, expanding contracted command
names into more detailed explanations like GranSaw = Grand Train + Chainsaw.
"""
commands = {}
# We split by ------ which divides the command name from its data
# As a result we have to pull the last line from each block and remember
# it as the name of the command in the next block. Blorf :)
next_command_name = None
for c_data in data.split("\n-------\n"):
c_data_lines = [c_data_line.strip() for c_data_line in c_data.split("\n")]
if "" in c_data_lines:
c_data_lines.remove("")
if next_command_name:
command_string = "; ".join(c_data_lines[:-1])
# Clip trailing junk from inconsistent spoiler log generation
# as well as the join above
if command_string.endswith("; "):
command_string = command_string[:-2]
if command_string.endswith("."):
command_string = command_string[:-1]
# Clean up a couple of clumsy string cases from the join above
command_string = command_string.replace(".; ", ": ")
command_string = command_string.replace(" ", " ")
command_string = command_string.replace(":;", ":")
# Commit the command to the dict
commands[next_command_name] = command_string
next_command_name = c_data_lines[-1].lower()
return commands
@staticmethod
def parse_SEED(data: str) -> dict[str, bool | str]:
"""
Parse the injected SEED section.
This is a fake section injected by the loader code. It contains nothing
but the seed code and we derive from this if the randomizer is BCCE or
BCEX, and try to pluck out other data.
We can't do much because the format is really hard to reverse.
"""
version, mode, flags, seed_num = data.split("|") if "|" in data else data.split(".")
return {
"version": version,
"flags": flags,
"seed_num": seed_num,
"mode": mode,
"is_bcce": data.startswith("CE"),
"seed": data,
}
@staticmethod
def parse_SECRET_ITEMS(data: str) -> list[str]:
"""
Parse the BCCE-only SECRET ITEMS section.
I'm unsure what this is for. It's a series of strings with no real obvious
significance, so we just return it as a list.
"""
# I have no idea what this is lol, dump it to a list for now
return [line for line in data.split("\n") if not line.startswith("---")]
@staticmethod
def parse_MUSIC(data: str) -> dict[str, dict]:
"""
Parse the MUSIC section.
This differs significantly between BCEX and BCCE: Numbers and data are split
with a period in BCCE and a colon in BCEX. BCEX puts arranger on the same line
as composer, split with --, BCEX lacks the Jukebox Title section entirely.
"""
music_sections = data.split("\n\n")
replacements = {}
for section in music_sections[1:]:
if not section.strip():
continue
# BCEX and BCCE divide numbers from data differently
if ":" in section[:6]:
_, info = section.split(":", 1)
else:
_, info = section.split(".", 1)
# The name of the song being replaced preceeds a ->
old_name, info = info.split("->", 1)
old_name = old_name.strip()
replacements[old_name] = {}
# Info is, mostly, one item per line, so let's go by line
tok_info = info.split("\n")
for k in ("name", "title", "composer", "arranger", "jukebox_title"):
# Not every song has all the data
if not len(tok_info):
break
line = tok_info.pop(0).strip()
# BCCE puts arranger on its own line. BCEX puts it on the same line as
# composed, split by "--". So we have to handle both
if "-- Arranged by " in line:
line, arranger = line.split("-- Arranged by ", 1)
replacements[old_name]["arranger"] = arranger
# Jukebox Title is in prens with extra stuff to chomp
if "Jukebox title" in line:
line = line[16:-1]
# These strings are fluff but should only appear in compose/arranger lines
# so it's safe to just blindly chomp them
line = line.split("Composed by ", 1)[-1].strip()
line = line.replace("Ripped and/or arranged by ", "")
replacements[old_name][k] = line
return replacements
@staticmethod
def parse_AESTHETICS(data: str) -> dict[str, str]:
"""
Parse the BCCE-only AESTHETICS section.
This is just a k=v list we split up and strip.
"""
replacements = {}
for line in data.split("\n"):
if ":" not in line:
continue
old, new = line.split(":")
old = old.strip()
new = new.strip()
replacements[old] = new
return replacements
@staticmethod
def cleanup_STATS(data: dict) -> bool:
"""
Fold BCCE-only STATS section back into CHARACTERS data.
This returns BCCE logs back to how they were laid out in BCEX: where stat blocks
were simply part of the CHARACTERS data.
The BCCE STATS section keys on character slot (Terra, Locke, etc) and not the
new randomized character name, so some hunting has to happen here.
"""
for slot, stats in data.get("STATS", {}).items():
for c_data in data.get("CHARACTERS", {}).values():
if c_data["originally"].lower() == slot.lower():
c_data["stats"] = stats
return True
@staticmethod
def cleanup_COMMANDS(data: dict) -> bool:
"""Fold COMMANDS expanded descriptions into CHARACTERS command data."""
# If our COMMANDS section is missing or somehow missing a given command, we just
# repeat the command's name as its description. This should only be simple things
# like "fight" and "magic" unless something goes wrong.
command_info = data.get("COMMANDS", {})
for c_data in data.get("CHARACTERS", {}).values():
for command in c_data.get("commands", {}):
c_data["commands"][command] = command_info.get(command, command)
return False
@staticmethod
def cleanup_REMONSTERATE(data: dict) -> bool:
"""Fold REMONSTERATE section into MONSTERS section data."""
for name, info in data.get("REMONSTERATE", {}).items():
for m_name, m_info in data.get("MONSTERS", {}).items():
if name == m_name:
m_info["originally"] = info["originally"]
m_info["sprite"] = info["sprite"]
return True
def get_sections(self, data: str) -> dict[str, str]:
"""Split logfile text and return a dict of sections for parsing."""
tok_data = data.split("============================================================\n")
sections = {}
top_section = True
for s in tok_data:
# The top section needs special handling and contains only seed code
if top_section:
sections["SEED"] = s.split("\n", 1)[0][12:]
top_section = False
continue
# Everything else we just dump into named sections for now
section_header, section_data = s.split("\n", 1)
sections[section_header[5:]] = section_data
self.config_sections = sections
return sections
def parse(self) -> dict:
"""Fully parse the logfile and return the full data object."""
# Get individual sections to work on
with Path(self.filename).open(encoding="utf-8") as infile:
sections = self.get_sections(infile.read())
data = {}
# For each section attempt to run a parser function for it
for k, v in sections.items():
section_func = f"parse_{k.replace(' ', '_')}"
if hasattr(self, section_func):
data[k] = getattr(self, section_func)(v)
# Do post-parse cleanup. We need all sections parsed to do these
# Any cleanup function that returns true has its respective section deleted
section_dels = set()
for k in data:
section_func = f"cleanup_{k.replace(' ', '_')}"
if hasattr(self, section_func) and getattr(self, section_func)(data):
section_dels.add(k)
# Any section cleanup that returns true means delete that section
for k in section_dels:
if k in data:
del data[k]
self.data_sections = data
return data
if __name__ == "__main__":
p = Parser(sys.argv[1])
data = p.parse()
# Barf this pile of trash out
print(json.dumps(data))