Converting statparser.php to python

I converted the statparser.php to the python.

The result slightly differs from the statparser.php output: some keys can differ, some duplicated or unclear (for me) values are not decoded. In addition to each single statistics like "buildings_build", "infantry_killed" etc. I save their sum to the "total_buildings_build", "total_infantry_killed" etc. and then aggregate these to "total_built", "total_killed" etc.

I also noticed that the "L" suffix (like "BLL", "UNL" etc.) actually means not "lost", but rather "left". You can validate this too.

The code is written and tested under python 3.9.5, but should work for many other versions.

Minimal usage example:

import os.path

import parsing, utils

raw_stats = parsing.parse_stats(os.path.join("path", "to", "stats.dmp"))
stats = parsing.prettify_stats(raw_stats, save_raw=True)
utils.write_dict_to_json(stats, os.path.join("path", "to", "stats.json"))

parsing.py:

import base64
from collections import defaultdict
from struct import unpack
from typing import Dict, Union

import mappings, utils

_DecodedBlock = Union[bytes, bool, int, str, Dict[str, int]]

def _decode_block(binary_blob: bytes, type_: int, tag: str) -> _DecodedBlock:
    """
    Decode a binary blob given its type.
    """
    # pylint: disable=too-many-return-statements
    if type_ == 1:
        # Single byte.
        return unpack(">c", binary_blob)[0]
    if type_ == 2:
        # Single boolean.
        return unpack(">?", binary_blob)[0]
    if type_ == 3:
        # Single short.
        return unpack(">h", binary_blob)[0]
    if type_ == 4:
        # Single unsigned short.
        return unpack(">H", binary_blob)[0]
    if type_ == 5:
        # Single long.
        return unpack(">l", binary_blob)[0]
    if type_ == 6:
        # Single unsigned long.
        return unpack(">L", binary_blob)[0]
    if type_ == 7:
        # Multiple b"\x00"-terminated chars.
        return utils.bytes_to_ascii(binary_blob.rstrip(b"\x00"))
    if type_ == 20:
        # Custom type and length.
        if tag[:3] in mappings.HUMAN_READABLE_COUNTABLES:
            # Multiple unsigned longs.
            if len(binary_blob) % 4:
                raise ValueError(
                    "Length of binary blocks should be multiple of 4, but "
                    f"{len(binary_blob)} received."
                )
            counts = unpack(f">{int(len(binary_blob) / 4)}L", binary_blob)
            return {
                mappings.COUNTABLE_TYPES[tag[:2]][j]: count
                for j, count in enumerate(counts)
                if count > 0
            }
        # Raw bytes.
        # Bytes are not json serializable, so encode them as base64.
        return base64.b64encode(binary_blob).decode("ascii")
    raise ValueError(f"Unknown binary blob type: {type_}.")

def parse_stats(filepath: str) -> Dict[str, _DecodedBlock]:
    """
    Parse a `"stats.dmp"` file to dict.

    Args:
        filepath: Path to a `"stats.dmp"` file.

    Returns:
        Parsed statistics.
    """
    utils.assert_file_exists(filepath, ".dmp")
    stats = {}
    with open(filepath, "rb") as file:
        # Read file header.
        binary_blob = file.read(4)
        if len(binary_blob) < 4:
            raise ValueError("Cannot read file header, file too short.")
        while True:
            # Read block header.
            binary_blob = file.read(8)
            if len(binary_blob) == 0:
                # End of file.
                break
            if len(binary_blob) < 8:
                raise ValueError("Cannot read block header, file too short.")
            binary_tag, type_, length = unpack(">4sHH", binary_blob)
            tag = utils.bytes_to_ascii(binary_tag)
            # Read block data.
            binary_blob = file.read(length) if length > 0 else b""
            if len(binary_blob) < length:
                raise ValueError("Cannot read block data, file too short.")
            if length % 4:
                # Skip block padding.
                file.read(4 - length % 4)
            stats[tag] = _decode_block(binary_blob, type_, tag)
    return stats

def prettify_stats(
    raw_stats: Dict[str, _DecodedBlock], save_raw: bool
) -> Dict[str, Dict[str, _DecodedBlock]]:
    """
    Prettify the parsed statistics from a `"stats.dmp"` file.

    Args:
        raw_stats: Parsed statistics (output of `parse_stats` function).
        save_raw: Whether to save not prettified values too.

    Returns:
        Prettified statistics.
    """
    stats = defaultdict(dict)
    for tag, value in raw_stats.items():
        if tag[-1] in "01234567":
            # Player-specific data.
            key = raw_stats[f"NAM{tag[-1]}"]
            tag = tag[:-1]
        else:
            # Common data.
            key = "common"
        # Prettify data.
        # Player-specific tags.
        if tag == "CMP":
            for code, status in mappings.COMPLETION_CODES.items():
                stats[key][status] = bool(value & code)
        elif tag == "RSG":
            stats[key]["quit"] = value
        elif tag == "DED":
            stats[key]["defeated"] = value
        elif tag == "SPC":
            stats[key]["spectator"] = value
        elif tag in ("LCN", "CON"):
            stats[key]["disconnected"] = value
        elif tag == "CTY":
            stats[key]["side"] = mappings.SIDES[value]
        elif tag == "NAM":
            stats[key]["name"] = value
        elif tag == "CRD":
            stats[key]["credits_left"] = value
        # Common tags.
        elif tag == "DURA":
            stats[key]["duration"] = value
        elif tag == "AFPS":
            stats[key]["fps"] = value
        elif tag == "FINI":
            stats[key]["finished"] = value
        elif tag == "TIME":
            stats[key]["timestamp"] = value
        elif tag == "SCEN":
            stats[key]["map"] = value
        elif tag == "UNIT":
            stats[key]["starting_units"] = value
        elif tag == "CRED":
            stats[key]["starting_credits"] = value
        elif tag == "SUPR":
            stats[key]["superweapons"] = bool(value)
        elif tag == "CRAT":
            stats[key]["crates"] = bool(value)
        elif tag == "PLRS":
            stats[key]["human_players"] = value
        elif tag == "BAMR":
            stats[key]["mcv_repacks"] = bool(value & 1)
            stats[key]["build_off_ally_conyards"] = bool(value & 2)
        elif tag == "SHRT":
            stats[key]["short_game"] = bool(value)
        elif tag == "AIPL":
            stats[key]["ai_players"] = value
        elif tag == "VERS":
            stats[key]["game_version"] = value
        elif tag in mappings.HUMAN_READABLE_COUNTABLES:
            human_readable_tag = mappings.HUMAN_READABLE_COUNTABLES[tag]
            stats[key][f"total_{human_readable_tag}"] = sum(value.values(), 0)
            stats[key][human_readable_tag] = value
        elif save_raw:
            stats[f"{key}_raw"][tag] = value
    for key, value in stats.items():
        if key.startswith("common") or key.endswith("raw"):
            continue
        for suffix in ("built", "killed", "left", "captured", "found"):
            stats[key][f"total_{suffix}"] = sum(
                (
                    value
                    for key, value in value.items()
                    if key.startswith("total") and key.endswith(suffix)
                ),
                0,
            )
    return stats

utils.py (common-purpose helpers):

import json
import os.path
import re
from typing import Dict, Iterable, Optional, Union

def assert_file_exists(
    path: str, ext: Optional[Union[str, Iterable[str]]] = None
) -> None:
    """
    Check if a file exists and optionally check its extension.

    Args:
        path: Path to file to check.
        ext: Optional file extension/extensions to check. If `None`, do not
            check extension.

    Raises:
        FileNotFoundError: If file does not exist.
        TypeError: If `filepath` or `ext` has a wrong type.
        ValueError: If file has a wrong extension.
    """
    if not os.path.isfile(path):
        raise FileNotFoundError(
            f'Given path "{path}" does not exist or is not a file.'
        )
    if ext is None:
        return
    if isinstance(ext, str):
        ext = (ext,)
    if any(not isinstance(x, str) for x in ext):
        raise TypeError(
            f"`ext` argument should be a string, an iterable with strings or "
            f"`None`, but value {ext} or type {type(ext)} received."
        )
    exts = [x if x.startswith(".") else f".{x}" for x in ext]
    if len(exts) == 0:
        return
    if os.path.splitext(path)[1] not in exts:
        raise ValueError(
            f'Given file "{path}" should have one of the following extensions: "'
            + '", "'.join(exts)
            + '".'
        )

def bytes_to_ascii(binary_blob: bytes) -> str:
    """
    Convert bytes to ASCII string, replace invalid symbols with "?".

    Args:
        binary_blob: Input bytes.

    Returns:
        Decoded ASCII string.
    """
    binary_blob = re.sub(rb"[^\x20-\x7E]", b"?", binary_blob)
    return binary_blob.decode("utf-8")

def write_dict_to_json(data: Dict, output_file: str) -> None:
    """
    Save a dict as file in human readable JSON format.

    Args:
        data: Dict to save.
        output_file: Output file.
    """
    with open(output_file, "w") as file:
        json.dump(data, file, indent=4, sort_keys=True)

mappings.py (constants only):

from typing import Dict, List

SIDES: Dict[int, str] = {
    0: "America",
    1: "Korea",
    2: "France",
    3: "Germany",
    4: "Great Britain",
    5: "Libya",
    6: "Iraq",
    7: "Cuba",
    8: "Russia",
    9: "Yuri",
}

COMPLETION_CODES: Dict[int, str] = {
    2: "disconnected",
    8: "no_completion",
    16: "quit",
    64: "draw",
    256: "won",
    512: "defeated",
}

HUMAN_READABLE_COUNTABLES: Dict[str, str] = {
    "UNB": "units_built",
    "INB": "infantry_built",
    "PLB": "planes_built",
    "VSB": "ships_built",
    "BLB": "buildings_built",
    "UNK": "units_killed",
    "INK": "infantry_killed",
    "PLK": "planes_killed",
    "VSK": "ships_killed",
    "BLK": "buildings_killed",
    "BLC": "buildings_captured",
    "UNL": "units_left",
    "INL": "infantry_left",
    "PLL": "planes_left",
    "BLL": "buildings_left",
    "VSL": "ships_left",
    "CRA": "crates_found",
}

COUNTABLE_TYPES: Dict[str, List[str]] = {
    "CR": [
        "Armor",
        "Firepower",
        "HealBase",
        "Money",
        "Reveal",
        "Speed",
        "Veteran",
        "Unit",
        "Invulnerability",
        "IonStorm",
        "Gas",
        "Tiberium",
        "Pod",
        "Cloak",
        "Darkness",
        "Explosion",
        "ICBM",
        "Napalm",
        "Squad",
    ],
    "IN": [
        "E1",
        "E2",
        "SHK",
        "ENGINEER",
        "JUMPJET",
        "GHOST",
        "YURI",
        "IVAN",
        "DESO",
        "DOG",
        "CIV1",
        "CIV2",
        "CIV3",
        "CTECH",
        "WEEDGUY",
        "CLEG",
        "SPY",
        "CCOMAND",
        "PTROOP",
        "CIVAN",
        "YURIPR",
        "SNIPE",
        "COW",
        "ALL",
        "TANY",
        "FLAKT",
        "TERROR",
        "SENGINEER",
        "ADOG",
        "VLADIMIR",
        "PENTGEN",
        "PRES",
        "SSRV",
        "CIVA",
        "CIVB",
        "CIVC",
        "CIVBBP",
        "CIVBFM",
        "CIVBF",
        "CIVBTM",
        "CIVSFM",
        "CIVSF",
        "CIVSTM",
        "POLARB",
        "JOSH",
        "YENGINEER",
        "GGI",
        "INIT",
        "BORIS",
        "BRUTE",
        "VIRUS",
        "CLNT",
        "ARND",
        "STLN",
        "CAML",
        "EINS",
        "MUMY",
        "RMNV",
        "LUNR",
        "DNOA",
        "DNOB",
        "SLAV",
        "WWLF",
        "YDOG",
        "YADOG",
    ],
    "UN": [
        "AMCV",
        "HARV",
        "APOC",
        "HTNK",
        "SAPC",
        "CAR",
        "BUS",
        "WINI",
        "PICK",
        "MTNK",
        "HORV",
        "TRUCKA",
        "TRUCKB",
        "CARRIER",
        "V3",
        "ZEP",
        "DRON",
        "HTK",
        "DEST",
        "SUB",
        "AEGIS",
        "LCRF",
        "DRED",
        "SHAD",
        "SQD",
        "DLPH",
        "SMCV",
        "TNKD",
        "HOWI",
        "TTNK",
        "HIND",
        "LTNK",
        "CMON",
        "CMIN",
        "SREF",
        "XCOMET",
        "HYD",
        "MGTK",
        "FV",
        "DeathDummy",
        "VLAD",
        "DTRUCK",
        "PROPA",
        "CONA",
        "COP",
        "EUROC",
        "LIMO",
        "STANG",
        "SUVB",
        "SUVW",
        "TAXI",
        "PTRUCK",
        "CRUISE",
        "TUG",
        "CDEST",
        "YHVR",
        "PCV",
        "SMIN",
        "SMON",
        "YCAB",
        "YTNK",
        "BFRT",
        "TELE",
        "CAOS",
        "DDBX",
        "BCAB",
        "BSUB",
        "SCHP",
        "JEEP",
        "MIND",
        "DISK",
        "UTNK",
        "ROBO",
        "YDUM",
        "SCHD",
        "DOLY",
        "CBLC",
        "FTRK",
        "AMBU",
        "CIVP",
        "VISC_LRG",
        "VISC_SML",
    ],
    "PL": [
        "APACHE",
        "ORCA",
        "HORNET",
        "V3ROCKET",
        "ASW",
        "DMISL",
        "PDPLANE",
        "BEAG",
        "CARGOPLANE",
        "BPLN",
        "SPYP",
        "CMISL",
    ],
    "BL": [
        "GAPOWR",
        "GAREFN",  # Ore Refinery
        "GACNST",
        "GAPILE",  # Barracks
        "GASAND",
        "GADEPT",  # War factory
        "GATECH",
        "GAWEAP",  # Service depot
        "CALAB",
        "NAPOWR",
        "NATECH",
        "NAHAND",
        "GAWALL",
        "NARADR",
        "NAWEAP",
        "NAREFN",
        "NAWALL",
        "CAHSE07",
        "NAPSIS",
        "CASYDN01",
        "NALASR",
        "NASAM",
        "CASYDN02",
        "GAYARD",
        "NAIRON",
        "NACNST",
        "NADEPT",
        "GACSPH",
        "GADUMY",
        "GAWEAT",
        "CABHUT",
        "GALITE",
        "REDLAMP",
        "GRENLAMP",
        "BLUELAMP",
        "YELWLAMP",
        "PURPLAMP",
        "INORANLAMP",
        "INGRNLMP",
        "INREDLMP",
        "INBLULMP",
        "CITY01",
        "CITY02",
        "CITY03",
        "CITY04",
        "CITY05",
        "CITY06",
        "CAHOSP",
        "INGALITE",
        "INYELWLAMP",
        "INPURPLAMP",
        "NEGLAMP",
        "NEGRED",
        "TESLA",
        "NAMISL",
        "ATESLA",
        "CAMACH",
        "TSTLAMP",
        "CASYDN03",
        "AMMOCRAT",
        "GAGREEN",
        "NAYARD",
        "GASPYSAT",
        "GAGAP",
        "GTGCAN",
        "NANRCT",
        "GAPILL",
        "NAFLAK",
        "CAOUTP",
        "CATHOSP",
        "CAAIRP",
        "CAOILD",
        "NACLON",
        "GAOREP",
        "CACITY01",
        "CACITY02",
        "CACITY03",
        "CACITY04",
        "CANEWY01",
        "CANEWY04",
        "CANEWY05",
        "CASWST01",
        "CATECH01",
        "CATEXS01",
        "CATEXS02",
        "CAWASH01",
        "CAFARM01",
        "CAFARM02",
        "CALIT01E",
        "CALIT01N",
        "CALIT01S",
        "CALIT01W",
        "CAMISC01",
        "CAMISC02",
        "CAMISC03",
        "CAMISC04",
        "CAPOL01E",
        "CAPOL01N",
        "CAPOL01S",
        "CAPOL01W",
        "CASIN01E",
        "CASIN01N",
        "CASIN01S",
        "CASIN01W",
        "CAPARS01",
        "GAAIRC",
        "CAFRMA",
        "CAFRMB",
        "CAWASH05",
        "CAWASH04",
        "CAWASH03",
        "CAWASH07",
        "CAWASH11",
        "CAWSH12",
        "CAWASH14",
        "CAWASH09",
        "CAWASH10",
        "CAWASH13",
        "CAARMY01",
        "CAUSFGL",
        "CAWASH08",
        "CALIT03E",
        "CALIT03N",
        "CALIT03S",
        "CALIT03W",
        "CALIT02L",
        "CALIT02R",
        "CAHSE01",
        "CAWT01",
        "CATS01",
        "CABARN02",
        "CAWA2A",
        "CAWA2B",
        "CAWA2C",
        "CAWA2D",
        "AMRADR",  # Power plant
        "CAPRS03",
        "CAGARD01",
        "CARUS01",
        "CAMIAM01",
        "CATRAN01",
        "CAMIAM02",
        "CANWY05",
        "MAYAN",
        "CAEUR1",
        "CAEUR2",
        "CAEUR04",
        "CAMEX01",
        "CARUS02A",
        "CARUS02B",
        "CARUS02C",
        "CARUS02D",
        "CARUS02E",
        "CARUS02F",
        "CANEWY06",
        "CANEWY07",
        "CANEWY08",
        "CAPARS02",
        "CAPARS08",
        "CAPARS09",
        "CARUS03",
        "CANEWY10",
        "CANEWY11",
        "CANEWY12",
        "CANEWY13",
        "CANEWY14",
        "CANEWY15",
        "CANEWY16",
        "CANEWY17",
        "CANEWY18",
        "CAPARS04",
        "CAPARS05",
        "CAPARS06",
        "CAPARS07",
        "CAWASH15",
        "CAPARS10",
        "CAPARS13",
        "CAPARS14",
        "CAGAS01",
        "CAPARS11",
        "CAPARS12",
        "CAFARM06",
        "CAMIAM04",
        "NAPSYB",
        "NAPSYA",
        "CAIND01",
        "CACOLO01",
        "CANWY09",
        "CANWY22",
        "CANWY23",
        "CANWY24",
        "CANWY25",
        "CANWY26",
        "CATEXS03",
        "CATEXS04",
        "CATEXS05",
        "CARUS02G",
        "CACHIG04",
        "CAMIAM03",
        "CARUS07",
        "CATEXS06",
        "CATEXS07",
        "CATEXS08",
        "CACHIG01",
        "CACHIG02",
        "CACHIG03",
        "CAWASH16",
        "CAWASH17",
        "CACHIG05",
        "CAWASH19",
        "CARUS08",
        "CARUS09",
        "CARUS10",
        "CARUS11",
        "CANEWY20",
        "CANEWY21",
        "CARUS04",
        "CARUS05",
        "CARUS06",
        "CAMSC01",
        "CAMSC02",
        "CAMSC03",
        "CAMSC04",
        "CAMSC05",
        "CAMSC06",
        "CAMSC07",
        "CAWASH18",
        "CAEURO05",
        "CAPARK01",
        "CAPARK02",
        "CAPARK03",
        "CAHSE02",
        "CAHSE03",
        "CAHSE04",
        "CASTRT01",
        "CASTRT02",
        "CASTL01",
        "CASTL02",
        "CASTL03",
        "CASTL04",
        "CAHSE05",
        "CAHSE06",
        "CAMIAM05",
        "CAMIAM06",
        "CAMIAM07",
        "CAFNCB",
        "CAFNCW",
        "CAMEX02",
        "CAMEX03",
        "CAMEX04",
        "CAMEX05",
        "CACHIG06",
        "CAMSC08",
        "CAMSC09",
        "CAARMY02",
        "CAARMY03",
        "CAARMY04",
        "TEMMORLAMP",
        "TEMDAYLAMP",
        "TEMDUSLAMP",
        "TEMNITLAMP",
        "SNOMORLAMP",
        "SNODAYLAMP",
        "SNODUSLAMP",
        "SNONITLAMP",
        "CAKRMW",
        "CARUFGL",
        "CAFRFGL",
        "CATRAN02",
        "CACUFGL",
        "CASKFGL",
        "CALBFGL",
        "CAMIAM08",
        "CAMISC05",
        "CAMISC06",
        "CASTL05A",
        "CASTL05B",
        "CASTL05C",
        "CASTL05D",
        "CASTL05E",
        "CASTL05F",
        "CASTL05G",
        "CASTL05H",
        "CAMSC10",
        "CAGEFGL",
        "CAUKFGL",
        "CAWASH06",
        "CAMSC11",
        "CAMSC12",
        "CAMSC13",
        "CAPOFGL",
        "CAMSC12A",
        "CAMOV01",
        "CAMOV02",
        "CABUNK01",
        "CABUNK02",
        "CAFNCP",
        "CASTRT03",
        "CASTRT04",
        "CASTRT05",
        "YACNST",
        "YAPOWR",
        "YABRCK",
        "YAWEAP",
        "YAYARD",
        "YADEPT",
        "YATECH",
        "GAFWLL",
        "YAGGUN",
        "YAPSYT",
        "NAINDP",
        "YAGRND",
        "YAGNTC",
        "CASLAB",
        "CATIME",
        "YAPPET",
        "CALOND04",
        "CALOND05",
        "CALOND06",
        "CAMOON01",
        "CATRAN03",
        "CAEAST01",
        "CAEGYP01",
        "CAEGYP02",
        "CAEGYP03",
        "CALA01",
        "CALA02",
        "CALA03",
        "CALA04",
        "CALA05",
        "CALOND01",
        "CALOND02",
        "CALOND03",
        "CAMORR01",
        "CAMORR02",
        "CAMORR03",
        "CASANF01",
        "CASANF02",
        "CASANF03",
        "CASANF04",
        "CASANF05",
        "CASEAT01",
        "NATBNK",
        "GAGATE_A",
        "CASANF09",
        "CASANF10",
        "CASANF11",
        "CASANF12",
        "CASANF13",
        "CASANF14",
        "CASANF06",
        "CASANF07",
        "CASANF08",
        "CASEAT02",
        "YACOMD",
        "YAPPPT",
        "GAROBO",
        "YAREFN",
        "YAROCK",
        "NABNKR",
        "CASANF15",
        "CASANF16",
        "CASANF17",
        "CASANF18",
        "CASIN03E",
        "CASIN03S",
        "CAURB01",
        "CAURB02",
        "CAURB03",
        "CAPOWR",
        "CALA07",
        "CAEGYP06",
        "CALA08",
        "CAEAST02",
        "CABARR01",
        "CABARR02",
        "CAMORR04",
        "CAMORR05",
        "CALA09",
        "CAEGYP04",
        "CAEGYP05",
        "CALA06",
        "CAMORR06",
        "CAMORR07",
        "CAMORR08",
        "CAMORR09",
        "CAMORR10",
        "CATIME01",
        "CATIME02",
        "CALA10",
        "CALA11",
        "CALA12",
        "CALA13",
        "CAPARK04",
        "CAPARK05",
        "CAPARK06",
        "CALA14",
        "CALA15",
        "CABUNK03",
        "CABUNK04",
        "CALUNR01",
        "CALUNR02",
    ],
}

angadsingh / yrstats

Converting statparser.php to python #1