The result slightly differs from the statparser.php output: some keys can differ, some duplicated or unclear (for me) values are not decoded. In addition to each single statistics like "buildings_build", "infantry_killed" etc. I save their sum to the "total_buildings_build", "total_infantry_killed" etc. and then aggregate these to "total_built", "total_killed" etc.
I also noticed that the "L" suffix (like "BLL", "UNL" etc.) actually means not "lost", but rather "left". You can validate this too.
The code is written and tested under python 3.9.5, but should work for many other versions.
import base64
from collections import defaultdict
from struct import unpack
from typing import Dict, Union
import mappings, utils
_DecodedBlock = Union[bytes, bool, int, str, Dict[str, int]]
def _decode_block(binary_blob: bytes, type_: int, tag: str) -> _DecodedBlock:
"""
Decode a binary blob given its type.
"""
# pylint: disable=too-many-return-statements
if type_ == 1:
# Single byte.
return unpack(">c", binary_blob)[0]
if type_ == 2:
# Single boolean.
return unpack(">?", binary_blob)[0]
if type_ == 3:
# Single short.
return unpack(">h", binary_blob)[0]
if type_ == 4:
# Single unsigned short.
return unpack(">H", binary_blob)[0]
if type_ == 5:
# Single long.
return unpack(">l", binary_blob)[0]
if type_ == 6:
# Single unsigned long.
return unpack(">L", binary_blob)[0]
if type_ == 7:
# Multiple b"\x00"-terminated chars.
return utils.bytes_to_ascii(binary_blob.rstrip(b"\x00"))
if type_ == 20:
# Custom type and length.
if tag[:3] in mappings.HUMAN_READABLE_COUNTABLES:
# Multiple unsigned longs.
if len(binary_blob) % 4:
raise ValueError(
"Length of binary blocks should be multiple of 4, but "
f"{len(binary_blob)} received."
)
counts = unpack(f">{int(len(binary_blob) / 4)}L", binary_blob)
return {
mappings.COUNTABLE_TYPES[tag[:2]][j]: count
for j, count in enumerate(counts)
if count > 0
}
# Raw bytes.
# Bytes are not json serializable, so encode them as base64.
return base64.b64encode(binary_blob).decode("ascii")
raise ValueError(f"Unknown binary blob type: {type_}.")
def parse_stats(filepath: str) -> Dict[str, _DecodedBlock]:
"""
Parse a `"stats.dmp"` file to dict.
Args:
filepath: Path to a `"stats.dmp"` file.
Returns:
Parsed statistics.
"""
utils.assert_file_exists(filepath, ".dmp")
stats = {}
with open(filepath, "rb") as file:
# Read file header.
binary_blob = file.read(4)
if len(binary_blob) < 4:
raise ValueError("Cannot read file header, file too short.")
while True:
# Read block header.
binary_blob = file.read(8)
if len(binary_blob) == 0:
# End of file.
break
if len(binary_blob) < 8:
raise ValueError("Cannot read block header, file too short.")
binary_tag, type_, length = unpack(">4sHH", binary_blob)
tag = utils.bytes_to_ascii(binary_tag)
# Read block data.
binary_blob = file.read(length) if length > 0 else b""
if len(binary_blob) < length:
raise ValueError("Cannot read block data, file too short.")
if length % 4:
# Skip block padding.
file.read(4 - length % 4)
stats[tag] = _decode_block(binary_blob, type_, tag)
return stats
def prettify_stats(
raw_stats: Dict[str, _DecodedBlock], save_raw: bool
) -> Dict[str, Dict[str, _DecodedBlock]]:
"""
Prettify the parsed statistics from a `"stats.dmp"` file.
Args:
raw_stats: Parsed statistics (output of `parse_stats` function).
save_raw: Whether to save not prettified values too.
Returns:
Prettified statistics.
"""
stats = defaultdict(dict)
for tag, value in raw_stats.items():
if tag[-1] in "01234567":
# Player-specific data.
key = raw_stats[f"NAM{tag[-1]}"]
tag = tag[:-1]
else:
# Common data.
key = "common"
# Prettify data.
# Player-specific tags.
if tag == "CMP":
for code, status in mappings.COMPLETION_CODES.items():
stats[key][status] = bool(value & code)
elif tag == "RSG":
stats[key]["quit"] = value
elif tag == "DED":
stats[key]["defeated"] = value
elif tag == "SPC":
stats[key]["spectator"] = value
elif tag in ("LCN", "CON"):
stats[key]["disconnected"] = value
elif tag == "CTY":
stats[key]["side"] = mappings.SIDES[value]
elif tag == "NAM":
stats[key]["name"] = value
elif tag == "CRD":
stats[key]["credits_left"] = value
# Common tags.
elif tag == "DURA":
stats[key]["duration"] = value
elif tag == "AFPS":
stats[key]["fps"] = value
elif tag == "FINI":
stats[key]["finished"] = value
elif tag == "TIME":
stats[key]["timestamp"] = value
elif tag == "SCEN":
stats[key]["map"] = value
elif tag == "UNIT":
stats[key]["starting_units"] = value
elif tag == "CRED":
stats[key]["starting_credits"] = value
elif tag == "SUPR":
stats[key]["superweapons"] = bool(value)
elif tag == "CRAT":
stats[key]["crates"] = bool(value)
elif tag == "PLRS":
stats[key]["human_players"] = value
elif tag == "BAMR":
stats[key]["mcv_repacks"] = bool(value & 1)
stats[key]["build_off_ally_conyards"] = bool(value & 2)
elif tag == "SHRT":
stats[key]["short_game"] = bool(value)
elif tag == "AIPL":
stats[key]["ai_players"] = value
elif tag == "VERS":
stats[key]["game_version"] = value
elif tag in mappings.HUMAN_READABLE_COUNTABLES:
human_readable_tag = mappings.HUMAN_READABLE_COUNTABLES[tag]
stats[key][f"total_{human_readable_tag}"] = sum(value.values(), 0)
stats[key][human_readable_tag] = value
elif save_raw:
stats[f"{key}_raw"][tag] = value
for key, value in stats.items():
if key.startswith("common") or key.endswith("raw"):
continue
for suffix in ("built", "killed", "left", "captured", "found"):
stats[key][f"total_{suffix}"] = sum(
(
value
for key, value in value.items()
if key.startswith("total") and key.endswith(suffix)
),
0,
)
return stats
utils.py (common-purpose helpers):
import json
import os.path
import re
from typing import Dict, Iterable, Optional, Union
def assert_file_exists(
path: str, ext: Optional[Union[str, Iterable[str]]] = None
) -> None:
"""
Check if a file exists and optionally check its extension.
Args:
path: Path to file to check.
ext: Optional file extension/extensions to check. If `None`, do not
check extension.
Raises:
FileNotFoundError: If file does not exist.
TypeError: If `filepath` or `ext` has a wrong type.
ValueError: If file has a wrong extension.
"""
if not os.path.isfile(path):
raise FileNotFoundError(
f'Given path "{path}" does not exist or is not a file.'
)
if ext is None:
return
if isinstance(ext, str):
ext = (ext,)
if any(not isinstance(x, str) for x in ext):
raise TypeError(
f"`ext` argument should be a string, an iterable with strings or "
f"`None`, but value {ext} or type {type(ext)} received."
)
exts = [x if x.startswith(".") else f".{x}" for x in ext]
if len(exts) == 0:
return
if os.path.splitext(path)[1] not in exts:
raise ValueError(
f'Given file "{path}" should have one of the following extensions: "'
+ '", "'.join(exts)
+ '".'
)
def bytes_to_ascii(binary_blob: bytes) -> str:
"""
Convert bytes to ASCII string, replace invalid symbols with "?".
Args:
binary_blob: Input bytes.
Returns:
Decoded ASCII string.
"""
binary_blob = re.sub(rb"[^\x20-\x7E]", b"?", binary_blob)
return binary_blob.decode("utf-8")
def write_dict_to_json(data: Dict, output_file: str) -> None:
"""
Save a dict as file in human readable JSON format.
Args:
data: Dict to save.
output_file: Output file.
"""
with open(output_file, "w") as file:
json.dump(data, file, indent=4, sort_keys=True)
I converted the
statparser.php
to thepython
.The result slightly differs from the
statparser.php
output: some keys can differ, some duplicated or unclear (for me) values are not decoded. In addition to each single statistics like "buildings_build", "infantry_killed" etc. I save their sum to the "total_buildings_build", "total_infantry_killed" etc. and then aggregate these to "total_built", "total_killed" etc.I also noticed that the "L" suffix (like "BLL", "UNL" etc.) actually means not "lost", but rather "left". You can validate this too.
The code is written and tested under
python 3.9.5
, but should work for many other versions.Minimal usage example:
parsing.py
:utils.py
(common-purpose helpers):mappings.py
(constants only):