Open ckulesa opened 1 year ago
Peter Provides code to update the keynote proto definitions and mappings. However, Apple seems to have made the approach he used to dump the mappings unworkable between Keynote 13.1 and 13.2. I was able to get updated mappings and protobuf definitions by temporarily disabling SIP. (https://developer.apple.com/documentation/security/disabling_and_enabling_system_integrity_protection) Here is the code I used to get new mappings while SIP was disabled.
Changed extract_mapping.py:
"""
Launch Keynote (or technically Pages, or any other iWork app), set a
breakpoint at the first reasonable method after everything is loaded,
then dump the contents of TSPRegistry sharedRegistry to a JSON file.
To avoid iCloud entitlement problems temporarily disable SIP before this code is run.
Nastiest hack. Please don't use this.
Copyright 2020 Peter Sobot (psobot.com).
"""
import sys
sys.path.append("/Library/Developer/CommandLineTools/Library/PrivateFrameworks/LLDB.framework/Resources/Python")
import os
import sys
import json
import lldb
exe = sys.argv[-1]
debugger = lldb.SBDebugger.Create()
debugger.SetAsync(False)
target = debugger.CreateTargetWithFileAndArch(exe, None)
target.BreakpointCreateByName("_sendFinishLaunchingNotification")
target.BreakpointCreateByName("_handleAEOpenEvent:")
process = target.LaunchSimple(None, None, os.getcwd())
if not process:
raise ValueError("Failed to launch process: " + exe)
try:
while process.GetState() == lldb.eStateStopped:
thread = process.GetThreadAtIndex(0)
if thread.GetStopReason() == lldb.eStopReasonBreakpoint:
if any([x in str(thread.GetSelectedFrame()) for x in ["CKContainer", "CloudKit"]]):
# Skip the code in CKContainer, avoiding a crash due to missing entitlements:
thread.ReturnFromFrame(thread.GetSelectedFrame(), lldb.SBValue().CreateValueFromExpression("0", ""))
process.Continue()
else:
break
elif thread.GetStopReason() == lldb.eStopReasonException:
sys.stderr.write(repr(thread) + "\n")
raise NotImplementedError(f"LLDB caught exception, {__file__} needs to be updated to handle.")
if process.GetState() == lldb.eStateStopped:
if thread:
frame = thread.GetFrameAtIndex(0)
if frame:
registry = frame.EvaluateExpression('[TSPRegistry sharedRegistry]').description
split = [
x.strip().split(" -> ")
for x in registry.split("{")[1].split("}")[0].split("\n")
if x.strip()
]
print(
json.dumps(
dict(
sorted(
[(int(a), b.split(" ")[-1]) for a, b in split if 'null' not in b]
)
),
indent=2,
)
)
else:
raise ValueError("Could not get frame to print out registry!")
else:
raise ValueError("LLDB was unable to stop process! " + str(process))
finally:
process.Kill()
Changed generate_mapping.py:
import json
import glob
RUNTIME_CODE = """
def compute_maps():
name_class_map = {}
for file in PROTO_FILES:
for message_name in file.DESCRIPTOR.message_types_by_name:
message_type = getattr(file, message_name)
name_class_map[message_type.DESCRIPTOR.full_name] = message_type
# These two are hard-coded because the above code does not support names with two periods
name_class_map['TST.GroupByArchive.GroupNodeArchive']=TSTArchives.GroupByArchive.GroupNodeArchive
name_class_map['TST.GroupByArchive.AggregatorArchive']=TSTArchives.GroupByArchive.AggregatorArchive
id_name_map = {}
for k, v in list(TSPRegistryMapping.items()):
if v in name_class_map:
id_name_map[int(k)] = name_class_map[v]
else:
print(f"Eeek, {v} from class_name_map not found")
return name_class_map, id_name_map
NAME_CLASS_MAP, ID_NAME_MAP = compute_maps()
"""
def main():
output_filename = "mapping.py"
mapping_filename = "mapping.json"
with open(output_filename, "w") as f:
f.write(f"# Generated code! Edit {__file__} instead.\n")
f.write("\n")
f.write("from __future__ import absolute_import\n")
f.write("\n")
proto_files = sorted(
[os.path.basename(path) for path in glob.glob(os.path.join("..", "protos", "*.proto"))]
)
for proto_file in proto_files:
f.write(
f"from .generated import {proto_file.replace('.proto', '')}_pb2 as"
f" {proto_file.replace('.proto', '')}\n"
)
f.write("\n\n")
f.write("PROTO_FILES = [\n")
for proto_file in proto_files:
f.write(f"\t{proto_file.replace('.proto', '')},\n")
f.write("]\n")
f.write("\n")
with open(mapping_filename) as mapping_file:
f.write(f"TSPRegistryMapping = {repr(json.load(mapping_file))}\n")
f.write(RUNTIME_CODE)
if __name__ == "__main__":
main()
Changed Makefile:
LLDB_PYTHON_PATH := /Library/Developer/CommandLineTools/Library/PrivateFrameworks/LLDB.framework/Resources/Python
LLDB_PYTHON := python3.9
#IDENTITY := $(shell security find-identity -p codesigning | head -n 1 | python3 -c 'import sys; print(sys.stdin.read().split("\"")[1])')
all: mapping.py proto
mapping.json: Keynote.unsigned.app/Contents/MacOS/Keynote ./extract_mapping.py
PYTHONPATH=${LLDB_PYTHON_PATH} xcrun $(LLDB_PYTHON) ./extract_mapping.py Keynote.unsigned.app/Contents/MacOS/Keynote > $@
rm -rf Keynote.unsigned.app
proto: /Applications/Keynote.app
python3 protodump.py /Applications/Keynote.app ./proto/
# Note that if any of the incoming Protobuf definitions contain periods,
# protoc will put them into their own Python packages. This is not desirable
# for import rules in Python, so we replace non-final period characters with
# underscores.
python3 ./rename_proto_files.py proto
cp ./proto/*.proto ../protos/
Keynote.unsigned.app/Contents/MacOS/Keynote: /Applications/Keynote.app
cp -r /Applications/Keynote.app ./Keynote.unsigned.app
mapping.py: mapping.json
python3 generate_mapping.py
cp mapping.py ../keynote_parser/mapping.py
clean:
rm -rf Keynote.unsigned.app
rm -rf mapping.json
rm -rf mapping.py
rm -rf proto
Changed keynoteparser/__init_\.py:
"""Unpack and repack Apple Keyote files."""
__author__ = "Peter Sobot"
import keynote_parser.macos_app_version
__major_version__ = 1
__patch_version__ = 0
__supported_keynote_version__ = keynote_parser.macos_app_version.MacOSAppVersion(
"13.2", "7037.0.101", "1A98"
)
__version_tuple__ = (
__major_version__,
__supported_keynote_version__.major,
__supported_keynote_version__.minor,
__patch_version__,
)
__version__ = ".".join([str(x) for x in __version_tuple__])
__email__ = "github@petersobot.com"
__description__ = 'A tool for manipulating Apple Keynote presentation files.'
__url__ = "https://github.com/psobot/keynote-parser"
__new_issue_url__ = "https://github.com/psobot/keynote-parser/issues/new"
__command_line_invocation__ = False
Only unpacks Data (images and whatnot) and Index. In "Index", the only file is "AnnotationAuthorStorage.iwa.yaml". No other files are unpacked.