psobot / keynote-parser

A packer/unpacker for Apple Keynote presentation files.
156 stars 17 forks source link

Please add support for Keynote 13.2 (7038.0.87, 1A101s) #55

Open ckulesa opened 1 year ago

ckulesa commented 1 year ago

Only unpacks Data (images and whatnot) and Index. In "Index", the only file is "AnnotationAuthorStorage.iwa.yaml". No other files are unpacked.

tlw98683 commented 9 months ago

Peter Provides code to update the keynote proto definitions and mappings. However, Apple seems to have made the approach he used to dump the mappings unworkable between Keynote 13.1 and 13.2. I was able to get updated mappings and protobuf definitions by temporarily disabling SIP. (https://developer.apple.com/documentation/security/disabling_and_enabling_system_integrity_protection) Here is the code I used to get new mappings while SIP was disabled.

Changed extract_mapping.py:

"""
Launch Keynote (or technically Pages, or any other iWork app), set a
breakpoint at the first reasonable method after everything is loaded,
then dump the contents of TSPRegistry sharedRegistry to a JSON file.

To avoid iCloud entitlement problems temporarily disable SIP before this code is run.

Nastiest hack. Please don't use this.
Copyright 2020 Peter Sobot (psobot.com).
"""

import sys
sys.path.append("/Library/Developer/CommandLineTools/Library/PrivateFrameworks/LLDB.framework/Resources/Python")

import os
import sys
import json
import lldb

exe = sys.argv[-1]
debugger = lldb.SBDebugger.Create()
debugger.SetAsync(False)
target = debugger.CreateTargetWithFileAndArch(exe, None)
target.BreakpointCreateByName("_sendFinishLaunchingNotification")
target.BreakpointCreateByName("_handleAEOpenEvent:")

process = target.LaunchSimple(None, None, os.getcwd())

if not process:
    raise ValueError("Failed to launch process: " + exe)
try:
    while process.GetState() == lldb.eStateStopped:
        thread = process.GetThreadAtIndex(0)
        if thread.GetStopReason() == lldb.eStopReasonBreakpoint:
            if any([x in str(thread.GetSelectedFrame()) for x in ["CKContainer", "CloudKit"]]):
                # Skip the code in CKContainer, avoiding a crash due to missing entitlements:
                thread.ReturnFromFrame(thread.GetSelectedFrame(), lldb.SBValue().CreateValueFromExpression("0", ""))
                process.Continue()
            else:
                break
        elif thread.GetStopReason() == lldb.eStopReasonException:
            sys.stderr.write(repr(thread) + "\n")
            raise NotImplementedError(f"LLDB caught exception, {__file__} needs to be updated to handle.")
    if process.GetState() == lldb.eStateStopped:
        if thread:
            frame = thread.GetFrameAtIndex(0)
            if frame:
                registry = frame.EvaluateExpression('[TSPRegistry sharedRegistry]').description
                split = [
                    x.strip().split(" -> ")
                    for x in registry.split("{")[1].split("}")[0].split("\n")
                    if x.strip()
                ]
                print(
                    json.dumps(
                        dict(
                            sorted(
                                [(int(a), b.split(" ")[-1]) for a, b in split if 'null' not in b]
                            )
                        ),
                        indent=2,
                    )
                )
            else:
                raise ValueError("Could not get frame to print out registry!")
    else:
        raise ValueError("LLDB was unable to stop process! " + str(process))
finally:
    process.Kill()

Changed generate_mapping.py:

import json
import glob

RUNTIME_CODE = """

def compute_maps():
    name_class_map = {}
    for file in PROTO_FILES:
        for message_name in file.DESCRIPTOR.message_types_by_name:
            message_type = getattr(file, message_name)
            name_class_map[message_type.DESCRIPTOR.full_name] = message_type
    # These two are hard-coded because the above code does not support names with two periods
    name_class_map['TST.GroupByArchive.GroupNodeArchive']=TSTArchives.GroupByArchive.GroupNodeArchive
    name_class_map['TST.GroupByArchive.AggregatorArchive']=TSTArchives.GroupByArchive.AggregatorArchive

    id_name_map = {}
    for k, v in list(TSPRegistryMapping.items()):
        if v in name_class_map:
            id_name_map[int(k)] = name_class_map[v]
        else:
            print(f"Eeek, {v} from class_name_map not found")

    return name_class_map, id_name_map

NAME_CLASS_MAP, ID_NAME_MAP = compute_maps()
"""

def main():
    output_filename = "mapping.py"
    mapping_filename = "mapping.json"

    with open(output_filename, "w") as f:
        f.write(f"# Generated code! Edit {__file__} instead.\n")
        f.write("\n")

        f.write("from __future__ import absolute_import\n")
        f.write("\n")

        proto_files = sorted(
            [os.path.basename(path) for path in glob.glob(os.path.join("..", "protos", "*.proto"))]
        )

        for proto_file in proto_files:
            f.write(
                f"from .generated import {proto_file.replace('.proto', '')}_pb2 as"
                f" {proto_file.replace('.proto', '')}\n"
            )

        f.write("\n\n")

        f.write("PROTO_FILES = [\n")
        for proto_file in proto_files:
            f.write(f"\t{proto_file.replace('.proto', '')},\n")
        f.write("]\n")
        f.write("\n")

        with open(mapping_filename) as mapping_file:
            f.write(f"TSPRegistryMapping = {repr(json.load(mapping_file))}\n")

        f.write(RUNTIME_CODE)

if __name__ == "__main__":
    main()

Changed Makefile:


LLDB_PYTHON_PATH := /Library/Developer/CommandLineTools/Library/PrivateFrameworks/LLDB.framework/Resources/Python 
LLDB_PYTHON := python3.9
#IDENTITY := $(shell security find-identity -p codesigning | head -n 1 | python3 -c 'import sys; print(sys.stdin.read().split("\"")[1])')

all: mapping.py proto

mapping.json: Keynote.unsigned.app/Contents/MacOS/Keynote ./extract_mapping.py
    PYTHONPATH=${LLDB_PYTHON_PATH} xcrun $(LLDB_PYTHON) ./extract_mapping.py Keynote.unsigned.app/Contents/MacOS/Keynote > $@
    rm -rf Keynote.unsigned.app

proto: /Applications/Keynote.app
    python3 protodump.py /Applications/Keynote.app ./proto/
    # Note that if any of the incoming Protobuf definitions contain periods,
    # protoc will put them into their own Python packages. This is not desirable
    # for import rules in Python, so we replace non-final period characters with
    # underscores.
    python3 ./rename_proto_files.py proto
    cp ./proto/*.proto ../protos/

Keynote.unsigned.app/Contents/MacOS/Keynote: /Applications/Keynote.app
    cp -r /Applications/Keynote.app ./Keynote.unsigned.app

mapping.py: mapping.json
    python3 generate_mapping.py
    cp mapping.py ../keynote_parser/mapping.py

clean:
    rm -rf Keynote.unsigned.app
    rm -rf mapping.json
    rm -rf mapping.py
    rm -rf proto

Changed keynoteparser/__init_\.py:

"""Unpack and repack Apple Keyote files."""
__author__ = "Peter Sobot"

import keynote_parser.macos_app_version

__major_version__ = 1
__patch_version__ = 0
__supported_keynote_version__ = keynote_parser.macos_app_version.MacOSAppVersion(
    "13.2", "7037.0.101", "1A98"
)
__version_tuple__ = (
    __major_version__,
    __supported_keynote_version__.major,
    __supported_keynote_version__.minor,
    __patch_version__,
)
__version__ = ".".join([str(x) for x in __version_tuple__])

__email__ = "github@petersobot.com"
__description__ = 'A tool for manipulating Apple Keynote presentation files.'
__url__ = "https://github.com/psobot/keynote-parser"
__new_issue_url__ = "https://github.com/psobot/keynote-parser/issues/new"
__command_line_invocation__ = False