pspdev / psp-ghidra-scripts

Scripts for use with the Ghidra Reverse Engineering Suite
Apache License 2.0
34 stars 6 forks source link

Add more NIDs #17

Closed efonte closed 5 months ago

efonte commented 8 months ago

I have combined the NIDs from the Spenon-dev/PSPLibDoc repository.

I have kept the original xml just in case you want to keep it.

efonte commented 5 months ago

@John-K this is the script I used to generate an xml with the combined nids, then i merge this xml with the original file ppsspp_niddb.xml using vscode (with delete duplicate lines and sort lines ascending options)

from collections import namedtuple
from pathlib import Path

from lxml import etree as ET

NIDEntry = namedtuple(
    "NIDEntry",
    ["nid", "name"],
)

def load_psp_libdoc(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    entries = []
    for prx in root.findall("PRXFILES/PRXFILE"):
        for library in prx.findall("LIBRARIES/LIBRARY"):
            for function in library.findall("FUNCTIONS/FUNCTION"):
                function_nid = function.find("NID").text.upper().removeprefix("0X")
                function_name = function.find("NAME").text
                entries.append(
                    NIDEntry(
                        nid=function_nid,
                        name=function_name,
                    )
                )

    return entries

if __name__ == "__main__":
    nid_entries = []
    for xml_path in Path("PSPLibDoc").glob("**/PSPLibDoc_*.xml"):
        print(xml_path)
        libdoc_entries = load_psp_libdoc(xml_path)
        nid_entries.extend(libdoc_entries)

    with open("niddb_combined.xml", "w") as f:
        f.write('<?xml version="1.0" ?>\n<FUNCTIONS>\n')
        for entry in sorted(set(nid_entries), key=lambda n: (n.nid, n.name)):
            function_nid = entry.nid
            function_name = entry.name
            lib_doc_nid_name_unk = function_name.upper().endswith(function_nid)
            f.write(
                f"<FUNCTION><NID>0x{function_nid}</NID><NAME>{function_name}</NAME></FUNCTION>\n"
            )
        f.write("</FUNCTIONS>")

for the verified xml:

import hashlib
import xml.etree.ElementTree as ET

def get_nid_for_string(string):
    sha1_hash = hashlib.sha1(bytes(string, "utf-8"))
    hash_bytes = sha1_hash.digest()[0:4]
    nid = hash_bytes[::-1].hex().upper()
    return nid

def process_file(input_path, output_path):
    # Parse the input XML file
    tree = ET.parse(input_path)
    root = tree.getroot()

    with open(output_path, "w") as outfile:
        outfile.write('<?xml version="1.0" ?>\n<FUNCTIONS>\n')
        for function_elem in root.findall("FUNCTION"):
            # Get NID and function name
            nid = function_elem.find("NID").text
            name = function_elem.find("NAME").text

            # Calculate the hash
            calculated_hash = get_nid_for_string(name)

            # Check if the hash matches
            if nid[2:] == calculated_hash:
                outfile.write(
                    f"<FUNCTION><NID>{nid}</NID><NAME>{name}</NAME></FUNCTION>\n"
                )
        outfile.write("</FUNCTIONS>")

input_file_path = "niddb_combined.xml"
output_file_path = "niddb_combined_and_verified.xml"
process_file(input_file_path, output_file_path)
John-K commented 5 months ago

Thanks @efonte!