gsuberland / UMP_Format

A description of the UMP data format used by YouTube
27 stars 3 forks source link

Decoding attempt #1

Open anton-novak opened 4 months ago

anton-novak commented 4 months ago

Hi, thanks for laying down your findings. I am trying to decode a response from YT Music and I'm getting results that deviate from them when I get past the second UMP part of the response.

I detect the header part with type 20, and I've checked via the protobufs decoder that it has an ID of the media item I am looking at. Then I detect the media part with type 21 but it has a really short payload length, and after this part I detect parts with unknown types that go beyond 128.

If you have any advice to give, I'll be happy to follow it and give it one more go. response.txt

I am not really familiar with bit & byte manipulation, here's my attempt in Node.js:

import fs from "fs";

const data = fs.readFileSync("response", "utf8");

readUmpResponseParts(data);

function readUmpResponseParts(responseData) {
    // ignore the first 35 characters
    // data:application/vnd.yt-ump;base64,
    if (isValidBase64(responseData.slice(35))) {
        console.log("Valid base64");
    } else {
        throw new Error("Invalid base64");
    }
    const buffer = Buffer.from(responseData.slice(35), "base64");

    let bytePosition = 0;
    while (bytePosition < buffer.length) {
        bytePosition += readPart(buffer.slice(bytePosition));
    }
}

function readPart(buffer) {
    const firstByte = buffer[0];
    const partVarIntSize = getVarIntSize(firstByte);
    console.log("Part varint size:", partVarIntSize);

    let bytePosition = 0;
    let varInts = [];
    let partType;
    let partPayloadLength = Infinity;

    while(bytePosition < buffer.length && varInts.length < partPayloadLength + 2) {
        const varIntBytes = buffer.slice(bytePosition, bytePosition + partVarIntSize);
        if (bytePosition === 0) varIntBytes[0] = getRemainderOfFirstByte(varIntBytes[0]);
        const varInt = combineBytes(varIntBytes, partVarIntSize);
        varInts.push(varInt);
        bytePosition += partVarIntSize;
        if (varInts.length === 1) {
            partType = varInts[0];
            console.log("Part type:", varInts[0]);
        }
        if (varInts.length === 2) {
            partPayloadLength = varInts[1];
            console.log("Part payload length:", varInts[1]);
        }
    }

    return bytePosition;
}

function getRemainderOfFirstByte(b) {
    const size = getVarIntSize(b);
    switch (size) {
        case 1: return b & 0b01111111;
        case 2: return b & 0b00111111;
        case 3: return b & 0b00011111;
        case 4: return b & 0b00001111;
        case 5: return 0; // For 5-byte integers, the initial bits are ignored in value calculation.
        default: throw new Error("Invalid size");
    }
}

function getVarIntSize(b) {
    let size = 0;
    for (let shift = 1; shift <= 5; shift++) {
        if ((b & (128 >> (shift - 1))) === 0) {
            size = shift;
            break;
        }
    }
    // Ensure size is within expected bounds
    if (size < 1 || size > 5) throw new Error("Invalid size");
    return size;
}

function isValidBase64(str) {
    const base64Pattern = /^(?:[A-Za-z0-9+\/]{4})*?(?:[A-Za-z0-9+\/]{2}==|[A-Za-z0-9+\/]{3}=)?$/;
    return base64Pattern.test(str);
}

function combineBytes(bytes, size) {
    let value = 0;

    for (let i = 0; i < size; i++) {
        value |= bytes[i] << (8 * (size - 1 - i));
    }

    return value;
}

and this is the output:

Valid base64
Part varint size: 1
Part type: 20
Part payload length: 49
Part varint size: 1
Part type: 21
Part payload length: 205
Part varint size: 1
Part type: 2
Part payload length: 231
Part varint size: 2
Part type: 256
Part payload length: 2
Part varint size: 2
Part type: 8192
Part payload length: 2
Part varint size: 2
Part type: 15872
Part payload length: 2
Part varint size: 1
Part type: 61
Part payload length: 0
Part varint size: 1
Part type: 0
Part payload length: 2
Part varint size: 1
Part type: 0
Part payload length: 2
Part varint size: 1
Part type: 0
Part payload length: 2
Part varint size: 1
Part type: 0
Part payload length: 3
Part varint size: 1
Part type: 2
Part payload length: 155
Part varint size: 3
Part type: 786432
Part payload length: 229376
davidzeng0 commented 1 month ago

There is updated information available at my PR. https://github.com/gsuberland/UMP_Format/pull/2