JayFoxRox / top2ogg

A tool to convert Topheavy video files (*.top) to *.ogg video files.
GNU General Public License v2.0
0 stars 0 forks source link

Audio doesn't work #1

Open JayFoxRox opened 8 years ago

JayFoxRox commented 8 years ago

I don't know how vorbis stuff works

james-wallace-ghub commented 7 years ago

Is this still an issue, in some cases the vorbis format needs a bit of finetuning after ripping. Can I suggest this thread if you haven't already seen it, as the tools there seem to do the job, and aside from the TOPH container, the video files are identical it seems. https://www.hcs64.com/mboard/forum.php?showthread=37817

JayFoxRox commented 7 years ago

Here is a crazily modified / hacked version of vid1_2ogg for *.top files. I'm not sure about the license with this. I took a lot of shortcuts and it's just for testing. The audio seems to be mostly fine but it sounds kind of low-quality (which might just be due to their recording quality).

#define __STDC_CONSTANT_MACROS
#include <iostream>
#include <cstring>
#include "stdint.h"
#include "errors.h"
#include "VID1.h"
#include "Bit_stream.h"

using namespace std;

class Vorbis_packet_header
{
    uint8_t type;

    static const char vorbis_str[6];

public:
    explicit Vorbis_packet_header(uint8_t t) : type(t) {}

    friend Bit_oggstream& operator << (Bit_oggstream& bstream, const Vorbis_packet_header& vph) {
        Bit_uint<8> t(vph.type);
        bstream << t;

        for ( unsigned int i = 0; i < 6; i++ )
        {
            Bit_uint<8> c(vorbis_str[i]);
            bstream << c;
        }

        return bstream;
    }
};

const char Vorbis_packet_header::vorbis_str[6] = {'v','o','r','b','i','s'};

VID1_Vorbis::VID1_Vorbis(const string& name) :
    _file_name(name),
    _infile(name.c_str(), ios::binary), _file_size(-1),
    _vid1_offset(-1), _vid1_size(-1),
    _head_offset(-1), _head_size(-1),
    _first_fram_offset(-1),
    _sample_rate(0), _channels(0), _sample_count(0),
    _info_packet_offset(-1), _info_packet_size(-1),
    _setup_packet_offset(-1), _setup_packet_size(-1)
{
    if (!_infile) throw File_open_error(name);

    _infile.seekg(0, ios::end);
    _file_size = _infile.tellg();

    // read chunks
    long chunk_offset = 0;
    while (chunk_offset < _file_size)
    {
        _infile.seekg(chunk_offset, ios::beg);

        if (chunk_offset + 8 > _file_size) throw Parse_error_str("chunk header truncated");

        char chunk_type[4];
        _infile.read(chunk_type, 4);
        uint32_t chunk_size = read_32_le(_infile);
        uint32_t payload_size = chunk_size - 8;
        long payload_offset = chunk_offset + 8;

        std::cout << chunk_type << std::endl;
        std::cout << "Offset " << chunk_offset << std::endl;
        std::cout << "Size " << payload_size << std::endl;

        long chunk_offset2 = chunk_offset;

        if (!memcmp(chunk_type,"TOPH",4))
        {
            _vid1_offset = payload_offset;
            _vid1_size = payload_size;
        } else if(!memcmp(chunk_type,"\x89\xBF\x63\xAF",4)) {
            // Seems to be a nested one
            std::cout << "That weird one " << chunk_offset << std::endl;

            // check pad
            if (0 != read_32_le(_infile))
            {
                throw Parse_error_str("expected 0 padding in weird one");
            }

            long chunk_offset = chunk_offset2 + 12; 

            while (chunk_offset < _file_size)
            {

              _infile.seekg(chunk_offset, ios::beg);

                std::cout << "sub @ " << _infile.tellg() << " / " << chunk_offset << std::endl;

                _infile.read(chunk_type, 4);
                std::cout << chunk_type << std::endl;

                uint32_t chunk_size;

                if (!memcmp(chunk_type,"HDIV",4)) {
                    _vid1_offset = payload_offset;
                    _vid1_size = payload_size;

                    chunk_size = read_32_le(_infile); // chunk size?!
                    read_32_le(_infile); // ???
                    uint16_t width = read_16_le(_infile);
                    uint16_t height = read_16_le(_infile);
                    std::cout << "Video is " << width << " x " << height << std::endl;

                } else if (!memcmp(chunk_type,"HDUA",4)) {

                  chunk_size = read_32_le(_infile);

      #if 0
                  std::cout << _head_size << std::endl;
                  if (chunk_size > _head_size - 12)
                  {
                      throw Parse_error_str("AUDH size mismatch");
                  }
      #endif

                  // check pad
                  if (0 != read_32_le(_infile)) {
                      throw Parse_error_str("expected 0 padding in AUDH");
                  }

                  // check VAUD (Vorbis audio?)
                  _infile.read(chunk_type, 4);
                  if (memcmp(chunk_type,"DUAV",4)) {
                      throw Parse_error_str("expected VAUD");
                  }

                  _sample_rate = read_32_le(_infile);
                  _channels = read_8(_infile);
                  if (1 != read_8(_infile) || 0x20 != read_16_le(_infile))  // ? and padding?
                  {
                      throw Parse_error_str("HEAD crap");
                  }
          #if 0
                      // bitrate crap
                      0x22800 != read_32_le(_infile) || // ?
                      0x416C != read_32_le(_infile)) // ?
          #endif

                  read_32_le(_infile);
                  read_32_le(_infile);
                  read_32_le(_infile); // ?
                  read_32_le(_infile); // ?

      #if 0
                  if (0xFA != read_32_le(_infile)) // ?
                  {
                      throw Parse_error_str("HEAD crap 2");
                  }
      #else
                  read_32_le(_infile); // ? always zero?
      #endif

                  _sample_count = read_32_le(_infile); // sample count?

                  // get identification packet info
                  {
                      Bit_stream ss(_infile);
                      Bit_uint<4> size_bits;
                      ss >> size_bits;
                      Bit_uintv size(size_bits+1);
                      ss >> size;

                      _info_packet_size = size;
                      _info_packet_offset = _infile.tellg();

                      if ( 0x1e != size ||
                           1  != read_8(_infile) ||
                          'v' != read_8(_infile) ||
                          'o' != read_8(_infile) ||
                          'r' != read_8(_infile) ||
                          'b' != read_8(_infile) ||
                          'i' != read_8(_infile) ||
                          's' != read_8(_infile) )
                      {
                          throw Parse_error_str("bad identification packet");
                      }
                  }

                  // get setup packet info
                  {
                      _infile.seekg(_info_packet_offset + _info_packet_size);
                      Bit_stream ss(_infile);
                      Bit_uint<4> size_bits;
                      ss >> size_bits;
                      Bit_uintv size(size_bits+1);
                      ss >> size;

                      _setup_packet_size = size;
                      _setup_packet_offset = _infile.tellg();

                      if ( 5  != read_8(_infile) ||
                          'v' != read_8(_infile) ||
                          'o' != read_8(_infile) ||
                          'r' != read_8(_infile) ||
                          'b' != read_8(_infile) ||
                          'i' != read_8(_infile) ||
                          's' != read_8(_infile) )
                      {
                          throw Parse_error_str("bad setup packet");
                      }
                  }

                  //FIXME: Is this correct? Or do we need a \x00\x00\x00\x00-chunk to leave the nest?
                  break;

                } else {
                    throw Parse_error_str("unknown sub-chunk type");
                }

                chunk_offset = chunk_offset + chunk_size;
            }

        } else if (!memcmp(chunk_type,"MARF",4)) {
            if (-1 == _first_fram_offset)
            {
                _first_fram_offset = chunk_offset;
            }

#if 1
        } else if (!memcmp(chunk_type,"\x00\x00\x00\x00",4) && _file_size - chunk_offset <= 0x20) {
            _file_size = chunk_offset;
#endif

        } else {
            throw Parse_error_str("unknown chunk type");
        }

        chunk_offset = chunk_offset + chunk_size;
    }
}

void VID1_Vorbis::generate_ogg_header(Bit_oggstream& os)
{
    // copy information packet
    {
        _infile.seekg(_info_packet_offset);

        Bit_uint<8> c(_infile.get());
        if (1 != c)
        {
            throw Parse_error_str("wrong type for information packet");
        }

        os << c;

        for (unsigned int i = 1; i < _info_packet_size; i++)
        {
            c = _infile.get();
            os << c;
        }

        // identification packet on its own page
        os.end_packet();
        os.end_page();
    }

    // generate comment packet
    {
        Vorbis_packet_header vhead(3);

        os << vhead;

        static const char vendor[] = "converted by top2ogg " VERSION;
        Bit_uint<32> vendor_size(strlen(vendor));

        os << vendor_size;
        for (unsigned int i = 0; i < vendor_size; i ++) {
            Bit_uint<8> c(vendor[i]);
            os << c;
        }

        // no user comments
        Bit_uint<32> user_comment_count(0);
        os << user_comment_count;

        Bit_uint<1> framing(1);
        os << framing;

        os.end_packet();
    }

    // copy setup packet
    {
        _infile.seekg(_setup_packet_offset);
        Bit_stream ss(_infile);

        Bit_uint<8> c;
        ss >> c;

        // type
        if (5 != c)
        {
            throw Parse_error_str("wrong type for setup packet");
        }
        os << c;

        for (unsigned int i = 1; i < _setup_packet_size; i++)
        {
            c = _infile.get();
            os << c;
        }

        os.end_packet();
    }

    // end of header pages
    os.end_page();
}

void VID1_Vorbis::generate_ogg(ofstream& of)
{
    Bit_oggstream os(of);

    generate_ogg_header(os);

    // Audio pages
    {
        long offset = _first_fram_offset;
        long granule = 0;

        bool first_page = true;

        while (offset < _file_size)
        {
            uint32_t chunk_size, payload_size;

            if (first_page)
            {
                first_page = false;
            }
            else
            {
                //granule++;
                os.end_page();
            }

            _infile.seekg(offset);

            char chunk_type[4];
            _infile.read(chunk_type, 4);

            if (memcmp(chunk_type,"MARF",4))
            {
                if (memcmp(chunk_type,"\0\0\0",4))
                {
                    break;
                }

                throw Parse_error_str("missing FRAM");
            }

            chunk_size = read_32_le(_infile);

            // check padding
            for (int i = 0; i < 0x20-8; i+=4)
            {
                if (0 != read_32_le(_infile))
                {
                    throw Parse_error_str("nonzero padding in FRAM");
                }
            }

            uint32_t this_chunk_size;

            {

                _infile.read(chunk_type, 4);
                std::cout << chunk_type << std::endl;
                if (memcmp(chunk_type, "DDIV", 4))
                {
                    throw Parse_error_str("missing VIDD");
                }
                this_chunk_size = read_32_le(_infile);                
                //FIXME: Process VIDD

            }

            // Skip VIDD
            _infile.seekg((int)_infile.tellg() + this_chunk_size - 8);
            std::cout << _infile.tellg() << std::endl;

            uint32_t offset2 = offset;

            {

                _infile.read(chunk_type, 4);
                std::cout << chunk_type << std::endl;
                if (memcmp(chunk_type, "DDUA", 4))
                {
                    break;
                    throw Parse_error_str("missing AUDD");
                }
                this_chunk_size = read_32_le(_infile);

#if 0
                if (read_32_le(_infile) != chunk_size - 0x20)
                {
                    throw Parse_error_str("FRAM/AUDD size mismatch");
                }
#endif

                if (0 != read_32_le(_infile))
                {
                    throw Parse_error_str("nonzero padding in AUDD");
                }

                payload_size = read_32_le(_infile);

                if (payload_size > chunk_size - 0x30)
                {
                    throw Parse_error_str("doesn't seem like payload fits in FRAM");
                }

                read_16_le(_infile); // ???

                long packet_offset = _infile.tellg(); //offset + 0x32;
                offset = packet_offset - 0x32;

                //_infile.seekg(packet_offset);

                std::cout << "Reading granule from " << packet_offset << std::endl;
                granule += read_16_le(_infile);

                packet_offset += 2;

                while (packet_offset < offset + 0x30 + payload_size)
                {
                    os.set_granule(granule);

                    Bit_stream is(_infile);
                    Bit_uint<4> size_bits;
                    is >> size_bits;
                    Bit_uintv packet_size(size_bits+1);
                    is >> packet_size;

                    int header_bytes = (4+size_bits+1+7)/8;
                    if (0 == size_bits) {
                        _infile.seekg(packet_offset);
                        if (0x80 == _infile.get()) {
                            packet_size = 1;
                        }
                    }
                    _infile.seekg(packet_offset + header_bytes);
                    for (unsigned int i = 0; i < packet_size; i++) {
                        Bit_uint<8> c(_infile.get());
                        os << c;
                    }

                    packet_offset += header_bytes + packet_size;

                    if (packet_size != 0) {
                        os.end_packet();
                    }
                }
#if 0
                if (packet_offset != offset + 0x30 + payload_size)
                {
                    throw Parse_error_str("packets didn't match up with AUDD payload size");
                }
#endif
            }

            offset = offset2 + chunk_size;
        }

        if (offset > _file_size) throw Parse_error_str("page truncated");
#if 1
        std::cout << "Granule: " << granule << std::endl;
        std::cout << "Samples: " << _sample_count << std::endl;
        if (granule != _sample_count) throw Parse_error_str("miscounted samples");
#endif
    }
}

So at least I could copy some of that audio code. However, I'm still lost about the video, see issue #2

james-wallace-ghub commented 7 years ago

I'll try to take a look - there is an easier way for you I guess, there's a PS2 version of this game that uses .AIF files, these contain .PSS videos that are basically MPEG2. That doesn't help me though, so I'll try to get this running (I don't have Clang, just GCC so I need to get the tools sorted).