tribusonz-2 / rb_wave

Wave library for Ruby
MIT License
0 stars 0 forks source link

The RIFF reader-writer implementation #1

Closed tribusonz-2 closed 4 weeks ago

tribusonz-2 commented 1 month ago

I'll implement that the RIFF reader-writer so, I had modified Dr. Naofumi Aoki's wave function.
I thought that function routines would be replaced by APIs, but they are now different things:

static uint16_t
doublebyte2u16le(VALUE bytes)
{
    unsigned char *ptr = (unsigned char *)StringValuePtr(bytes);
    uint16_t size = ptr[0] | ptr[1] << 8;
    return size;
}

static uint32_t
quadbyte2u32le(VALUE bytes)
{
    unsigned char *ptr = (unsigned char *)StringValuePtr(bytes);
    uint32_t size = ptr[0] | ptr[1] << 8 | ptr[2] << 16 | ptr[3] << 24;
    return size;
}

void
pcm_read_8bit(unsigned char buf[], double s[])
{
    double data = (buf[0] - 128.0);
    *s = data / (double)0x80;
}

void
pcm_read_16bit(unsigned char buf[], double s[])
{
    int16_t data = (int16_t)(buf[0] | buf[1] << 8);
    *s = data / (double)0x8000;
}

void
pcm_read_24bit(unsigned char buf[], double s[])
{
     int data = (buf[0] | buf[1] << 8 | buf[2] << 16);
     if (data & 0x800000)  data -= 0x1000000;
    *s = data / (double)0x800000;
}

void
pcm_read_32bit(unsigned char buf[], double s[])
{
    int32_t data = (int32_t)(buf[0] | buf[1] << 8 | buf[2] << 16 | buf[3] << 24);
    *s = data / (double)0x80000000;
}

static inline void
must_not_be_zero_error(const char *memb)
{
    rb_raise(rb_eWaveSemanticError, "'%s' must not be zero", memb);
}

static inline VALUE
wave_read_linear_pcm(char *file_name)
{
    const int BUFFER_SIZE = 0x1000;
    static ID readpartial;
    VALUE io = rb_funcall(
        rb_cFile, 
        rb_intern("open"),
        2, 
        rb_str_new_cstr(file_name), 
        rb_str_new_cstr("rb"));
    VALUE io_buf = rb_str_new(0,0);

//  char riff_chunk_ID[4];
    uint32_t riff_chunk_size;
//  char file_format_type[4];
//  char fmt_chunk_ID[4];
    uint32_t fmt_chunk_size;
    uint16_t wave_format_type;
    uint16_t channel;
    uint32_t samples_per_sec;
    uint32_t bytes_per_sec;
    uint16_t block_size;
    uint16_t bits_per_sample;
//  char data_chunk_ID[4];
    uint32_t data_chunk_size;

    VALUE pcm_ary;
    double **mat;
    long length;
    void (*func)(unsigned char *, double *);
    long idx;
    int buffer_size;
    uint16_t samples_per_block = 1;

    if (!readpartial)
        readpartial = rb_intern_const("readpartial");

    // RIFF chunk
    rb_funcall(io, readpartial, 2, INT2FIX(4), io_buf);
    if (!RTEST(rb_str_equal(io_buf, rb_str_new_cstr("RIFF"))))
        rb_raise(rb_eWaveSemanticError, "unknown RIFF chunk ID: %"PRIsVALUE"", io_buf);

    rb_funcall(io, readpartial, 2, INT2FIX(4), io_buf);
    riff_chunk_size = quadbyte2u32le(io_buf);

    rb_funcall(io, readpartial, 2, INT2FIX(4), io_buf);
    if (!RTEST(rb_str_equal(io_buf, rb_str_new_cstr("WAVE"))))
        rb_raise(rb_eWaveSemanticError, "unknown file format type: %"PRIsVALUE"", io_buf);

    // format chunk
    rb_funcall(io, readpartial, 2, INT2FIX(4), io_buf);
    if (!RTEST(rb_str_equal(io_buf, rb_str_new_cstr("fmt "))))
        rb_raise(rb_eWaveSemanticError, "no format chunk");

    rb_funcall(io, readpartial, 2, INT2FIX(4), io_buf);
    fmt_chunk_size = quadbyte2u32le(io_buf);

    rb_funcall(io, readpartial, 2, INT2FIX(2), io_buf);
    wave_format_type = doublebyte2u16le(io_buf);
    if (wave_format_type != 1)
        rb_raise(rb_eWaveSemanticError, "not a linear PCM");

    rb_funcall(io, readpartial, 2, INT2FIX(2), io_buf);
    channel = doublebyte2u16le(io_buf);
    if (!channel)
        must_not_be_zero_error("channel");

    rb_funcall(io, readpartial, 2, INT2FIX(4), io_buf);
    samples_per_sec = quadbyte2u32le(io_buf);
    if (!samples_per_sec)
        must_not_be_zero_error("samples_per_sec");

    rb_funcall(io, readpartial, 2, INT2FIX(4), io_buf);
    bytes_per_sec = quadbyte2u32le(io_buf);
    if (!bytes_per_sec)
        must_not_be_zero_error("bytes_per_sec");

    rb_funcall(io, readpartial, 2, INT2FIX(2), io_buf);
    block_size = doublebyte2u16le(io_buf);
    if (!block_size)
        must_not_be_zero_error("block_size");

    rb_funcall(io, readpartial, 2, INT2FIX(2), io_buf);
    bits_per_sample = doublebyte2u16le(io_buf);
    if (!bits_per_sample)
        must_not_be_zero_error("bits_per_sample");

    if ((bits_per_sample / 8 * channel) != block_size)
        rb_raise(rb_eWaveSemanticError, "'block_size' mismatch");

    if ((samples_per_sec * block_size) != bytes_per_sec)
        rb_raise(rb_eWaveSemanticError, "'bytes_per_sec' mismatch");

    // data chunk
    rb_funcall(io, readpartial, 2, INT2FIX(4), io_buf);
    if (!RTEST(rb_str_equal(io_buf, rb_str_new_cstr("data"))))
        rb_raise(rb_eWaveSemanticError, "no data chunk");

    rb_funcall(io, readpartial, 2, INT2FIX(4), io_buf);
    data_chunk_size = quadbyte2u32le(io_buf);

    if ((data_chunk_size % block_size) != 0)
        rb_raise(rb_eWaveSemanticError, "'data_chunk_size' is not a multiple of 'block_size'");

    switch (bits_per_sample) {
    case 8:  func = pcm_read_8bit;  break;
    case 16: func = pcm_read_16bit; break;
    case 24: func = pcm_read_24bit; break;
    case 32: func = pcm_read_32bit; break;
    default: rb_raise(rb_eWaveSemanticError, 
        "unrecognized (or unsupported) bits per sample: %d (for wave format type: %d)", 
        bits_per_sample, wave_format_type);
        break;
    }

    length = data_chunk_size / block_size;
    pcm_ary = rb_ary_new2(channel);
    for (long i = 0; i < channel; i++)
    {
        rb_ary_store(pcm_ary, i, rb_pcm_new(length, samples_per_sec));
    }

    mat = ALLOCA_N(double*, channel);
    for (long i = 0; i < channel; i++)
    {
        VALUE obj = rb_ary_entry(pcm_ary, i);
        mat[i] = WaveformDataPtr(obj);
    }

    buffer_size = BUFFER_SIZE / block_size * block_size;

    idx = 0;
    for (long data_ptr = 0; data_ptr < data_chunk_size; data_ptr += buffer_size)
    {
        if ((1. * data_ptr + buffer_size) > data_chunk_size)
            buffer_size = data_chunk_size - data_ptr;
        rb_funcall(io, readpartial, 2, INT2FIX(buffer_size), io_buf);
        unsigned char *buf = (unsigned char *)StringValuePtr(io_buf);
        for (long n = 0; n < buffer_size; n += block_size)
        {
            for (long i = 0; i < channel; i++)
            {
                double *s = mat[i];
                func(buf+(i*block_size/channel+n), s+idx);
            }
            idx += samples_per_block;
        }
    }
    rb_str_resize(io_buf, 0);
    rb_io_close(io);
    return pcm_ary;
}

static VALUE
test_wave_read_linear_pcm(VALUE unused_obj, VALUE fname)
{
    return wave_read_linear_pcm(StringValuePtr(fname));
}

To achieve high-speed processing, vector sequences are extracted from Ruby structures and converted into matrices. Matrixization uses pointers to line up the beginning of each vector sequence.

    mat = ALLOCA_N(double*, channel);
    for (long i = 0; i < channel; i++)
    {
        VALUE obj = rb_ary_entry(pcm_ary, i);
        mat[i] = WaveformDataPtr(obj); // <- HERE
    }

Decoding/encoding is done using callback functions while accumulating buffers. Currently implemented iteratively, there may be a better way.

    {
        if ((1. * data_ptr + buffer_size) > data_chunk_size)
            buffer_size = data_chunk_size - data_ptr;
        rb_funcall(io, readpartial, 2, INT2FIX(buffer_size), io_buf);
        unsigned char *buf = (unsigned char *)StringValuePtr(io_buf);
        for (long n = 0; n < buffer_size; n += block_size)
        {
            for (long i = 0; i < channel; i++)
            {
                double *s = mat[i];
                func(buf+(i*block_size/channel+n), s+idx);
            }
            idx += samples_per_block;
        }
    }

I tried reading 80MiB 24bit PCM Stereo. In my environment, I have obtained the following benchmark results:

require './wave'
require 'benchmark'

Benchmark.bm do |x|
   x.report { pcms = Wave::RIFF.read_linear_pcm('sample.wav') }
end
#=>       user     system      total        real
#=>   0.054870   0.122014   0.176884 (  0.183259)

Matz would say, "I came up with this design," and van Rossum would probably say, "I can't think of any other design."

tribusonz-2 commented 1 month ago

4bit IMA-ADPCM are also support. IMA-ADPCM coder have a variable of 'index'. Notably, this is an independent variable for each channel.
This does not require a variable for reading, but it does for writing.

In implementation it would look like this: The callback function is as follows

static inline void
ima_adpcm_read_4bit(unsigned char buf[], double s[], int *index)
{
    // Processing ...
}

Define the indexes array and allocate memory according to the number of channels:

int *indexes;

// :

indexes = ALLOCA_N(int, channel);

// :

for (long i = 0; i < channel; i++)
{
    VALUE obj = rb_ary_entry(pcm_ary, i);
    mat[i] = WaveformDataPtr(obj);
    indexes[i] = 0;
}

// :

for (long i = 0; i < channel; i++)
{
    double *s = mat[i];
    func(buf+(i*block_size/channel+n), s+idx, indexes+i);
}
idx += samples_per_block;
tribusonz-2 commented 4 weeks ago

Below, write method.
This is also an improved version of Dr. Aoki's wave function. The digitizer works well with Dr. Aoki's routine, but it seems faster to use a normalizer.

static void
io_writepartial(VALUE io, VALUE buf)
{
    if (rb_io_bufwrite(io, (unsigned char *)StringValuePtr(buf), RSTRING_LEN(buf)) == -1)
        rb_raise(rb_eIOError, "write failure");
}

// #define imax(a, b) ((a) > (b) ? (a) : (b))
// #define imin(a, b) ((a) < (b) ? (a) : (b))
// #define iclip(x, min, max) return imin(imax(min, x), max)

static VALUE
rb_str_cat_uintle(VALUE str, uint32_t value, size_t sz)
{
    static char s[4];
//  sz = iclip(sz, 1, 4);
    rb_integer_pack(ULL2NUM(value), s, sz, 8, 0, 
        INTEGER_PACK_LITTLE_ENDIAN | INTEGER_PACK_2COMP);
    return rb_str_buf_cat(str, s, sz);
}

static VALUE
rb_str_buf_z_new(long len)
{
    VALUE bin = rb_str_buf_new(len);
    char *ptr = RSTRING_PTR(bin);
    rb_str_resize(bin, len);
    MEMZERO(ptr, char, len);

    return bin;
}

static VALUE
rb_str_buf_z_resize(VALUE bin, long len)
{
    char *ptr = RSTRING_PTR(bin);
    if (RSTRING_LEN(bin) == len)
    {
        MEMZERO(ptr, char, len);
    }
    else if (RSTRING_LEN(bin) < len)
    {
        rb_str_resize(bin, len);
        MEMZERO(ptr, char, len);
    }
    else if (RSTRING_LEN(bin) > len)
    {
        rb_str_resize(bin, len);
    }

    return bin;
}

static inline VALUE
wave_write_linear_pcm(VALUE pcm_ary, int16_t bits, char *file_name)
{
    const int BUFFER_SIZE = 0x1000;

    if (!ary_all_pcm_p(pcm_ary))
        rb_raise(rb_eArgError, "not a %"PRIsVALUE"", rb_cWavePCM);

    VALUE io = rb_funcall(
        rb_cFile, 
        rb_intern("open"),
        2, 
        rb_str_new_cstr(file_name), 
        rb_str_new_cstr("wb"));
    VALUE io_buf;

    char riff_chunk_ID[4];
    uint32_t riff_chunk_size;
    char file_format_type[4];
    char fmt_chunk_ID[4];
    uint32_t fmt_chunk_size;
    uint16_t wave_format_type;
    uint16_t channels;
    uint32_t samples_per_sec;
    uint32_t bytes_per_sec;
    uint16_t block_size;
    uint16_t bits_per_sample;
    char data_chunk_ID[4];
    uint32_t data_chunk_size;

    double **mat;
    long length;
    void (*func)(unsigned char *, double *);
    long idx;
    int buffer_size;
    uint16_t samples_per_block = 1;

    riff_chunk_ID[0] = 'R';
    riff_chunk_ID[1] = 'I';
    riff_chunk_ID[2] = 'F';
    riff_chunk_ID[3] = 'F';

    file_format_type[0] = 'W';
    file_format_type[1] = 'A';
    file_format_type[2] = 'V';
    file_format_type[3] = 'E';

    fmt_chunk_ID[0] = 'f';
    fmt_chunk_ID[1] = 'm';
    fmt_chunk_ID[2] = 't';
    fmt_chunk_ID[3] = ' ';
    fmt_chunk_size = 16;
    wave_format_type = 1;

    if (RARRAY_LEN(pcm_ary) > UINT16_MAX)
        rb_raise(rb_eRangeError, "too many PCM classes");
    channels = (uint16_t)RARRAY_LEN(pcm_ary);

    samples_per_sec = 0;
    length = 0;
    mat = ALLOCA_N(double*, channels);
    for (long i = 0; i < channels; i++)
    {
        VALUE obj = rb_ary_entry(pcm_ary, i);
        mat[i] = WaveformDataPtr(obj);
        if (!samples_per_sec)
            samples_per_sec = rb_pcm_fs(obj);
        else
            if (samples_per_sec != rb_pcm_fs(obj))
                rb_raise(rb_eRuntimeError, 
                "Exporting each channels's different sampling frequency is not supported yet");
        if (!length)
            length = rb_pcm_len(obj);
        else
            if (length != rb_pcm_len(obj))
                rb_raise(rb_eRuntimeError, 
                "Exporting each channels's different length is not supported yet");
    }

    bits_per_sample = bits;
    switch (bits_per_sample) {
    case 8:  func = pcm_write_8bit;  break;
    case 16: func = pcm_write_16bit; break;
    case 24: func = pcm_write_24bit; break;
    case 32: func = pcm_write_32bit; break;
    default: rb_raise(rb_eWaveSemanticError, 
        "unrecognized (or unsupported) bits per sample: %d (for wave format type: %d)", 
        bits_per_sample, wave_format_type);
        break;
    }

    bytes_per_sec = samples_per_sec * bits_per_sample / 8 * channels;
    block_size = bits_per_sample / 8 * channels;

    data_chunk_ID[0] = 'd';
    data_chunk_ID[1] = 'a';
    data_chunk_ID[2] = 't';
    data_chunk_ID[3] = 'a';
    data_chunk_size = length * bits_per_sample / 8 * channels;

    riff_chunk_size = 36 + data_chunk_size;
    if (riff_chunk_size % 2 == 1)  riff_chunk_size++;

    io_buf = rb_str_new(0, 0);
    rb_str_buf_cat(io_buf, riff_chunk_ID, 4);
    rb_str_cat_uintle(io_buf, riff_chunk_size, 4);
    rb_str_buf_cat(io_buf, file_format_type, 4);
    io_writepartial(io, io_buf);

    io_buf = rb_str_new(0, 0);
    rb_str_buf_cat(io_buf, fmt_chunk_ID, 4);
    rb_str_cat_uintle(io_buf, fmt_chunk_size, 4);
    rb_str_cat_uintle(io_buf, wave_format_type, 2);
    rb_str_cat_uintle(io_buf, channels, 2);
    rb_str_cat_uintle(io_buf, samples_per_sec, 4);
    rb_str_cat_uintle(io_buf, bytes_per_sec, 4);
    rb_str_cat_uintle(io_buf, block_size, 2);
    rb_str_cat_uintle(io_buf, bits_per_sample, 2);
    io_writepartial(io, io_buf);

    io_buf = rb_str_new(0, 0);
    rb_str_buf_cat(io_buf, data_chunk_ID, 4);
    rb_str_cat_uintle(io_buf, data_chunk_size, 4);
    io_writepartial(io, io_buf);

    io_buf = rb_str_new(0, 0);
    buffer_size = BUFFER_SIZE / block_size * block_size;
    idx = 0;
    for (long data_offset = 0; data_offset < data_chunk_size; data_offset += buffer_size)
    {
        if ((1. * data_offset + buffer_size) > data_chunk_size)
            buffer_size = data_chunk_size - data_offset;

        if (data_offset == 0)
            io_buf = rb_str_buf_z_new(buffer_size);
        else if (RSTRING_LEN(io_buf) != buffer_size)
            rb_str_buf_z_resize(io_buf, buffer_size);

        unsigned char *ptr = (unsigned char *)RSTRING_PTR(io_buf);
        for ( ; ; )
        {
            for (long i = 0; i < channels; i++)
            {
                double *s = mat[i];
                func(ptr+(i*block_size/channels), s+idx);
            }
            idx += samples_per_block;
            ptr += block_size;

            if (ptr == (unsigned char *)RSTRING_END(io_buf))
            {
                io_writepartial(io, io_buf);
                break;
            }
        }

    }
    if (data_chunk_size % 2 == 1)
        io_writepartial(io, rb_str_buf_z_new(1));

    rb_str_resize(io_buf, 0);
    rb_io_close(io);

    return Qtrue; // TODO: must be return a wrote byte-size
}

static VALUE
test_wave_write_linear_pcm(VALUE unused_obj, VALUE fname, VALUE pcm_ary, VALUE bits)
{
    return wave_write_linear_pcm(pcm_ary, NUM2INT(bits), StringValuePtr(fname));
}

I thought maybe a Mutex was needed for this. However, it seems that it is not be need to set-up routines that complicated at the ABI level.
This routine achieves sufficient write speed. The benchmark in my environment is below.

pcms = Wave::RIFF.read_linear_pcm('sample.wav')

pcms.each{|ch| p ch.length}
#=> 13632192
#=> 13632192

Benchmark.bm do |x|
  x.report do
    Wave::RIFF.write_linear_pcm("test8bit.wav", pcms, 8)
  end
  x.report do
    Wave::RIFF.write_linear_pcm("test16bit.wav", pcms, 16)
  end
  x.report do
    Wave::RIFF.write_linear_pcm("test24bit.wav", pcms, 24)
  end
  x.report do
    Wave::RIFF.write_linear_pcm("test32bit.wav", pcms, 32)
  end
end
#=>        user     system      total        real
#=>    0.174198   0.029389   0.203587 (  0.203614)
#=>    0.180408   0.047551   0.227959 (  0.228773)
#=>    0.150851   0.094450   0.245301 (  0.245575)
#=>    0.114868   0.180066   0.294934 (  0.295109)