Zero latency biquad may access unaligned memory

If using an input expression that reads from aligned memory, you will get an unaligned memory read on this line: https://github.com/kfrlib/kfr/blob/master/include/kfr/dsp/biquad.hpp#L196 (depending on the value of Filters)

Original code:

    template <size_t width>
    KFR_INTRIN vec<T, width> operator()(cinput_t cinput, size_t index, vec_t<T, width> t) const
    {
        index += filters - 1;
        vec<T, width> out;
        if (index + width <= block_end)
        {
            const vec<T, width> in = this->argument_first(cinput, index, t);

            CMT_LOOP_UNROLL
            for (size_t i = 0; i < width; i++)
            {
                state.out = process(bq, state, insertleft(in[i], state.out));
                out[i]    = state.out[filters - 1];
            }
     //...

Suggested fix:

    template <size_t width>
    KFR_INTRIN vec<T, width> operator()(cinput_t cinput, size_t index, vec_t<T, width> t) const
    {
        index += filters - 1;
        vec<T, width> out;
        if (index + width <= block_end)
        {
            CMT_LOOP_UNROLL
            for (size_t i = 0; i < width; i++)
            {
        const vec<T, 1> in = this->argument_first(cinput, index + i, vec_t<T, 1>());

                state.out = process(bq, state, insertleft(in[0], state.out));
                out[i]    = state.out[filters - 1];
            }
     //...

kfrlib / kfr

Zero latency biquad may access unaligned memory #9