majianjia / nnom

A higher-level Neural Network library for microcontrollers.
Apache License 2.0
955 stars 248 forks source link

About RNN noise running on STM32 MCU #227

Open kronee0516 opened 1 day ago

kronee0516 commented 1 day ago

Hi,

i tried to use nnom rnnoise on STM32F411CEU6(black pill) with i2s data from WM8978G the cmsis nn and dsp come from this repository i almost copied your code to run. but the output data are all zero

what do you think is the problem?

i just printed out the first ten data to compare their different but seems nothing change the output


Ldata : 1943 1948 1954 1972 1975 1991 2006 2039 2028 2047 
filter : 0 0 0 0 0 0 0 0 0 0 

Ldata : 1441 1442 1433 1438 1409 1410 1418 1428 1435 1428 
filter : 0 0 0 0 0 0 0 0 0 0 

Ldata : 784 766 801 754 751 743 752 738 779 767 
filter : 0 0 0 0 0 0 0 0 0 0 

Ldata : 26 28 41 44 24 51 26 19 11 -41 
filter : 0 0 0 0 0 0 0 0 0 0 

Ldata : 16643 16572 15805 17007 16381 16276 15223 13976 12746 13059 
filter : 0 0 0 0 0 0 0 0 0 0 

Ldata : -7642 -7674 -7686 -7531 -7387 -7434 -7568 -7522 -7628 -7340 
filter : 0 0 0 0 0 0 0 0 0 0 

Ldata : -4021 -4014 -4016 -4015 -3992 -3974 -3980 -3941 -3980 -3955 
filter : 0 0 0 0 0 0 0 0 0 0 

Ldata : 1753 1745 1765 1770 1812 1803 1811 1828 1833 1860 
filter : 0 0 0 0 0 0 0 0 0 0 

Ldata : 1848 1839 1835 1833 1841 1829 1831 1812 1832 1783 
filter : 0 0 0 0 0 0 0 0 0 0 

Ldata : 940 937 908 924 935 901 903 899 906 882 
filter : 0 0 0 0 0 0 0 0 0 0 

Ldata : 2519 2905 2606 2419 2735 3029 2885 2632 2413 2883 
filter : 0 0 0 0 0 0 0 0 0 0 

Ldata : 10583 10455 10186 10511 11308 12384 11733 11188 10938 11217 
filter : 0 0 0 0 0 0 0 0 0 0 
kronee0516 commented 1 day ago
void processData(void){

    memcpy(LData, &LData[AUDIO_FRAME_LEN/2], AUDIO_FRAME_LEN/2*sizeof(int16_t));

    for(uint16_t n = 0;n<AUDIO_FRAME_LEN/2;n++){
            //RData[n]=*(inBufPtr+n*2);

            LData[n+AUDIO_FRAME_LEN/2]=*(inBufPtr+n*2+1);

    }

    printf("Ldata : ");
    for(uint8_t n = 0;n<10;n++){
                //RData[n]=*(inBufPtr+n*2);
                printf("%d ",LData[n+AUDIO_FRAME_LEN/2]);

        }
    printf("\r\n");
    mfcc_compute(mfcc, LData, mfcc_feature);
    for(uint32_t i=0; i< NUM_FEATURES; i++)
    {
        mfcc_feature_diff[i] = mfcc_feature[i] - mfcc_feature_prev[i];
        mfcc_feature_diff1[i] = mfcc_feature_diff[i] - mfcc_feature_diff_prev[i];
    }
    memcpy(mfcc_feature_prev, mfcc_feature, NUM_FEATURES * sizeof(float));
    memcpy(mfcc_feature_diff_prev, mfcc_feature_diff, NUM_FEATURES * sizeof(float));

    // combine MFCC with derivatives for the NN features
    memcpy(nn_features, mfcc_feature, NUM_FEATURES*sizeof(float));
    memcpy(&nn_features[NUM_FEATURES], mfcc_feature_diff, 10*sizeof(float));
    memcpy(&nn_features[NUM_FEATURES+10], mfcc_feature_diff1, 10*sizeof(float));

    // quantise them using the same scale as training data (in keras), by 2^n.
    quantize_data(nn_features, nn_features_q7, NUM_FEATURES+20, 3);

    // run the mode with the new input
    memcpy(nnom_input_data, nn_features_q7, sizeof(nnom_input_data));
    model_run(model);
    for(int i=0; i< NUM_FEATURES; i++){
                band_gains[i] = (float)(nnom_output_data[i]) / 127.f;
    }
    // one more step, limit the change of gians, to smooth the speech, per RNNoise paper
    for(int i=0; i< NUM_FEATURES; i++){
        band_gains[i] = _MAX(band_gains_prev[i]*0.8f, band_gains[i]);
    }
    memcpy(band_gains_prev, band_gains, NUM_FEATURES *sizeof(float));

    // update filter coefficient to applied dynamic gains to each frequency band
    set_gains((float*)coeff_b, (float*)b_, band_gains, NUM_FILTER, NUM_ORDER);

    // convert 16bit to float for equalizer
    for (int i = 0; i < AUDIO_FRAME_LEN/2; i++){
        audio_buffer[i] = LData[i+AUDIO_FRAME_LEN / 2] / 32768.f;
    }
    // finally, we apply the equalizer to this audio frame to denoise
    equalizer(audio_buffer, &audio_buffer[AUDIO_FRAME_LEN / 2], AUDIO_FRAME_LEN/2, (float*)b_,(float*)coeff_a, NUM_FILTER, NUM_ORDER);

    for (int i = 0; i < AUDIO_FRAME_LEN / 2; i++){
        *(outBufPtr+i*2) = (int16_t)audio_buffer[i + AUDIO_FRAME_LEN / 2] * 32768.f *0.6f; // 0.7 is the filter band overlapping factor
    }

            printf("filter : ");
                for(uint8_t n = 0;n<10;n++){
                            printf("%d ",(int16_t)audio_buffer[n + AUDIO_FRAME_LEN / 2] * 32768.f *0.6f);

                    }
                printf("\r\n");

    dataReadyflag=0;

}

i was testing with 512 double buffer(256 x 2) but since it is not quite identical to your parameter in arm_main.c , i changed to 1024( 512 x 2) to get one side of data(each side 512 len in total) only for process