introlab / odas

ODAS: Open embeddeD Audition System
MIT License
788 stars 251 forks source link

DIY 4 channel array hopsize for 64byte usb packet hopsize? #147

Closed benbiles closed 5 years ago

benbiles commented 5 years ago
Hi , my DIY stm32 based PDM mic array has 64 byte USB size packet max at 16bit 48khz per channel.

I tried most obvious hop sizes , 8 16 32 64 128 256 with varied results. 

definition
hopSize | uint | Number of samples acquired on each channel at each frame

Can I calculate proper hop size from this info ?

$ arecord -D "front:CARD=Mod,DEV=0" --dump-hw-params 

Recording WAVE 'stdin' : Unsigned 8 bit, Rate 8000 Hz, Mono
HW Params of device "front:CARD=Mod,DEV=0":
--------------------
ACCESS:  MMAP_INTERLEAVED RW_INTERLEAVED
FORMAT:  S16_LE
SUBFORMAT:  STD
SAMPLE_BITS: 16
FRAME_BITS: 64
CHANNELS: 4
RATE: 48000
PERIOD_TIME: [1000 1365334)
PERIOD_SIZE: [48 65536]
PERIOD_BYTES: [384 524288]
PERIODS: [2 1024]
BUFFER_TIME: [2000 2730667)
BUFFER_SIZE: [96 131072]
BUFFER_BYTES: [768 1048576]
TICK_TIME: ALL
--------------------
arecord: set_params:1233: Sample format non available
Available formats:
- S16_LE

ODAS is reporting correct direction from audio source although audio separation is not working if I play 2 x music sources at opposite sides ( equal distance / volume ) of array in the recordings. 
ODAS seams to crash and buffer overrun with less with hop size of less 256.

I tried mode_sep = "dgss"; for better audio separation.

# Configuration file

version = "2.1";

# Raw

raw: 
{

    fS = 48000;
    hopSize = 256;
    nBits = 16;
    nChannels = 4; 

    # Input with raw signal from microphones
    interface: {
        type = "soundcard";
        card = 2;
        device = 0;
    }

}

# Mapping

mapping:
{

    map: (1, 2, 3, 4);

}

# General

general:
{

    epsilon = 1E-20;

    size: 
    {
        hopSize = 128;
        frameSize = 256; 
    };

    samplerate:
    {
        mu = 16000;
        sigma2 = 0.01;
    };

    speedofsound:
    {
        mu = 343.0;
        sigma2 = 25.0;
    };

    mics = (

        # bbbox 4chan mic array spaced 60mm appart

        # Microphone 1
        { 
            mu = ( -0.0200, +0.0000, +0.0000 ); 
            sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
            direction = ( +0.000, +0.000, +1.000 );
            angle = ( 80.0, 90.0 );
        },

        # Microphone 2
        { 
            mu = ( +0.0000, +0.0200, +0.0000 ); 
            sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
            direction = ( +0.000, +0.000, +1.000 );
            angle = ( 80.0, 90.0 );
        },

        # Microphone 3
        { 
            mu = ( +0.0200, +0.0000, +0.0000 ); 
            sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
            direction = ( +0.000, +0.000, +1.000 );
            angle = ( 80.0, 90.0 );
        },

        # Microphone 4
        { 
            mu = ( +0.0000, -0.0200, +0.0000 ); 
            sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
            direction = ( +0.000, +0.000, +1.000 );
            angle = ( 80.0, 90.0 );
        }

    );

    # Spatial filters to include only a range of direction if required
    # (may be useful to remove false detections from the floor, or
    # limit the space search to a restricted region)
    spatialfilters = (

        {

            direction = ( +0.000, +0.000, +1.000 );
            angle = (80.0, 90.0);

        }

    );  

    nThetas = 181;
    gainMin = 0.25;

};

# Stationnary noise estimation

sne:
{

    b = 3;
    alphaS = 0.1;
    L = 150;
    delta = 3.0;
    alphaD = 0.1;

}

# Sound Source Localization

ssl:
{

    nPots = 4;
    nMatches = 10;
    probMin = 0.5;
    nRefinedLevels = 1;
    interpRate = 4;

    # Number of scans: level is the resolution of the sphere
    # and delta is the size of the maximum sliding window
    # (delta = -1 means the size is automatically computed)
    scans = (
        { level = 2; delta = -1; },
        { level = 4; delta = -1; }
    );

    # Output to export potential sources
    potential: {

        # format = "undefined";
        format = "json";

        interface: {
            # type = "blackhole";
            type = "socket"; ip = "127.0.0.1"; port = 9001;
        };
    };

};

# Sound Source Tracking

sst:
{  

    # Mode is either "kalman" or "particle"

    mode = "kalman";

    # Add is either "static" or "dynamic"

    add = "dynamic";

    # Parameters used by both the Kalman and particle filter

    active = (
        { weight = 1.0; mu = 0.3; sigma2 = 0.0025 }
    );

    inactive = (
        { weight = 1.0; mu = 0.15; sigma2 = 0.0025 }
    );

    sigmaR2_prob = 0.0025;
    sigmaR2_active = 0.0225;
    sigmaR2_target = 0.0025;
    Pfalse = 0.1;
    Pnew = 0.1;
    Ptrack = 0.8;

    theta_new = 0.9;
    N_prob = 5;
    theta_prob = 0.8;
    N_inactive = ( 150, 200, 250, 250 );
    theta_inactive = 0.9;

    # Parameters used by the Kalman filter only

    kalman: {

        sigmaQ = 0.001;

    };

    # Parameters used by the particle filter only

    particle: {

        nParticles = 1000;
        st_alpha = 2.0;
        st_beta = 0.04;
        st_ratio = 0.5;
        ve_alpha = 0.05;
        ve_beta = 0.2;
        ve_ratio = 0.3;
        ac_alpha = 0.5;
        ac_beta = 0.2;
        ac_ratio = 0.2;
        Nmin = 0.7;

    };

    target: ();

    # Output to export tracked sources
    tracked: {

        # format = "undefined";
        format = "json";

        interface: {
            # type = "blackhole";
            type = "socket"; ip = "127.0.0.1"; port = 9000;
        };

    };

}

sss:
{

    # Mode is either "dds", "dgss" or "dmvdr"

    mode_sep = "dgss";
    mode_pf = "ms";

    gain_sep = 1.0;
    gain_pf = 10.0;

    dds: {

    };

    dgss: {

        mu = 0.01;
        lambda = 0.5;

    };

    dmvdr: {

    };

    ms: {

        alphaPmin = 0.07;
        eta = 0.5;
        alphaZ = 0.8;        
        thetaWin = 0.3;
        alphaWin = 0.3;
        maxAbsenceProb = 0.9;
        Gmin = 0.01;
        winSizeLocal = 3;
        winSizeGlobal = 23;
        winSizeFrame = 256;

    };

    ss: {

        Gmin = 0.01;
        Gmid = 0.9;
        Gslope = 10.0;

    }

separated: {
  fS = 48000;
  hopSize = 512;
  nBits = 16;        

  interface: {
 type = "socket";
    ip = "127.0.0.1";
    port = 10000;
  }        
};

postfiltered: {
  fS = 48000;
  hopSize = 512;
  nBits = 16;        

  interface: {

type = "socket";
    ip = "127.0.0.1";
    port = 10010;

 }  

};

}

classify:
{

    frameSize = 1024;
    winSize = 3;
    tauMin = 32;
    tauMax = 200;
    deltaTauMax = 7;
    alpha = 0.3;
    gamma = 0.05;
    phiMin = 0.15;
    r0 = 0.2; 

    category: {

        format = "undefined";

        interface: {
            type = "blackhole";
        }

    }

}](url)
benbiles commented 5 years ago

ok this seams to have fixed to the problem.. hopSize = 512;