Closed tigaworx closed 6 years ago
Normally, there is an audio file created for each source. I experienced empty files when recording really short burst of sound. It’s currently a limitation of the system because of the way the tracking and separating modules operate in ODAS.
Do you think it could be your situation?
Yes thank you! Short burst of sound must have been my problem. One of the sources was counting and there was a short pause after each number. When we tested again with fluent text it worked fine.
@tigaworx are you using respeaker mic array v1.0 or the 2.0? If it is the 2.0 would you mind sending your config file. I cant seem to record correctly, thanks.
@yesheysamang I am using the USB 8 Sounds mic array. Maybe you could try changing the parameters fS and nBits in the raw block? If it is of any help: here is my config file for the USB 8 Sounds mic array: `` version = "2.1";
raw: {
fS = 44100;
hopSize = 512;
nBits = 32;
nChannels = 8;
# Input with raw signal from microphones
interface: {
type = "soundcard";
card = 1;
device = 0;
}
}
mapping: {
map: (1, 2, 3, 4, 5, 6, 7, 8);
}
general: {
epsilon = 1E-20;
size:
{
hopSize = 128;
frameSize = 256;
};
samplerate:
{
mu = 16000;
sigma2 = 0.01;
};
speedofsound:
{
mu = 343.0;
sigma2 = 25.0;
};
mics = (
# Microphone 1
{
mu = ( +0.000, +0.000, +0.000 );
sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
direction = ( +0.000, +1.000, +0.000 );
angle = ( 80.0, 100.0 );
},
# Microphone 2
{
mu = ( +0.030, +0.000, +0.000 );
sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
direction = ( +0.000, +1.000, +0.000 );
angle = ( 80.0, 100.0 );
},
# Microphone 3
{
mu = ( +0.060, +0.000, +0.000 );
sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
direction = ( +0.000, +1.000, +0.000 );
angle = ( 80.0, 100.0 );
},
# Microphone 4
{
mu = ( +0.090, +0.000, +0.000 );
sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
direction = ( +0.000, +1.000, +0.000 );
angle = ( 80.0, 100.0 );
},
# Microphone 5
{
mu = ( +0.120, +0.000, +0.000 );
sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
direction = ( +0.000, +1.000, +0.000 );
angle = ( 80.0, 100.0 );
},
# Microphone 6
{
mu = ( +0.150, +0.000, +0.000 );
sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
direction = ( +0.000, +1.000, +0.000 );
angle = ( 80.0, 100.0 );
},
# Microphone 7
{
mu = ( +0.180, +0.000, +0.000 );
sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
direction = ( +0.000, +1.000, +0.000 );
angle = ( 80.0, 100.0 );
},
# Microphone 8
{
mu = ( +0.210, +0.000, +0.000 );
sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
direction = ( +0.000, +1.000, +0.000 );
angle = ( 80.0, 100.0 );
}
);
# Spatial filter to include only a range of direction if required
# (may be useful to remove false detections from the floor)
spatialfilter: {
direction = ( +0.000, +0.000, +1.000 );
angle = (80.0, 100.0);
};
nThetas = 181;
gainMin = 0.25;
};
sne: {
b = 3;
alphaS = 0.1;
L = 150;
delta = 3.0;
alphaD = 0.1;
}
ssl: {
nPots = 4;
nMatches = 10;
probMin = 0.5;
nRefinedLevels = 1;
interpRate = 4;
# Number of scans: level is the resolution of the sphere
# and delta is the size of the maximum sliding window
# (delta = -1 means the size is automatically computed)
scans = (
{ level = 2; delta = -1; },
{ level = 4; delta = -1; }
);
# Output to export potential sources
potential: {
format = "json";
interface: {
type = "socket";
ip = "127.0.0.1";
port = 9001;
};
};
};
sst: {
# Mode is either "kalman" or "particle"
mode = "kalman";
# Add is either "static" or "dynamic"
add = "dynamic";
# Parameters used by both the Kalman and particle filter
active = (
{ weight = 1.0; mu = 0.4; sigma2 = 0.0025 }
);
inactive = (
{ weight = 1.0; mu = 0.25; sigma2 = 0.0025 }
);
sigmaR2_prob = 0.0025;
sigmaR2_active = 0.0225;
sigmaR2_target = 0.0025;
Pfalse = 0.1;
Pnew = 0.1;
Ptrack = 0.8;
theta_new = 0.9;
N_prob = 5;
theta_prob = 0.8;
N_inactive = ( 250, 250, 250, 250 );
theta_inactive = 0.9;
# Parameters used by the Kalman filter only
kalman: {
sigmaQ = 0.001;
};
# Parameters used by the particle filter only
particle: {
nParticles = 1000;
st_alpha = 2.0;
st_beta = 0.04;
st_ratio = 0.5;
ve_alpha = 0.05;
ve_beta = 0.2;
ve_ratio = 0.3;
ac_alpha = 0.5;
ac_beta = 0.2;
ac_ratio = 0.2;
Nmin = 0.7;
};
target: ();
# Output to export tracked sources
tracked: {
format = "json";
interface: {
type = "socket";
ip = "127.0.0.1";
port = 9000;
};
};
}
sss: {
# Mode is either "dds", "dgss" or "dmvdr"
mode_sep = "dds";
mode_pf = "ss";
gain_sep = 1.0;
gain_pf = 10.0;
dds: {
};
dgss: {
mu = 0.01;
lambda = 0.5;
};
dmvdr: {
};
ms: {
alphaPmin = 0.07;
eta = 0.5;
alphaZ = 0.8;
thetaWin = 0.3;
alphaWin = 0.3;
maxAbsenceProb = 0.9;
Gmin = 0.01;
winSizeLocal = 3;
winSizeGlobal = 23;
winSizeFrame = 256;
};
ss: {
Gmin = 0.01;
Gmid = 0.5;
Gslope = 10.0;
}
separated: {
fS = 16000;
hopSize = 128;
nBits = 16;
interface: {
type = "socket";
ip = "127.0.0.1";
port = 10000;
};
};
postfiltered: {
fS = 16000;
hopSize = 128;
nBits = 16;
gain = 10.0;
interface: {
type = "socket";
ip = "127.0.0.1";
port = 10010;
};
};
};
classify: {
frameSize = 4096;
winSize = 3;
tauMin = 88;
tauMax = 551;
deltaTauMax = 20;
alpha = 0.3;
gamma = 0.05;
phiMin = 0.5;
r0 = 0.2;
category: {
format = "undefined";
interface: {
type = "blackhole";
}
}
}
Hi, instead of a real issue I merely have a question. I hope that is okay. (Please tell me if there is a better way to ask questions.) My question: If a have two sound sources can I get separated wav files for both sources? One wav per source? When I tested this, I indeed got two wavs of separated sources, but one of them was empty. It seems like the algorithm only records the loudest source properly...