function X1_est = MMSN(Y1,Y2,A21,nf,g)
%MMSN - Implements the MMSN (Minimum Mean Square Noise) algorithm for
%feature enhancement in speech recognition on smartphones with
%dual-microphone.
%
% Inputs:
% Y1 Power spectrogram of the 1st channel (main mic) of the noisy
% utterance. Number of rows is the number of frequency channels
% and number of columns is the number of time frames.
% Y2 Power spectrogram of the 2nd channel (secondary mic) of the
% noisy utterance. Number of rows is the number of frequency
% channels and number of columns is the number of time frames.
% A21 Vector whose length is the number of rows in Y1 and Y2 with
% factors that relate clean speech power values in the first and
% second channels.
% nf The number of first and last frames in a utterance that are
% considered only noise. By default, nf = 20.
% g Thresholding factor. By default, g = 0.05.
%
% Outputs:
% X1_est Enhanced power spectrogram of the 1st channel.
% Ref:
% Iván López-Espejo, Ángel M. Gómez, José A. González and Antonio M. Peinado
% "Feature Enhancement for Robust Speech Recognition on Smartphones with Dual-Microphone"
% Proc. EUSIPCO, Lisbon (Portugal), September 2014
% Copyright (C) Iván López-Espejo 2014
% Version: MMSN.m, v1.0 2014/07/19
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This program is free software; you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation; either version 2 of the License, or
% (at your option) any later version.
%
% This program is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You can obtain a copy of the GNU General Public License from
% http://www.gnu.org/copyleft/gpl.html or by writing to
% Free Software Foundation, Inc.,675 Mass Ave, Cambridge, MA 02139, USA.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Thresholding factor (by default).
if nargin < 5
g = 0.05;
end
% The first and last 20 frames in each utterance are only noise (by
% default).
if nargin < 4
nf = 20;
end
[K,T] = size(Y1);
% Clean speech estimation.
X1_est = zeros(K,T);
for k = 1:K
% Noise correlation matrix.
c11 = xcorr([Y1(k,1:nf) Y1(k,end-nf+1:end)]);
c12 = xcorr([Y1(k,1:nf) Y1(k,end-nf+1:end)],[Y2(k,1:nf) Y2(k,end-nf+1:end)]);
c22 = xcorr([Y2(k,1:nf) Y2(k,end-nf+1:end)]);
Phi = [c11(2*nf) c12(2*nf); c12(2*nf) c22(2*nf)];
P = inv(Phi);
% Weight calculation.
C = P(1,1) + A21(k) * (P(1,2) + P(2,1)) + (A21(k)^2) * P(2,2);
w1 = (P(1,1) + A21(k) * P(1,2)) / C;
w2 = (P(2,1) + A21(k) * P(2,2)) / C;
% Feature enhancement.
X1_est(k,:) = w1 * Y1(k,:) + w2 * Y2(k,:);
end
% Estimation is bounded.
X1_est = max(X1_est,g * Y1);