% Copyright (c) 2005, International Computer Science Institute % % All rights reserved. % % Redistribution and use in source and binary forms, with or without % modification, are permitted provided that the following conditions % are met: % % Redistributions of source code must retain the above copyright notice, % this list of conditions and the following disclaimer. % % Redistributions in binary form must reproduce the above copyright % notice, this list of conditions and the following disclaimer in the % documentation and/or other materials provided with the distribution. % % Neither the name of ICSI nor the names of its contributors may be % used to endorse or promote products derived from this software % without specific prior written permission. % % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS % "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT % LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR % A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT % OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, % SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED % TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. function output=cs(alData, fs, filLengthSec, tauMaxSec) %Multi-channel weighted correlation shaping. %June 2005, Marc Ferras, International Computer Science Institute %Number of channels nChannels=length(alData); %Length of input waveforms lData=length(alData{1}); %LPC analysis active? lpc=1; %LPC analysis frame length and shift (in seconds). lpcFrameLengthSec=30e-3; frameShiftSec=10e-3; %LPC order lpcOrder=12; %Maximum time considered for correlation shaping (in seconds) csLengthSec=tauMaxSec; %Length of don't care region in the autocorrelation function (in seconds) minTauSec=18.7e-3; expWCutOffSec=csLengthSec-18.7e-3; %Required training iterations trainingIterations=2000 %Learning rate alpha=2.5e-4; s=sprintf('Initializing...'); disp(s); %Frame length and shift in samples minTau=max(round(minTauSec*fs),1); lpcFrameLength=floor(lpcFrameLengthSec*fs); csLength=max(floor(csLengthSec*fs),lpcFrameLength); filLength=floor(filLengthSec*fs); frameShift=floor(frameShiftSec*fs); %Windowing function for LPC analysis window=hanning(lpcFrameLength); %Overlap-add normalizing factor (empirical) olaScale=0.887; %Initialize LPC analysis lpcRes1=zeros(lData,nChannels); %Perform LPC analysis if necessary if lpc==1 %LP residual extraction s=sprintf('LP residual extraction...'); disp(s); %Frame boundaries iniFrameSample=1; endFrameSample=iniFrameSample+lpcFrameLength-1; while endFrameSample<=lData %For each input channel... for ch=1:nChannels %Window current frame yWindow=window.*alData{ch}(iniFrameSample:endFrameSample); %Get the optimal predicor a=arburg(yWindow,lpcOrder); %Apply the predictor r=filter(a,1,yWindow); %Resynthesize the prediction residual using OLA lpcRes1(iniFrameSample:endFrameSample,ch)=lpcRes1(iniFrameSample:endFrameSample,ch)+olaScale.*r; end %Update frame boundaries for next iteration iniFrameSample=iniFrameSample+frameShift; endFrameSample=endFrameSample+frameShift; end else %No LP residual extraction s=sprintf('Skipping LP residual extraction...'); disp(s); %Copy the input waves for ch=1:nChannels lpcRes1(:,ch)=alData{ch}; end end lpcRes=lpcRes1; %Get current time stamp tic; %Generate the exponential weighting function %Time vector len=csLength; n=0:len-1; n=n'; %Update cut-off to sample units expWCutOff=round(expWCutOffSec*fs); %Exponential function (after 3*tau it is considered to reach 0). %In tau samples it will reach 0.05 expW=exp(-(3.*n)./expWCutOff); %Invert it to weight further lags higher w=expW(end:-1:1); %Include don't care region in the weighting sequence w(1:minTau)=zeros(minTau,1); w(1)=1; %Correlation shaping initializations %Rename input waves x=lpcRes; %Update input length lx=length(x); %Auxiliary correlation length (to avoid transients in the autocorrelation %functions after filtering) csLengthAux=csLength+filLength %Compute all pairs of cross-correlation functions corrXX={}; for ch1=1:nChannels for ch2=1:nChannels [corrXX{ch1,ch2},lagsXX]=xcorr(x(:,ch1),x(:,ch2),csLengthAux); end end %Update lags vectors lagsYX=lagsXX; lagsYY=lagsXX; %Frame number nFrame=0; %Boundaries of correlation functions in lag space minCorrLag=-csLength; maxCorrLag=csLength; %Offset index to lag 0 in the autocorrelation vectors corrOffset=csLength+1; %Half the correlation length csLength2=floor(csLength/2); %Initialization of multi-channel shaping filters to delta functions g=zeros(nChannels,filLength); if nChannels == 1 g(1)=1; else g(:,1)=1; end %Initialize multi-channel gradient grad=zeros(nChannels,filLength); %Initialize auxiliary and true cross-correlation vectors tmpCorrYX=zeros(length(corrXX{1,1}),nChannels); corrYX=zeros(2*csLength+1,nChannels); for t=1:trainingIterations %Show iteration number every 100 iterations if mod(t,100)==0 s=sprintf('Iteration %d.',t); disp(s); end %Compute output-input cross-correlation functions by filtering the %input autocorrelation functions. for ch=1:nChannels tmpCorrYX(:,ch)=filter(g(1,:),1,corrXX{1,ch}); for ch2=2:nChannels corr=filter(g(ch2,:),1,corrXX{ch2,ch}); tmpCorrYX(:,ch)=tmpCorrYX(:,ch)+corr; end end %Trim transients after filtering operation. for ch=1:nChannels corrYX(:,ch)=tmpCorrYX(csLengthAux+1-csLength:csLengthAux+1+csLength,ch); end %Compute output autocorrelation function by filtering the %cross-correlation functions. tmp=xcorr(g(1,:),tmpCorrYX(:,1)); corrYY=tmp(1:length(corrXX{1,1}))'; for ch=2:nChannels tmp=xcorr(g(ch,:),tmpCorrYX(:,ch)); tmp=tmp(1:length(corrXX{1,1}))'; corrYY=corrYY+tmp; end %Trim transients after filtering operation. corrYY=corrYY(csLengthAux+1-csLength:csLengthAux+1+csLength); %Compute the gradient for each channel for ch=1:nChannels grad(ch,:)=csgrad(corrYY,corrYX(:,ch),csLength,filLength,minTau,w); end %Compute gradient energy ngrad=0; for ch=1:nChannels ngrad=ngrad+sum(grad(ch,:).*grad(ch,:)); end ngrad=sqrt(ngrad); %Normalize gradient if ngrad~=0 grad=grad./ngrad; end %Update shaping filter g=g-alpha.*grad; %Compute filter energy ng=0; for ch=1:nChannels ng=ng+sum(g(ch,:).*g(ch,:)); end ng=sqrt(ng); %Normalize filter energy if ng~=0 g=g./ng; end end %Apply converged filter onto the input speech signal (assuming that lpc and %the shaping filter order can be swapped) output=zeros(lData,1); for ch=1:nChannels output=output+filter(g(ch,:),1,alData{ch}); end %Print processing time s=sprintf('Processing time: %f s.', toc); disp(s); return;