%code for autocorrelation-based pitch analysis in auditory models

clc;close all;clear;
%some parameters for the auditory model
fLow = 50; % the lowest characteristic frequency of the filter bank
fHigh = 4000; % and the highest
fCut = 1000; % cut-off frequency of the lowpass filter
%load a speech file, the example contains 27 ms of /a/ vowel
[sample,fs] = audioread('kaksi.wav',[5900 7200]); % CA2015: load your audio file in whole length in here
sampleLen = length(sample);

%create of a gammatone filter bank using a command from the auditory
%modeling toolbox (http://amtoolbox.sourceforge.net)

[b,a] = gammatone(erbspacebw(fLow,fHigh),fs,'complex');
%processing the signal through the filter bank
filterOut = real(ufilterbankz(b,a,sample));


%emulation of the neural transduction with half-wave rectification and
%lowpass filtering of the filter bank output
rectified = filterOut.*(filterOut>0);
%a first-order IIR filter is used as the lowpass filter
beta = exp(-fCut*2*pi/fs);
outSig = filter(1-beta,[1 -beta],rectified);

for freqInd=1:size(outSig,2) % autocorrelation for each band
    auCorr(:,freqInd)= xcorr(outSig(:,freqInd),'coeff')';
end

%plotting of the figures

auCorr=auCorr((size(auCorr,1)+1)/2:end,:); 

lags=[[0:(size(auCorr,1)-1)]/fs*1000];

fcs = erbspacebw(fLow,fHigh);

h=figure; 

%plot the input signal

g(1) = subplot('position',[0.13 0.6438 0.3326 0.3012]);hold on;

plot((1:sampleLen)./fs*1000,sample,'k');

xlabel('Time [ms]');title('a) speech signal');

set(gca,'xlim',[0 sampleLen/fs*1000]);

%plot the filter bank outputs

g(2) = subplot('position',[0.5803 0.6438 0.3326 0.3012]);hold on;

for freqInd=size(outSig,2):-1:1

plot((1:sampleLen)./fs*1000,(freqInd-1)/30+filterOut(:,freqInd),'k');

end

set(gca,'YTick',(0:4:(size(outSig,2)-1))/30);

set(gca,'YTickLabel',round(fcs(1:4:end)));

axis([0 30 0.2 0.9])

xlabel('Time [ms]');ylabel('Characteristic frequency [Hz]');

title('b) filter bank outputs');

%plot the cochleograms

g(3) = subplot('position',[0.13 0.115 0.333 0.3812]); hold on;

for freqInd=size(outSig,2):-1:1

plot((1:sampleLen)./fs*1000,(freqInd-1)/2+outSig(:,freqInd)/max(outSig(:,freqInd)),'k');

end

set(gca,'YTick',(0:4:(size(outSig,2)-1))/2);

set(gca,'YTickLabel',round(fcs(1:4:end)));

xlabel('Time [ms]');ylabel('Characteristic frequency [Hz]');

title('c) normalized cochleograms');

axis([0 30 0 13.5])

%plot the normalized autocorrelation functions

g(4) = subplot('position',[0.5803 0.1150 0.3826 0.3012]);hold on;

for freqInd=size(outSig,2):-1:1

plot(lags,(freqInd-1)*0.12+auCorr(:,freqInd),'k');

end

axis([0 30 0 4])

set(gca,'YTick',(0:4:(size(outSig,2)-1))*0.12+1);

set(gca,'YTickLabel',round(fcs(1:4:end)));

set(gca,'xTick',floor(1./[250 100 50]*1000));

ylabel('Characteristic frequency [Hz]');

xlabel('Time lag [ms]');

text(15,3.8,'d) autocorrelation functions','HorizontalAlignment','center');

%plot the sum autocorrelation

g(5) = subplot('position',[0.5803 0.427 0.3826 0.07]);

plot(lags,sum(auCorr'),'k');

set(gca,'xaxisLocation','top')

set(gca,'YTickLabel',[]);

axis([0 30 0 25])

set(gca,'xTick',floor(1./[250 100 50]*1000));

set(gca,'xTickLabel',{'250 Hz','100 Hz','50 Hz'});

title('Sum autocorrelation');

print -deps2 speechpitch_ver3

%break

set(h,'Units','centimeters');

set(h,'Position',[(50-15.5)/2 (50-13)/2 15.5 13]);

set(h,'Color',[1 1 1]);

set(h,'defaultaxesfontsize',9);set(h,'defaulttextfontsize',9);

set(h,'DefaultAxesFontName','Times');

print -deps2 speechpitch_ver3