%code for autocorrelation-based pitch analysis in auditory models clc;close all;clear; %some parameters for the auditory model fLow = 50; % the lowest characteristic frequency of the filter bank fHigh = 4000; % and the highest fCut = 1000; % cut-off frequency of the lowpass filter %load a speech file, the example contains 27 ms of /a/ vowel [sample,fs] = audioread('kaksi.wav',[5900 7200]); % CA2015: load your audio file in whole length in here sampleLen = length(sample); %create of a gammatone filter bank using a command from the auditory %modeling toolbox (http://amtoolbox.sourceforge.net) [b,a] = gammatone(erbspacebw(fLow,fHigh),fs,'complex'); %processing the signal through the filter bank filterOut = real(ufilterbankz(b,a,sample)); %emulation of the neural transduction with half-wave rectification and %lowpass filtering of the filter bank output rectified = filterOut.*(filterOut>0); %a first-order IIR filter is used as the lowpass filter beta = exp(-fCut*2*pi/fs); outSig = filter(1-beta,[1 -beta],rectified); for freqInd=1:size(outSig,2) % autocorrelation for each band auCorr(:,freqInd)= xcorr(outSig(:,freqInd),'coeff')'; end %plotting of the figures auCorr=auCorr((size(auCorr,1)+1)/2:end,:); lags=[[0:(size(auCorr,1)-1)]/fs*1000]; fcs = erbspacebw(fLow,fHigh); h=figure; %plot the input signal g(1) = subplot('position',[0.13 0.6438 0.3326 0.3012]);hold on; plot((1:sampleLen)./fs*1000,sample,'k'); xlabel('Time [ms]');title('a) speech signal'); set(gca,'xlim',[0 sampleLen/fs*1000]); %plot the filter bank outputs g(2) = subplot('position',[0.5803 0.6438 0.3326 0.3012]);hold on; for freqInd=size(outSig,2):-1:1 plot((1:sampleLen)./fs*1000,(freqInd-1)/30+filterOut(:,freqInd),'k'); end set(gca,'YTick',(0:4:(size(outSig,2)-1))/30); set(gca,'YTickLabel',round(fcs(1:4:end))); axis([0 30 0.2 0.9]) xlabel('Time [ms]');ylabel('Characteristic frequency [Hz]'); title('b) filter bank outputs'); %plot the cochleograms g(3) = subplot('position',[0.13 0.115 0.333 0.3812]); hold on; for freqInd=size(outSig,2):-1:1 plot((1:sampleLen)./fs*1000,(freqInd-1)/2+outSig(:,freqInd)/max(outSig(:,freqInd)),'k'); end set(gca,'YTick',(0:4:(size(outSig,2)-1))/2); set(gca,'YTickLabel',round(fcs(1:4:end))); xlabel('Time [ms]');ylabel('Characteristic frequency [Hz]'); title('c) normalized cochleograms'); axis([0 30 0 13.5]) %plot the normalized autocorrelation functions g(4) = subplot('position',[0.5803 0.1150 0.3826 0.3012]);hold on; for freqInd=size(outSig,2):-1:1 plot(lags,(freqInd-1)*0.12+auCorr(:,freqInd),'k'); end axis([0 30 0 4]) set(gca,'YTick',(0:4:(size(outSig,2)-1))*0.12+1); set(gca,'YTickLabel',round(fcs(1:4:end))); set(gca,'xTick',floor(1./[250 100 50]*1000)); ylabel('Characteristic frequency [Hz]'); xlabel('Time lag [ms]'); text(15,3.8,'d) autocorrelation functions','HorizontalAlignment','center'); %plot the sum autocorrelation g(5) = subplot('position',[0.5803 0.427 0.3826 0.07]); plot(lags,sum(auCorr'),'k'); set(gca,'xaxisLocation','top') set(gca,'YTickLabel',[]); axis([0 30 0 25]) set(gca,'xTick',floor(1./[250 100 50]*1000)); set(gca,'xTickLabel',{'250 Hz','100 Hz','50 Hz'}); title('Sum autocorrelation'); print -deps2 speechpitch_ver3 %break set(h,'Units','centimeters'); set(h,'Position',[(50-15.5)/2 (50-13)/2 15.5 13]); set(h,'Color',[1 1 1]); set(h,'defaultaxesfontsize',9);set(h,'defaulttextfontsize',9); set(h,'DefaultAxesFontName','Times'); print -deps2 speechpitch_ver3