1 模型
采用能够反映人对语音的感知特性的Mel频率倒谱系数(MFCC)作为特征参数,以及为避免时间规整问题采用矢量量化技术开发的说话人识别系统.MFCC主要的是模拟人耳的听觉过程,相对于其它参数它对语音波形的变化不敏感,更加稳定,系统取得很好的识别结果,实验表明系统训练和识别的计算量和存储量都比较低.
2 部分代码
function varargout = Main(varargin)
% MAIN M-file for Main.fig
% MAIN, by itself, creates a new MAIN or raises the existing
% singleton*.
%
% H = MAIN returns the handle to a new MAIN or the handle to
% the existing singleton*.
%
% MAIN('CALLBACK',hObject,eventData,handles,...) calls the local
% function named CALLBACK in MAIN.M with the given input arguments.
%
% MAIN('Property','Value',...) creates a new MAIN or raises the
% existing singleton*. Starting from the left, property value pairs are
% applied to the GUI before Main_OpeningFcn gets called. An
% unrecognized property name or invalid value makes property application
% stop. All inputs are passed to Main_OpeningFcn via varargin.
%
% *See GUI Options on GUIDE's Tools menu. Choose "GUI allows only one
% instance to run (singleton)".
%
% See also: GUIDE, GUIDATA, GUIHANDLES
% Edit the above text to modify the response to help Main
% Last Modified by GUIDE v2.5 11-Aug-2016 00:35:18
% Begin initialization code - DO NOT EDIT
gui_Singleton = 1;
gui_State = struct('gui_Name', mfilename, ...
'gui_Singleton', gui_Singleton, ...
'gui_OpeningFcn', @Main_OpeningFcn, ...
'gui_OutputFcn', @Main_OutputFcn, ...
'gui_LayoutFcn', [] , ...
'gui_Callback', []);
if nargin && ischar(varargin{1})
gui_State.gui_Callback = str2func(varargin{1});
end
if nargout
[varargout{1:nargout}] = gui_mainfcn(gui_State, varargin{:});
else
gui_mainfcn(gui_State, varargin{:});
end
% End initialization code - DO NOT EDIT
% --- Executes just before Main is made visible.
function Main_OpeningFcn(hObject, eventdata, handles, varargin)
% This function has no output args, see OutputFcn.
% hObject handle to figure
% eventdata reserved - to be defined in a future version of MATLAB
% handles structure with handles and user data (see GUIDATA)
% varargin command line arguments to Main (see VARARGIN)
% Choose default command line output for Main
handles.output = hObject;
% Update handles structure
guidata(hObject, handles);
% UIWAIT makes Main wait for user response (see UIRESUME)
% uiwait(handles.figure1);
load TrainingSet;
load TrainingLable;
[totalSampl,q]=size(TrainingSet);
str=num2str(tabulate(TrainingLable));
set(handles.totalrecords,'String',strcat(str));
set(handles.resultText,'String',strcat('Total Samples: ',num2str(totalSampl)));
% --- Outputs from this function are returned to the command line.
function varargout = Main_OutputFcn(hObject, eventdata, handles)
% varargout cell array for returning output args (see VARARGOUT);
% hObject handle to figure
% eventdata reserved - to be defined in a future version of MATLAB
% handles structure with handles and user data (see GUIDATA)
% Get default command line output from handles structure
varargout{1} = handles.output;
% --- Executes on button press in trainBtn.
function trainBtn_Callback(hObject, eventdata, handles)
% hObject handle to trainBtn (see GCBO)
% eventdata reserved - to be defined in a future version of MATLAB
% handles structure with handles and user data (see GUIDATA)
clc;
% clear all;
% close all;
set(handles.statusText,'String','Start Speaking...');
pause(0.001);
Fs = 8000; % Sampling Freq (Hz)
%%Duration = 2; % Duration (sec)
%%audio_rec_obj = audiorecorder(Fs, 16, 1);
% get(audio_rec_obj);
% Record your voice for Duration seconds.
myRecording = wavrecord(2*Fs,Fs);
%%recordblocking(audio_rec_obj, Duration);
% disp('End of Recording.');
set(handles.statusText,'String','Saving....');
pause(0.001);
% Play back the recording.
%%play(audio_rec_obj);
% Store data in double-precision array.
%%myRecording = getaudiodata(audio_rec_obj);
% Plot the waveform.
% figure,
%plot(myRecording);
%grid on;
% title('Input Signal');
%xlabel('Samples');
%ylabel('Magnitude(db)');
%pre-empasis or high pass filter
Prem=0.97;
Filtered_output=filter([1,-Prem],1,myRecording);
%sound(Filtered_output);
wavwrite(Filtered_output, Fs, 16,'RAW');
wavplay(Filtered_output,Fs);
% figure,
%plot(Filtered_output);
%grid on;
% title('Pre-empasis Signal/Filtered Signal');
%xlabel('Samples');
%ylabel('Magnitude(db)');
len=length(Filtered_output);
Frame_size = Fs*32/1000; %200 (sample points)
Frame_overlap = Fs*16/1000; %120 (sample points)
Frame_step = Frame_size-Frame_overlap; % 80 (sample points)
Frame_rate = round(Fs/Frame_step)+1; %100; frames/sec
Fft_size=Frame_size;
numFrames=length(Filtered_output)/Frame_step;
%padd the zeros for equal frame length
for i=1:numFrames*Frame_size
paddesSignal(i,:)=0;
end
%get orignal signal
for n=1:len
paddesSignal(n,:) = Filtered_output(n,:);
end
%frame blocking or farming
for i=1:numFrames
for n=1:Frame_size
fdata(i,n)=paddesSignal(i*Frame_step+n,:);
end
end
%% (2) Windowing..
frameSize = size(fdata);
nbFrames = frameSize(1);
nbSamples = frameSize(2);
% Hamming window..
w = hamming(nbSamples);
afterWindow = zeros(nbFrames,nbSamples);
for i = 1:nbFrames
singleFrame = fdata(i,1:nbSamples);
afterWindow(i, 1:nbSamples) = w'.*singleFrame;
end
% figure,
%plot(afterWindow);
%grid on;
%xlabel('Samples');
%ylabel('Magnitude(db)');
% title('Windowing graph');
%ylabel('Magnitude(db)');
% title('mfcc normalized freq graph');
% disp('done feature extraction ');
set(handles.statusText,'String','Input Saved in .wav file format');
pause(0.001);
% %get size of train variable
%%%try
%%%load TrainingSet;
%%%load TrainingLable;
%%%catch er
%%%TrainingSet=[];
%%%TrainingLable=[];
%%%disp('created new training');
%%%end
%%%[featuresCnt,Samples]=size(TrainingSet);
%%%TrainingSet(featuresCnt+1,:)=meanMFCC; %craete training matrix
%create lables for features from user input
inputLable=input('Press any key ', 's');
%%%TrainingLable(featuresCnt+1)=str2num(inputLable);
disp('Select saved input through "Train with Audio" for Feature Extraction');
%store training and labels in .mat files for classifier training
%%%try
%%%save('TrainingSet','TrainingSet');
%%%save('TrainingLable','TrainingLable');
%%%set(handles.statusText,'String','Done with Training and Saved');
%%%pause(0.001);
%%%catch ers
%%%disp('Unable to save training set try again');
%%%end
% --- Executes on button press in testBtn.
function testBtn_Callback(hObject, eventdata, handles)
% hObject handle to testBtn (see GCBO)
% eventdata reserved - to be defined in a future version of MATLAB
% handles structure with handles and user data (see GUIDATA)
clc;
set(handles.statusText,'String','Start Speaking...');
Fs = 8000; % Sampling Freq (Hz)
%%Duration = 2; % Duration (sec)
%%audio_rec_obj = audiorecorder(Fs, 16, 1);
% get(audio_rec_obj);
% Record your voice for Duration seconds.
% disp('Start speaking.')
myRecording =audiorecorder(2*Fs,Fs);
pause(0.01);
set(handles.outputText,'String','--');
%%recordblocking(audio_rec_obj, Duration);
% disp('End of Recording.');
set(handles.statusText,'String','Stop Speaking');
pause(0.001);
% Play back the recording.
%%play(audio_rec_obj);
% Store data in double-precision array.
%%myRecording = getaudiodata(audio_rec_obj);
% Plot the waveform.
% figure,
axes(handles.axes1);
plot(myRecording);
grid on;
%title('Input Signal');
xlabel('Samples');
ylabel('Magnitude(db)');
set(handles.statusText,'String','Done with Recording...');
pause(0.001);
%pre-empasis or high pass filter
Prem=0.97;
Filtered_output=filter([1,-Prem],1,myRecording);
sound(Filtered_output);
% figure,
axes(handles.axes2);
plot(Filtered_output);
grid on;
%title('Pre-empasis Signal/Filtered Signal');
xlabel('Samples');
ylabel('Magnitude(db)');
len=length(Filtered_output);
Frame_size = Fs*32/1000; %200 (sample points)
Frame_overlap = Fs*16/1000; %120 (sample points)
Frame_step = Frame_size-Frame_overlap; % 80 (sample points)
Frame_rate = round(Fs/Frame_step)+1; %100; frames/sec
Fft_size=Frame_size;
numFrames=length(Filtered_output)/Frame_step;
%padd the zeros for equal frame length
for i=1:numFrames*Frame_size
paddesSignal(i,:)=0;
end
%get orignal signal
for n=1:len
paddesSignal(n,:) = Filtered_output(n,:);
end
%frame blocking or farming
for i=1:numFrames
for n=1:Frame_size
fdata(i,n)=paddesSignal(i*Frame_step+n,:);
end
end
%% (2) Windowing..
frameSize = size(fdata);
nbFrames = frameSize(1);
nbSamples = frameSize(2);
% Hamming window..
w = hamming(nbSamples);
afterWindow = zeros(nbFrames,nbSamples);
for i = 1:nbFrames
singleFrame = fdata(i,1:nbSamples);
afterWindow(i, 1:nbSamples) = w'.*singleFrame;
end
% figure,
axes(handles.axes3);
plot(afterWindow);
grid on;
xlabel('Samples');
ylabel('Magnitude(db)');
%title('Windowing graph');
Tw = 25; % analysis frame duration (ms)
Ts = 10; % analysis frame shift (ms)
alpha = 0.97; % preemphasis coefficient
R = [ 300 3700 ]; % frequency range to consider
M = 20; % number of filterbank channels
N = 13; % number of cepstral coefficients
L = 22;
nfft = 2^nextpow2( nbFrames ); % length of FFT analysis
K = nfft/2+1; % length of the unique part of the FFT
%% HANDY INLINE FUNCTION HANDLES
% Forward and backward mel frequency warping.
% Note that base 10 is used in [1], while base e is used here and in HTK code
hz2mel = @( hz )( 1127*log(1+hz/700) ); % Hertz to mel warping function
mel2hz = @( mel )( 700*exp(mel/1127)-700 ); % mel to Hertz warping function
% Type III DCT matrix routine
dctm = @( N, M )( sqrt(2.0/M) * cos( repmat([0:N-1].',1,M).* repmat(pi*([1:M]-0.5)/M,N,1) ) );
% Cepstral lifter routine
ceplifter = @( N, L )( 1+0.5*L*sin(pi*[0:N-1]/L) );
MAG = abs( fft(afterWindow,nfft,1) );
% figure,
% plot(MAG);
% title('fft magnitude garaph');
% Triangular filterbank with uniformly spaced filters on mel scale
H = trifbank( M, K, R, Fs, hz2mel, mel2hz ); % size of H is M x K
% Filterbank application to unique part of the magnitude spectrum
FBE = H * MAG(1:K,:); % FBE( FBE<1.0 ) = 1.0; % apply mel floor
% DCT matrix computation
temp = dctm( N, M );
% Conversion of logFBEs to cepstral coefficients through DCT
CC = temp * log( FBE );
% Cepstral lifter computation
lifter = ceplifter( N, L );
% Cepstral liftering gives liftered cepstral coefficients
CC = diag( lifter ) * CC; % ~ HTK's MFCCs
%%%%%%%%%%%%%%%%%%%% training %%%%%%%%%%%%%%%%%%%
%to train the classifier normalize the values by taking the mean of CC;
meanMFCC=mean(CC); %mean of CC 1xN
% plot(CC)
% figure,
axes(handles.axes4);
plot(meanMFCC);
grid on;
%title('mfcc normalized freq graph');
xlabel('Samples');
ylabel('Magnitude(db)');
set(handles.statusText,'String','Done');
% disp('done feature extraction ');
% %get size of train variable
% try
% load TrainingSet;
% load TrainingLable;
% catch er
% TrainingSet=[];
% TrainingLable=[];
% disp('created new training');
% end
%
% [featuresCnt,Samples]=size(TrainingSet);
% TrainingSet(featuresCnt+1,:)=meanMFCC; %craete training matrix
%
% %create lables for features from user input
% inputLable=input('Type the language lable (e.g. 1 for Marathi, 0 for English): ', 's');
%
% TrainingLable(featuresCnt+1)=str2num(inputLable);
% disp('done feature extraction');
% %store training and labels in .mat files for classifier training
% try
% save('TrainingSet','TrainingSet');
% save('TrainingLable','TrainingLable');
% catch ers
% disp('Unable to save training set try again');
% end
clc;
testData=meanMFCC;
%call svm training function
load Traininglable;
load TrainingSet;
% svmStruct = svmtrain(TrainingSet,TrainingLable','showplot',false);
% classes = svmclassify(svmStruct,testData,'showplot',false);
classes = multisvm(TrainingSet, TrainingLable', testData)
% disp('Done training');
set(handles.outputText,'String','--');
if(classes==1)
set(handles.outputText,'String','English');
end
if(classes==2)
set(handles.outputText,'String','Marathi');
end
if(classes==3)
set(handles.outputText,'String','Hindi');
end
% Conversion of logFBEs to cepstral coefficients through DCT
CC = DCT * log( FBE );
% Cepstral lifter computation
lifter = ceplifter( N, L );
% Cepstral liftering gives liftered cepstral coefficients
CC = diag( lifter ) * CC; % ~ HTK's MFCCs
%%%%%%%%%%%%%%%%%%%% training %%%%%%%%%%%%%%%%%%%
%to train the classifier normalize the values by taking the mean of CC;
meanMFCC=mean(CC); %mean of CC 1xN
set(handles.statusText,'String','Done Feature extraction');
axes(handles.axes4);
plot(meanMFCC);
grid on;
%title('mfcc normalized freq graph');
xlabel('Samples');
ylabel('Magnitude(db)');
set(handles.statusText,'String','Done feature extraction');
clc;
testData=meanMFCC;
%call svm training function
load Traininglable;
load TrainingSet;
% svmStruct = svmtrain(TrainingSet,TrainingLable','showplot',false);
% classes = svmclassify(svmStruct,testData,'showplot',false);
classes = multisvm(TrainingSet, TrainingLable', testData);
% disp('Done training');
set(handles.outputText,'String','--');
if(classes==1)
set(handles.outputText,'String','English');
end
if(classes==2)
set(handles.outputText,'String','Marathi');
end
if(classes==3)
set(handles.outputText,'String','Hindi');
end
function edit1_Callback(hObject, eventdata, handles)
% hObject handle to edit1 (see GCBO)
% eventdata reserved - to be defined in a future version of MATLAB
% handles structure with handles and user data (see GUIDATA)
% Hints: get(hObject,'String') returns contents of edit1 as text
% str2double(get(hObject,'String')) returns contents of edit1 as a double
% --- Executes during object creation, after setting all properties.
function edit1_CreateFcn(hObject, eventdata, handles)
% hObject handle to edit1 (see GCBO)
% eventdata reserved - to be defined in a future version of MATLAB
% handles empty - handles not created until after all CreateFcns called
% Hint: edit controls usually have a white background on Windows.
% See ISPC and COMPUTER.
if ispc && isequal(get(hObject,'BackgroundColor'), get(0,'defaultUicontrolBackgroundColor'))
set(hObject,'BackgroundColor','white');
end
% --------------------------------------------------------------------
function uipanel1_ButtonDownFcn(hObject, eventdata, handles)
% hObject handle to uipanel1 (see GCBO)
% eventdata reserved - to be defined in a future version of MATLAB
% handles structure with handles and user data (see GUIDATA)
% --- Executes on mouse press over figure background.
function figure1_ButtonDownFcn(hObject, eventdata, handles)
% hObject handle to figure1 (see GCBO)
% eventdata reserved - to be defined in a future version of MATLAB
% handles structure with handles and user data (see GUIDATA)
% --- If Enable == 'on', executes on mouse press in 5 pixel border.
% --- Otherwise, executes on mouse press in 5 pixel border or over trainWithFilebtn.
function trainWithFilebtn_ButtonDownFcn(hObject, eventdata, handles)
% hObject handle to trainWithFilebtn (see GCBO)
% eventdata reserved - to be defined in a future version of MATLAB
% handles structure with handles and user data (see GUIDATA)
% --- Executes when figure1 is resized.
function figure1_ResizeFcn(hObject, eventdata, handles)
% hObject handle to figure1 (see GCBO)
% eventdata reserved - to be defined in a future version of MATLAB
% handles structure with handles and user data (see GUIDATA)
% --- Executes on key press with focus on testWithAudioBtn and none of its controls.
function testWithAudioBtn_KeyPressFcn(hObject, eventdata, handles)
% hObject handle to testWithAudioBtn (see GCBO)
% eventdata structure with the following fields (see UICONTROL)
%Key: name of the key that was pressed, in lower case
%Character: character interpretation of the key(s) that was pressed
%Modifier: name(s) of the modifier key(s) (i.e., control, shift) pressed
% handles structure with handles and user data (see GUIDATA)
3 仿真结果
4 参考文献
[1]王伟, and 邓辉文. "基于MFCC参数和VQ的说话人识别系统." 第四届全国信息获取与处理学术会议 0.