zchphp - 2008-8-23 20:46:00
利用HMM的孤立字(词)语音识别系统
/** ***********************************************************************/
/* 文件名称:recog.cpp(主程序) */
/****************************************************************************/
# include "emhmm. h"
int BR_ Disp[MAX_ ARCS]; /* Displayment to save multiplies (branch_ prob) * /
int ME_ Disp[MAX_ ARCS]; /* Displayment to save mnltiplies (mean) * /
int CO_Disp[MAX_ ARCS]; /* Displayment to save multiplies (covariance) * /
int Train _ Disp[ MAX _ ARCS + MAX _ STATES]; / * really only need max(ma,ms) * /
im Num _ Dimensions;
int Num_ Train_ Sets; /* Size of dimension of training data */
int Total _ Frames;
int Longest _ Train; /* the length of the longest training set */
TRAINING Train[MAX-_ TRAIN_ SIZE];
HMM Hmm[ MAX_ REF];
main(argc,argv)
int argc;
char * argv[];
{
int i, num_ ref, result;
int cnt[MAX _ REF];
string hmm_ file;
char * in file =NULL, *ref_list = NULL;
char ch_ r[MAX_REF][8];
FILE * fp;
/****************
*命令行参数设置*
***************/
if( argc < 5 )
usage(argv[ 0 ] );
for(i = 1; i < argc; i+ + ){
if(argv[i][0] ! = '-'){
fprinff(stderr, "Illegal switch %s \ n", argv[i]);
usage(amv[0] );
}
switch (argv[i]) [1]);
}
case 'h':
ref_ list = argv[+ + i];
break;
case 'i':
in_ file = argv[+ + i];
break;
default:
fprimf(stderr, "Illegal switch %s \ n',argv[i]);
usage(argv[0]);
break;
}
}
/****************************
*读取参考模型的数量,即词表的大小*
****************************/
if((fp = fopen(ref_ list, "r")) = = NULL) {
fprinff(stderr, "%s: can't open %s to read. \ n', ref_list);
exit(0);
}
fscanf( fp, '%d', &num_ref);
/************
*读取模型的参数*
***********/
for(i = 0; i< num_ ref; i+ +){
cnt[i] = 0;
fscanf(fp, "%s%s", hmm_ file, ch_r[i]);
read_ hmm(hmm_ file, i);
cal_ invdet(i);
}
fclose(fp) ;
pfintf("Category size: % d \ n', hum_ref);
/***************
*读取待识别语音的数据*
*****************/
if((Num_Train_ Sets = read_ training(in_ file)) > MAX_TRAIN_ SIZE){
fprintf(stderr, '%s: too many test patterns in %s. \ n", argv[0], in_ file);
exit(0);
}
/**********
*识别*
*********/
for(i = 0;i< Num_Train_Sets; ++i){
result = dp(Train + i, num ref);
+ + cnt[result];
# if TRACE
printf("[%3d] %3d: %s \ n", i + 1,Train[i].length,ch_r[result]);
fflush (stdout);
# endif
free(Train[ i]. data);
}
printf("# # # 识别结果# # #");
for(i = 0;i < num_ref;i + +)
printf("# # # /%4s/ = %6.2f %% \n", ch_r[i], cnt[i] * 100.0/Num_Train_Sets);
}
/*****************************************************************/
/* 函数名称:usage
/*函数功能:提示信息,并退出程序 */
/* 输入函数:obj- 指向提示信息的指针 */
/****************************************************************/
usage(obj)
char * obj;
{
fprintf(stderr,"Useage: %s -h HMM-list -i input-file \n",obj);
exit(0);
}
文件名称:(CMHMM的输入输出函数)
#include "cmhmm.h"
#include <string.h>
extern int BR_Disp[],ME_Disp[],CO_Disp[],Train_Disp[];
extern int Total_Frames;
extern int Num_Dimensions,Longest_Train;
extern TRAINING Train[];
extern HMM Hmm[];
void read_hmm(char *file,int rnum)
{
FILE *fp;
int temp,i,j,k,num,mat_num,from,to,bdisp,mdisp,cdisp;
float trans_prob;
if((fp=fopen(file,"r"))==NULL)
exit(0);
if(fscanf(fp,"%d",&Num_Dimensions)==EOF)
exit(0);
if(fscanf(fp,"%d",&temp)==EOF)
exit(0);
Hmm[rnum].num_omatrix=temp;
if(fscanf(fp,"%d",&temp)==EOF)
exit(0);
Hmm[rnum].num_mixture=temp;
for(i=0;i<Hmm[rnum].num_omatrix;++i)
{
temp=i*Hmm[rnum].num_mixture;
BR_Disp[i]=temp;
temp*=Num_Dimensions;
ME_Disp[i]=temp;
temp*=Num_Dimensions;
CO_Disp[i]=temp;
}
temp=FLOATSIZE*Hmm[rnum].num_omatrix*Hmm[rnum].num_mixture;
if((Hmm[rnum].branch_prob=(float *)malloc(temp))==NULL)
exit(0);
temp*=Num_Dimensions;
if((Hmm[rnum].mean=(float *)malloc(temp))==NULL)
exit(0);
temp*=Num_Dimensions;
if((Hmm[rnum].covariance=(float *)malloc(temp))==NULL)
exit(0);
for(i=0;i<Hmm[rnum].num_omatrix;i++)
{
bdisp=BR_Disp[i];
for(j=0;j<Hmm[rnum].num_mixture;j++)
{
if(fscanf(fp,"%f",Hmm[rnum].branch_prob+bdisp+j)==EOF)
exit(0);
}
for(temp=0,j=0;j<Hmm[rnum].num_mixture;++j,temp+=Num_Dimensions)
{
mdisp=ME_Disp[i]+temp;
cdisp=CO_Disp[i]+temp*Num_Dimensions;
for(k=0;k<Num_Dimensions;k++)
{
if(fscanf(fp,"%f",Hmm[rnum].mean+mdisp+k)==EOF)
exit(0);
}
for(k=0;k<Num_Dimensions*Num_Dimensions;k++)
{
if(fscanf(fp,"%f",Hmm[rnum].covariance+cdisp+k)==EOF)
exit(0);
}
}
}
if(fscanf(fp,"%d",&temp)==EOF)exit(0);
Hmm[rnum].num_states=temp;
if((Hmm[rnum].states=(STATE *)malloc(sizeof(STATE)*Hmm[rnum].num_states))==NULL)exit(0);
for(i=0;i<Hmm[rnum].num_states;i++)
{
Hmm[rnum].states[i].label=i;
Hmm[rnum].states[i].num_from=Hmm[rnum].states[i].num_to=0;
Hmm[rnum].states[i].is_initial=Hmm[rnum].states[i].is_final=FALSE;
}
if(fscanf(fp,"%d",&temp)==EOF)exit(0);
Hmm[rnum].num_initial=temp;
for(i=0;i<Hmm[rnum].num_initial;i++)
{
if(fscanf(fp,"%d",&num)==EOF)exit(0);
Hmm[rnum].states[num].is_initial=TRUE;
}
if(fscanf(fp,"%d",&temp)==EOF)exit(0);
Hmm[rnum].num_final=temp;
for(i=0;i<Hmm[rnum].num_final;i++)
{
if(fscanf(fp,"%d",&num)==EOF)exit(0);
Hmm[rnum].states[num].is_final=TRUE;
}
if(fscanf(fp,"%d",&temp)==EOF)exit(0);
Hmm[rnum].num_arcs=temp;
if((Hmm[rnum].transitions=(TRANSITION*)malloc(sizeof(TRANSITION)*\
Hmm[rnum].num_arcs))==NULL)exit(0);
for(i=0;i<Hmm[rnum].num_arcs;i++)
{
if(fscanf(fp,"%d%d%e%d",&from,&to,&trans_prob,&mat_num)==EOF)exit(0);
if((mat_num>=Hmm[rnum].num_omatrix)||((mat_num<0)&&(mat_num!=NULL_TRANSITION)))exit(0);
if((from>Hmm[rnum].num_states-1)||(from<0)||(to>Hmm[rnum].num_states-1)||(to<0))
{
fprintf(stderr,"read_hmm:Illegal from(%d)-to(%d)\n",from,to);
exit(0);
}
if(trans_prob<=0.0)exit(0);
Hmm[rnum].transitions[i].trans_prob=log(trans_prob);
Hmm[rnum].transitions[i].origin=from;
Hmm[rnum].transitions[i].destination=to;
Hmm[rnum].transitions[i].out_prob_index=mat_num;
Hmm[rnum].states[from].trans_from[Hmm[rnum].states[from].num_from++]=i;
Hmm[rnum].states[to].trans_to[Hmm[rnum].states[to].num_to++]=i;
}
if(fscanf(fp,"%*d")!=EOF)exit(0);
fclose(fp);
}
int read_training(char *f_list)
{
FILE *fp,*fs;
int cnt,i,j,k,length,total_length=0,max=0;
int m;
string f_name;
float buff[100];
if(Num_Dimensions>100)exit(0);
if((fs=fopen(f_list,"r"))==NULL)
{
fprintf(stderr,"red_data:can't open %s to read\n",f_list);
exit(0);
}
for(cnt=0;fscanf(fs,"%s",f_name)!=EOF;++cnt)
{
if(cnt==MAX_TRAIN_SIZE)
{
printf("Warning in read_data:number of data sets(%s) is forced into %d.\n",f_list,\
MAX_TRAIN_SIZE);
fflush(stdout);
break;
}
if((fp=fopen(f_name,"rb"))==NULL)exit(0);
for(length=0;(m=fread(buff,FLOATSIZE,(Num_Dimensions+1),fp))==(Num_Dimensions+1);++length);
if((Train[cnt].data=(float *)malloc(FLOATSIZE * Num_Dimensions*length))==NULL)
{
fprintf(stderr,"read_data:not enough space for %s\n",f_list);
exit(EOF);
}
rewind(fp);
for(i=j=0;(m=fread(buff,FLOATSIZE,(Num_Dimensions+1),fp))==(Num_Dimensions+1);++i)
for(k=1;k<(Num_Dimensions+1);)
Train[cnt].data[j++]=100.0*buff[k++];
fclose(fp);
if(length!=1)
{
fprintf(stderr,"read_data:size missmatch(%d!=%d)\n",length,i);exit(EOF);
}
Train[cnt].length=length;
total_length+=length;
if(length>max)max=length;
}
fclose(fs);
Total_Frames=total_length;
Longest_Train=max;
for(i=j=0;j<MAX_ARCS+MAX_STATES;++j,i+=Longest_Train)
Train_Disp[j]=i;
printf("Training data list:%s[%d data sets]\n",f_list,cnt);
return cnt;
}
文件名称:cmhmmdp.cpp
#include"cmhmm.h"
extern int BR_Disp[],ME_Disp,CO_Dsip[],Train_Disp[];
extern int Num_Dimensions,Num_Train_Sets,Longest_Train;
extern TRAINING Train[];
extern HMM Hmm[];
extern double *Inv_Covariance[], *Det_Covariance[];
float *date;
/***************************************************************************/
/*函数名称:dp */
/*函数功能:计算任一特征向量序列输入到各个HMM时的输出频率,并返回有最大输出概率的*/
/* HMM的序列 */
/*输入参数:train -输入的特征响亮序列;num-ref —词表的大小 */
/*返回值:返回有最大输出概率的HMM的序号。 */
/***************************************************************************/
int dp(TRAINING *train,int num_ref)
{
char *stop, *cptr;
int i,j,k,l,m,n,t;
int length,index,x;
int out,lx,nhat;
int Outprob_Disp[MAX_STATES];
double *outprob_buff,oprob;
double out_prob,banch_prob,sum_prob;
double *pn, *prev_pn,px,p[MAX_REF];
TRAINSITION *tptr;
length = train - >length;
data = train - >data;
/* memory allocation for work */
if((pn = (double * )malloc(Longest_Train * MAX_STATES * DOUBLEESIZE)) == NULL)
exit(0);
if((outprob_buff = (double * )malloc((Longest_Train) * MAX_ARCS * DOUBLESIZE)) == NULL)
exit(0);
for(s = i = 0;s < MAX_STATES; i + = Longest_Train) Outprob_Disp[s + +] = i;
for(n = 0;n < num_ref; n++){ /* step1 */
s = MAX_STATES * Longest_Train;
for(i = 0;i<s;i++) pn[i]=INFINT; /* clear pn */
for(s = 0;s<Humm[n].num_omatrix; + + s){
if(Hmm[n].states[s].is_initial)
pn[Train_Disp[s]] =0.0;
}
for(i = 0;i<MAX_ARCA * Longest_Train;outprob_buff[i + +] = INFINIT);
for(s = 0;s<Humm[n].num_omatrix;s + +){
for(i = 0;i <length; i + +){
sum_prob = 0.0;
for(j = 0;j < Humm[n].num_mixture; j + +){
branch_prob = Humm[n].branch_prob[BR_disp[s] + j];
if(branch_prob = = 0.0) continue;
sum_prob + = pow(branch_prob,5.0) * cal_outprob (&(Humm[n]),n,s,i,j);
}
outprob_buff[Outprob_Disp[s] + i] = log(sum_prob);
}
}
for(i = 0;i < length; i + +){ /* step2 */
for(s = 0;s < Humm[n].num_states; s + +){ /* step3 */
index = Train_Disp[s] + i;
stop = &(Humm[n].states[s].trans_to[Humm[n].states[s].num_to]);
for(cptr = &(Humm[n].states[s].trans_to[0]);cptr < stop;cptr + +){ /* step4 */
tptr = &(Humm[n].transitions[ * cptr]);
px = INFINIT;
if(tptr - >out_prob_index = = NULL_TRANSITION){
prev_pn = pn + (Train_Disp[tptr - >origin] + i);
if( * prev_pn > INFINIT)
px = * prev_pn + tptr - >trans_prob;
if(px > pn[index])
pn[index] = px;
}
else if(i > 0){
prev_pn = pn + (Train_Disap[tptr - >origin] + i - 1);
if( * prev_pn > INFINIE){
out_prob = *(outprob_buff + Outprob_Disp[tptr - > out_prob_index] + i);
if(out_prob > INFINIT){
px = *prev_pn + (tptr - >trans_prob) + out_prob;
if(px > pn[index])
pn[index] = px;
}
}
}
}
}
}
p[n] = pn[Train_Disp[(Humm[n].num_states) - 1] + length -1];
}
free(pn);
free(outprob_buff);
nhat = 0;
for(n = 1; n < num_ref; n + +)
if(p[n] > p[nhat]) nhat = n;
return nhat;
}
/**************************************************************************/
/* 函数名称:cal-outprob */
/* 函数功能:计算特征向量序列输入到HUMM室的输出概率的大小 */
/* 输入参数:humm-HUMM 的参数;ref-num - HUMM的序号;onum- 具有输出字符的 */
/* 的状态的序号;inum-输入特征向量的个数;mnum- 混合概率密度的序号 */
/* 返回值:HMM的输出概率 */
/**************************************************************************/
double cal_outprob(HUMM * hmm,int ref_num,int onum,int inum,int mnum)
{
double a[100],b,out_prob;
int i,j,mdisp,cdsp,ddisp;
if(Num_Dismensions > 100)
exit(0);
i = mnum;
ddisp = BR_Disap[onum] + i;
i * = Num_Dimensions;
mdisp = ME_Disp[onum] + i;
i * = Num_Dimensions;
cdisp = CO_Disp[onum] + i;
out_prob=0.0;
for(i=0;i<=Num_Dimensions;i++)
a[i]=data[inum*Num _Dimensions+i]-hmm->mean[mdisp+i];
for(i=0;i<=Num_Dimensions;i++){
b=0.0;
for(i=0;j<=Num_Dimensions;j++)
b+=Inv_Covariance[ref_num][cdisp+i*Num_Dimensions+j]*a[j];
out_prob+=a[i]*b;
}
out_prob/=-2.0;
out_prob=exp(out_prob)/Det_Covariance[ref_num][ddsip];
return(out_prob>MIN_OUTPROB)?out_prob;MIN_OUTPROB;
}
文件名称:cmhmm.h
#include "cmhmm.h"
#define M_PI 3.1415926535879323846
extern int Num_Dimensions;
extern HMM Hmm[];
extern int CO_Disp[],BR_Disp[];
double *Inv_Covariance[MAX_REF];
double *Det_Covariance[MAX_REF];
void cal_invdet(int rnum)
{
int i,j,k,l,m;
int work[100];
int lr,iw,cdisp,ddisp;
double eps=1.0e-20,w,wmax,pivot,api;
if (Num_Dimensions>100)
exit(0);
/*memory allocation*/
i=Hmm[rnum].num_omatrix * Hmm[rnum].num_mixture;
if((Det_Covariance[rnum]=(double *)malloc(i*DOUBLESIZE))==NULL)
exit(0);
for (m=0;m<Hmm[rnum].num_omatrix;m++)
{
for (l=0;l<Hmm[rnum].num_mixture;l++)
{
cdisp=CO_Disp[m]+Num_Dimensions*Num_Dimensions*l;
ddisp=BR_Disp[m]+1;
for (i=0;i<Num_Dimensions;i++)
{
for (j=0;j<Num_Dimensions;j++)
Inv_Covariance[rnum][cdisp+i*Num_Dimensions+j]=\
Hmm[rnum].covariance[cdisp+i*Num_Dimensions+j];
Det_Covariance[rnum][ddisp]=1.0;
for (i=0;i<Num_Dimensions;i++)
work[i]=i;
for(k=0;k<Num_Dimensions;k++){
wmax=0.0;
for (i=k;i<Num_Dimensions;i++)
{
w=fabs(Inv_Covariance[rnum][cdisp+k*Num_Dimensions+i]);
if(w<wmax)continue;
wmax=w;
lr=i;
}
pivot=Inv_Covariance[rnum][cdisp+k*Num_Dimensions+i];
api=fabs(pivot);
if(api<=eps)
exit(0);
Det_Covariance[rnum][ddisp]*=pivot;
if(Det_Covariance[rnum][ddisp]==0.0)
{
printf("cal_invdet:Det_Covariance[%d][%d] is underflow.\n",rnum,ddisp);
exit(0);
}
if(lr!=k)
{
Det_Covariance[rnum][ddisp]*=-1.0;
iw=work[k];
work[k]=work[k];
work[k]=iw;
for (j=0;j<Num_Dimensions;j++){
w=Inv_Covariance[rnum][cdisp+j*Num_Dimensions+k];
Inv_Covariance[rnum][cdisp+j*Num_Dimensions+k]\
=Inv_Covariance[rnum][cdisp+j*Num_Dimensions+lr];
w=Inv_Covariance[rnum][cdisp+j*Num_Dimensions+lr]=w;
}
}
for (i=0;i<Num_Dimensions;i++)
{
Inv_Covariance[rnum][cdisp+i*Num_Dimensions+k]/=pivot;
}
for(i=0;i<Num_Dimensions;i++){
if (i==k)continue;
w=Inv_Covariance[rnum][cdisp+k*Num_Dimensions+i];
if(w==0.0)continue;
for(j=0;j<Num_Dimensions;j++)
{
if(j==k)continue;
Inv_Covariance[rnum][cdisp+j*Num_Dimensions+i]-=\
w*Inv_Covariance[rnum][cdisp+j*Num_Dimensions+k];
}
Inv_Covariance[rnum][cdisp+k*Num_Dimensions+i]=-w/pivot;
}
Inv_Covariance[rnum][cdisp+k*Num_Dimensions+k]=1.0/pivot;
}
for(i=0;i<Num_Dimensions;i++)
{
while(1){
k=work[i];
if(k==i)break;
iw=work[k];
work[k]=work[i];
work[i]=iw;
for(j=0;j<Num_Dimensions;j++){
w=Inv_Covariance[rnum][cdisp+i*Num_Dimensions+j];
Inv_Covariance[rnum][cdisp+i*Num_Dimensions+j]=\
Inv_Covariance[rnum][cdisp+k*Num_Dimensions+j];
Inv_Covariance[rnum][cdisp+k*Num_Dimensions+j]=w;
}
}
}
if((w=Det_Covariance[rnum][ddisp])<0.0){
fprintf(stderr,"ERROR:Det_Covariance[%d][%d]<0.0\n",rnum,ddisp);
exit(0);
}
Det_Covariance[rnum][ddisp]=pow(2.0*M_PI,(double)(Num_Dimensions>>1))*sqrt(w);
}
}
}