sumom****@users*****
sumom****@users*****
2013年 12月 5日 (木) 12:43:14 JST
Index: julius4/libsent/src/wav2mfcc/wav2mfcc-buffer.c diff -u julius4/libsent/src/wav2mfcc/wav2mfcc-buffer.c:1.6 julius4/libsent/src/wav2mfcc/wav2mfcc-buffer.c:1.7 --- julius4/libsent/src/wav2mfcc/wav2mfcc-buffer.c:1.6 Thu Jul 28 16:07:48 2011 +++ julius4/libsent/src/wav2mfcc/wav2mfcc-buffer.c Thu Dec 5 12:43:14 2013 @@ -24,7 +24,7 @@ * @author Akinobu LEE * @date Thu Feb 17 17:43:35 2005 * - * $Revision: 1.6 $ + * $Revision: 1.7 $ * */ @@ -54,7 +54,7 @@ * @return the number of processed frames. */ int -Wav2MFCC(SP16 *wave, float **mfcc, Value *para, int nSamples, MFCCWork *w) +Wav2MFCC(SP16 *wave, float **mfcc, Value *para, int nSamples, MFCCWork *w, CMNWork *c) { int i, k, t; int end = 0, start = 1; @@ -94,8 +94,8 @@ if (para->acc) Accel(mfcc, frame_num, para); /* Cepstrum Mean and/or Variance Normalization */ - if (para->cmn && ! para->cvn) CMN(mfcc, frame_num, para->mfcc_dim + (para->c0 ? 1 : 0)); - else if (para->cmn || para->cvn) MVN(mfcc, frame_num, para); + if (para->cmn && ! para->cvn) CMN(mfcc, frame_num, para->mfcc_dim + (para->c0 ? 1 : 0), c); + else if (para->cmn || para->cvn) MVN(mfcc, frame_num, para, c); return(frame_num); } @@ -216,26 +216,34 @@ * @param frame_num [in] number of frames * @param dim [in] total dimension of MFCC vectors */ -void CMN(float **mfcc, int frame_num, int dim) +void CMN(float **mfcc, int frame_num, int dim, CMNWork *c) { int i, t; float *mfcc_ave, *sum; - mfcc_ave = (float *)mycalloc(dim, sizeof(float)); - sum = (float *)mycalloc(dim, sizeof(float)); - - for(i = 0; i < dim; i++){ - sum[i] = 0.0; - for(t = 0; t < frame_num; t++) - sum[i] += mfcc[t][i]; - mfcc_ave[i] = sum[i] / frame_num; - } - for(t = 0; t < frame_num; t++){ - for(i = 0; i < dim; i++) - mfcc[t][i] = mfcc[t][i] - mfcc_ave[i]; + if (c != NULL && c->cmean_init_set) { + /* has initial param, use it permanently */ + for(t = 0; t < frame_num; t++){ + for(i = 0; i < dim; i++) + mfcc[t][i] -= c->cmean_init[i]; + } + } else { + /* compute from current input */ + mfcc_ave = (float *)mycalloc(dim, sizeof(float)); + sum = (float *)mycalloc(dim, sizeof(float)); + for(i = 0; i < dim; i++){ + sum[i] = 0.0; + for(t = 0; t < frame_num; t++) + sum[i] += mfcc[t][i]; + mfcc_ave[i] = sum[i] / frame_num; + } + for(t = 0; t < frame_num; t++){ + for(i = 0; i < dim; i++) + mfcc[t][i] = mfcc[t][i] - mfcc_ave[i]; + } + free(sum); + free(mfcc_ave); } - free(sum); - free(mfcc_ave); } /** @@ -245,7 +253,7 @@ * @param frame_num [in] number of frames * @param para [in] configuration parameters */ -void MVN(float **mfcc, int frame_num, Value *para) +void MVN(float **mfcc, int frame_num, Value *para, CMNWork *c) { int i, t; float *mfcc_mean, *mfcc_sd; @@ -254,6 +262,21 @@ basedim = para->mfcc_dim + (para->c0 ? 1 : 0); + if (c != NULL && c->cmean_init_set) { + /* has initial param, use it permanently */ + for(t = 0; t < frame_num; t++){ + if (para->cmn) { + /* mean normalization (base MFCC only) */ + for(i = 0; i < basedim; i++) mfcc[t][i] -= c->cmean_init[i]; + } + if (para->cvn) { + /* variance normalization (full MFCC) */ + for(i = 0; i < para->veclen; i++) mfcc[t][i] /= sqrt(c->cvar_init[i]); + } + } + return; + } + mfcc_mean = (float *)mycalloc(para->veclen, sizeof(float)); if (para->cvn) mfcc_sd = (float *)mycalloc(para->veclen, sizeof(float)); Index: julius4/libsent/src/wav2mfcc/wav2mfcc-pipe.c diff -u julius4/libsent/src/wav2mfcc/wav2mfcc-pipe.c:1.9 julius4/libsent/src/wav2mfcc/wav2mfcc-pipe.c:1.10 --- julius4/libsent/src/wav2mfcc/wav2mfcc-pipe.c:1.9 Fri Jun 21 02:14:27 2013 +++ julius4/libsent/src/wav2mfcc/wav2mfcc-pipe.c Thu Dec 5 12:43:14 2013 @@ -20,7 +20,7 @@ * @author Akinobu LEE * @date Thu Feb 17 18:12:30 2005 * - * $Revision: 1.9 $ + * $Revision: 1.10 $ * */ /* @@ -267,11 +267,19 @@ } c->now.mfcc_sum = (float *)mymalloc(sizeof(float) * c->veclen); if (c->var) c->now.mfcc_var = (float *)mymalloc(sizeof(float) * c->veclen); + if (c->var) c->all.mfcc_var = (float *)mymalloc(sizeof(float) * c->veclen); c->cmean_init = (float *)mymalloc(sizeof(float) * c->veclen); if (c->var) c->cvar_init = (float *)mymalloc(sizeof(float) * c->veclen); c->cmean_init_set = FALSE; + c->loaded_from_file = FALSE; + + if (c->var) { + for(i = 0; i < c->veclen; i++) c->all.mfcc_var[i] = 0.0; + } + c->all.framenum = 0; + return c; } @@ -291,6 +299,7 @@ if (c->var) { free(c->cvar_init); free(c->now.mfcc_var); + free(c->all.mfcc_var); } for(i=0;i<c->clist_max;i++) { if (c->var) free(c->clist[i].mfcc_var); @@ -349,10 +358,8 @@ mfcc[d] -= x; } if (c->var) { - /* variance normalization */ - x = c->now.mfcc_var[d] + c->cweight * c->cvar_init[d]; - y = (double)c->now.framenum + c->cweight; - mfcc[d] /= sqrt(x / y); + /* variance normalization (static) */ + mfcc[d] /= sqrt(c->cvar_init[d]); } } } else { @@ -417,32 +424,31 @@ /* compute cepstral mean from now and previous sums up to CPMAX frames */ for(d=0;d<c->veclen;d++) c->cmean_init[d] = c->now.mfcc_sum[d]; - if (c->var) { - for(d=0;d<c->veclen;d++) c->cvar_init[d] = c->now.mfcc_var[d]; - } frames = c->now.framenum; for(i=0;i<c->clist_num;i++) { for(d=0;d<c->veclen;d++) c->cmean_init[d] += c->clist[i].mfcc_sum[d]; - if (c->var) { - for(d=0;d<c->veclen;d++) c->cvar_init[d] += c->clist[i].mfcc_var[d]; - } frames += c->clist[i].framenum; if (frames >= CPMAX) break; } for(d=0;d<c->veclen;d++) c->cmean_init[d] /= (float) frames; - if (c->var) { - for(d=0;d<c->veclen;d++) c->cvar_init[d] /= (float) frames; - } c->cmean_init_set = TRUE; + /* also compute all and update cvar_init */ + if (c->loaded_from_file == FALSE && c->var) { + for(d = 0; d < c->veclen; d++) { + c->all.mfcc_var[d] = (c->all.mfcc_var[d] * c->all.framenum + c->now.mfcc_var[d]) / (c->all.framenum + c->now.framenum); + } + c->all.framenum += c->now.framenum; + for(d=0;d<c->veclen;d++) c->cvar_init[d] = c->all.mfcc_var[d]; + } + /* expand clist if neccessary */ if (c->clist_num == c->clist_max && frames < CPMAX) { c->clist_max += CPSTEP; c->clist = (CMEAN *)myrealloc(c->clist, sizeof(CMEAN) * c->clist_max); for(i=c->clist_num;i<c->clist_max;i++) { c->clist[i].mfcc_sum = (float *)mymalloc(sizeof(float)*c->veclen); - if (c->var) c->clist[i].mfcc_var = (float *)mymalloc(sizeof(float)*c->veclen); c->clist[i].framenum = 0; } } @@ -455,7 +461,6 @@ if (c->var) c->clist[0].mfcc_var = tmp2; /* copy now to clist[0] */ memcpy(c->clist[0].mfcc_sum, c->now.mfcc_sum, sizeof(float) * c->veclen); - if (c->var) memcpy(c->clist[0].mfcc_var, c->now.mfcc_var, sizeof(float) * c->veclen); c->clist[0].framenum = c->now.framenum; if (c->clist_num < c->clist_max) c->clist_num++; @@ -485,33 +490,10 @@ } /** - * Write binary with byte swap (assume data is Big Endian) - * - * @param buf [in] data buffer - * @param unitbyte [in] size of unit in bytes - * @param unitnum [in] number of units to write - * @param fd [in] file descriptor - * - * @return TRUE if required number of units are fully written, FALSE if failed. - */ -static boolean -mywrite(void *buf, size_t unitbyte, size_t unitnum, int fd) -{ -#ifndef WORDS_BIGENDIAN - swap_bytes(buf, unitbyte, unitnum); -#endif - if (write(fd, buf, unitbyte * unitnum) < unitbyte * unitnum) { - return(FALSE); - } -#ifndef WORDS_BIGENDIAN - swap_bytes(buf, unitbyte, unitnum); -#endif - return(TRUE); -} - -/** * Load CMN parameter from file. If the number of MFCC dimension in the * file does not match the specified one, an error will occur. + * + * Format can be either HTK ascii format or binary format (made by Julius older than ver.4.2.3) * * @param c [i/o] CMN calculation work area * @param filename [in] file name @@ -523,37 +505,126 @@ { FILE *fp; int veclen; + char ch[5]; + char *buf; - jlog("Stat: wav2mfcc-pipe: reading initial CMN from file \"%s\"\n", filename); + jlog("Stat: wav2mfcc-pipe: reading initial cepstral mean/variance from file \"%s\"\n", filename); if ((fp = fopen_readfile(filename)) == NULL) { - jlog("Error: wav2mfcc-pipe: failed to open\n"); + jlog("Error: wav2mfcc-pipe: failed to open %s\n", filename); return(FALSE); } - /* read header */ - if (myread(&veclen, sizeof(int), 1, fp) == FALSE) { - jlog("Error: wav2mfcc-pipe: failed to read header\n"); - fclose_readfile(fp); - return(FALSE); - } - /* check length */ - if (veclen != c->veclen) { - jlog("Error: wav2mfcc-pipe: cepstral dimension mismatch\n"); - jlog("Error: wav2mfcc-pipe: process = %d, file = %d\n", c->veclen, veclen); - fclose_readfile(fp); - return(FALSE); - } - /* read body */ - if (myread(c->cmean_init, sizeof(float), c->veclen, fp) == FALSE) { - jlog("Error: wav2mfcc-pipe: failed to read mean for CMN\n"); + + /* detect file format */ + if (myread(&ch, sizeof(char), 5, fp) == FALSE) { + jlog("Error: wav2mfcc-pipe: failed to read CMN/CVN file\n"); fclose_readfile(fp); return(FALSE); } - if (c->var) { - if (myread(c->cvar_init, sizeof(float), c->veclen, fp) == FALSE) { - jlog("Error: wav2mfcc-pipe: failed to read variance for CVN\n"); + + myfrewind(fp); + if (ch[0] == '<' && + (ch[1] == 'C' || ch[1] == 'c') && + (ch[2] == 'E' || ch[2] == 'e') && + (ch[3] == 'P' || ch[3] == 'p') && + (ch[4] == 'S' || ch[4] == 's') ) { + /* ascii HTK format (>=4.3) */ + char *p; + int mode; + int d, dv, len; + + jlog("Stat: wav2mfcc-pipe: reading HTK-format cepstral vectors\n"); + buf = (char *)mymalloc(MAXLINELEN); + mode = 0; + while(getl(buf, MAXLINELEN, fp) != NULL) { + for (p = mystrtok_quote(buf, "<> \t\r\n"); p; p = mystrtok_quote(NULL, "<> \t\r\n")) { + switch(mode){ + case 0: + if (strmatch(p, "MEAN")) { + mode = 1; + } else if (strmatch(p, "VARIANCE")) { + mode = 3; + } + break; + case 1: + len = atof(p); + if (len != c->veclen) { + jlog("Error: wav2mfcc-pipe: cepstral dimension mismatch\n"); + jlog("Error: wav2mfcc-pipe: process = %d, file = %d\n", c->veclen, len); + free(buf); fclose_readfile(fp); + return(FALSE); + } + d = 0; + mode = 2; + break; + case 2: + if (strmatch(p, "VARIANCE")) { + mode = 3; + } else { + if (d >= len) { + jlog("Error: wav2mfcc-pipe: corrupted data\n"); + free(buf); fclose_readfile(fp); + return(FALSE); + } + c->cmean_init[d++] = atof(p); + } + break; + case 3: + len = atof(p); + if (len != c->veclen) { + jlog("Error: wav2mfcc-pipe: cepstral dimension mismatch\n"); + jlog("Error: wav2mfcc-pipe: process = %d, file = %d\n", c->veclen, len); + free(buf); fclose_readfile(fp); + return(FALSE); + } + dv = 0; + mode = 4; + break; + case 4: + if (dv >= len) { + jlog("Error: wav2mfcc-pipe: corrupted data\n"); + free(buf); fclose_readfile(fp); + return(FALSE); + } + c->cvar_init[dv++] = atof(p); + break; + } + } + } + free(buf); + if (d != len || (mode >= 3 && dv != len)) { + jlog("Error: wav2mfcc-pipe: corrupted data\n"); + fclose_readfile(fp); + return(FALSE); + } + } else { + /* binary (<4.3) */ + jlog("Stat: wav2mfcc-pipe: reading binary-format cepstral vectors\n"); + /* read header */ + if (myread(&veclen, sizeof(int), 1, fp) == FALSE) { + jlog("Error: wav2mfcc-pipe: failed to read header\n"); fclose_readfile(fp); return(FALSE); } + /* check length */ + if (veclen != c->veclen) { + jlog("Error: wav2mfcc-pipe: cepstral dimension mismatch\n"); + jlog("Error: wav2mfcc-pipe: process = %d, file = %d\n", c->veclen, veclen); + fclose_readfile(fp); + return(FALSE); + } + /* read body */ + if (myread(c->cmean_init, sizeof(float), c->veclen, fp) == FALSE) { + jlog("Error: wav2mfcc-pipe: failed to read mean for CMN\n"); + fclose_readfile(fp); + return(FALSE); + } + if (c->var) { + if (myread(c->cvar_init, sizeof(float), c->veclen, fp) == FALSE) { + jlog("Error: wav2mfcc-pipe: failed to read variance for CVN\n"); + fclose_readfile(fp); + return(FALSE); + } + } } if (fclose_readfile(fp) == -1) { @@ -562,7 +633,8 @@ } c->cmean_init_set = TRUE; - jlog("Stat: wav2mfcc-pipe: read CMN parameter\n"); + c->loaded_from_file = TRUE; + jlog("Stat: wav2mfcc-pipe: finished reading CMN/CVN parameter\n"); return(TRUE); } @@ -578,41 +650,35 @@ boolean CMN_save_to_file(CMNWork *c, char *filename) { - int fd; + FILE *fp; + int d; - jlog("Stat: wav2mfcc-pipe: writing current cepstral data to file \"%s\"\n", filename); + /* save in HTK ascii format */ - if ((fd = open(filename, O_CREAT | O_RDWR -#ifdef O_BINARY - | O_BINARY -#endif - , 0644)) == -1) { + /* open file for writing */ + if ((fp = fopen_writefile(filename)) == NULL) { jlog("Error: wav2mfcc-pipe: failed to open \"%s\" to write current cepstral data\n", filename); return(FALSE); } - /* write header */ - if (mywrite(&(c->veclen), sizeof(int), 1, fd) == FALSE) { - jlog("Error: wav2mfcc-pipe: cannot write header to \"%s\" as current cepstral data\n", filename); - close(fd); - return(FALSE); - } - /* write body */ - if (mywrite(c->cmean_init, sizeof(float), c->veclen, fd) == FALSE) { - jlog("Error: wav2mfcc-pipe: cannot write mean to \"%s\" as current cepstral data\n", filename); - close(fd); - return(FALSE); + + fprintf(fp, "<CEPSNORM> <>\n"); + fprintf(fp, "<MEAN> %d\n", c->veclen); + for(d=0;d<c->veclen;d++) { + fprintf(fp, " %e", c->cmean_init[d]); } if (c->var) { - if (mywrite(c->cvar_init, sizeof(float), c->veclen, fd) == FALSE) { - jlog("Error: wav2mfcc-pipe: cannot write variance to \"%s\" as current cepstrum\n", filename); - close(fd); - return(FALSE); + fprintf(fp, "\n<VARIANCE> %d\n", c->veclen); + for(d=0;d<c->veclen;d++) { + fprintf(fp, " %e", c->cvar_init[d]); } } + fprintf(fp, "\n"); - close(fd); + fclose_writefile(fp); - jlog("Stat: wav2mfcc-pipe: current cepstral data written to \"%s\"\n", filename); + jlog("Stat: wav2mfcc-pipe: cepstral mean"); + if (c->var) jlog(" and variance"); + jlog(" written to \"%s\"\n", filename); return(TRUE); }