V_READHTK read an HTK parameter file [D,FP,DT,TC,T]=(FILE) Input: FILE = name of HTX file Outputs: D = data: column vector for waveforms, one row per frame for other types FP = frame period in seconds DT = data type (also includes Voicebox code for generating data) 0 WAVEFORM Acoustic waveform 1 LPC Linear prediction coefficients 2 LPREFC LPC Reflection coefficients: -v_lpcar2rf([1 LPC]);LPREFC(1)=[]; 3 LPCEPSTRA LPC Cepstral coefficients 4 LPDELCEP LPC cepstral+delta coefficients (obsolete) 5 IREFC LPC Reflection coefficients (16 bit fixed point) 6 MFCC Mel frequency cepstral coefficients 7 FBANK Log Fliter bank energies 8 MELSPEC linear Mel-scaled spectrum 9 USER User defined features 10 DISCRETE Vector quantised codebook 11 PLP Perceptual Linear prediction 12 ANON TC = full type code = DT plus (optionally) one or more of the following modifiers 64 _E Includes energy terms 128 _N Suppress absolute energy 256 _D Include delta coefs 512 _A Include acceleration coefs 1024 _C Compressed 2048 _Z Zero mean static coefs 4096 _K CRC checksum (not implemented yet) 8192 _0 Include 0'th cepstral coef 16384 _V Attach VQ index 32768 _T Attach delta-delta-delta index T = text version of type code e.g. LPC_C_K
0001 function [d,fp,dt,tc,t]=v_readhtk(file) 0002 %V_READHTK read an HTK parameter file [D,FP,DT,TC,T]=(FILE) 0003 % 0004 % Input: 0005 % FILE = name of HTX file 0006 % Outputs: 0007 % D = data: column vector for waveforms, one row per frame for other types 0008 % FP = frame period in seconds 0009 % DT = data type (also includes Voicebox code for generating data) 0010 % 0 WAVEFORM Acoustic waveform 0011 % 1 LPC Linear prediction coefficients 0012 % 2 LPREFC LPC Reflection coefficients: -v_lpcar2rf([1 LPC]);LPREFC(1)=[]; 0013 % 3 LPCEPSTRA LPC Cepstral coefficients 0014 % 4 LPDELCEP LPC cepstral+delta coefficients (obsolete) 0015 % 5 IREFC LPC Reflection coefficients (16 bit fixed point) 0016 % 6 MFCC Mel frequency cepstral coefficients 0017 % 7 FBANK Log Fliter bank energies 0018 % 8 MELSPEC linear Mel-scaled spectrum 0019 % 9 USER User defined features 0020 % 10 DISCRETE Vector quantised codebook 0021 % 11 PLP Perceptual Linear prediction 0022 % 12 ANON 0023 % TC = full type code = DT plus (optionally) one or more of the following modifiers 0024 % 64 _E Includes energy terms 0025 % 128 _N Suppress absolute energy 0026 % 256 _D Include delta coefs 0027 % 512 _A Include acceleration coefs 0028 % 1024 _C Compressed 0029 % 2048 _Z Zero mean static coefs 0030 % 4096 _K CRC checksum (not implemented yet) 0031 % 8192 _0 Include 0'th cepstral coef 0032 % 16384 _V Attach VQ index 0033 % 32768 _T Attach delta-delta-delta index 0034 % T = text version of type code e.g. LPC_C_K 0035 0036 % Thanks to Dan Ellis (ee.columbia.edu) for sorting out decompression. 0037 % Thanks to Stuart Anderson (whispersys.com) for making it work on 64 bit machines. 0038 0039 % Copyright (C) Mike Brookes 2005 0040 % Version: $Id: v_readhtk.m 10865 2018-09-21 17:22:45Z dmb $ 0041 % 0042 % VOICEBOX is a MATLAB toolbox for speech processing. 0043 % Home page: http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html 0044 % 0045 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 0046 % This program is free software; you can redistribute it and/or modify 0047 % it under the terms of the GNU General Public License as published by 0048 % the Free Software Foundation; either version 2 of the License, or 0049 % (at your option) any later version. 0050 % 0051 % This program is distributed in the hope that it will be useful, 0052 % but WITHOUT ANY WARRANTY; without even the implied warranty of 0053 % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 0054 % GNU General Public License for more details. 0055 % 0056 % You can obtain a copy of the GNU General Public License from 0057 % http://www.gnu.org/copyleft/gpl.html or by writing to 0058 % Free Software Foundation, Inc.,675 Mass Ave, Cambridge, MA 02139, USA. 0059 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 0060 0061 fid=fopen(file,'r','b'); 0062 if fid < 0 0063 error(sprintf('Cannot read from file %s',file)); 0064 end 0065 nf=fread(fid,1,'int32'); % number of frames 0066 fp=fread(fid,1,'int32')*1.E-7; % frame interval (converted to seconds) 0067 by=fread(fid,1,'int16'); % bytes per frame 0068 tc=fread(fid,1,'int16'); % type code (see comments above for interpretation) 0069 tc=tc+65536*(tc<0); 0070 cc='ENDACZK0VT'; % list of suffix codes 0071 nhb=length(cc); % number of suffix codes 0072 ndt=6; % number of bits for base type 0073 hb=floor(tc*pow2(-(ndt+nhb):-ndt)); 0074 hd=hb(nhb+1:-1:2)-2*hb(nhb:-1:1); % extract bits from type code 0075 dt=tc-pow2(hb(end),ndt); % low six bits of tc represent data type 0076 0077 % hd(7)=1 CRC check 0078 % hd(5)=1 compressed data 0079 if (dt==5) % hack to fix error in IREFC files which are sometimes stored as compressed LPREFC 0080 fseek(fid,0,'eof'); 0081 flen=ftell(fid); % find length of file 0082 fseek(fid,12,'bof'); 0083 if flen>14+by*nf % if file is too long (including possible CRCC) then assume compression constants exist 0084 dt=2; % change type to LPREFC 0085 hd(5)=1; % set compressed flag 0086 nf=nf+4; % frame count doesn't include compression constants in this case 0087 end 0088 end 0089 0090 if any(dt==[0,5,10]) % 16 bit data for waveforms, IREFC and DISCRETE 0091 d=fread(fid,[by/2,nf],'int16').'; 0092 if ( dt == 5), 0093 d=d/32767; % scale IREFC 0094 end 0095 else 0096 if hd(5) % compressed data - first read scales 0097 nf = nf - 4; % frame count includes compression constants 0098 ncol = by / 2; 0099 scales = fread(fid, ncol, 'float'); 0100 biases = fread(fid, ncol, 'float'); 0101 d = ((fread(fid,[ncol, nf], 'int16')+repmat(biases,1,nf)).*repmat(1./scales,1,nf)).'; 0102 else % uncompressed data 0103 d=fread(fid,[by/4,nf],'float').'; 0104 end 0105 end; 0106 fclose(fid); 0107 if nargout > 4 0108 ns=sum(hd); % number of suffixes 0109 kinds={'WAVEFORM' 'LPC' 'LPREFC' 'LPCEPSTRA' 'LPDELCEP' 'IREFC' 'MFCC' 'FBANK' 'MELSPEC' 'USER' 'DISCRETE' 'PLP' 'ANON' '???'}; 0110 t=[kinds{min(dt+1,length(kinds))} reshape(['_'*ones(1,ns);cc(hd>0)],1,2*ns)]; 0111 end