Home > voicebox > readhtk.m

readhtk

PURPOSE ^

READHTK read an HTK parameter file [D,FP,DT,TC,T]=(FILE)

SYNOPSIS ^

function [d,fp,dt,tc,t]=readhtk(file)

DESCRIPTION ^

READHTK  read an HTK parameter file [D,FP,DT,TC,T]=(FILE)

 Input:
    FILE = name of HTX file
 Outputs:
       D = data: column vector for waveforms, one row per frame for other types
      FP = frame period in seconds
      DT = data type (also includes Voicebox code for generating data)
             0  WAVEFORM     Acoustic waveform
             1  LPC          Linear prediction coefficients
             2  LPREFC       LPC Reflection coefficients:  -lpcar2rf([1 LPC]);LPREFC(1)=[];
             3  LPCEPSTRA    LPC Cepstral coefficients
             4  LPDELCEP     LPC cepstral+delta coefficients (obsolete)
             5  IREFC        LPC Reflection coefficients (16 bit fixed point)
             6  MFCC         Mel frequency cepstral coefficients
             7  FBANK        Log Fliter bank energies
             8  MELSPEC      linear Mel-scaled spectrum
             9  USER         User defined features
            10  DISCRETE     Vector quantised codebook
            11  PLP          Perceptual Linear prediction
            12  ANON
      TC = full type code = DT plus (optionally) one or more of the following modifiers
               64  _E  Includes energy terms
              128  _N  Suppress absolute energy
              256  _D  Include delta coefs
              512  _A  Include acceleration coefs
             1024  _C  Compressed
             2048  _Z  Zero mean static coefs
             4096  _K  CRC checksum (not implemented yet)
             8192  _0  Include 0'th cepstral coef
            16384  _V  Attach VQ index
            32768  _T  Attach delta-delta-delta index
       T = text version of type code e.g. LPC_C_K

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function [d,fp,dt,tc,t]=readhtk(file)
0002 %READHTK  read an HTK parameter file [D,FP,DT,TC,T]=(FILE)
0003 %
0004 % Input:
0005 %    FILE = name of HTX file
0006 % Outputs:
0007 %       D = data: column vector for waveforms, one row per frame for other types
0008 %      FP = frame period in seconds
0009 %      DT = data type (also includes Voicebox code for generating data)
0010 %             0  WAVEFORM     Acoustic waveform
0011 %             1  LPC          Linear prediction coefficients
0012 %             2  LPREFC       LPC Reflection coefficients:  -lpcar2rf([1 LPC]);LPREFC(1)=[];
0013 %             3  LPCEPSTRA    LPC Cepstral coefficients
0014 %             4  LPDELCEP     LPC cepstral+delta coefficients (obsolete)
0015 %             5  IREFC        LPC Reflection coefficients (16 bit fixed point)
0016 %             6  MFCC         Mel frequency cepstral coefficients
0017 %             7  FBANK        Log Fliter bank energies
0018 %             8  MELSPEC      linear Mel-scaled spectrum
0019 %             9  USER         User defined features
0020 %            10  DISCRETE     Vector quantised codebook
0021 %            11  PLP          Perceptual Linear prediction
0022 %            12  ANON
0023 %      TC = full type code = DT plus (optionally) one or more of the following modifiers
0024 %               64  _E  Includes energy terms
0025 %              128  _N  Suppress absolute energy
0026 %              256  _D  Include delta coefs
0027 %              512  _A  Include acceleration coefs
0028 %             1024  _C  Compressed
0029 %             2048  _Z  Zero mean static coefs
0030 %             4096  _K  CRC checksum (not implemented yet)
0031 %             8192  _0  Include 0'th cepstral coef
0032 %            16384  _V  Attach VQ index
0033 %            32768  _T  Attach delta-delta-delta index
0034 %       T = text version of type code e.g. LPC_C_K
0035 
0036 %   Thanks to Dan Ellis (ee.columbia.edu) for sorting out decompression.
0037 %   Thanks to Stuart Anderson (whispersys.com) for making it work on 64 bit machines.
0038 
0039 %      Copyright (C) Mike Brookes 2005
0040 %      Version: $Id: readhtk.m 713 2011-10-16 14:45:43Z dmb $
0041 %
0042 %   VOICEBOX is a MATLAB toolbox for speech processing.
0043 %   Home page: http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html
0044 %
0045 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0046 %   This program is free software; you can redistribute it and/or modify
0047 %   it under the terms of the GNU General Public License as published by
0048 %   the Free Software Foundation; either version 2 of the License, or
0049 %   (at your option) any later version.
0050 %
0051 %   This program is distributed in the hope that it will be useful,
0052 %   but WITHOUT ANY WARRANTY; without even the implied warranty of
0053 %   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0054 %   GNU General Public License for more details.
0055 %
0056 %   You can obtain a copy of the GNU General Public License from
0057 %   http://www.gnu.org/copyleft/gpl.html or by writing to
0058 %   Free Software Foundation, Inc.,675 Mass Ave, Cambridge, MA 02139, USA.
0059 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0060 
0061 fid=fopen(file,'r','b');
0062 if fid < 0
0063     error(sprintf('Cannot read from file %s',file));
0064 end
0065 nf=fread(fid,1,'int32');             % number of frames
0066 fp=fread(fid,1,'int32')*1.E-7;       % frame interval (converted to seconds)
0067 by=fread(fid,1,'int16');            % bytes per frame
0068 tc=fread(fid,1,'int16');            % type code (see comments above for interpretation)
0069 tc=tc+65536*(tc<0);
0070 cc='ENDACZK0VT';                    % list of suffix codes
0071 nhb=length(cc);                     % number of suffix codes
0072 ndt=6;                              % number of bits for base type
0073 hb=floor(tc*pow2(-(ndt+nhb):-ndt));
0074 hd=hb(nhb+1:-1:2)-2*hb(nhb:-1:1);   % extract bits from type code
0075 dt=tc-pow2(hb(end),ndt);            % low six bits of tc represent data type
0076 
0077 % hd(7)=1 CRC check
0078 % hd(5)=1 compressed data
0079 if (dt==5)  % hack to fix error in IREFC files which are sometimes stored as compressed LPREFC
0080     fseek(fid,0,'eof');
0081     flen=ftell(fid);        % find length of file
0082     fseek(fid,12,'bof');
0083     if flen>14+by*nf        % if file is too long (including possible CRCC) then assume compression constants exist
0084         dt=2;               % change type to LPREFC
0085         hd(5)=1;            % set compressed flag
0086         nf=nf+4;            % frame count doesn't include compression constants in this case
0087     end
0088 end
0089 
0090 if any(dt==[0,5,10])        % 16 bit data for waveforms, IREFC and DISCRETE
0091     d=fread(fid,[by/2,nf],'int16').';
0092     if ( dt == 5),
0093         d=d/32767;                    % scale IREFC
0094     end
0095 else
0096     if hd(5)                            % compressed data - first read scales
0097         nf = nf - 4;                    % frame count includes compression constants
0098         ncol = by / 2;
0099         scales = fread(fid, ncol, 'float');
0100         biases = fread(fid, ncol, 'float');
0101         d = ((fread(fid,[ncol, nf], 'int16')+repmat(biases,1,nf)).*repmat(1./scales,1,nf)).';
0102     else                              % uncompressed data
0103         d=fread(fid,[by/4,nf],'float').';
0104     end
0105 end;
0106 fclose(fid);
0107 if nargout > 4
0108     ns=sum(hd);                 % number of suffixes
0109     kinds={'WAVEFORM' 'LPC' 'LPREFC' 'LPCEPSTRA' 'LPDELCEP' 'IREFC' 'MFCC' 'FBANK' 'MELSPEC' 'USER' 'DISCRETE' 'PLP' 'ANON' '???'};
0110     t=[kinds{min(dt+1,length(kinds))} reshape(['_'*ones(1,ns);cc(hd>0)],1,2*ns)];
0111 end

Generated on Fri 22-Sep-2017 19:37:38 by m2html © 2003