Home > voicebox > readsph.m

readsph

PURPOSE ^

READSPH Read a SPHERE/TIMIT format sound file [Y,FS,WRD,PHN,FFX]=(FILENAME,MODE,NMAX,NSKIP)

SYNOPSIS ^

function [y,fs,wrd,phn,ffx]=readsph(filename,mode,nmax,nskip)

DESCRIPTION ^

READSPH  Read a SPHERE/TIMIT format sound file [Y,FS,WRD,PHN,FFX]=(FILENAME,MODE,NMAX,NSKIP)

 Input Parameters:

    FILENAME gives the name of the file (with optional .SPH extension) or alternatively
                 can be the FFX output from a previous call to READSPH having the 'f' mode option
    MODE        specifies the following (*=default):

    Scaling: 's'    Auto scale to make data peak = +-1 (use with caution if reading in chunks)
             'r'    Raw unscaled data (integer values)
             'p' *    Scaled to make +-1 equal full scale
             'o'    Scale to bin centre rather than bin edge (e.g. 127 rather than 127.5 for 8 bit values)
                     (can be combined with n+p,r,s modes)
             'n'    Scale to negative peak rather than positive peak (e.g. 128.5 rather than 127.5 for 8 bit values)
                     (can be combined with o+p,r,s modes)
   Format    'l'    Little endian data (Intel,DEC) (overrides indication in file)
             'b'    Big endian data (non Intel/DEC) (overrides indication in file)

   File I/O: 'f'    Do not close file on exit
             'd'    Look in data directory: voicebox('dir_data')
             'w'    Also read the annotation file *.wrd if present (as in TIMIT)
             't'    Also read the phonetic transcription file *.phn if present (as in TIMIT)
                    Eac line of the annotation and transcription files is of the form: m n token
                    where m and n are start end end times in samples and token is a word or phoneme test descriptor
                    The corresponding cell arrays WRD and PHN contain two elements per row: {[m n]/fs 'token'}
                    These outputs are only present if the corresponding 'w' and 't' options are selected

    NMAX     maximum number of samples to read (or -1 for unlimited [default])
    NSKIP    number of samples to skip from start of file
               (or -1 to continue from previous read when FFX is given instead of FILENAME [default])

 Output Parameters:

    Y          data matrix of dimension (samples,channels)
    FS         sample frequency in Hz
    WRD{*,2}   cell array with word annotations: WRD{*,:)={[t_start t_end],'text'} where times are in seconds
              only present if 'w' option is given
    PHN{*,2}   cell array with phoneme annotations: PHN{*,:)={[t_start    t_end],'phoneme'} where times are in seconds
              only present if 't' option is present
    FFX        Cell array containing

     {1}     filename
     {2}     header information
        {1}  first header field name
        {2}  first header field value
     {3}     format string (e.g. NIST_1A)
     {4}(1)  file id
        (2)  current position in file
        (3)  dataoff    byte offset in file to start of data
        (4)  order  byte order (l or b)
        (5)  nsamp    number of samples
        (6)  number of channels
        (7)  nbytes    bytes per data value
        (8)  bits    number of bits of precision
        (9)  fs    sample frequency
         (10) min value
        (11) max value
        (12) coding: 0=PCM,1=uLAW + 0=no compression,10=shorten,20=wavpack,30=shortpack
        (13) file not yet decompressed
     {5}     temporary filename

   If no output parameters are specified, header information will be printed.
   To decode shorten-encoded files, the program shorten.exe must be in the same directory as this m-file

  Usage Examples:

 (a) Draw an annotated spectrogram of a TIMIT file
           filename='....TIMIT/TEST/DR1/FAKS0/SA1.WAV';
           [s,fs,wrd,phn]=readsph(filename,'wt');
           spgrambw(s,fs,'Jwcpta',[],[],[],[],wrd);

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function [y,fs,wrd,phn,ffx]=readsph(filename,mode,nmax,nskip)
0002 %READSPH  Read a SPHERE/TIMIT format sound file [Y,FS,WRD,PHN,FFX]=(FILENAME,MODE,NMAX,NSKIP)
0003 %
0004 % Input Parameters:
0005 %
0006 %    FILENAME gives the name of the file (with optional .SPH extension) or alternatively
0007 %                 can be the FFX output from a previous call to READSPH having the 'f' mode option
0008 %    MODE        specifies the following (*=default):
0009 %
0010 %    Scaling: 's'    Auto scale to make data peak = +-1 (use with caution if reading in chunks)
0011 %             'r'    Raw unscaled data (integer values)
0012 %             'p' *    Scaled to make +-1 equal full scale
0013 %             'o'    Scale to bin centre rather than bin edge (e.g. 127 rather than 127.5 for 8 bit values)
0014 %                     (can be combined with n+p,r,s modes)
0015 %             'n'    Scale to negative peak rather than positive peak (e.g. 128.5 rather than 127.5 for 8 bit values)
0016 %                     (can be combined with o+p,r,s modes)
0017 %   Format    'l'    Little endian data (Intel,DEC) (overrides indication in file)
0018 %             'b'    Big endian data (non Intel/DEC) (overrides indication in file)
0019 %
0020 %   File I/O: 'f'    Do not close file on exit
0021 %             'd'    Look in data directory: voicebox('dir_data')
0022 %             'w'    Also read the annotation file *.wrd if present (as in TIMIT)
0023 %             't'    Also read the phonetic transcription file *.phn if present (as in TIMIT)
0024 %                    Eac line of the annotation and transcription files is of the form: m n token
0025 %                    where m and n are start end end times in samples and token is a word or phoneme test descriptor
0026 %                    The corresponding cell arrays WRD and PHN contain two elements per row: {[m n]/fs 'token'}
0027 %                    These outputs are only present if the corresponding 'w' and 't' options are selected
0028 %
0029 %    NMAX     maximum number of samples to read (or -1 for unlimited [default])
0030 %    NSKIP    number of samples to skip from start of file
0031 %               (or -1 to continue from previous read when FFX is given instead of FILENAME [default])
0032 %
0033 % Output Parameters:
0034 %
0035 %    Y          data matrix of dimension (samples,channels)
0036 %    FS         sample frequency in Hz
0037 %    WRD{*,2}   cell array with word annotations: WRD{*,:)={[t_start t_end],'text'} where times are in seconds
0038 %              only present if 'w' option is given
0039 %    PHN{*,2}   cell array with phoneme annotations: PHN{*,:)={[t_start    t_end],'phoneme'} where times are in seconds
0040 %              only present if 't' option is present
0041 %    FFX        Cell array containing
0042 %
0043 %     {1}     filename
0044 %     {2}     header information
0045 %        {1}  first header field name
0046 %        {2}  first header field value
0047 %     {3}     format string (e.g. NIST_1A)
0048 %     {4}(1)  file id
0049 %        (2)  current position in file
0050 %        (3)  dataoff    byte offset in file to start of data
0051 %        (4)  order  byte order (l or b)
0052 %        (5)  nsamp    number of samples
0053 %        (6)  number of channels
0054 %        (7)  nbytes    bytes per data value
0055 %        (8)  bits    number of bits of precision
0056 %        (9)  fs    sample frequency
0057 %         (10) min value
0058 %        (11) max value
0059 %        (12) coding: 0=PCM,1=uLAW + 0=no compression,10=shorten,20=wavpack,30=shortpack
0060 %        (13) file not yet decompressed
0061 %     {5}     temporary filename
0062 %
0063 %   If no output parameters are specified, header information will be printed.
0064 %   To decode shorten-encoded files, the program shorten.exe must be in the same directory as this m-file
0065 %
0066 %  Usage Examples:
0067 %
0068 % (a) Draw an annotated spectrogram of a TIMIT file
0069 %           filename='....TIMIT/TEST/DR1/FAKS0/SA1.WAV';
0070 %           [s,fs,wrd,phn]=readsph(filename,'wt');
0071 %           spgrambw(s,fs,'Jwcpta',[],[],[],[],wrd);
0072 
0073 %       Copyright (C) Mike Brookes 1998
0074 %      Version: $Id: readsph.m 713 2011-10-16 14:45:43Z dmb $
0075 %
0076 %   VOICEBOX is a MATLAB toolbox for speech processing.
0077 %   Home page: http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html
0078 %
0079 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0080 %   This program is free software; you can redistribute it and/or modify
0081 %   it under the terms of the GNU General Public License as published by
0082 %   the Free Software Foundation; either version 2 of the License, or
0083 %   (at your option) any later version.
0084 %
0085 %   This program is distributed in the hope that it will be useful,
0086 %   but WITHOUT ANY WARRANTY; without even the implied warranty of
0087 %   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0088 %   GNU General Public License for more details.
0089 %
0090 %   You can obtain a copy of the GNU General Public License from
0091 %   http://www.gnu.org/copyleft/gpl.html or by writing to
0092 %   Free Software Foundation, Inc.,675 Mass Ave, Cambridge, MA 02139, USA.
0093 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0094 
0095 persistent BYTEORDER
0096 codes={'sample_count'; 'channel_count';  'sample_n_bytes';'sample_sig_bits'; 'sample_rate'; 'sample_min'; 'sample_max'};
0097 codings={'pcm'; 'ulaw'};
0098 compressions={',embedded-shorten-';',embedded-wavpack-'; ',embedded-shortpack-'};
0099 if isempty(BYTEORDER), BYTEORDER='l'; end
0100 if nargin<1, error('Usage: [y,fs,hdr,fidx]=READSPH(filename,mode,nmax,nskip)'); end
0101 if nargin<2, mode='p';
0102 else mode = [mode(:).' 'p'];
0103 end
0104 k=find((mode>='p') & (mode<='s'));
0105 mno=all(mode~='o');                      % scale to input limits not output limits
0106 sc=mode(k(1));
0107 if any(mode=='l'), BYTEORDER='l';
0108 elseif any(mode=='b'), BYTEORDER='b';
0109 end
0110 if nargout
0111     ffx=cell(5,1);
0112     if ischar(filename)
0113         if any(mode=='d')
0114             filename=fullfile(voicebox('dir_data'),filename);
0115         end
0116         fid=fopen(filename,'rb',BYTEORDER);
0117         if fid == -1
0118             fn=[filename,'.sph'];
0119             fid=fopen(fn,'rb',BYTEORDER);
0120             if fid ~= -1, filename=fn; end
0121         end
0122         if fid == -1
0123             error('Can''t open %s for input',filename);
0124         end
0125         ffx{1}=filename;
0126     else
0127         if iscell(filename)
0128             ffx=filename;
0129         else
0130             fid=filename;
0131         end
0132     end
0133 
0134     if isempty(ffx{4});
0135         fseek(fid,0,-1);
0136         str=char(fread(fid,16)');
0137         if str(8) ~= 10 || str(16) ~= 10, fclose(fid); error('File does not begin with a SPHERE header'); end
0138         ffx{3}=str(1:7);
0139         hlen=str2double(str(9:15));
0140         hdr={};
0141         while 1
0142             str=fgetl(fid);
0143             if str(1) ~= ';'
0144                 [tok,str]=strtok(str);
0145                 if strcmp(tok,'end_head'), break; end
0146                 hdr(end+1,1)={tok};
0147                 [tok,str]=strtok(str);
0148                 if tok(1) ~= '-', error('Missing ''-'' in SPHERE header'); end
0149                 if tok(2)=='s'
0150                     hdr(end,2)={str(2:str2num(tok(3:end))+1)};
0151                 elseif tok(2)=='i'
0152                     hdr(end,2)={sscanf(str,'%d',1)};
0153                 else
0154                     hdr(end,2)={sscanf(str,'%f',1)};
0155                 end
0156             end
0157         end
0158         i=find(strcmp(hdr(:,1),'sample_byte_format'));
0159         if ~isempty(i)
0160             bord=char('b'+('l'-'b')*(hdr{i,2}(1)=='0'));
0161             if bord ~= BYTEORDER && mode~='b' && mode ~='l'
0162                 BYTEORDER=bord;
0163                 fclose(fid);
0164                 fid=fopen(filename,'rb',BYTEORDER);
0165             end
0166         end
0167         i=find(strcmp(hdr(:,1),'sample_coding'));
0168         icode=0;                % initialize to PCM coding
0169         if ~isempty(i)
0170             icode=-1;                   % unknown code
0171             scode=hdr{i,2};
0172             nscode=length(scode);
0173             for j=1:length(codings)
0174                 lenj=length(codings{j});
0175                 if strcmp(scode(1:min(nscode,lenj)),codings{j})
0176                     if nscode>lenj
0177                         for k=1:length(compressions)
0178                             lenk=length(compressions{k});
0179                             if strcmp(scode(lenj+1:min(lenj+lenk,nscode)),compressions{k})
0180                                 icode=10*k+j-1;
0181                                 break;
0182                             end
0183                         end
0184                     else
0185                         icode=j-1;
0186                     end
0187                     break;
0188                 end
0189             end
0190         end
0191 
0192         info=[fid; 0; hlen; double(BYTEORDER); 0; 1; 2; 16; 1 ; 1; -1; icode];
0193         for j=1:7
0194             i=find(strcmp(hdr(:,1),codes{j}));
0195             if ~isempty(i)
0196                 info(j+4)=hdr{i,2};
0197             end
0198         end
0199         if ~info(5)
0200             fseek(fid,0,1);
0201             info(5)=floor((ftell(fid)-info(3))/(info(6)*info(7)));
0202         end
0203         ffx{2}=hdr;
0204         ffx{4}=info;
0205     end
0206     info=ffx{4};
0207     if nargin<4, nskip=info(2);
0208     elseif nskip<0, nskip=info(2);
0209     end
0210 
0211     ksamples=info(5)-nskip;
0212     if nargin>2
0213         if nmax>=0
0214             ksamples=min(nmax,ksamples);
0215         end
0216     end
0217 
0218     if ksamples>0
0219         fid=info(1);
0220         if icode>=10 && isempty(ffx{5}) %#ok<AND2>
0221             fclose(fid);
0222             dirt=voicebox('dir_temp');
0223             [fnp,fnn,fne,fnv]=fileparts(filename);
0224             filetemp=fullfile(dirt,[fnn fne fnv]);
0225             cmdtemp=fullfile(dirt,'shorten.bat');               % batch file needed to convert to short filenames
0226             % if ~exist(cmdtemp,'file')                   % write out the batch file if it doesn't exist
0227                 cmdfid=fopen(cmdtemp,'wt');
0228                 fprintf(cmdfid,'@"%s" -x -a %%1 "%%~s2" "%%~s3"\n',voicebox('shorten'));
0229                 fclose(cmdfid);
0230             % end
0231             if exist(filetemp,'file')                          % need to explicitly delete old file since shorten makes read-only
0232                 doscom=['del /f "' filetemp '"'];
0233                 if dos(doscom) % run the program
0234                     error('Error running DOS command: %s',doscom);
0235                 end
0236             end
0237             if floor(icode/10)==1               % shorten
0238                 doscom=['"' cmdtemp '" ' num2str(info(3)) ' "' filename '" "' filetemp '"'];
0239                 %                     fprintf(1,'Executing: %s\n',doscom);
0240                 if dos(doscom) % run the program
0241                     error('Error running DOS command: %s',doscom);
0242                 end
0243             else
0244                 error('unknown compression format');
0245             end
0246             ffx{5}=filetemp;
0247             fid=fopen(filetemp,'r',BYTEORDER);
0248             if fid<0, error('Cannot open decompressed file %s',filetemp); end
0249             info(1)=fid;                            % update fid
0250         end
0251         info(2)=nskip+ksamples;
0252         pk=pow2(0.5,8*info(7))*(1+(mno/2-all(mode~='n'))/pow2(0.5,info(8)));  % use modes o and n to determine effective peak
0253         fseek(fid,info(3)+info(6)*info(7)*nskip,-1);
0254         nsamples=info(6)*ksamples;
0255         if info(7)<3
0256             if info(7)<2
0257                 y=fread(fid,nsamples,'uchar');
0258                 if info(12)==1
0259                     y=pcmu2lin(y);
0260                     pk=2.005649;
0261                 else
0262                     y=y-128;
0263                 end
0264             else
0265                 y=fread(fid,nsamples,'short');
0266             end
0267         else
0268             if info(7)<4
0269                 y=fread(fid,3*nsamples,'uchar');
0270                 y=reshape(y,3,nsamples);
0271                 y=[1 256 65536]*y-pow2(fix(pow2(y(3,:),-7)),24);
0272             else
0273                 y=fread(fid,nsamples,'long');
0274             end
0275         end
0276         if sc ~= 'r'
0277             if sc=='s'
0278                 if info(10)>info(11)
0279                     info(10)=min(y);
0280                     info(11)=max(y);
0281                 end
0282                 sf=1/max(max(abs(info(10:11))),1);
0283             else sf=1/pk;
0284             end
0285             y=sf*y;
0286         end
0287         if info(6)>1, y = reshape(y,info(6),ksamples).'; end
0288     else
0289         y=[];
0290     end
0291 
0292     if mode~='f'
0293         fclose(fid);
0294         info(1)=-1;
0295         if ~isempty(ffx{5})
0296             doscom=['del /f ' ffx{5}];
0297             if dos(doscom) % run the program
0298                 error('Error running DOS command: %s',doscom);
0299             end
0300             ffx{5}=[];
0301         end
0302     end
0303     ffx{4}=info;
0304     fs=info(9);
0305     wrd=ffx;        % copy ffx into the other arguments in case 'w' and/or 't' are not specified
0306     phn=ffx;
0307     if any(mode=='w')
0308         wrd=cell(0,0);
0309         fidw=fopen([filename(1:end-3) 'wrd'],'r');
0310         if fidw>0
0311             while 1
0312                 tline = fgetl(fidw); % read an input line
0313                 if ~ischar(tline)
0314                     break
0315                 end
0316                 [wtim, ntim, ee, nix] = sscanf(tline,'%d%d',2);
0317                 if ntim==2
0318                     wrd{end+1,1}=wtim(:)'/fs;
0319                     wrd{end,2}=strtrim(tline(nix:end));
0320                 end
0321             end
0322             fclose(fidw);
0323         end
0324     end
0325     if any(mode=='t')
0326         ph=cell(0,0);
0327         fidw=fopen([filename(1:end-3) 'phn'],'r');
0328         if fidw>0
0329             while 1
0330                 tline = fgetl(fidw); % read an input line
0331                 if ~ischar(tline)
0332                     break
0333                 end
0334                 [wtim, ntim, ee, nix] = sscanf(tline,'%d%d',2);
0335                 if ntim==2
0336                     ph{end+1,1}=wtim(:)'/fs;
0337                     ph{end,2}=strtrim(tline(nix:end));
0338                 end
0339             end
0340             fclose(fidw);
0341         end
0342         if any(mode=='w')
0343             phn=ph;             % copy into 4th argument
0344         else
0345             wrd=ph;             % copy into 3rd argument
0346         end
0347     end
0348 else
0349     [y1,fs,ffx]=readsph(filename,mode,0);
0350     info=ffx{4};
0351     if ~isempty(ffx{1}), fprintf(1,'Filename: %s\n',ffx{1}); end
0352     fprintf(1,'Sphere file type: %s\n',ffx{3});
0353     fprintf(1,'Duration = %ss: %d channel * %d samples @ %sHz\n',sprintsi(info(5)/info(9)),info(6),info(5),sprintsi(info(9)));
0354 end
0355 
0356 
0357 
0358

Generated on Thu 02-Feb-2012 09:15:04 by m2html © 2003