Home > voicebox > readsfs.m

readsfs

PURPOSE ^

readsfs Read a .SFS format sound file [Y,FS,HD,FFX]=(FF,TY,SUB,MODE,NMAX,NSKIP,XPATH)

SYNOPSIS ^

function [y,fs,hd,ffx]=readsfs(ff,ty,sub,mode,nmax,nskip,xpath)

DESCRIPTION ^

readsfs  Read a .SFS format sound file [Y,FS,HD,FFX]=(FF,TY,SUB,MODE,NMAX,NSKIP,XPATH)

 The SFS (Speech Filing System) is a package mostly written by Mark Huckvale
 and is available for UNIX and PC systems from http://www.phon.ucl.ac.uk/resource/sfs/

 Input Parameters:

  FF gives the name of the file or alternatively
                 can be the ffx output from a previous call to readsfs
  TY gives the type of data item required e.g.:
              0 Main header, 1 Speech data, 2 Laryngograph, 5 Annotation
  SUB specifies the instance of type TY in the file: 0 for first (default), -1 for last or else
      it can specify the start of the processing history field as a string (e.g. 'hqtx')
  MODE        specifies the following (*=default):

           File I/O: 'f'    Do not close file on exit
             'd'    Look in data directory: voicebox('dir_data')
           Int Format: 'i'  Force integer data to be at least 16 bits
                              (some sfs files have a header error which falsely indicates 8-bit data)
           Create item: 'c' Create item if necessary

  NMAX     maximum number of samples to read (or -1 for unlimited [default])
  NSKIP    number of samples or frames to skip from start of file
               (or -1 to continue from previous read when FFX is given instead of a filename [default])
  XPATH    (used with 'c' option) gives the full name of the program needed to generate the data or
           the directory containing it.

 Output Parameters:

  Y        data matrix or cell matrix whose format depends on TY:
        TY=0: empty
            TY=5: cell array {nf,3} = {position length annotation}
        TY=1,2: column vector containing data
        TY=11: data array with one row per frame
  FS       sample frequency in Hz
  HD     cell matrix whose format depends on TY:
        TY=0: cell{14,1}
              {1} row vector
                {1}(1) = serial_date (see DATENUM() for format)
                {1}(2) = file_number
                {1}(3) = machine_type
              {2} = File type (= 'UC2')
              {3} = username of creator
              {4} = site of creator
              {5} = source
              {6} = database
              {7} = speaker name
              {8} = session code
              {9} = session date (as a string)
             {10} = name of token
             {11} = token repetition code
             {12} = recording conditions
             {13} = archiving details
             {14} = general comments
        TY>0: cell{4,1}
              {1} = (1,14) array:
                 {1}(1)  = processdate (see DATENUM() for format)
                 {1}(2)  = datatype: 1=speech, 2=lx, 3=tx cycle lengths, 4=fx freq
                                     5=annotations, 6=phonetic, 7=synthesiser, 8=words
                                     9=grey-level, 10=voicing, 11=energy, 12=formants
                                     13=energy, 14=lpc, 15=markov, 16=acoustic, 17=?,
                                     18=geometry, 19=aerodynamics, 20=articulatory
                                     21=source, 22=physiological, 23=rational filter
                                     24=poles/zeros, 25=glottal flow, 26=excitation model
                                     27=nose, 28=calibration, 29=area
                 {1}(3)  = subtype
                 {1}(4)  = floating: 1=float, 0=int, -1=structure+
                 {1}(5)  = datasize in bytes
                 {1}(6)  = framesize in units of datasize
                 {1}(7)  = numframes
                 {1}(8)  = length in bytes of data
                 {1}(9)  = frameduration=1/sample_rate
                 {1}(10) = datapresent: 0=deleted, 1=present, 2=link
                 {1}(11) = time offset
                 {1}(12) = windowsize
                 {1}(13) = overlap
                 {1}(14) = lxsync
              {2} = processing history
              {3} = parameter field
              {4} = comment

  FFX     cell array containing:
              {1} = filename
              {2} = (1,4) = [fid byte_order item_row values_read]
              {3} = (nitem,5) = one row per item [type subtype length position byteorder]
              {4} = {nitem,3} = cell: one row per item {processing parameters comment} text strings

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function [y,fs,hd,ffx]=readsfs(ff,ty,sub,mode,nmax,nskip,xpath)
0002 %readsfs  Read a .SFS format sound file [Y,FS,HD,FFX]=(FF,TY,SUB,MODE,NMAX,NSKIP,XPATH)
0003 %
0004 % The SFS (Speech Filing System) is a package mostly written by Mark Huckvale
0005 % and is available for UNIX and PC systems from http://www.phon.ucl.ac.uk/resource/sfs/
0006 %
0007 % Input Parameters:
0008 %
0009 %  FF gives the name of the file or alternatively
0010 %                 can be the ffx output from a previous call to readsfs
0011 %  TY gives the type of data item required e.g.:
0012 %              0 Main header, 1 Speech data, 2 Laryngograph, 5 Annotation
0013 %  SUB specifies the instance of type TY in the file: 0 for first (default), -1 for last or else
0014 %      it can specify the start of the processing history field as a string (e.g. 'hqtx')
0015 %  MODE        specifies the following (*=default):
0016 %
0017 %           File I/O: 'f'    Do not close file on exit
0018 %             'd'    Look in data directory: voicebox('dir_data')
0019 %           Int Format: 'i'  Force integer data to be at least 16 bits
0020 %                              (some sfs files have a header error which falsely indicates 8-bit data)
0021 %           Create item: 'c' Create item if necessary
0022 %
0023 %  NMAX     maximum number of samples to read (or -1 for unlimited [default])
0024 %  NSKIP    number of samples or frames to skip from start of file
0025 %               (or -1 to continue from previous read when FFX is given instead of a filename [default])
0026 %  XPATH    (used with 'c' option) gives the full name of the program needed to generate the data or
0027 %           the directory containing it.
0028 %
0029 % Output Parameters:
0030 %
0031 %  Y        data matrix or cell matrix whose format depends on TY:
0032 %        TY=0: empty
0033 %            TY=5: cell array {nf,3} = {position length annotation}
0034 %        TY=1,2: column vector containing data
0035 %        TY=11: data array with one row per frame
0036 %  FS       sample frequency in Hz
0037 %  HD     cell matrix whose format depends on TY:
0038 %        TY=0: cell{14,1}
0039 %              {1} row vector
0040 %                {1}(1) = serial_date (see DATENUM() for format)
0041 %                {1}(2) = file_number
0042 %                {1}(3) = machine_type
0043 %              {2} = File type (= 'UC2')
0044 %              {3} = username of creator
0045 %              {4} = site of creator
0046 %              {5} = source
0047 %              {6} = database
0048 %              {7} = speaker name
0049 %              {8} = session code
0050 %              {9} = session date (as a string)
0051 %             {10} = name of token
0052 %             {11} = token repetition code
0053 %             {12} = recording conditions
0054 %             {13} = archiving details
0055 %             {14} = general comments
0056 %        TY>0: cell{4,1}
0057 %              {1} = (1,14) array:
0058 %                 {1}(1)  = processdate (see DATENUM() for format)
0059 %                 {1}(2)  = datatype: 1=speech, 2=lx, 3=tx cycle lengths, 4=fx freq
0060 %                                     5=annotations, 6=phonetic, 7=synthesiser, 8=words
0061 %                                     9=grey-level, 10=voicing, 11=energy, 12=formants
0062 %                                     13=energy, 14=lpc, 15=markov, 16=acoustic, 17=?,
0063 %                                     18=geometry, 19=aerodynamics, 20=articulatory
0064 %                                     21=source, 22=physiological, 23=rational filter
0065 %                                     24=poles/zeros, 25=glottal flow, 26=excitation model
0066 %                                     27=nose, 28=calibration, 29=area
0067 %                 {1}(3)  = subtype
0068 %                 {1}(4)  = floating: 1=float, 0=int, -1=structure+
0069 %                 {1}(5)  = datasize in bytes
0070 %                 {1}(6)  = framesize in units of datasize
0071 %                 {1}(7)  = numframes
0072 %                 {1}(8)  = length in bytes of data
0073 %                 {1}(9)  = frameduration=1/sample_rate
0074 %                 {1}(10) = datapresent: 0=deleted, 1=present, 2=link
0075 %                 {1}(11) = time offset
0076 %                 {1}(12) = windowsize
0077 %                 {1}(13) = overlap
0078 %                 {1}(14) = lxsync
0079 %              {2} = processing history
0080 %              {3} = parameter field
0081 %              {4} = comment
0082 %
0083 %  FFX     cell array containing:
0084 %              {1} = filename
0085 %              {2} = (1,4) = [fid byte_order item_row values_read]
0086 %              {3} = (nitem,5) = one row per item [type subtype length position byteorder]
0087 %              {4} = {nitem,3} = cell: one row per item {processing parameters comment} text strings
0088 %
0089 
0090 % Features yet to be implemented:
0091 %
0092 %   (1) If no output parameters are specified, header information will be printed.
0093 %   (2) following link items
0094 %   (3) MODE options:
0095 %                Scaling: 's'    Auto scale to make data peak = +-1
0096 %                         'r'    Raw unscaled data (integer values)
0097 %                         'q'    Scaled to make 0dBm0 be unity mean square
0098 %                         'p' *    Scaled to make +-1 equal full scale
0099 %                Errors   'r'    Return if file is non-existant
0100 
0101 
0102 %       Copyright (C) Mike Brookes 1998
0103 %      Version: $Id: readsfs.m 713 2011-10-16 14:45:43Z dmb $
0104 %
0105 %   VOICEBOX is a MATLAB toolbox for speech processing.
0106 %   Home page: http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html
0107 %
0108 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0109 %   This program is free software; you can redistribute it and/or modify
0110 %   it under the terms of the GNU General Public License as published by
0111 %   the Free Software Foundation; either version 2 of the License, or
0112 %   (at your option) any later version.
0113 %
0114 %   This program is distributed in the hope that it will be useful,
0115 %   but WITHOUT ANY WARRANTY; without even the implied warranty of
0116 %   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0117 %   GNU General Public License for more details.
0118 %
0119 %   You can obtain a copy of the GNU General Public License from
0120 %   http://www.gnu.org/copyleft/gpl.html or by writing to
0121 %   Free Software Foundation, Inc.,675 Mass Ave, Cambridge, MA 02139, USA.
0122 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0123 
0124 if nargin<7
0125     xpath=voicebox('sfsbin');       % path for sfs programs
0126 end
0127 EXESUF=voicebox('sfssuffix');                                              % suffix for executable O/S dependent
0128 if nargin<4 mode='p';
0129 else mode = [mode(:).' 'p'];
0130 end
0131 if nargout==0
0132     if nargin<2 | ty<=0
0133         [yy,fs,hd,ffx]=readsfs(ff,0,0,mode);
0134         fprintf(1,'File: %s\n',ffx{1});
0135         fprintf(1,'Database: %s, Speaker: %s, Date: %s, Token: %s\n',hd{6},hd{7},hd{9},hd{10});
0136         lst=ffx{3};
0137         for it=2:size(lst,1);
0138             [yy,fs,hd,ffx]=readsfs(ffx,lst(it,1),lst(it,2),mode,0);
0139             nf=hd{1}(7);
0140             fd=hd{1}(9);
0141             fprintf(1,'%3d.%02d %ss @ %sHz (%d frames): %s\n',lst(it,1),lst(it,2),sprintsi(nf*fd),sprintsi(1/fd),nf,ffx{4}{it,1});   
0142         end
0143     end
0144 else
0145     it=[];
0146     xfid=[];                  % xfid will be non-empty second time around
0147     while (isempty(it))                 % may go round this loop twice
0148         if ischar(ff)           % If ff is a string we must read file
0149             if any(mode=='d')
0150                 ff=fullfile(voicebox('dir_data'),ff);
0151             end
0152             fid=fopen(ff,'rb','b');
0153             if fid == -1 error(sprintf('Can''t open %s for input',ff)); end
0154             
0155             t=fread(fid,512,'uchar').';
0156             if (t(1:3)~='UC2')
0157                 error(sprintf('%s is not an SFS file type UC2',ff))
0158             end
0159             itemlist = [0 1 0 0 t(512)];
0160             
0161             proglist={};
0162             for i=2:200
0163                 pos = ftell(fid);
0164                 [t,n]=fread(fid,512,'uchar');
0165                 if (n < 512) break; end
0166                 mm=pow2(1,8*([0 1 2 3]+(t(512)==0)*[3 1 -1 -3]));
0167                 itemlist(i,:)=[mm*[t(389:392) t(393:396) t(413:416)] pos t(512)];
0168                 if itemlist(i,1)>29
0169                     error(sprintf('%d is not a valid SFS item type',itemlist(i,1)))
0170                 end
0171                 proglist{i,1}=char(zerotrim(t(1:256)'));
0172                 proglist{i,2}=char(zerotrim(t(257:384)'));
0173                 proglist{i,3}=char(zerotrim(t(437:456)'));
0174                 fseek(fid,itemlist(i,3),'cof');
0175             end
0176             ffx={ff; [fid 0 0 0]; itemlist; proglist};
0177         else
0178             ffx=ff;
0179             ff=ffx{1};
0180             fid=ffx{2}(1);
0181             if fid<0 
0182                 fid=fopen(ffx{1},'rb',char('b'+(ffx{2}(2)~=0)*('l'-'b')));
0183             end
0184         end
0185         
0186         % now try to find the requested item
0187         
0188         list=ffx{3};
0189         if nargin<2 ty=0; end
0190         if nargin<3 sub=0; end
0191         if ty<0 ty=list(1,1); end
0192         if ischar(sub)
0193             lsub=length(sub);
0194             proglist=ffx{4};
0195             
0196             for itt=size(proglist,1):-1:2
0197                 if list(itt,1)==ty & length(proglist{itt,1})>=lsub
0198                     if strcmpi(sub,proglist{itt,1}(1:lsub))
0199                         it=itt;
0200                     end
0201                 end
0202             end
0203             if (isempty(it))
0204                 if any(mode=='c') & isempty(xfid)      % try to create item if we haven't tried before
0205                     xfid=-1;
0206                     if nargin>=7
0207                         xname=xpath;
0208                         xfid=fopen(xname);
0209                     end
0210                     if xfid<0
0211                         if any('/\'==xpath(end))        % would be better to use fullfile()
0212                             xname=[xpath sub EXESUF];
0213                         else
0214                             xname=[xpath '/' sub EXESUF];
0215                         end
0216                         xfid=fopen(xname);
0217                     end
0218                     if xfid<0
0219                         error(sprintf('Cannot find executable program %s',sub)); 
0220                     else
0221                         fclose(xfid);
0222                         fclose(fid); % close this file
0223                         doscom=['cmd /c "' xname '" ' ffx{1}];
0224                         %fprintf(1,'Executing: %s\n',doscom);
0225                         if dos(doscom) % run the program
0226                             error(sprintf('Error running DOS command: %s',doscom));
0227                         end
0228                         ff=ffx{1};          % force reread of header information
0229                     end
0230                 else
0231                     error(sprintf('Cannot find item %d.%s in file: %s',ty,sub,ff)); 
0232                 end
0233             end
0234         else % numeric subitem specification
0235             if sub>0
0236                 it = find(list(:,1)==ty & list(:,2)==sub);
0237             elseif sub==0
0238                 it = min(find(list(:,1)==ty));
0239             else
0240                 it = max(find(list(:,1)==ty));
0241             end  
0242             if (isempty(it))
0243                 error(sprintf('Cannot find item %d.%d in file: %s',ty,sub,ff)); 
0244             end
0245         end
0246     end % loop up to two times while (isempty(it))
0247     lit=list(it,:);
0248     if ffx{2}(3)~=it
0249         ffx{2}(3)=it;
0250         ffx{2}(4)=0;
0251     end
0252     
0253     % read the selected item with the correct byte order
0254     
0255     if lit(5)~=ffx{2}(2)
0256         fclose(fid);
0257         fid=fopen(ffx{1},'rb',char('b'+(lit(5)~=0)*('l'-'b')));
0258         ffx{2}(1:2)=[fid lit(5)];
0259         if fid == -1 error(sprintf('Can''t open %s for input',ff)); end
0260     end
0261     fseek(fid,lit(4),'bof');
0262     
0263     
0264     y=[];
0265     fs=0;
0266     if ~lit(1)                              % read main header
0267         mb=fread(fid,512,'uchar').';
0268         if nargout>2
0269             mc=[1 4; 9 28; 29 32; 37 56; 57 76; 77 96; 97 116; 117 136; 137 296; 297 304; 305 312; 313 332; 333 412];
0270             hd=cell(14,1);
0271             hd{1}=[pow2(1,8*([2 3 0 1]+(mb(512)==0)*[1 -1 1 -1]))*[mb(5:8); mb(33:36)].' mb(512)];
0272             hd{1}(1)=hd{1}(1)/86400+719529;  % convert date format
0273             for i=1:13
0274                 hd{i+1}=char(zerotrim(mb(mc(i,1):mc(i,2))));
0275             end
0276         end
0277     else
0278         
0279         % read the item header
0280         
0281         hd=cell(4,1);
0282         hdr=zeros(1,14);
0283         
0284         [str,n]=fread(fid,256,'uchar');
0285         if (n<256) error(sprintf('Error reading item %d.%d in file: %s',ty,sub,file)); end
0286         hd{2}=char(zerotrim(str'));
0287         
0288         [str,n]=fread(fid,128,'uchar');
0289         if (n<128) error(sprintf('Error reading item %d.%d in file: %s',ty,sub,file)); end
0290         hd{3}=char(zerotrim(str'));
0291         
0292         hdr(1:8)=fread(fid,8,'long');
0293         hdr(9)=fread(fid,1,'double');
0294         if hdr(9) fs=1/hdr(9); end
0295         hdr(10)=fread(fid,1,'long');
0296         hdr(11)=fread(fid,1,'double');
0297         
0298         [str,n]=fread(fid,20,'uchar');
0299         if (n<20) error(sprintf('Error reading item %d.%d in file: %s',ty,sub,file)); end
0300         hd{4}=char(zerotrim(str'));
0301         
0302         [hdr(12:14),n]=fread(fid,3,'long');
0303         if (n<3) error(sprintf('Error reading item %d.%d in file: %s',ty,sub,file)); end
0304         fseek(fid,44,'cof');
0305         hd{1}=hdr;
0306         hd{1}(1)=hd{1}(1)/86400+719529;  % convert date format
0307         
0308         % now read the actual data
0309         
0310         if nargin<6 nskip=ffx{2}(4);
0311         elseif nskip<0 nskip=ffx{2}(4);
0312         end
0313         
0314         ksamples=hdr(7)-nskip;
0315         if nargin>4
0316             if nmax>=0
0317                 ksamples=min(nmax,ksamples);
0318             end
0319         end
0320         
0321         if ksamples>0
0322             ffx{2}(4)=nskip+ksamples;
0323             fsz=hdr(6);
0324             if(hdr(10)==1)        % data present
0325                 if(hdr(4)>=0)        % non-structured
0326                     ds=hdr(5);
0327                     if(hdr(4)>0)
0328                         if(ds==4) fmt='float';
0329                         elseif (ds==8) fmt='double';
0330                         else error('error in sfs file'); end
0331                     else
0332                         if(ds==1 & all(mode~='i')) fmt='uchar';
0333                         elseif(ds<=2) fmt='short'; fsz=ceil(fsz*ds/2);
0334                         elseif(ds==4) fmt='long';
0335                         else error('error in sfs file'); end
0336                     end
0337                     fseek(fid,lit(4)+512+nskip*fsz*ds,'bof');
0338                     nd=fsz*ksamples;
0339                     [y,n]=fread(fid,nd,fmt); 
0340                     if (n<nd) error(sprintf('Error reading item %d.%d in file: %s',ty,sub,file)); end
0341                     y = reshape(y,fsz,ksamples)';
0342                 else
0343                     if (hdr(2)==5)
0344                         y = cell(ksamples,3);
0345                         for ifr=1:nskip
0346                             lf=fread(fid,1,'uchar');
0347                             fseek(fid,lf,'cof');
0348                         end
0349                         
0350                         for ifr=1:ksamples
0351                             lf=fread(fid,1,'uchar');
0352                             tdat=fread(fid,2,'long');
0353                             y(ifr,:)={tdat(1) tdat(2) char(fread(fid,lf-9,'uchar').')};
0354                             lf=fread(fid,1,'uchar');
0355                         end
0356                     else
0357                         error(sprintf('Cannot convert item %d.%d in file: %s',ty,sub,file)); 
0358                     end
0359                 end
0360             end
0361         end
0362     end
0363     if all(mode~='f') fclose(fid); ffx{2}(1)=-1; end
0364 end
0365 
0366 
0367 
0368 
0369 
0370

Generated on Thu 02-Feb-2012 09:15:04 by m2html © 2003