Home > voicebox > gaussmixp.m

gaussmixp

PURPOSE ^

GAUSSMIXP calculate probability densities from a Gaussian mixture model

SYNOPSIS ^

function [lp,rp,kh,kp]=gaussmixp(y,m,v,w,a,b)

DESCRIPTION ^

GAUSSMIXP calculate probability densities from a Gaussian mixture model

 Inputs: n data values, k mixtures, p parameters, q data vector size

   Y(n,q) = input data
   M(k,p) = mixture means for x(p)
   V(k,p) or V(p,p,k) variances (diagonal or full)
   W(k,1) = weights
   A(q,p), B(q) = transformation: y=x*a'+b' (where y and x are row vectors)
            if A is omitted, it is assumed to be the first q rows of the
            identity matrix. B defaults to zero.
   Note that most commonly, q=p and A and B are omitted entirely.

 Outputs

  LP(n,1) = log probability of each data point
  RP(n,k) = relative probability of each mixture
  KH(n,1) = highest probability mixture
  KP(n,1) = relative probability of highest probability mixture

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function [lp,rp,kh,kp]=gaussmixp(y,m,v,w,a,b)
0002 %GAUSSMIXP calculate probability densities from a Gaussian mixture model
0003 %
0004 % Inputs: n data values, k mixtures, p parameters, q data vector size
0005 %
0006 %   Y(n,q) = input data
0007 %   M(k,p) = mixture means for x(p)
0008 %   V(k,p) or V(p,p,k) variances (diagonal or full)
0009 %   W(k,1) = weights
0010 %   A(q,p), B(q) = transformation: y=x*a'+b' (where y and x are row vectors)
0011 %            if A is omitted, it is assumed to be the first q rows of the
0012 %            identity matrix. B defaults to zero.
0013 %   Note that most commonly, q=p and A and B are omitted entirely.
0014 %
0015 % Outputs
0016 %
0017 %  LP(n,1) = log probability of each data point
0018 %  RP(n,k) = relative probability of each mixture
0019 %  KH(n,1) = highest probability mixture
0020 %  KP(n,1) = relative probability of highest probability mixture
0021 
0022 %      Copyright (C) Mike Brookes 2000-2009
0023 %      Version: $Id: gaussmixp.m 713 2011-10-16 14:45:43Z dmb $
0024 %
0025 %   VOICEBOX is a MATLAB toolbox for speech processing.
0026 %   Home page: http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html
0027 %
0028 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0029 %   This program is free software; you can redistribute it and/or modify
0030 %   it under the terms of the GNU General Public License as published by
0031 %   the Free Software Foundation; either version 2 of the License, or
0032 %   (at your option) any later version.
0033 %
0034 %   This program is distributed in the hope that it will be useful,
0035 %   but WITHOUT ANY WARRANTY; without even the implied warranty of
0036 %   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0037 %   GNU General Public License for more details.
0038 %
0039 %   You can obtain a copy of the GNU General Public License from
0040 %   http://www.gnu.org/copyleft/gpl.html or by writing to
0041 %   Free Software Foundation, Inc.,675 Mass Ave, Cambridge, MA 02139, USA.
0042 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0043 [n,q]=size(y);
0044 [k,p]=size(m);
0045 
0046 if nargin<4
0047     w=repmat(1/k,k,1);
0048     if nargin<3
0049         v=ones(k,p);
0050     end
0051 end
0052 fv=ndims(v)>2 || size(v,1)>k; % full covariance matrix is requested
0053 if nargin>4         % need to modify distribution means
0054     if nargin>5
0055         m=m*a'+repmat(b',k,1);
0056     else
0057         m=m*a';
0058     end
0059     v1=v;
0060     v=zeros(q,q,k);
0061     if fv
0062         for ik=1:k
0063             v(:,:,ik)=a*v1(:,:,ik)*a';
0064         end
0065     else
0066         for ik=1:k
0067             v(:,:,ik)=(a.*repmat(v1(ik,:),q,1))*a';
0068         end
0069         fv=1;
0070     end
0071 elseif q<p     % need to select coefficient subset
0072     m=m(:,1:q);
0073     if fv
0074         v=v(1:q,1:q,:);
0075     else
0076         v=v(:,1:q);
0077     end
0078 end
0079 
0080 memsize=voicebox('memsize');    % set memory size to use
0081 
0082 lp=zeros(n,1);
0083 rp=zeros(n,k);
0084 wk=ones(k,1);
0085 
0086 if ~fv          % diagonal covariance
0087     %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0088     % Diagonal Covariance matrices  %
0089     %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0090 
0091     % If data size is large then do calculations in chunks
0092 
0093     nb=min(n,max(1,floor(memsize/(8*q*k))));    % chunk size for testing data points
0094     nl=ceil(n/nb);                  % number of chunks
0095     jx0=n-(nl-1)*nb;                % size of first chunk
0096     im=repmat((1:k)',nb,1);
0097     wnb=ones(1,nb);
0098     wnj=ones(1,jx0);
0099 
0100     vi=-0.5*v.^(-1);                % data-independent scale factor in exponent
0101     lvm=log(w)-0.5*sum(log(v),2);   % log of external scale factor (excluding -0.5*q*log(2pi) term)
0102 
0103     % first do partial chunk
0104 
0105     jx=jx0;
0106     ii=1:jx;
0107     kk=repmat(ii,k,1);
0108     km=repmat(1:k,1,jx);
0109     py=reshape(sum((y(kk(:),:)-m(km(:),:)).^2.*vi(km(:),:),2),k,jx)+lvm(:,wnj);
0110     mx=max(py,[],1);                % find normalizing factor for each data point to prevent underflow when using exp()
0111     px=exp(py-mx(wk,:));            % find normalized probability of each mixture for each datapoint
0112     ps=sum(px,1);                   % total normalized likelihood of each data point
0113     rp(ii,:)=(px./ps(wk,:))';                % relative mixture probabilities for each data point (columns sum to 1)
0114     lp(ii)=log(ps)+mx;
0115 
0116     for il=2:nl
0117         ix=jx+1;
0118         jx=jx+nb;                    % increment upper limit
0119         ii=ix:jx;
0120         kk=repmat(ii,k,1);
0121         py=reshape(sum((y(kk(:),:)-m(im,:)).^2.*vi(im,:),2),k,nb)+lvm(:,wnb);
0122         mx=max(py,[],1);                % find normalizing factor for each data point to prevent underflow when using exp()
0123         px=exp(py-mx(wk,:));            % find normalized probability of each mixture for each datapoint
0124         ps=sum(px,1);                   % total normalized likelihood of each data point
0125         rp(ii,:)=(px./ps(wk,:))';                % relative mixture probabilities for each data point (columns sum to 1)
0126         lp(ii)=log(ps)+mx;
0127     end
0128 else
0129     %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0130     % Full Covariance matrices  %
0131     %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0132     pl=q*(q+1)/2;
0133     lix=1:q^2;
0134     cix=repmat(1:q,q,1);
0135     rix=cix';
0136     lix(cix>rix)=[];                                        % index of lower triangular elements
0137     lixi=zeros(q,q);
0138     lixi(lix)=1:pl;
0139     lixi=lixi';
0140     lixi(lix)=1:pl;                                        % reverse index to build full matrices
0141     v=reshape(v,q^2,k);
0142     v=v(lix,:)';                                            % lower triangular in rows
0143 
0144     % If data size is large then do calculations in chunks
0145 
0146     nb=min(n,max(1,floor(memsize/(24*q*k))));    % chunk size for testing data points
0147     nl=ceil(n/nb);                  % number of chunks
0148     jx0=n-(nl-1)*nb;                % size of first chunk
0149     wnb=ones(1,nb);
0150     wnj=ones(1,jx0);
0151 
0152     vi=zeros(q*k,q);                    % stack of k inverse cov matrices each size q*q
0153     vim=zeros(q*k,1);                   % stack of k vectors of the form inv(v)*m
0154     mtk=vim;                             % stack of k vectors of the form m
0155     lvm=zeros(k,1);
0156     wpk=repmat((1:q)',k,1);
0157 
0158     for ik=1:k
0159 
0160         % these lines added for debugging only
0161         %             vk=reshape(v(k,lixi),q,q);
0162         %             condk(ik)=cond(vk);
0163         %%%%%%%%%%%%%%%%%%%%
0164         [uvk,dvk]=eig(reshape(v(ik,lixi),q,q));      % convert lower triangular to full and find eigenvalues
0165         dvk=diag(dvk);
0166         vik=-0.5*uvk*diag(dvk.^(-1))*uvk';   % calculate inverse
0167         vi((ik-1)*q+(1:q),:)=vik;           % vi contains all mixture inverses stacked on top of each other
0168         vim((ik-1)*q+(1:q))=vik*m(ik,:)';   % vim contains vi*m for all mixtures stacked on top of each other
0169         mtk((ik-1)*q+(1:q))=m(ik,:)';       % mtk contains all mixture means stacked on top of each other
0170         lvm(ik)=log(w(ik))-0.5*sum(log(dvk));       % vm contains the weighted sqrt of det(vi) for each mixture
0171     end
0172     %
0173     %         % first do partial chunk
0174     %
0175     jx=jx0;
0176     ii=1:jx;
0177     xii=y(ii,:).';
0178     py=reshape(sum(reshape((vi*xii-vim(:,wnj)).*(xii(wpk,:)-mtk(:,wnj)),q,jx*k),1),k,jx)+lvm(:,wnj);
0179     mx=max(py,[],1);                % find normalizing factor for each data point to prevent underflow when using exp()
0180     px=exp(py-mx(wk,:));  % find normalized probability of each mixture for each datapoint
0181     ps=sum(px,1);                   % total normalized likelihood of each data point
0182     rp(ii,:)=(px./ps(wk,:))';                % relative mixture probabilities for each data point (columns sum to 1)
0183     lp(ii)=log(ps)+mx;
0184 
0185     for il=2:nl
0186         ix=jx+1;
0187         jx=jx+nb;        % increment upper limit
0188         ii=ix:jx;
0189         xii=y(ii,:).';
0190         py=reshape(sum(reshape((vi*xii-vim(:,wnb)).*(xii(wpk,:)-mtk(:,wnb)),q,nb*k),1),k,nb)+lvm(:,wnb);
0191         mx=max(py,[],1);                % find normalizing factor for each data point to prevent underflow when using exp()
0192         px=exp(py-mx(wk,:));  % find normalized probability of each mixture for each datapoint
0193         ps=sum(px,1);                   % total normalized likelihood of each data point
0194         rp(ii,:)=(px./ps(wk,:))';                % relative mixture probabilities for each data point (columns sum to 1)
0195         lp(ii)=log(ps)+mx;
0196     end
0197 end
0198 lp=lp-0.5*q*log(2*pi);
0199 if nargout >2
0200     [kp,kh]=max(rp,[],2);
0201 end

Generated on Thu 02-Feb-2012 09:15:04 by m2html © 2003