% load Fisher Iris dataset

load fisheriris

% project it down to 2 dimensions for the sake of visualization

[~,data] = pca(meas,'NumComponents',2);

mn = min(data); mx = max(data);

D = size(data,2); % data dimension

% inital kmeans step used to initialize EM

K = 3; % number of mixtures/clusters

cInd = kmeans(data, K, 'EmptyAction','singleton');

% fit a GMM model

gmm = fitgmdist(data, K, 'Options',statset('MaxIter',1000), ...

'CovType','full', 'SharedCov',false, 'Regularize',0.01, 'Start',cInd);

% means, covariances, and mixing-weights

mu = gmm.mu;

sigma = gmm.Sigma;

p = gmm.PComponents;

% cluster and posterior probablity of each instance

% note that: [~,clustIdx] = max(p,[],2)

[clustInd,~,p] = cluster(gmm, data);

tabulate(clustInd)

% plot data, clustering of the entire domain, and the GMM contours

clrLite = [1 0.6 0.6 ; 0.6 1 0.6 ; 0.6 0.6 1];

clrDark = [0.7 0 0 ; 0 0.7 0 ; 0 0 0.7];

[X,Y] = meshgrid(linspace(mn(1),mx(1),50), linspace(mn(2),mx(2),50));

C = cluster(gmm, [X(:) Y(:)]);

image(X(:), Y(:), reshape(C,size(X))), hold on

gscatter(data(:,1), data(:,2), species, clrDark)

h = ezcontour(@(x,y)pdf(gmm,[x y]), [mn(1) mx(1) mn(2) mx(2)]);

set(h, 'LineColor','k', 'LineStyle',':')

hold off, axis xy, colormap(clrLite)

title('2D data and fitted GMM'), xlabel('PC1'), ylabel('PC2')