-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmanova2.m
222 lines (201 loc) · 6.92 KB
/
manova2.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
function [d, p, stats] = manova2(x,group,alpha)
%function based on manova1 present in matlab
%MANOVA1 One-way multivariate analysis of variance (MANOVA).
% D = MANOVA1(X,GROUP,ALPHA) performs a one-way MANOVA for comparing
% the mean vectors of two or more groups of multivariate data.
%
% X is a matrix with each row representing a multivariate
% observation, and each column representing a variable.
%
% GROUP is a categorical variable, numeric vector, string array, or cell
% array of strings with the same number of rows as X. X values are in
% the same group if they correspond to the same value of GROUP.
%
% ALPHA is the scalar significance level and is 0.05 by default.
%
% D is an estimate of the dimension of the group means. It is the
% smallest dimension such that a test of the hypothesis that the
% means lie on a space of that dimension is not rejected. If D=0
% for example, we cannot reject the hypothesis that the means are
% the same. If D=1, we reject the hypothesis that the means are the
% same but we cannot reject the hypothesis that they lie on a line.
%
% [D,P] = MANOVA1(...) returns P, a vector of p-values for testing
% the null hypothesis that the mean vectors of the groups lie on
% various dimensions. P(1) is the p-value for a test of dimension
% 0, p(2) for dimension 1, etc.
%
% [D,P,STATS] = MANOVA1(...) returns a STATS structure with the
% following fields:
% W within-group sum of squares and cross-products
% B between-group sum of squares and cross-products
% T total sum of squares and cross-products
% dfW degrees of freedom for W
% dfB degrees of freedom for B
% dfT degrees of freedom for T
% lambda value of Wilk's lambda (the test statistic)
% chisq transformation of lambda to a chi-square distribution
% chisqdf degrees of freedom for chisq
% eigenval eigenvalues of (W^-1)*B
% eigenvec eigenvectors of (W^-1)*B; these are the coefficients
% for canonical variables, and they are scaled
% so the within-group variance of C is 1
% canon canonical variables, equal to XC*eigenvec, where XC is
% X with columns centered by subtracting their means
% mdist Mahalanobis distance from each point to its group mean
% gmdist Mahalanobis distances between each pair of group means
%
% The canonical variables C have the property that C(:,1) is the
% linear combination of the X columns that has the maximum
% separation between groups, C(:,2) has the maximum separation
% subject to it being orthogonal to C(:,1), and so on.
%
% See also ANOVA1, GPLOTMATRIX, GSCATTER.
% References:
% Krzanowski, W.J. (1988), "Principles of Multivariate Analysis,"
% Oxford University Press, Oxford.
% Manly, B.F.J. (1990), "Multivariate Statistical Methods:
% A Primer," Wiley, London
% Mardia, K.V., J.T. Kent, and J.M. Bibby (1979), "Multivariate
% Analysis," Academic Press, London.
% Morrison, D.F. (1976), "Multivariate Statistical Methods,"
% McGraw-Hill, New York.
% Copyright 1993-2014 The MathWorks, Inc.
narginchk(2,3)
if (nargin < 3)
alpha = 0.05;
end
if (length(alpha) > 1)
return
error(message('stats:manova1:BadAlphaScalar'));
end
if ((alpha <= 0) | (alpha >= 1))
return
error(message('stats:manova1:BadAlphaValue'))
end
% Convert group to cell array from character array, make it a column
if (ischar(group)), group = cellstr(group); end
if (size(group, 1) == 1), group = group'; end
% Make sure inputs have the correct size
n = size(x,1);
if (size(group,1) ~= n)
error(message('stats:manova1:InputSizeMismatch'));
end
% Remove missing X columns first in case this eliminates a group
nonan = (sum(isnan(x), 2) == 0);
x = x(nonan,:);
group = group(nonan,:);
wasnan = ~nonan;
% Convert group to indices 1,...,g and separate names
[groupnum, gnames] = grp2idx(group);
ngroups = length(gnames);
% Remove NaN values again
nonan = ~isnan(groupnum);
if (~all(nonan))
groupnum = groupnum(nonan);
x = x(nonan,:);
wasnan(~wasnan) = ~nonan;
end
[n,m] = size(x);
realgroups = ismember(1:ngroups,groupnum);
g = sum(realgroups);
% Start by computing the Total sum of squares matrix
xm = mean(x);
x = x - xm(ones(n,1),:); % done with the original x
T = x'*x;
% Now compute the Within sum of squares matrix
W = zeros(size(T));
for j=1:ngroups
r = find(groupnum == j);
nr = length(r);
if (nr > 1)
z = x(r,:);
xm = mean(z);
z = z - xm(ones(nr,1),:);
W = W + z'*z;
end
end
% The Between sum of squares matrix is the difference
B = T - W;
% Compute the eigenvalues and eigenvectors of (W^-1)B
% We would like to do something like:
% [v,ed] = eig(B,W);
% but in order to insure the condition v'*W*v=I we need:
[R,p] = chol(W);
if (p > 0)
d = [];
p = [];
stats = 0;
return
%error(message('stats:manova1:SingularSumSquares2'));
end
S = R' \ B / R;
S = (S+S')/2; % remove asymmetry caused by roundoff
[vv,ed] = eig(S);
v = R\vv;
[e,ei] = sort(diag(ed)); % put in descending order
% Compute Barlett's statistic for each dimension
if (min(e) <= -1) % should not happen
d = [];
p = [];
stats = 0;
return
%error(message('stats:manova1:SingularSumSquares'));
end
dims = 0:(min(g-1, m)-1); % testable dimensions
lambda = flipud(1 ./ cumprod(e+1));
lambda = lambda(1+dims);
chistat = -(n-1-(g+m)/2) .* log(lambda);
chisqdf = ((m - dims) .* (g - 1 - dims))';
pp = chi2pval(chistat, chisqdf);
% Pick off the first acceptable dimension
d = dims(pp>alpha);
if (length(d) > 0)
d = d(1);
else
d = max(dims) + 1;
end
% If required, create extra outputs
if (nargout > 1), p = pp; end
if (nargout > 2)
stats.W = W;
stats.B = B;
stats.T = T;
stats.dfW = n-g;
stats.dfB = g-1;
stats.dfT = n-1;
stats.lambda = lambda;
stats.chisq = chistat;
stats.chisqdf = chisqdf;
stats.eigenval = flipud(e); % order is now increasing
% Need to re-scale eigenvectors so the within-group variance is 1
v = v(:,flipud(ei)); % in order of increasing e
vs = diag((v' * W * v))' ./ (n-g);
vs(vs<=0) = 1;
v = v ./ repmat(sqrt(vs), size(v,1), 1);
% Flip sign so that the average element is positive
j = (sum(v) < 0);
v(:,j) = -v(:,j);
stats.eigenvec = v;
canon = x*v;
if (any(wasnan))
tmp(~wasnan,:) = canon;
tmp(wasnan,:) = NaN;
stats.canon = tmp;
else
stats.canon = canon;
end
% Compute Mahalanobis distances from points to group means
gmean = nan(ngroups,size(canon,2));
gmean(realgroups,:) = grpstats(canon, groupnum);
mdist = sum((canon - gmean(groupnum,:)).^2, 2);
if (any(wasnan))
stats.mdist(~wasnan) = mdist;
stats.mdist(wasnan) = NaN;
else
stats.mdist = mdist;
end
% Compute Mahalanobis distances between group means
stats.gmdist = squareform(pdist(gmean)).^2;
stats.gnames = gnames;
end