Error using zeros, Maximum variable size allowed by the program is exceeded.
1 次查看(过去 30 天)
显示 更早的评论
Error using zeros Maximum variable size allowed by the program is exceeded.
Error in knnclassify>distfun (line 216) D = zeros(n,size(Test,1));
Code:
function class = knnclassify(sample, TRAIN, group, K, distance,tieBreaker)
%KNNCLASSIFY K-Nearest Neighbor Classifier.
% CLASS = KNNCLASSIFY(SAMPLE,TRAINING,GROUP) classifies each row of the
% data in SAMPLE into one of the groups in TRAINING using the nearest
% neighbour method. SAMPLE and TRAINING must be matrices with the same
% number of columns. GROUP is a grouping variable for TRAINING. Its
% unique values define groups, and each element defines the group to
% which the corresponding row of TRAINING belongs. GROUP can be a
% numeric vector, a string array, or a cell array of strings. TRAINING
% and GROUP must have the same number of rows. CLASSIFY treats NaNs or
% empty strings in GROUP as missing values, and ignores the corresponding
% rows of TRAINING. CLASS indicates which group each row of SAMPLE has
% been assigned to, and is of the same type as GROUP. % % CLASS = KNNCLASSIFY(SAMPLE,TRAINING,GROUP,K) allows you to specify K,
% the number of nearest neighbors used in the classification. The default
% is 1. % % CLASS = KNNCLASSIFY(SAMPLE,TRAINING,GROUP,K,DISTANCE) allows you to
% select the distance metric. Choices are:
% {'sqEuclidean'} - Squared Euclidean distance
% 'cityblock' - Sum of absolute differences, a.k.a. L1
% 'cosine' - One minus the cosine of the included angle
% between points (treated as vectors)
% 'correlation' - One minus the sample correlation between
% points (treated as sequences of values)
% 'Hamming' - Percentage of bits that differ (only
% suitable for binary data) % % CLASS = KNNCLASSIFY(SAMPLE,TRAINING,GROUP,K,DISTANCE,TIEBREAK) allows
% you to specify the method used to break a tie in the number of nearest
% neighbors. Options are 'random', which selects a random tiebreaker, and
% 'nearest' which uses the nearest neighbor amongst the tied groups to
% break the tie.
% % Examples: % % % training data: two normal components
% training = [mvnrnd([ 1 1], eye(2), 100); ...
% mvnrnd([-1 -1], 2*eye(2), 100)];
% group = [repmat(1,100,1); repmat(2,100,1)];
% gscatter(training(:,1),training(:,2),group);hold on; % % % % some random sample data
% sample = unifrnd(-5, 5, 100, 2);
% % classify the sample using the nearest neighbor classification
% c = knnclassify(sample, training, group); % % gscatter(sample(:,1),sample(:,2),c,'mc'); hold on;
% c3 = knnclassify(sample, training, group, 3);
% gscatter(sample(:,1),sample(:,2),c3,'mc','o');
% Copyright 1993-2002 The MathWorks, Inc.
% $Revision: $ $Date: 2002/04/11 14:24:07 $
% References:
% [1]
if nargin < 3
error('Requires at least three arguments.');
end
% grp2idx sorts a numeric grouping var ascending, and a string grouping
% var by order of first occurrence
[gindex,groups] = grp2idx(group);
nans = find(isnan(gindex));
if length(nans) > 0
TRAIN(nans,:) = [];
gindex(nans) = [];
end
ngroups = length(groups);
gsize = hist(gindex,1:ngroups);
[n,d] = size(TRAIN);
if size(gindex,1) ~= n
error('The length of GROUP must equal the number of rows in TRAINING.');
elseif size(sample,2) ~= d
error('SAMPLE and TRAINING must have the same number of columns.');
end
m = size(sample,1);
if nargin < 4
K = 1;
elseif ~isnumeric(K)
error('K must be numeric.');
end
if nargin < 5 isempty(distance)
distance = 'sqeuclidean';
end
if ischar(distance)
distNames = {'sqeuclidean','cityblock','cosine','correlation','hamming'};
i = strmatch(lower(distance), distNames);
if length(i) > 1
error('stats:knn:AmbiguousDistance', ...
'Ambiguous ''distance'' parameter value: %s.', distance);
elseif isempty(i)
error('stats:knn:UnknownDistance', ...
'Unknown ''distance'' parameter value: %s.', distance);
end
distance = distNames{i};
else
error('stats:knn:InvalidDistance', ...
'The ''distance'' parameter value must be a string.');
end
if nargin < 6
tieBreaker = 'random';
elseif ischar(tieBreaker)
% if K == 1
% warning('Tie breaker is not necessary when K is 1.');
% else
tieNames = {'random','nearest','farthest'};
i = strmatch(lower(tieBreaker), tieNames);
if length(i) > 1
error('stats:knn:AmbiguousDistance', ...
'Ambiguous ''distance'' parameter value: %s.', distance);
elseif isempty(i)
error('stats:knn:UnknownDistance', ...
'Unknown ''distance'' parameter value: %s.', distance);
end
tieBreaker = tieNames{i};
% end
else
error('stats:knn:InvalidDistance', ...
'The ''distance'' parameter value must be a string.');
end
% Calculate the distances from all points in the training set to all points
% in the test set.
dists = distfun(sample,TRAIN,distance);
% find the K nearest
if K >1
[dSorted,dIndex] = sort(dists,2);
dIndex = dIndex(:,1:K);
classes = gindex(dIndex);
% special case when we have one input -- this gets turned into a
% column vector, so we have to turn it back into a row vector.
if size(classes,2) == 1
classes = classes';
end
% count the occurences of the classes
counts = zeros(m,ngroups);
for outer = 1:m
for inner = 1:K
counts(outer,classes(outer,inner)) = counts(outer,classes
(outer,inner)) + 1;
end
end
[L,class] = max(counts,[],2);
% we need to check case where L <= K/2 for possible ties
checkRows = find(L<=(K/2));
for i = 1:numel(checkRows)
ties = counts(checkRows(i),:) == L(checkRows(i));
numTies = sum(ties);
if numTies > 1
choice = find(ties);
switch tieBreaker
case 'random'
% random tie break
tb = randsample(numTies,1);
class(checkRows(i)) = choice(tb);
case 'nearest'
% find the use the closest element of the equal groups
% to break the tie
for inner = 1:K
if ismember(classes(checkRows(i),inner),find(ties))
class(checkRows(i)) = classes(checkRows(i),inner);
break
end
end
case 'farthest'
% find the use the closest element of the equal groups
% to break the tie
for inner = K:-1:1
if ismember(classes(checkRows(i),inner),find(ties))
class(checkRows(i)) = classes(checkRows(i),inner);
break
end
end
end
end
end
else
% Need to deal with a tie
[dSorted,dIndex] = min(dists,[],2);
class = gindex(dIndex);
end
% Convert back to original grouping variable
if isnumeric(group)
groups = str2num(char(groups));
class = groups(class);
elseif ischar(group)
groups = char(groups);
class = groups(class,:);
else %if iscellstr(group)
class = groups(class);
end
function D = distfun(Train, Test, dist)
%DISTFUN Calculate distances from training points to test points.
[n,p] = size(Train);
D = zeros(n,size(Test,1));
numTest = size(Test,1);
switch dist
case 'sqeuclidean'
for i = 1:numTest
D(:,i) = sum((Train - Test(repmat(i,n,1),:)).^2, 2);
end
case 'cityblock'
for i = 1:numTest
D(:,i) = sum(abs(Train - Test(repmat(i,n,1),:)), 2);
end
case {'cosine','correlation'}
% Normalized both the training and test data.
normTrain = sqrt(sum(Train.^2, 2));
normTest = sqrt(sum(Test.^2, 2));
normData = sqrt(sum([Train;Test].^2, 2));
Train = Train ./ normTrain(:,ones(1,size(Train,2)));
if any(normData < eps) % small relative to unit-length data points
error('stats:knn:ZeroTestentroid', ...
'Zero cluster centroid created at iteration %d.',iter);
end
% This can be done without a loop, but the loop saves memory allocations
for i = 1:numTest
D(:,i) = 1 - (Train * Test(i,:)') ./ normTest(i);
end
case 'hamming'
if ~all(ismember(X(:),[0 1]))
error('Non-binary data cannot be clustered using Hamming distance.');
end
for i = 1:numTest
D(:,i) = sum(abs(Train - Test(repmat(i,n,1),:)), 2) / p;
end
end
0 个评论
采纳的回答
the cyclist
2013-1-25
The easiest way to debug this probably to type
>> dbstop if error
before running your code. Then, execute your code, and it will halt when it encounters the error, and enter debug mode. You can then investigate your variables, to see what's going on.
In your case, given the error message, I am guessing that either n or size(Test,1) is much larger than you expect. You should be able to trace back from there.
5 个评论
Image Analyst
2013-1-25
What is repmat(i,n,1)? Is it a 1 by 6 array? I still don't think that would work, because even if it were, you're still giving just 2 dimensions to Test instead of 7. For example Test([1 22 43 4 5 6], 2) really means Test(1,2), Test(22,2), Test(43,2) and so on. It's not specifying each of the 7 dimensions. It's specifying a collection of indexes to use on the first dimension, 2 for the second dimension, and not specifying indexes for dimensions 3-7 at all.
更多回答(0 个)
另请参阅
类别
在 Help Center 和 File Exchange 中查找有关 Hypothesis Tests 的更多信息
Community Treasure Hunt
Find the treasures in MATLAB Central and discover how the community can help you!
Start Hunting!