dlarray/dlgradient Value to differentiate is non-scalar. It must be a traced real dlarray scalar.

17 次查看(过去 30 天)
Hello, I am working on auto differentiation. But it came up with a error shown as title.
data = randn (3, 5000, 100);
numChannels=size(data,1);
numObservations=size(data,3);
XTrain = data(:,:,1:floor(0.9*numObservations));
XTest = data(:,:,floor(0.9*numObservations)+1:end);
numHiddenUnits=100;
numLatentChannels=1;
layersE = [
sequenceInputLayer(numChannels,Normalization="zscore")
lstmLayer(numHiddenUnits,'OutputMode','sequence')
fullyConnectedLayer(2*numLatentChannels)
samplingLayerSeq
];
Unrecognized function or variable 'samplingLayerSeq'.
layersD = [
sequenceInputLayer(numLatentChannels,Normalization="zscore")
lstmLayer(numHiddenUnits,'OutputMode','sequence')
fullyConnectedLayer(numChannels)
];
netE = dlnetwork(layersE);
netD = dlnetwork(layersD);
numEpochs = 150;
miniBatchSize = 20;
learnRate = 1e-2;
dsTrain = arrayDatastore(XTrain,IterationDimension=3);
numOutputs = 1;
mbq = minibatchqueue(dsTrain,numOutputs, ...
MiniBatchSize = miniBatchSize, ...
MiniBatchFcn=@preprocessMiniBatch, ...
MiniBatchFormat="CBT", ...
PartialMiniBatch="discard");
trailingAvgE = [];
trailingAvgSqE = [];
trailingAvgD = [];
trailingAvgSqD = [];
numObservationsTrain = size(XTrain,3);
numIterationsPerEpoch = ceil(numObservationsTrain / miniBatchSize);
numIterations = numEpochs * numIterationsPerEpoch;
monitor = trainingProgressMonitor( ...
Metrics="Loss", ...
Info="Epoch", ...
XLabel="Iteration");
epoch = 0;
iteration = 0;
% Loop over epochs.
while epoch < numEpochs && ~monitor.Stop
epoch = epoch + 1;
% Shuffle data.
shuffle(mbq);
% Loop over mini-batches.
while hasdata(mbq) && ~monitor.Stop
iteration = iteration + 1;
% Read mini-batch of data.
X = next(mbq);
% X = dlarray(X,'CBT');
% Evaluate loss and gradients.
[loss,gradientsE,gradientsD] = dlfeval(@modelLoss,netE,netD,X);
% Update learnable parameters.
[netE,trailingAvgE,trailingAvgSqE] = adamupdate(netE, ...
gradientsE,trailingAvgE,trailingAvgSqE,iteration,learnRate);
[netD, trailingAvgD, trailingAvgSqD] = adamupdate(netD, ...
gradientsD,trailingAvgD,trailingAvgSqD,iteration,learnRate);
end
end
%% model loss
function [loss,gradientsE,gradientsD] = modelLoss(netE,netD,X)
% Forward through encoder.
[Z,mu,logSigmaSq] = forward(netE,X);
% Forward through decoder.
Y = forward(netD,Z);
% Calculate loss and gradients.
loss = elboLoss(Y,X,mu,logSigmaSq);
[gradientsE,gradientsD] = dlgradient(loss,netE.Learnables,netD.Learnables);
end
%% elboloss
function loss = elboLoss(Y,T,mu,logSigmaSq)
% Reconstruction loss.
reconstructionLoss = mse(Y,T);
% KL divergence.
KL = -0.5 * sum(1 + logSigmaSq - mu.^2 - exp(logSigmaSq),1);
KL = mean(KL);
% Combined loss.
loss = reconstructionLoss + KL;
end
%% preprocess minibatch
function X = preprocessMiniBatch(dataX)
% Concatenate.
X = cat(3,dataX{:});
end
%% class
classdef samplingLayerSeq < nnet.layer.Layer
methods
function layer = samplingLayerSeq(args)
% layer = samplingLayer creates a sampling layer for VAEs.
%
% layer = samplingLayer(Name=name) also specifies the layer
% name.
% Parse input arguments.
arguments
args.Name = "";
end
% Layer properties.
layer.Name = args.Name;
layer.Type = "Sampling";
layer.Description = "Mean and log-variance sampling";
layer.OutputNames = ["out" "mean" "log-variance"];
end
function [Z,mu,logSigmaSq] = predict(~,X)
% [Z,mu,logSigmaSq] = predict(~,Z) Forwards input data through
% the layer at prediction and training time and output the
% result.
%
% Inputs:
% X - Concatenated input data where X(1:K,:) and
% X(K+1:end,:) correspond to the mean and
% log-variances, respectively, and K is the number
% of latent channels.
% Outputs:
% Z - Sampled output
% mu - Mean vector.
% logSigmaSq - Log-variance vector
% Data dimensions.
numLatentChannels = size(X,1)/2;
miniBatchSize = size(X,2);
% Split statistics.
mu = X(1:numLatentChannels,:,:);
logSigmaSq = X(numLatentChannels+1:end,:,:);
sz = size(mu);
epsilon =randn(sz);
% Sample output.
% epsilon = randn(numLatentChannels,miniBatchSize,"like",X);
sigma = exp(.5 * logSigmaSq);
Z = epsilon .* sigma + mu;
% Z = dlarray(Z,'CBT');
end
end
end
Illegal use of reserved keyword "classdef".

回答(1 个)

Ben
Ben 2024-1-5
Your loss in modelLoss has a non-scalar T dimension since the model outputs sequences. You need to compute a scalar loss to use dlgradient. Standard approaches might be to take a sum or mean over the T dimension, but more intricate losses are common too.

类别

Help CenterFile Exchange 中查找有关 Custom Training Loops 的更多信息

标签

Community Treasure Hunt

Find the treasures in MATLAB Central and discover how the community can help you!

Start Hunting!

Translated by