代码一:% 输入输出归一化
[XTrain,input_str] = mapminmax(p) ;
[YTrain,output_str] = mapminmax(q) ;
layers = [ ...
sequenceInputLayer(numFeatures)
bilstmLayer(numHiddenUnits,'OutputMode','sequence')
% lstmLayer(numHiddenUnits)
fullyConnectedLayer(300)
% lstmLayer(numHiddenUnits)
dropoutLayer(0.5)%会使训练结果出现波动
fullyConnectedLayer(numResponses)
];
net = dlnetwork(layers)
% net = gpuArray(net);
% net = dlupdate(@gpuArray,net);
numEpochs = 150;
miniBatchSize = 100;
ds=arrayDatastore([XTrain;YTrain]);
numObservationsTrain = size(YTrain,2);
numIterationsPerEpoch = floor(numObservationsTrain / miniBatchSize);
%adam优化器
averageGrad = [];
averageSqGrad = [];
numIterations = numEpochs * numIterationsPerEpoch;
iteration = 0;
epoch = 0;
mbq = minibatchqueue(ds,...
MiniBatchSize=miniBatchSize,...
MiniBatchFormat=["CBT"]);
monitor = trainingProgressMonitor( ...
Metrics="Loss", ...
Info=["Epoch" "LearnRate"], ...
XLabel="Iteration");
gradThreshold = 1.0;
while epoch < numEpochs && ~monitor.Stop
epoch = epoch + 1;
reset(mbq);
% idx = 1:1:numel(YTrain);
idx = randperm(size(YTrain,2));
XTrain = XTrain(:,idx);%XTrain = XTrain(:,idx);
YTrain = YTrain(:,idx);
k = 0;
% while k < numIterationsPerEpoch && ~monitor.Stop
for i = 1:numel(net.Layers)
if isa(net.Layers(i), 'nnet.internal.cnn.layer.learnable.LearnableParameter')
grad = net.Layers(i).dLdW; % 获取参数的梯度
gradNorm = norm(grad);
if gradNorm > gradThreshold
grad = grad * (gradThreshold / gradNorm); % 对梯度进行修剪
end
net.Layers(i).dLdW = grad; % 更新修剪后的梯度
end
end
while hasdata(mbq) && ~monitor.Stop
k = k + 1;
iteration = iteration + 1;
XY=next(mbq);
X=XY(1:13,:);
Y=XY(14:16,:);
X = dlarray(X, 'CBT');
Y = dlarray(Y, 'CBT');
[loss, gradients] = dlfeval(@modelLoss2, net, X, Y);
[net, averageGrad, averageSqGrad] = adamupdate(net, gradients, averageGrad, averageSqGrad, iteration);%学习率的自适应调整
recordMetrics(monitor, iteration, Loss=loss);
updateInfo(monitor,Epoch=epoch + " of " + numEpochs);
monitor.Progress = 100 * iteration / numIterations;
end
end
代码二:[XTrain,input_str] = mapminmax(p) ;
[YTrain,output_str] = mapminmax(q) ;
layers = [ ...
sequenceInputLayer(numFeatures)
bilstmLayer(numHiddenUnits,'OutputMode','sequence')
fullyConnectedLayer(300)
dropoutLayer(0.5)
fullyConnectedLayer(numResponses)
regressionLayer];
maxEpochs = 150;
miniBatchSize = 200;
options = trainingOptions('adam', ...
'MaxEpochs',maxEpochs, ...
'MiniBatchSize',miniBatchSize, ...
'InitialLearnRate',0.005, ...
'GradientThreshold',1, ...
'Shuffle','never', ...
'Plots','training-progress',...
'Verbose',false);
net = trainNetwork(XTrain,YTrain,layers,options);
我的XTrain\YTrain都一致,不知为何测试集的回归情况,代码一误差很大,代码二误差很小。我的XTrain是13*14000;YTrain是3*14000。