function [gradients,loss]= ModelD(k,M,Validate_train,params,NN_layer)
Attentionweight = stripdims(squeeze(params.attention.weight));
weight_out = stripdims(squeeze(params.attention.output_weight));
bias = stripdims(squeeze(params.attention.bias));
Validate_train =(Validate_train(:,:));
validate_data_in = Validate_train(randperm(size(Validate_train,1)),:);
Validate_train_x = validate_data_in (:,1:3);
Validate_train_y = validate_data_in (:,4:end);
A_zero= zeros(size(Validate_train_y,1),1);
Validate_train_y = [Validate_train_y, A_zero];
Validate_data_x = [];
for i =1:k
for j= 1:NN_layer
Validate_data_x (i,j) = Validate_train_x(j);
Validate_train_x(j) = Validate_train_x(j+3);
end
end
y_in = Validate_train_y(1:M,:);
Index =randi([1,M],1,1);
X_in = Validate_data_x(Index,:);
Y_in = repmat(y_in(Index,:),11);
for i= 1:NN_layer
h = X_in(i);
ht = Y_in(1,i);
A = (Attentionweight(i)).*h;
B = (weight_out)*ht;
C = (bias(i));
score(i) = tanh( A + B + C) ;
end
score =score';
score = dlarray(score,'CB');
a = softmax(score);
Vt = [];
for i = 1:NN_layer
AA = a(i)* X_in(i);
Vt = [Vt AA];
end
Vt = dlarray(Vt,'CB');
loss = mse(Vt,X_in);
gradients = dlgradient(loss,params);
end