function [agents] = createDDPGAgents(N)
obsInfo = rlNumericSpec([2 1],'LowerLimit',-100*ones(2,1),'UpperLimit',100*ones(2,1));
actInfo = rlNumericSpec([N 1],'LowerLimit',-100*ones(N,1),'UpperLimit',100*ones(N,1));
obsPath = featureInputLayer(prod(obsInfo.Dimension), Name="obsInLyr");
actPath = featureInputLayer(prod(actInfo.Dimension), Name="actInLyr");
concatenationLayer(1, 2, Name="concat")
criticNet = layerGraph(obsPath);
criticNet = addLayers(criticNet, actPath);
criticNet = addLayers(criticNet, commonPath);
criticNet = connectLayers(criticNet, "obsInLyr", "concat/in1");
criticNet = connectLayers(criticNet, "actInLyr", "concat/in2");
criticNet = dlnetwork(criticNet);
critic = rlQValueFunction(criticNet, obsInfo, actInfo, ...
ObservationInputNames="obsInLyr", ...
ActionInputNames="actInLyr");
getValue(critic, {rand(obsInfo.Dimension)}, {rand(actInfo.Dimension)})
featureInputLayer(prod(obsInfo.Dimension))
fullyConnectedLayer(prod(actInfo.Dimension))
actorNet = dlnetwork(actorNet);
actor = rlContinuousDeterministicActor(actorNet, obsInfo, actInfo);
agentOptions = rlDDPGAgentOptions(...
'DiscountFactor', 0.98, ...
'MiniBatchSize', 128, ...
'TargetSmoothFactor', 1e-3, ...
'ExperienceBufferLength', 1e6, ...
agent1 = rlDDPGAgent(actor, critic, agentOptions);
agent2 = rlDDPGAgent(actor, critic, agentOptions);
agents = [agent1, agent2];
agentOptions.NoiseOptions.MeanAttractionConstant = 0.1;
agentOptions.NoiseOptions.StandardDeviation = 0.3;
agentOptions.NoiseOptions.StandardDeviationDecayRate = 8e-4;
agentOptions.NoiseOptions
function [nextObs, reward, isDone, loggedSignals] = myStepFunction(action, loggedSignals,S)
nextObs1 = S.A1d*loggedSignals.State + S.B1d*action(1);
loggedSignals.State = nextObs1;
if abs(loggedSignals.State(1))<=0.05 && abs(loggedSignals.State(2))<=0.05
reward1 = -1*(1.01*(nextObs1(1))^2 + 1.01*nextObs1(2)^2 + action^2 );
if abs(loggedSignals.State(1))<=0.02 && abs(loggedSignals.State(2))<=0.02
obsInfo1 = rlNumericSpec([2 1],'LowerLimit',-100*ones(2,1),'UpperLimit',100*ones(2,1)) ;
actInfo1 = rlNumericSpec([N 1],'LowerLimit',-100*ones(N,1),'UpperLimit',100*ones(N,1));
stepFn1 = @(action, loggedSignals) myStepFunction(action, loggedSignals, S);
resetFn1 = @() myResetFunction(pos1);
env = rlFunctionEnv(obsInfo1, actInfo1, stepFn1, resetFn1);
agent= createDDPGAgents(S.N);
trainOpts = rlTrainingOptions(...
MaxStepsPerEpisode=1000,...
StopTrainingCriteria="AverageReward",...
StopTrainingValue=480,...
Plots="training-progress");
train(agent, env, trainOpts);