env = rlSimulinkEnv(mdl,agentblk,obsInfo,actInfo);
nI = obsInfo.Dimension(1);  
nL = 24;   
nO = numel(actInfo.Elements);
dnn = [
    featureInputLayer(nI,'Name','state','Normalization','none')
    fullyConnectedLayer(nL,'Name','fc1')
    reluLayer('Name','relu1')
    fullyConnectedLayer(nL,'Name','fc2')
    reluLayer('Name','relu2')
    fullyConnectedLayer(nO,'Name','output')];
criticOpts = rlRepresentationOptions('LearnRate',0.00025,'GradientThreshold',1);
critic = rlQValueRepresentation(dnn,obsInfo,actInfo,'Observation',{'state'},criticOpts);
agentOpts = rlDQNAgentOptions(...
    'UseDoubleDQN',false, ...
    'TargetUpdateMethod',"periodic", ...
    'TargetUpdateFrequency',4, ...
    'ExperienceBufferLength',1000, ...
    'DiscountFactor',0.99, ...
    'MiniBatchSize',32);
agentOptions.EpsilonGreedyExploration.Epsilon=1;
agentOptions.EpsilonGreedyExploration.EpsilonMin=0.2;
agentOptions.EpsilonGreedyExploration.EpsilonDecay=0.0050;
agentObj = rlDQNAgent(critic,agentOpts)
maxepisodes = 10000;
maxsteps = ceil(T/Ts);
trainingOpts = rlTrainingOptions('MaxEpisodes',10000,...
    'MaxStepsPerEpisode',maxsteps,...
    'Verbose',false,...
    'Plots','training-progress',...
    'StopTrainingCriteria','EpisodeReward',...
    'StopTrainingValue', 0);
 trainingStats = train(agentObj,env,trainingOpts)