actions = linspace(-0.5,0.5,41);
numActions = length(actions);
Q = zeros(numSOCs, numPrices, T, numActions);
price_real = 140.5*ones(1,24);
price_real(22:24) = 87.3;
price_real(8:10) = 109.8;
price_real(18:21) = 109.8;
price_real = [price_real, price_real];
load_real = table2array(readtable('48_consumption_6.1.xlsx'));
pv_real = table2array(readtable("PV_gen.xlsx"));
load_real = load_real - pv_real;
p_crt(p) = p_crt_val*(24-p);
p_crt = [p_crt, p_crt] + 0.03*randn(1,48);
price_norm = price_real / max(price_real);
discretizeState = @(x) min(max(floor(x * numPrices) + 1, 1), numPrices);
saving_history = NaN(1,numEpisodes);
completion_rate = zeros(1,numEpisodes);
grid_before_ep = zeros(1,T);
grid_after_ep = zeros(1,T);
s_idx = [discretizeState(SOC), discretizeState(price_norm(t)), t];
a_idx = randi(numActions);
[~, a_idx] = max(Q(s_idx(1), s_idx(2), s_idx(3), :));
a_kW = actions(a_idx) * P_ess_max;
SOC_next = SOC + (a_kW / ESS_cap) * eff_cha;
SOC_next = SOC + (a_kW / ESS_cap) / eff_dch;
SOC_next = SOC_max; a_kW = 0;
elseif SOC_next < SOC_min
SOC_next = SOC_min; a_kW = 0;
elseif SOC_next < p_crt(t)
SOC_next = p_crt(t); a_kW = 0;
grid_before_ep(t) = load_real(t);
grid_after_ep(t) = load_real(t) + a_kW;
episode_memory(end+1,:) = [s_idx, a_idx];
if done_flag && length(episode_memory) == T
cost_before_ep = sum(grid_before_ep .* price_real);
cost_after_ep = sum(grid_after_ep .* price_real);
saving_ep = cost_before_ep - cost_after_ep;
saving_history(ep) = saving_ep;
for step = 1:size(episode_memory,1)
s_idx = episode_memory(step,1:3);
a_idx = episode_memory(step,4);
Q(s_idx(1), s_idx(2), s_idx(3), a_idx) = ...
Q(s_idx(1), s_idx(2), s_idx(3), a_idx) + ...
alpha * (saving_ep - Q(s_idx(1), s_idx(2), s_idx(3), a_idx));
epsilon = epsilon * epsilon_decay;
completion_rate(ep) = sum(~isnan(saving_history)) / ep;
fprintf("Episode %d: 완주=%d, 절감액=%.2f원, ε=%.3f\n", ...
ep, done_flag, saving_history(ep), epsilon);
end
Episode 10000: 완주=1, 절감액=589706.25원, ε=0.050
Episode 20000: 완주=1, 절감액=811117.50원, ε=0.050
Episode 30000: 완주=1, 절감액=668235.00원, ε=0.050
Episode 40000: 완주=1, 절감액=623130.00원, ε=0.050
Episode 50000: 완주=1, 절감액=598522.50원, ε=0.050
Episode 60000: 완주=1, 절감액=674752.50원, ε=0.050
grid_power_before = zeros(1,T);
grid_power_after = zeros(1,T);
grid_power_before(t) = load_real(t);
s_idx = [discretizeState(SOC), discretizeState(price_norm(t)), t];
[~, a_idx] = max(Q(s_idx(1), s_idx(2), s_idx(3), :));
a_kW = actions(a_idx) * P_ess_max;
SOC_next = SOC + (a_kW / ESS_cap) * eff_cha;
SOC_next = SOC + (a_kW / ESS_cap) / eff_dch;
SOC_next = SOC_max; a_kW = 0;
elseif SOC_next < SOC_min
SOC_next = SOC_min; a_kW = 0;
elseif SOC_next < p_crt(t)
SOC_next = p_crt(t); a_kW = 0;
grid_power_after(t) = load_real(t) + a_kW;
cost_before = sum(grid_power_before .* price_real);
cost_after = sum(grid_power_after .* price_real);
saving = cost_before - cost_after;
fprintf('최종 ESS 미사용 전 전기비용: %.3f 원\n', cost_before);
최종 ESS 미사용 전 전기비용: 6278802.431 원
fprintf('최종 ESS 사용 후 전기비용: %.3f 원\n', cost_after);
최종 ESS 사용 후 전기비용: 5594228.681 원
fprintf('최종 절감 금액: %.3f 원 (절감률 %.2f%%)\n', saving, saving/cost_before*100);
최종 절감 금액: 684573.750 원 (절감률 10.90%)
plot(saving_history); title('Learning Curve'); xlabel('Episode'); ylabel('Total Reward'); yticks(-4e5:1e5:9e5); grid on;
plot(100*SOC_traj,'LineWidth',1); hold on; plot(100*p_crt, 'r','LineWidth',1); title('SOC Trajectory'); ylabel('SOC(%)');ylim([-5 105]);legend('SOC','Critical Load'); grid on;
stairs(act_traj, '-x'); title('Action Trajectory (kW)'); grid on;
stairs(price_real); title('Price'); xlabel('Time'); ylabel('Price');