Feed Forward Back Propagation Help Needed

Question

Jeremy 2020-11-28

0
链接

此问题的直接链接

https://ww2.mathworks.cn/matlabcentral/answers/666743-feed-forward-back-propagation-help-needed

编辑： Jeremy 2020-12-5

Wondering if any neural network experts out there feel like helping a novice teacher of machines.

Below is the feed-forward back-propagation function I wrote. It can take in any number of inputs and seems to produce good results. The issue I'm having is with my gradient descent calculations. After reading through quite a bit of literature on the topic, from texts to academic papers to reasonably credible-looking tutorials, and everything I see says that gradient descent is w = w - eta*dCdW (or however you personally represent your cost function.)

As you can see below, I have w = w + eta*dCdW, and that is because the first form gives me the exact opposite of my desired output, which doesn't exactly fill me with confidence. For example, using the current code with the desired outputs set to [1;0], I get roughly [0.9937;0.0028] but when I subtract, I get [4e-5;1].

If there's a glaring error, throw something at me. However, if the answer is more involved, I would very much appreciate an explanation, and don't skimp on the math. Thank you all very much for your time!

clc
clear
% Network Inputs
tol = 1e-10; % set the tolerance
counter = 1; % initialize the counter
inputWeight = [2 2 2];
w = cell(size(inputWeight,1));
for i = 2:numel(inputWeight)
    w{i-1} = 0.25*rand([inputWeight(i),inputWeight(i-1)]);
end
x = rand(inputWeight(1),1); % input layer
y = zeros(inputWeight(end),1);
y(1:2:end) = 1; % target output
m = 1;
eta = 0.5; % learning rate
stopNum = 25000; % stopping index
cellSize = cell(numel(inputWeight)-1,1);
a = cellSize;
a{1} = 1./(1+exp(-w{1}*x));
for j = 2:numel(inputWeight)-1
    a{j} = 1./(1+exp(-w{j}*a{j-1}));
end
a{end} = 1./(1+exp(-w{end}*a{end-1}));
storeOutput = [];
storeOutput = [storeOutput a{end}'];
storeCostFn = [];
costFn = 0.5*(sum((y-a{end}).^2)); % cost function -> mean square error
storeCostFn = [storeCostFn;costFn];
aPrime = cellSize;
for k = 1:numel(inputWeight)-1
    aPrime{k} = a{k}.*(1-a{k});
end
delta = cellSize;
dCdW = cellSize;
wFlip = flip(w);
aFlip = flipud(a);
aPrimeFlip = flipud(aPrime);
delta{1} = (y-aFlip{1}).*aPrimeFlip{1};
dCdW{1} = delta{1}*aFlip{2}';
for m = 2:numel(inputWeight)-1
    delta{m} = (wFlip{m-1}'*delta{m-1}).*aPrimeFlip{m};
    if m+1 < numel(inputWeight)
        dCdW{m} = delta{m}*aFlip{m+1}';
    else
        dCdW{m} = delta{m}*x';
    end
end
dCdW = flipud(dCdW);
for n = 1:length(w)
    w{n} = w{n} + eta.*dCdW{n};
end
while costFn > tol
    a = cellSize;
    a{1} = 1./(1+exp(-w{1}*x));
    for j = 2:numel(inputWeight)-1
        a{j} = 1./(1+exp(-w{j}*a{j-1}));
    end
    a{end} = 1./(1+exp(-w{end}*a{end-1}));
    storeOutput = [storeOutput a{end}'];
    costFn = 0.5*(sum((y-a{end}).^2)); % cost function -> mean square error
    storeCostFn = [storeCostFn;costFn];
    
    aPrime = cellSize;
    for k = 1:numel(inputWeight)-1
        aPrime{k} = a{k}.*(1-a{k});
    end
    delta = cellSize;
    dCdW = cellSize;
    wFlip = flip(w);
    aFlip = flipud(a);
    aPrimeFlip = flipud(aPrime);
    delta{1} = (y-aFlip{1}).*aPrimeFlip{1};
    dCdW{1} = delta{1}*aFlip{2}';
    for m = 2:numel(inputWeight)-1
        delta{m} = (wFlip{m-1}'*delta{m-1}).*aPrimeFlip{m};
        if m+1 < numel(inputWeight)
            dCdW{m} = delta{m}*aFlip{m+1}';
        else
            dCdW{m} = delta{m}*x';
        end
    end
    dCdW = flipud(dCdW);
    for n = 1:length(w)
        w{n} = w{n} + eta.*dCdW{n};
    end
    
    counter = counter + 1;
    if counter > stopNum
        break
    end
end