Deep Neural Network with AlexNet training but Objective is not Converging?

Question

h612 2017-5-13

0
链接

此问题的直接链接

https://ww2.mathworks.cn/matlabcentral/answers/340119-deep-neural-network-with-alexnet-training-but-objective-is-not-converging

编辑： h612 2017-5-13

obj.png

Hello I'm using MATCONVNET DagNN. Using AlexNet architechture. The last few layers of my architecutre are

net = dagnn.DagNN() ;
imdb_32 =load('imdb_all_32_pd_norm.mat');
imdb_32=imdb_32.imdb;
% some common options
opts.train.batchSize = 100;
opts.train.numEpochs = 100 ;
opts.train.continue = true ;
opts.train.gpus = [] ;
opts.train.learningRate = 0.2;%[0.1 * ones(1,30), 0.01*ones(1,30), 0.001*ones(1,30)] ;%0.002;%[2e-1*ones(1, 10),  2e-2*ones(1, 5)];
opts.train.momentum = 0.9;
opts.train.expDir = expDir;
opts.train.numSubBatches = 1;
bopts.useGpu =0;%numel(opts.train.gpus) >  0 ;
%%NET
net.addLayer('conv1', dagnn.Conv('size', [11 11 3 96], 'hasBias', true, 'stride', [4, 4], 'pad', [20 20 20 20]), {'input'}, {'conv1'},  {'conv1f'  'conv1b'});
net.addLayer('relu1', dagnn.ReLU(), {'conv1'}, {'relu1'}, {});
net.addLayer('lrn1', dagnn.LRN('param', [5 1 2.0000e-05 0.7500]), {'relu1'}, {'lrn1'}, {});
net.addLayer('pool1', dagnn.Pooling('method', 'max', 'poolSize', [3, 3], 'stride', [2 2], 'pad', [0 0 0 0]), {'lrn1'}, {'pool1'}, {});
net.addLayer('conv2', dagnn.Conv('size', [5 5 48 256], 'hasBias', true, 'stride', [1, 1], 'pad', [2 2 2 2]), {'pool1'}, {'conv2'},  {'conv2f'  'conv2b'});
net.addLayer('relu2', dagnn.ReLU(), {'conv2'}, {'relu2'}, {});
net.addLayer('lrn2', dagnn.LRN('param', [5 1 2.0000e-05 0.7500]), {'relu2'}, {'lrn2'}, {});
net.addLayer('pool2', dagnn.Pooling('method', 'max', 'poolSize', [3, 3], 'stride', [2 2], 'pad', [0 0 0 0]), {'lrn2'}, {'pool2'}, {});
net.addLayer('drop2',dagnn.DropOut('rate',0.7),{'pool2'},{'drop2'});
net.addLayer('conv3', dagnn.Conv('size', [3 3 256 384], 'hasBias', true, 'stride', [1, 1], 'pad', [1 1 1 1]), {'drop2'}, {'conv3'},  {'conv3f'  'conv3b'});
net.addLayer('relu3', dagnn.ReLU(), {'conv3'}, {'relu3'}, {});
net.addLayer('conv4', dagnn.Conv('size', [3 3 192 384], 'hasBias', true, 'stride', [1, 1], 'pad', [1 1 1 1]), {'relu3'}, {'conv4'},  {'conv4f'  'conv4b'});
net.addLayer('relu4', dagnn.ReLU(), {'conv4'}, {'relu4'}, {});
net.addLayer('conv5', dagnn.Conv('size', [3 3 192 256], 'hasBias', true, 'stride', [1, 1], 'pad', [1 1 1 1]), {'relu4'}, {'conv5'},  {'conv5f'  'conv5b'});
net.addLayer('relu5', dagnn.ReLU(), {'conv5'}, {'relu5'}, {});
net.addLayer('pool5', dagnn.Pooling('method', 'max', 'poolSize', [3 3], 'stride', [2 2], 'pad', [0 0 0 0]), {'relu5'}, {'pool5'}, {});
net.addLayer('drop5',dagnn.DropOut('rate',0.5),{'pool5'},{'drop5'});
net.addLayer('fc6', dagnn.Conv('size', [1 1 256 4096], 'hasBias', true, 'stride', [1, 1], 'pad', [0 0 0 0]), {'drop5'}, {'fc6'},  {'conv6f'  'conv6b'});
net.addLayer('relu6', dagnn.ReLU(), {'fc6'}, {'relu6'}, {});
net.addLayer('fc7', dagnn.Conv('size', [1 1 4096 4096], 'hasBias', true, 'stride', [1, 1], 'pad', [0 0 0 0]), {'relu6'}, {'fc7'},  {'conv7f'  'conv7b'});
net.addLayer('relu7', dagnn.ReLU(), {'fc7'}, {'relu7'}, {});
classLabels=max(unique(imdb_32.images.labels));
net.addLayer('classifier', dagnn.Conv('size', [1 1 4096 1], 'hasBias', true, 'stride', [1, 1], 'pad', [0 0 0 0]), {'relu7'}, {'prediction'},  {'conv8f'  'conv8b'});
 net.addLayer('prob', dagnn.SoftMax(), {'prediction'}, {'prob'}, {});
net.addLayer('l2_loss', dagnn.L2Loss(), {'prob', 'label'}, {'objective'});
net.addLayer('error', dagnn.Loss('loss', 'classerror'), {'prob','label'}, 'error') ;
opts.colorDeviation = zeros(3) ;
net.meta.augmentation.jitterFlip = true ;
net.meta.augmentation.jitterLocation = true ;
net.meta.augmentation.jitterFlip = true ;
net.meta.augmentation.jitterBrightness = double(0.1 * opts.colorDeviation) ;
net.meta.augmentation.jitterAspect = [3/4, 4/3] ;
net.meta.augmentation.jitterScale  = [0.4, 1.1] ;
net.meta.augmentation.jitterSaturation = 0.4 ;
net.meta.augmentation.jitterContrast = 0.4 ;
% net.meta.augmentation.jitterAspect = [2/3, 3/2] ;
net.meta.normalization.averageImage=imdb_32.images.data_mean;
initNet_He(net);
info = cnn_train_dag(net, imdb_32, @(i,b) getBatch(bopts,i,b), opts.train, 'val', find(imdb_32.images.set == 2)) ;