1. CNN
Solutions are from kloudkl/stanford_dl_ex-solutions,
https://github.com/kloudkl/stanford_dl_ex-solutions
1.1 cnnTrain.m
CNN training main,
function cnnTrain(options)
%% Convolution Neural Network Exercise
% Instructions
% ------------
%
% This file contains code that helps you get started in building a single.
% layer convolutional nerual network. In this exercise, you will only
% need to modify cnnCost.m and cnnminFuncSGD.m. You will not need to
% modify this file.
%%======================================================================
%% STEP 0: Initialize Parameters and Load Data
% Here we initialize some parameters used for the exercise.
// Load default parameter if no parameter is passed???
if nargin < 1
learning_rate_schedule = 'half_per_epoch';
end
USE_GPU = 0;
dataset = 'mnist';
dataset = 'svhn';
% Configuration
imageDim = 28;
numClasses = 10; % Number of classes (MNIST images fall into 10 classes)
// what is filter size ???
filterDim = 9; % Filter size for conv layer
// why do we need number of filters???
numFilters = 20; % Number of filters for conv layer
// what is the definition of pooling dimension???
poolDim = 2; % Pooling dimension, (should divide imageDim-filterDim+1)
%% Load images and labels
switch dataset
case 'mnist'
% Load MNIST Train
addpath ../common/;
images = loadMNISTImages('../common/train-images-idx3-ubyte');
images = reshape(images,imageDim,imageDim,[]);
labels = loadMNISTLabels('../common/train-labels-idx1-ubyte');
labels(labels==0) = 10; % Remap 0 to 10
% mean normalization
// for MNIST, image is 28*28*6000, data_mean is 28*28
data_mean = mean(images, 3);
// for MNIST, data_std is 28 * 28
data_std = std(images, 0, 3);
// preparing for bsxfun() ???
data_std(data_std == 0) = 1;
// subtract image with data mean
images = bsxfun(@minus, images, data_mean);
case 'svhn'
% Load SVHN Train
load('../common/svhh_train_32_32_zeromean_mergeChannel');
imageDim = 32;
end
% Sampling the data
numImages = size(images, 3);
% numImages = 60000;
% numImages = 10000;
// 1:min(numImages, end) index through smaller of numImages and end ???
images = images(:, :, 1:min(numImages, end));
labels = labels(1:min(numImages, end), :);
% Initialize Parameters
// init parameters for cnn
theta = cnnInitParams(imageDim,filterDim,numFilters,poolDim,numClasses);
% % Transfer to GPU
if USE_GPU
device = gpuDevice(1);
device.reset();
images = gpuArray(images);
labels = gpuArray(labels);
theta = gpuArray(theta);
end
%%======================================================================
%% STEP 1: Implement convNet Objective
% Implement the function cnnCost.m.
%%======================================================================
%% STEP 2: Gradient Check
% Use the file computeNumericalGradient.m to check the gradient
% calculation for your cnnCost.m function. You may need to add the
% appropriate path or copy the file to this directory.
DEBUG = false; % set this to true to check gradient
% DEBUG = true;
if DEBUG
% To speed up gradient checking, we will use a reduced network and
% a debugging data set
db_numFilters = 2;
db_filterDim = 9;
switch dataset
case 'mnist'
db_poolDim = 5;
case 'svhn'
db_poolDim = 6;
end
numDebugImages = 11; % better to be different from the numClasses
db_images = images(:,:,1:numDebugImages);
db_labels = labels(1:numDebugImages);
db_theta = cnnInitParams(imageDim,db_filterDim,db_numFilters,...
db_poolDim,numClasses);
[cost grad] = cnnCost(db_theta,db_images,db_labels,numClasses,...
db_filterDim,db_numFilters,db_poolDim);
% Check gradients
numGrad = computeNumericalGradient( @(x) cnnCost(x,db_images,...
db_labels,numClasses,db_filterDim,...
db_numFilters,db_poolDim), db_theta);
% Use this to visually compare the gradients side by side
num = numel(grad);
for n = 1:num
ratio = abs(grad(n) - numGrad(n)) / (abs(grad(n)) + 1e-6);
if ratio > 1e-4
fprintf('%d %10f %10f %10f\n', n, grad(n), numGrad(n), ratio);
end
end
% Should be small. In our implementation, these values are usually
% less than 1e-9.
diff = norm(numGrad-grad)/norm(numGrad+grad)
assert(diff < 1e-9,...
'Difference too large. Check your gradient computation again');
return;
end
%%======================================================================
%% STEP 4: Test
% Test the performance of the trained model using the MNIST test set. Your
% accuracy should be above 97% after 3 epochs of training
switch dataset
case 'mnist'
% Load mnist test
testImages = loadMNISTImages('../common/t10k-images-idx3-ubyte');
testImages = reshape(testImages,imageDim,imageDim,[]);
testLabels = loadMNISTLabels('../common/t10k-labels-idx1-ubyte');
testLabels(testLabels==0) = 10; % Remap 0 to 10
testImages = bsxfun(@minus, testImages, data_mean);
case 'svhn'
% % Load SVHN test
test = load('../common/svhh_test_32_32_zeromean_mergeChannel');
testImages = test.images;
testLabels = test.labels;
end
% % Transfer to GPU
if USE_GPU
testImages = gpuArray(testImages);
testLabels = gpuArray(testLabels);
end
%% STEP 3: Learn Parameters
% Implement minFuncSGD.m, then train the model.
options.epochs = 3;
options.minibatch = 256;
options.alpha = 1e-1;
options.momentum = .95;
opttheta = minFuncSGD(@(x,y,z) cnnCost(x, y, z, numClasses, filterDim, ...
numFilters, poolDim), theta, images, labels, options, testImages, ...
testLabels, numClasses, filterDim, numFilters, poolDim);
%%======================================================================
%% STEP 4: Test
% [~, cost, preds]=cnnCost(opttheta, testImages, testLabels, numClasses, ...
% filterDim, numFilters, poolDim, true);
%
% acc = 100 * sum(preds==testLabels) / length(preds);
%
% % Accuracy should be around 97.4% after 3 epochs
% fprintf('Accuracy is %f\n',acc);
2. cnnInitParams
function theta = cnnInitParams(imageDim,filterDim,numFilters,...
poolDim,numClasses)
% Initialize parameters for a single layer convolutional neural
% network followed by a softmax layer.
%
% Parameters:
% imageDim - height/width of image
% filterDim - dimension of convolutional filter
% numFilters - number of convolutional filters
% poolDim - dimension of pooling area
% numClasses - number of classes to predict
%
%
% Returns:
% theta - unrolled parameter vector with initialized weights
%% Initialize parameters randomly based on layer sizes.
assert(filterDim < imageDim,'filterDim must be less that imageDim');
// imageDim = 28, filterDim = 9, the outDim is the dimension of the filter square (map), in this case, 20 x 20 = 400 filters.
outDim = imageDim - filterDim + 1; % dimension of convolved image
% assume outDim is multiple of poolDim
// if this is asserted, it means this is no overlapping between pooling?
assert(mod(outDim, poolDim)==0,...
'poolDim must divide imageDim - filterDim + 1');
// number of filters is the size of different filters that applied on a image. Each filter perform different filtering function.
Wc = 1e-1*randn(filterDim,filterDim,numFilters);
// from filter layer to pooling layer.
outDim = outDim/poolDim;
hiddenSize = outDim^2*numFilters;
% we'll choose weights uniformly from the interval [-r, r]
r = sqrt(6) / sqrt(numClasses+hiddenSize+1);
Wd = rand(numClasses, hiddenSize) * 2 * r - r;
bc = zeros(numFilters, 1);
bd = zeros(numClasses, 1);
% Convert weights and bias gradients to the vector form.
% This step will "unroll" (flatten and concatenate together) all
% your parameters into a vector, which can then be used with minFunc.
theta = [Wc(:) ; Wd(:) ; bc(:) ; bd(:)];
end