Tuesday, April 26, 2016

[Deep Learning] Notes for stanford_dl_ex

1. CNN

Solutions are from kloudkl/stanford_dl_ex-solutions,
https://github.com/kloudkl/stanford_dl_ex-solutions

1.1 cnnTrain.m

CNN training main,


function cnnTrain(options)
%% Convolution Neural Network Exercise

%  Instructions
%  ------------
%
%  This file contains code that helps you get started in building a single.
%  layer convolutional nerual network. In this exercise, you will only
%  need to modify cnnCost.m and cnnminFuncSGD.m. You will not need to
%  modify this file.

%%======================================================================
%% STEP 0: Initialize Parameters and Load Data
%  Here we initialize some parameters used for the exercise.

// Load default parameter if no parameter is passed???
if nargin < 1
    learning_rate_schedule = 'half_per_epoch';
end

USE_GPU = 0;
dataset = 'mnist';
dataset = 'svhn';

% Configuration
imageDim = 28;
numClasses = 10;  % Number of classes (MNIST images fall into 10 classes)
// what is filter size ???
filterDim = 9;    % Filter size for conv layer
// why do we need number of filters???
numFilters = 20;   % Number of filters for conv layer
// what is the definition of pooling dimension???
poolDim = 2;      % Pooling dimension, (should divide imageDim-filterDim+1)

%% Load images and labels
switch dataset
    case 'mnist'
        % Load MNIST Train
        addpath ../common/;
        images = loadMNISTImages('../common/train-images-idx3-ubyte');
        images = reshape(images,imageDim,imageDim,[]);
        labels = loadMNISTLabels('../common/train-labels-idx1-ubyte');
        labels(labels==0) = 10; % Remap 0 to 10
        
        % mean normalization
// for MNIST, image is 28*28*6000, data_mean is 28*28
        data_mean = mean(images, 3);
// for MNIST, data_std is 28 * 28
        data_std = std(images, 0, 3);
// preparing for bsxfun() ??? 
        data_std(data_std == 0) = 1;
// subtract image with data mean
        images = bsxfun(@minus, images, data_mean);
    case 'svhn'
        % Load SVHN Train
        load('../common/svhh_train_32_32_zeromean_mergeChannel');
        imageDim = 32;
end

% Sampling the data
numImages = size(images, 3);
% numImages = 60000;
% numImages = 10000;
// 1:min(numImages, end) index through smaller of numImages and end ???
images = images(:, :, 1:min(numImages, end));
labels = labels(1:min(numImages, end), :);

% Initialize Parameters
// init parameters for cnn
theta = cnnInitParams(imageDim,filterDim,numFilters,poolDim,numClasses);

% % Transfer to GPU
if USE_GPU
    device = gpuDevice(1);
    device.reset();
    images = gpuArray(images);
    labels = gpuArray(labels);
    theta = gpuArray(theta);
end

%%======================================================================
%% STEP 1: Implement convNet Objective
%  Implement the function cnnCost.m.

%%======================================================================
%% STEP 2: Gradient Check
%  Use the file computeNumericalGradient.m to check the gradient
%  calculation for your cnnCost.m function.  You may need to add the
%  appropriate path or copy the file to this directory.

DEBUG = false;  % set this to true to check gradient
% DEBUG = true;
if DEBUG
    % To speed up gradient checking, we will use a reduced network and
    % a debugging data set
    db_numFilters = 2;
    db_filterDim = 9;
    switch dataset
        case 'mnist'
            db_poolDim = 5;
        case 'svhn'
            db_poolDim = 6;
    end
    numDebugImages = 11; % better to be different from the numClasses
    db_images = images(:,:,1:numDebugImages);
    db_labels = labels(1:numDebugImages);
    db_theta = cnnInitParams(imageDim,db_filterDim,db_numFilters,...
        db_poolDim,numClasses);
    
    [cost grad] = cnnCost(db_theta,db_images,db_labels,numClasses,...
        db_filterDim,db_numFilters,db_poolDim);
    
    
    % Check gradients
    numGrad = computeNumericalGradient( @(x) cnnCost(x,db_images,...
        db_labels,numClasses,db_filterDim,...
        db_numFilters,db_poolDim), db_theta);
    
    % Use this to visually compare the gradients side by side
    num = numel(grad);
    for n = 1:num
        ratio = abs(grad(n) - numGrad(n)) / (abs(grad(n)) + 1e-6);
        if ratio > 1e-4
            fprintf('%d %10f %10f %10f\n', n, grad(n), numGrad(n), ratio);
        end
    end
    % Should be small. In our implementation, these values are usually
    % less than 1e-9.
    diff = norm(numGrad-grad)/norm(numGrad+grad)
    assert(diff < 1e-9,...
        'Difference too large. Check your gradient computation again');
    return;
end

%%======================================================================
%% STEP 4: Test
%  Test the performance of the trained model using the MNIST test set. Your
%  accuracy should be above 97% after 3 epochs of training

switch dataset
    case 'mnist'
        % Load mnist test
        testImages = loadMNISTImages('../common/t10k-images-idx3-ubyte');
        testImages = reshape(testImages,imageDim,imageDim,[]);
        testLabels = loadMNISTLabels('../common/t10k-labels-idx1-ubyte');
        testLabels(testLabels==0) = 10; % Remap 0 to 10
        
        testImages = bsxfun(@minus, testImages, data_mean);
    case 'svhn'
        % % Load SVHN test
        test = load('../common/svhh_test_32_32_zeromean_mergeChannel');
        testImages = test.images;
        testLabels = test.labels;
end

% % Transfer to GPU
if USE_GPU
    testImages = gpuArray(testImages);
    testLabels = gpuArray(testLabels);
end

%% STEP 3: Learn Parameters
%  Implement minFuncSGD.m, then train the model.

options.epochs = 3;
options.minibatch = 256;
options.alpha = 1e-1;
options.momentum = .95;

opttheta = minFuncSGD(@(x,y,z) cnnCost(x, y, z, numClasses, filterDim, ...
    numFilters, poolDim), theta, images, labels, options, testImages, ...
    testLabels, numClasses, filterDim, numFilters, poolDim);

%%======================================================================
%% STEP 4: Test

% [~, cost, preds]=cnnCost(opttheta, testImages, testLabels, numClasses, ...
%     filterDim, numFilters, poolDim, true);
% 
% acc = 100 * sum(preds==testLabels) / length(preds);
% 
% % Accuracy should be around 97.4% after 3 epochs
% fprintf('Accuracy is %f\n',acc);

2. cnnInitParams
function theta = cnnInitParams(imageDim,filterDim,numFilters,...
                                poolDim,numClasses)
% Initialize parameters for a single layer convolutional neural
% network followed by a softmax layer.
%                            
% Parameters:
%  imageDim   -  height/width of image
%  filterDim  -  dimension of convolutional filter                            
%  numFilters -  number of convolutional filters
%  poolDim    -  dimension of pooling area
%  numClasses -  number of classes to predict
%
%
% Returns:
%  theta      -  unrolled parameter vector with initialized weights

%% Initialize parameters randomly based on layer sizes.
assert(filterDim < imageDim,'filterDim must be less that imageDim');

// imageDim = 28, filterDim = 9, the outDim is the dimension of the filter square (map), in this case, 20 x 20 = 400 filters.
outDim = imageDim - filterDim + 1; % dimension of convolved image

% assume outDim is multiple of poolDim
// if this is asserted, it means this is no overlapping between pooling?
assert(mod(outDim, poolDim)==0,...
       'poolDim must divide imageDim - filterDim + 1');

// number of filters is the size of different filters that applied on a image. Each filter perform different filtering function.
Wc = 1e-1*randn(filterDim,filterDim,numFilters);

// from filter layer to pooling layer. 
outDim = outDim/poolDim;
hiddenSize = outDim^2*numFilters;

% we'll choose weights uniformly from the interval [-r, r]
r  = sqrt(6) / sqrt(numClasses+hiddenSize+1);
Wd = rand(numClasses, hiddenSize) * 2 * r - r;

bc = zeros(numFilters, 1);
bd = zeros(numClasses, 1);

% Convert weights and bias gradients to the vector form.
% This step will "unroll" (flatten and concatenate together) all 
% your parameters into a vector, which can then be used with minFunc. 
theta = [Wc(:) ; Wd(:) ; bc(:) ; bd(:)];

end

No comments:

Post a Comment