Simple multi-layer perceptron

This is an example of a simple multi-layer perceptron (MLP). It classifies handwritten digits and we train it using the MNIST dataset.

Load the necessary packages.

using Flux, Statistics
using Flux.Data: DataLoader
using Flux: onehotbatch, onecold, logitcrossentropy, throttle, @epochs
using Base.Iterators: repeated
using Parameters: @with_kw
using CUDAapi
using MLDatasets

if has_cuda()		
    @info "CUDA is on"
    import CUDA		
    CUDA.allowscalar(false)
end


Set learning rate, batch size, number of epochs and set as gpu (if available) parameters for the model.

@with_kw mutable struct Args
    η::Float64 = 3e-4     
    batchsize::Int = 1024  
    epochs::Int = 10        
    device::Function = gpu  
end


Load the MNIST dataset from MLDatasets.

function getdata(args)
    # Loading Dataset	
    xtrain, ytrain = MLDatasets.MNIST.traindata(Float32)
    xtest, ytest = MLDatasets.MNIST.testdata(Float32)
	
    # Reshape Data in order to flatten each image into a linear array
    xtrain = Flux.flatten(xtrain)
    xtest = Flux.flatten(xtest)

    # One-hot-encode the labels
    ytrain, ytest = onehotbatch(ytrain, 0:9), onehotbatch(ytest, 0:9)

    # Batching
    train_data = DataLoader(xtrain, ytrain, batchsize=args.batchsize, shuffle=true)
    test_data = DataLoader(xtest, ytest, batchsize=args.batchsize)

    return train_data, test_data
end


This function defines the MLP. It has has one layer of input size 28X28 with the relu activation function, and the second layer has input size of 32 and output size of 10.

function build_model(; imgsize=(28,28,1), nclasses=10)
    return Chain(
 	    Dense(prod(imgsize), 32, relu),
            Dense(32, nclasses))
end


Define the loss function.

function loss_all(dataloader, model)
    l = 0f0
    for (x,y) in dataloader
        l += logitcrossentropy(model(x), y)
    end
    l/length(dataloader)
end


We use the accuracy function as a validation metric.

function accuracy(data_loader, model)
    acc = 0
    for (x,y) in data_loader
        acc += sum(onecold(cpu(model(x))) .== onecold(cpu(y)))*1 / size(x,2)
    end
    acc/length(data_loader)
end


Define the train function with the ADAM optimiser.

function train(; kws...)
    # Initializing Model parameters 
    args = Args(; kws...)

    # Load Data
    train_data,test_data = getdata(args)

    # Construct model
    m = build_model()
    train_data = args.device.(train_data)
    test_data = args.device.(test_data)
    m = args.device(m)
    loss(x,y) = logitcrossentropy(m(x), y)
    
    ## Training
    evalcb = () -> @show(loss_all(train_data, m))
    opt = ADAM(args.η)
		
    @epochs args.epochs Flux.train!(loss, params(m), train_data, opt, cb = evalcb)

    @show accuracy(train_data, m)

    @show accuracy(test_data, m)
end


Finally, we train the MLP.

cd(@__DIR__)
train()

– Adarsh Kumar, Mike J Innes, Andrew Dinhobl, Jerry Ling, natema