Simple multi-layer perceptron
This is an example of a simple multi-layer perceptron (MLP). It classifies handwritten digits and we train it using the MNIST dataset.
Load the necessary packages.
using Flux, Statistics
using Flux.Data: DataLoader
using Flux: onehotbatch, onecold, logitcrossentropy, throttle, @epochs
using Base.Iterators: repeated
using Parameters: @with_kw
using CUDAapi
using MLDatasets
if has_cuda()
@info "CUDA is on"
import CUDA
CUDA.allowscalar(false)
end
Set learning rate, batch size, number of epochs and set as gpu (if available) parameters for the model.
@with_kw mutable struct Args
η::Float64 = 3e-4
batchsize::Int = 1024
epochs::Int = 10
device::Function = gpu
end
Load the MNIST dataset from MLDatasets.
function getdata(args)
# Loading Dataset
xtrain, ytrain = MLDatasets.MNIST.traindata(Float32)
xtest, ytest = MLDatasets.MNIST.testdata(Float32)
# Reshape Data in order to flatten each image into a linear array
xtrain = Flux.flatten(xtrain)
xtest = Flux.flatten(xtest)
# One-hot-encode the labels
ytrain, ytest = onehotbatch(ytrain, 0:9), onehotbatch(ytest, 0:9)
# Batching
train_data = DataLoader(xtrain, ytrain, batchsize=args.batchsize, shuffle=true)
test_data = DataLoader(xtest, ytest, batchsize=args.batchsize)
return train_data, test_data
end
This function defines the MLP. It has has one layer of input size 28X28
with the relu
activation function, and the second layer has input size of 32
and output size of 10
.
function build_model(; imgsize=(28,28,1), nclasses=10)
return Chain(
Dense(prod(imgsize), 32, relu),
Dense(32, nclasses))
end
Define the loss function.
function loss_all(dataloader, model)
l = 0f0
for (x,y) in dataloader
l += logitcrossentropy(model(x), y)
end
l/length(dataloader)
end
We use the accuracy function as a validation metric.
function accuracy(data_loader, model)
acc = 0
for (x,y) in data_loader
acc += sum(onecold(cpu(model(x))) .== onecold(cpu(y)))*1 / size(x,2)
end
acc/length(data_loader)
end
Define the train function with the ADAM optimiser.
function train(; kws...)
# Initializing Model parameters
args = Args(; kws...)
# Load Data
train_data,test_data = getdata(args)
# Construct model
m = build_model()
train_data = args.device.(train_data)
test_data = args.device.(test_data)
m = args.device(m)
loss(x,y) = logitcrossentropy(m(x), y)
## Training
evalcb = () -> @show(loss_all(train_data, m))
opt = ADAM(args.η)
@epochs args.epochs Flux.train!(loss, params(m), train_data, opt, cb = evalcb)
@show accuracy(train_data, m)
@show accuracy(test_data, m)
end
Finally, we train the MLP.
cd(@__DIR__)
train()