I'm making a basic feedforward neural network to solve the XOR gate problem.
Standard settings: input layer + hidden layer + output layer, the constant learning rate of 0.01 and the number of epochs is 500.
Sigmoid activation all the way. Stochastic/Gradient descent for backpropagation.
the hidden layer has 2 neurons. The input and output data:
I can post full code here, but I am certain the problem is from biases I just don't know why?
import Surge
// XOR TABLE DATA
let inputDataAsArray: [[Double]] = [[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]]
let outputDataAsArray: [[Double]] = [[0.0], [1.0], [1.0], [0.0]]
let inputData: Matrix<Double> = Matrix<Double>(inputDataAsArray)
let outputData: Matrix<Double> = Matrix<Double>(outputDataAsArray)
var inputData_samples : Array<Matrix<Double>> = Array()
var outputData_samples : Array<Matrix<Double>> = Array()
for i in 0..<inputDataAsArray.count{
inputData_samples.append(Matrix<Double>([inputDataAsArray[i]]))
outputData_samples.append(Matrix<Double>([outputDataAsArray[i]]))
}
let size = inputData.rows
let neurons = 2 // NUMBER OF NEURONS IN HIDDEN LAYER
var weights0 : Matrix<Double> = random(rows: inputData.columns, columns: neurons)
var biases0 : Matrix<Double> = Matrix<Double>(rows: 1, columns: neurons, repeatedValue: 0.0)
var weights1 : Matrix<Double> = random(rows: neurons, columns: outputData.columns)
var biases1 : Matrix<Double> = Matrix<Double>(rows: 1, columns: outputData.columns, repeatedValue: 0.0)
print("Running...")
let alpha = 0.01
let loops = size * 500
var sampleIndex = 0
for i in 0..<loops{
// FORWARD PROPAGATION
// LAYER 1
sampleIndex = i % size
let j : Int = .random(in: ClosedRange<Int>(uncheckedBounds: (lower: 0, upper: size - 1)))
let a0 = inputData_samples[j]
let output = outputData_samples[j]
let z1: Matrix<Double> = a0 * weights0 + biases0
let a1: Matrix<Double> = sigmoidMatrix(x: z1)
// LAYER 2
let z2 : Matrix<Double> = a1 * weights1 + biases1
let a2 : Matrix<Double> = sigmoidMatrix(x: z2)
// let cost = cross_entropy(size: size, a: a2, y: output)
// BACKPROPAGATION
// LAYER 2
var dz2 : Matrix<Double> = subtractMatrix(x: a2, y: output)
let dw2 : Matrix<Double> = divideMatrix(x: transpose(a1) * dz2 , y: size)
let db2 : Matrix<Double> = divideMatrix(x: dz2, y: size)
// LAYER 1
dz2 = dz2 * transpose(weights1)
let dz1 : Matrix<Double> = sub(y: 1.0, x: a0)
* transpose(a0) * dz2 // multiply(x: part1, y: sub(y: 1.0, x: part2))
let dw1 : Matrix<Double> = divideMatrix(x: transpose(a0) * dz1 , y: size)
let db1 : Matrix<Double> = divideMatrix(x: dz1, y: size)
weights0 = subtractMatrix(x: weights0, y: mul(alpha, x: dw1))
biases0 = subtractMatrix(x: biases0, y: mul(alpha, x: db1))
weights1 = subtractMatrix(x: weights1, y: mul(alpha, x: dw2))
biases1 = subtractMatrix(x: biases1, y: mul(alpha, x: db2))
}
for sample in inputData_samples{
let z1: Matrix<Double> = sample * weights0 + biases0
let a1: Matrix<Double> = sigmoidMatrix(x: z1)
let z2 : Matrix<Double> = a1 * weights1 + biases1
let a2 : Matrix<Double> = sigmoidMatrix(x: z2)
print(a2.description)
}