#!/usr/bin/python3 # vim: set fileencoding=utf-8 : import logging import numpy as np logger = logging.getLogger(__name__) class Network: """ a simple neuronal network implementation """ def __init__(self, layers, activations, seed=None): assert len(activations) == len(layers) - 1 self.activations = [] self.biases = [] self.costs = [] self.derivatives = [] self.epochs = 0 self.layers = layers self.weights = [] # search for activation functions and derivatives on this class for name in activations: activation = getattr(self, f'activation_{name}') derivative = getattr(self, f'derivative_{name}') self.activations.append(activation) self.derivatives.append(derivative) # fill weights and biases with zeroes previous = None if seed is not None: # set the random starting point to get consistent images np.random.seed(seed) for i, layer in enumerate(layers): if i == 0: previous = layer continue # initialize with random parameters (to break symmetry) self.biases.append(2 * np.random.random((layer, 1)) - 1) self.weights.append(2 * np.random.random((layer, previous)) - 1) previous = layer def train(self, x_ik, y_ik, epochs=10000, learning_rate=0.1): for epoch in range(epochs): self.epochs += 1 # propagate all data and obtain the results of the NN # for the whole dataset (usually done in batches) z_ik, a_ik = self.forward_propagation(x_ik, train=True) # calculate back propagation partial_w, partial_b = self.back_propagation(y_ik, z_ik, a_ik) # modify weights and biases by the calculated gradients self.weights = [w - learning_rate * dw for w, dw in zip(self.weights, partial_w)] self.biases = [b - learning_rate * db for b, db in zip(self.biases, partial_b)] def back_propagation(self, y_ik, z_ik, a_ik): """ calculate gradients of the weights and biases """ # calculate batch size and shape input data y_ik = np.array(y_ik) batch_size = y_ik.size // self.layers[-1] y_ik = y_ik.reshape((self.layers[-1], batch_size)) # save derivatives for each layer dw = [] # dC/dW db = [] # dC/dB d_ik = list(None for _ in range(len(self.layers) - 1)) # prefill # please refere to the course to unterstand the maths # the selected variable names and indices are consistent with the course # all variables are lists of matrices, in matrix multiplication the # indices change: a_ij * b_jk = c_ik (and b_jk * a_ij != c_ik) # the order of multiplication matters here, as well as the "order" of the indices # - if the formula says: a_ij * b_kj = c_ki # - you need transpose a_ij and calculate: b_kj * (a_ij).T # last layer d_ik[-1] = (a_ik[-1] - y_ik) * self.derivatives[-1](z_ik[-1]) for l in reversed(range(1, len(d_ik))): # back propagation d_ik[l - 1] = self.weights[l].T.dot(d_ik[l]) * self.derivatives[l-1](z_ik[l - 1]) # calculate gradients for layer, delta in enumerate(d_ik, 1): dw.append(np.dot(delta, a_ik[layer - 1].T) / float(batch_size)) db.append(np.dot(delta, np.ones((batch_size, 1))) / float(batch_size)) # return the derivitives respect to weight matrix and biases return dw, db def forward_propagation(self, x_ik, train=False): """ calculate the results of the neuronal network """ # calculate batch size and shape input data a = np.array(x_ik) batch_size = a.size // self.layers[0] if a.shape != (self.layers[0], batch_size): # logger.warning( # "x_ik needs to be reshaped from %s to (%s, %s)", # a.shape, # self.layers[0], # batch_size, # ) a = a.reshape((self.layers[0], batch_size)) # save activations for each layer z_ik = [] a_ik = [a] # do forward propagation for b, w, phi in zip(self.biases, self.weights, self.activations): z = np.dot(w, a) + b z_ik.append(z) a = phi(z) a_ik.append(a) if train: return z_ik, a_ik # a is transposed to change it's shape: # each row now represents the solution for the corresponding input batch return a.T @staticmethod def activation_sigmoid(x): """ Sigmoid activation function """ return 1 / (1 + np.exp(-x)) @staticmethod def activation_linear(x): """ Linear activation function """ return x @staticmethod def activation_relu(x): """ ReLu activation function """ return np.where(x >= 0, x, 0) @staticmethod def activation_tanh(x): """ Tanh activation function """ return np.tanh(x) @staticmethod def activation_leakyrelu(x): """ Leaky ReLu activation function """ return np.where(x >= 0, x, x * 0.01) @classmethod def derivative_sigmoid(cls, x): """ first derivative of the sigmoid activation function """ return cls.activation_sigmoid(x) * (1 - cls.activation_sigmoid(x)) @staticmethod def derivative_linear(x): """ first derivative of the linear activation function """ return np.ones(x.shape) @staticmethod def derivative_relu(x): """ first derivative of the ReLu activation function """ return np.where(x>0, 1, 0) @staticmethod def derivative_tanh(x): """ first derivative of the Tanh activation function """ return 1 - np.tanh(x)**2 @staticmethod def derivative_leakyrelu(x): """ first derivative of the leaky ReLu activation function """ return np.where(x>0, 1, -0.01) if __name__=='__main__': nn = Network([1, 2, 1], activations=["sigmoid", "linear"], seed=1) x = [0.4, 0.5, 0.7, 0.9] y = [0.2, 0.6, 0.5, 0.2] for n in range(11): nn.train(x, y, epochs=1, learning_rate=1.0) print(f'{nn.epochs:d}') print(nn.weights) print(nn.biases)