// Cost returns total cost of input training items for cross-entropy func (network NN) Cost(inputs []TrainItem) float64 { cost := 0.0 for _, input := range inputs { output := network.FeedForward(input.Values) y, err := matrices.OneHotMatrix(1, input.Distinct, 0, int(input.Label)) if err != nil { panic(err) } first, err := y.Apply(matrices.Negate).Mult(output.Apply(math.Log2)) if err != nil { panic(err) } second, err := y.Apply(matrices.OneMinus).Mult(output.Apply(matrices.OneMinus).Apply(math.Log2)) if err != nil { panic(err) } together, err := first.Sub(second) if err != nil { panic(err) } cost += together.Sum() } return cost / float64(len(inputs)) }
func (network NN) backprop(item TrainItem) ([]matrices.Matrix, []matrices.Matrix) { nablaW := make([]matrices.Matrix, len(network.weights)) nablaB := make([]matrices.Matrix, len(network.biases)) for i, m := range network.weights { nablaW[i] = matrices.InitMatrix(m.Rows(), m.Cols()) } for i, m := range network.biases { nablaB[i] = matrices.InitMatrix(m.Rows(), m.Cols()) } activation := item.Values activations := make([]matrices.Matrix, len(network.weights)+1) activations[0] = activation zs := make([]matrices.Matrix, len(network.weights)) for i := range network.weights { weights := network.weights[i] biases := network.biases[i] multiplied, err := activation.Dot(weights) if err != nil { panic(err) } z, err := multiplied.Add(biases) if err != nil { panic(err) } zs[i] = z activation = z.Sigmoid() activations[i+1] = activation } y, err := matrices.OneHotMatrix(1, item.Distinct, 0, int(item.Label)) if err != nil { panic(err) } // old code with MSE // costDerivative, err := activations[len(activations) - 1].Sub(y) // if err != nil { // panic(err) // } // delta, err := costDerivative.Mult(zs[len(zs) - 1].SigmoidPrime()) // if err != nil { // panic(err) // } // new code with cross-entropy delta, err := activations[len(activations)-1].Sub(y) if err != nil { panic(err) } nablaB[len(nablaB)-1] = delta nablaW[len(nablaW)-1], err = activations[len(activations)-2].Transpose().Dot(delta) if err != nil { panic(err) } for l := 2; l < len(network.layers); l++ { z := zs[len(zs)-l] sp := z.SigmoidPrime() dotted, err := delta.Dot(network.weights[len(network.weights)-l+1].Transpose()) if err != nil { panic(err) } delta, err = dotted.Mult(sp) if err != nil { panic(err) } nablaB[len(nablaB)-l] = delta nablaW[len(nablaW)-l], err = activations[len(activations)-l-1].Transpose().Dot(delta) if err != nil { panic(err) } } return nablaW, nablaB }