func main() { rand.Seed(time.Now().UnixNano()) outNet := neuralnet.Network{ &neuralnet.DenseLayer{ InputCount: StateSize * 2, OutputCount: 10, }, &neuralnet.Sigmoid{}, &neuralnet.DenseLayer{ InputCount: 10, OutputCount: 2, }, &neuralnet.LogSoftmaxLayer{}, } outNet.Randomize() bd := &rnn.Bidirectional{ Forward: &rnn.BlockSeqFunc{B: rnn.NewGRU(2, StateSize)}, Backward: &rnn.BlockSeqFunc{B: rnn.NewGRU(2, StateSize)}, Output: &rnn.NetworkSeqFunc{Network: outNet}, } var samples []seqtoseq.Sample var sampleSet sgd.SliceSampleSet for i := 0; i < TrainingSize; i++ { samples = append(samples, generateSequence()) sampleSet = append(sampleSet, samples[i]) } g := &sgd.RMSProp{ Gradienter: &seqtoseq.Gradienter{ SeqFunc: bd, Learner: bd, CostFunc: neuralnet.DotCost{}, }, } var i int sgd.SGDInteractive(g, sampleSet, StepSize, BatchSize, func() bool { fmt.Printf("%d epochs: cost=%f\n", i, totalCost(bd, sampleSet)) i++ return true }) var testingCorrect, testingTotal int for j := 0; j < TestingSize; j++ { sample := generateSequence() inRes := seqfunc.ConstResult([][]linalg.Vector{sample.Inputs}) output := bd.ApplySeqs(inRes).OutputSeqs()[0] for i, expected := range sample.Outputs { actual := output[i] if math.Abs(expected[0]-math.Exp(actual[0])) < 0.1 { testingCorrect++ } testingTotal++ } } fmt.Printf("Got %d/%d (%.2f%%)\n", testingCorrect, testingTotal, 100*float64(testingCorrect)/float64(testingTotal)) }
func (g *Gradienter) runBatch(grad autofunc.Gradient, set sgd.SampleSet) { seqs := sampleSetSlice(set) var seqIns [][]linalg.Vector for _, s := range seqs { seqIns = append(seqIns, s.Inputs) } output := g.SeqFunc.ApplySeqs(seqfunc.ConstResult(seqIns)) upstream := make([][]linalg.Vector, len(seqIns)) for i, outSeq := range output.OutputSeqs() { us := make([]linalg.Vector, len(outSeq)) expectedSeq := seqs[i].Outputs for j, actual := range outSeq { expected := expectedSeq[j] us[j] = costFuncDeriv(g.CostFunc, expected, actual) } upstream[i] = us } output.PropagateGradient(upstream, grad) }
func sgdOnSequences(f *rnn.Bidirectional, s []seqtoseq.Sample) { gradient := autofunc.NewGradient(f.Parameters()) for _, x := range s { inRes := seqfunc.ConstResult([][]linalg.Vector{x.Inputs}) output := f.ApplySeqs(inRes) upstreamGrad := make([]linalg.Vector, len(x.Outputs)) for i, o := range x.Outputs { upstreamGrad[i] = o.Copy().Scale(-1) } output.PropagateGradient([][]linalg.Vector{upstreamGrad}, gradient) } for _, vec := range gradient { for i, x := range vec { if x > 0 { vec[i] = 1 } else { vec[i] = -1 } } } gradient.AddToVars(-StepSize) }
// TotalCostSeqFunc runs a seqfunc.RFunc on a set of // Samples and evaluates the total output cost. // // The batchSize specifies how many samples to run in // batches while computing the cost. func TotalCostSeqFunc(f seqfunc.RFunc, batchSize int, s sgd.SampleSet, c neuralnet.CostFunc) float64 { var totalCost float64 for i := 0; i < s.Len(); i += batchSize { var inSeqs [][]linalg.Vector var outSeqs [][]linalg.Vector for j := i; j < i+batchSize && j < s.Len(); j++ { seq := s.GetSample(j).(Sample) inSeqs = append(inSeqs, seq.Inputs) outSeqs = append(outSeqs, seq.Outputs) } output := f.ApplySeqs(seqfunc.ConstResult(inSeqs)) for j, actualSeq := range output.OutputSeqs() { expectedSeq := outSeqs[j] for k, actual := range actualSeq { expected := expectedSeq[k] actualVar := &autofunc.Variable{Vector: actual} totalCost += c.Cost(expected, actualVar).Output()[0] } } } return totalCost }