func (b *BatchRGradienter) runBatch(rv autofunc.RVector, rgrad autofunc.RGradient, grad autofunc.Gradient, s sgd.SampleSet) { if s.Len() == 0 { return } sampleCount := s.Len() firstSample := s.GetSample(0).(VectorSample) inputSize := len(firstSample.Input) outputSize := len(firstSample.Output) inVec := make(linalg.Vector, sampleCount*inputSize) outVec := make(linalg.Vector, sampleCount*outputSize) for i := 0; i < s.Len(); i++ { sample := s.GetSample(i) vs := sample.(VectorSample) copy(inVec[i*inputSize:], vs.Input) copy(outVec[i*outputSize:], vs.Output) } inVar := &autofunc.Variable{inVec} if rgrad != nil { rVar := autofunc.NewRVariable(inVar, rv) result := b.Learner.BatchR(rv, rVar, sampleCount) cost := b.CostFunc.CostR(rv, outVec, result) cost.PropagateRGradient(linalg.Vector{1}, linalg.Vector{0}, rgrad, grad) } else { result := b.Learner.Batch(inVar, sampleCount) cost := b.CostFunc.Cost(outVec, result) cost.PropagateGradient(linalg.Vector{1}, grad) } }
func (b *SingleRGradienter) RGradient(rv autofunc.RVector, s sgd.SampleSet) (autofunc.Gradient, autofunc.RGradient) { if b.gradCache == nil { b.gradCache = autofunc.NewGradient(b.Learner.Parameters()) } else { b.gradCache.Zero() } if b.rgradCache == nil { b.rgradCache = autofunc.NewRGradient(b.Learner.Parameters()) } else { b.rgradCache.Zero() } for i := 0; i < s.Len(); i++ { sample := s.GetSample(i) vs := sample.(VectorSample) output := vs.Output inVar := &autofunc.Variable{vs.Input} rVar := autofunc.NewRVariable(inVar, rv) result := b.Learner.ApplyR(rv, rVar) cost := b.CostFunc.CostR(rv, output, result) cost.PropagateRGradient(linalg.Vector{1}, linalg.Vector{0}, b.rgradCache, b.gradCache) } return b.gradCache, b.rgradCache }
// TotalCost returns the total cost of a layer on a // set of VectorSamples. // The elements of s must be VectorSamples. func TotalCost(c CostFunc, layer autofunc.Func, s sgd.SampleSet) float64 { var totalCost float64 for i := 0; i < s.Len(); i++ { sample := s.GetSample(i) vs := sample.(VectorSample) inVar := &autofunc.Variable{vs.Input} result := layer.Apply(inVar) costOut := c.Cost(vs.Output, result) totalCost += costOut.Output()[0] } return totalCost }
func (g *GradHelper) subBatches(s sgd.SampleSet) <-chan sgd.SampleSet { batchSize := g.batchSize() res := make(chan sgd.SampleSet, s.Len()/batchSize+1) for i := 0; i < s.Len(); i += batchSize { bs := batchSize if bs > s.Len()-i { bs = s.Len() - i } res <- s.Subset(i, i+bs) } close(res) return res }
func countCorrect(n neuralnet.Network, s sgd.SampleSet) int { var count int for i := 0; i < s.Len(); i++ { sample := s.GetSample(i).(neuralnet.VectorSample) output := n.Apply(&autofunc.Variable{Vector: sample.Input}).Output() var maxIdx int var maxVal float64 for j, x := range output { if x > maxVal || j == 0 { maxIdx = j maxVal = x } } if sample.Output[maxIdx] == 1 { count++ } } return count }
func (b *SingleRGradienter) Gradient(s sgd.SampleSet) autofunc.Gradient { if b.gradCache == nil { b.gradCache = autofunc.NewGradient(b.Learner.Parameters()) } else { b.gradCache.Zero() } for i := 0; i < s.Len(); i++ { sample := s.GetSample(i) vs := sample.(VectorSample) output := vs.Output inVar := &autofunc.Variable{vs.Input} result := b.Learner.Apply(inVar) cost := b.CostFunc.Cost(output, result) cost.PropagateGradient(linalg.Vector{1}, b.gradCache) } return b.gradCache }
func (g *GradHelper) batch(rv autofunc.RVector, s sgd.SampleSet) (grad autofunc.Gradient, rgrad autofunc.RGradient) { g.gradCache.variables = g.Learner.Parameters() if g.lastGradResult != nil { g.gradCache.Free(g.lastGradResult) } if g.lastRGradResult != nil { g.gradCache.FreeR(g.lastRGradResult) } batchSize := g.batchSize() maxGos := g.goroutineCount() if s.Len() < batchSize || maxGos < 2 { grad, rgrad = g.runSync(rv, s) } else { grad, rgrad = g.runAsync(rv, s) } g.lastGradResult = grad g.lastRGradResult = rgrad return }
// sampleSetSlice converts a sample set into a slice // of Samples. func sampleSetSlice(s sgd.SampleSet) []Sample { res := make([]Sample, s.Len()) for i := 0; i < s.Len(); i++ { res[i] = s.GetSample(i).(Sample) } return res }
// TotalCostBlock runs an rnn.Block on a set of Samples // and evaluates the total output cost. // // The batchSize specifies how many samples to run in // batches while computing the cost. func TotalCostBlock(b rnn.Block, batchSize int, s sgd.SampleSet, c neuralnet.CostFunc) float64 { runner := &rnn.Runner{Block: b} var cost float64 for i := 0; i < s.Len(); i += batchSize { var inSeqs, outSeqs [][]linalg.Vector for j := i; j < i+batchSize && j < s.Len(); j++ { seq := s.GetSample(j).(Sample) inSeqs = append(inSeqs, seq.Inputs) outSeqs = append(outSeqs, seq.Outputs) } output := runner.RunAll(inSeqs) for j, outSeq := range outSeqs { for t, actual := range output[j] { expected := outSeq[t] actualVar := &autofunc.Variable{Vector: actual} cost += c.Cost(expected, actualVar).Output()[0] } } } return cost }
// TotalCostSeqFunc runs a seqfunc.RFunc on a set of // Samples and evaluates the total output cost. // // The batchSize specifies how many samples to run in // batches while computing the cost. func TotalCostSeqFunc(f seqfunc.RFunc, batchSize int, s sgd.SampleSet, c neuralnet.CostFunc) float64 { var totalCost float64 for i := 0; i < s.Len(); i += batchSize { var inSeqs [][]linalg.Vector var outSeqs [][]linalg.Vector for j := i; j < i+batchSize && j < s.Len(); j++ { seq := s.GetSample(j).(Sample) inSeqs = append(inSeqs, seq.Inputs) outSeqs = append(outSeqs, seq.Outputs) } output := f.ApplySeqs(seqfunc.ConstResult(inSeqs)) for j, actualSeq := range output.OutputSeqs() { expectedSeq := outSeqs[j] for k, actual := range actualSeq { expected := expectedSeq[k] actualVar := &autofunc.Variable{Vector: actual} totalCost += c.Cost(expected, actualVar).Output()[0] } } } return totalCost }
func createNetwork(samples sgd.SampleSet) *rnn.Bidirectional { means := make(linalg.Vector, FeatureCount) var count float64 for i := 0; i < samples.Len(); i++ { inputSeq := samples.GetSample(i).(ctc.Sample).Input for _, vec := range inputSeq { means.Add(vec) count++ } } means.Scale(-1 / count) stddevs := make(linalg.Vector, FeatureCount) for i := 0; i < samples.Len(); i++ { inputSeq := samples.GetSample(i).(ctc.Sample).Input for _, vec := range inputSeq { for j, v := range vec { stddevs[j] += math.Pow(v+means[j], 2) } } } stddevs.Scale(1 / count) for i, x := range stddevs { stddevs[i] = 1 / math.Sqrt(x) } outputNet := neuralnet.Network{ &neuralnet.DropoutLayer{ KeepProbability: HiddenDropout, Training: false, }, &neuralnet.DenseLayer{ InputCount: HiddenSize * 2, OutputCount: OutHiddenSize, }, &neuralnet.HyperbolicTangent{}, &neuralnet.DenseLayer{ InputCount: OutHiddenSize, OutputCount: len(cubewhisper.Labels) + 1, }, &neuralnet.LogSoftmaxLayer{}, } outputNet.Randomize() inputNet := neuralnet.Network{ &neuralnet.VecRescaleLayer{ Biases: means, Scales: stddevs, }, &neuralnet.GaussNoiseLayer{ Stddev: InputNoise, Training: false, }, } netBlock := rnn.NewNetworkBlock(inputNet, 0) forwardBlock := rnn.StackedBlock{ netBlock, rnn.NewGRU(FeatureCount, HiddenSize), } backwardBlock := rnn.StackedBlock{ netBlock, rnn.NewGRU(FeatureCount, HiddenSize), } for _, block := range []rnn.StackedBlock{forwardBlock, backwardBlock} { for i, param := range block.Parameters() { if i%2 == 0 { for i := range param.Vector { param.Vector[i] = rand.NormFloat64() * WeightStddev } } } } return &rnn.Bidirectional{ Forward: &rnn.BlockSeqFunc{Block: forwardBlock}, Backward: &rnn.BlockSeqFunc{Block: backwardBlock}, Output: &rnn.NetworkSeqFunc{Network: outputNet}, } }