func (b *BatchRGradienter) runBatch(rv autofunc.RVector, rgrad autofunc.RGradient, grad autofunc.Gradient, s sgd.SampleSet) { if s.Len() == 0 { return } sampleCount := s.Len() firstSample := s.GetSample(0).(VectorSample) inputSize := len(firstSample.Input) outputSize := len(firstSample.Output) inVec := make(linalg.Vector, sampleCount*inputSize) outVec := make(linalg.Vector, sampleCount*outputSize) for i := 0; i < s.Len(); i++ { sample := s.GetSample(i) vs := sample.(VectorSample) copy(inVec[i*inputSize:], vs.Input) copy(outVec[i*outputSize:], vs.Output) } inVar := &autofunc.Variable{inVec} if rgrad != nil { rVar := autofunc.NewRVariable(inVar, rv) result := b.Learner.BatchR(rv, rVar, sampleCount) cost := b.CostFunc.CostR(rv, outVec, result) cost.PropagateRGradient(linalg.Vector{1}, linalg.Vector{0}, rgrad, grad) } else { result := b.Learner.Batch(inVar, sampleCount) cost := b.CostFunc.Cost(outVec, result) cost.PropagateGradient(linalg.Vector{1}, grad) } }
func (b *SingleRGradienter) RGradient(rv autofunc.RVector, s sgd.SampleSet) (autofunc.Gradient, autofunc.RGradient) { if b.gradCache == nil { b.gradCache = autofunc.NewGradient(b.Learner.Parameters()) } else { b.gradCache.Zero() } if b.rgradCache == nil { b.rgradCache = autofunc.NewRGradient(b.Learner.Parameters()) } else { b.rgradCache.Zero() } for i := 0; i < s.Len(); i++ { sample := s.GetSample(i) vs := sample.(VectorSample) output := vs.Output inVar := &autofunc.Variable{vs.Input} rVar := autofunc.NewRVariable(inVar, rv) result := b.Learner.ApplyR(rv, rVar) cost := b.CostFunc.CostR(rv, output, result) cost.PropagateRGradient(linalg.Vector{1}, linalg.Vector{0}, b.rgradCache, b.gradCache) } return b.gradCache, b.rgradCache }
func manualNetworkSeq(rv autofunc.RVector, f autofunc.RFunc, start *autofunc.Variable, ins [][]*autofunc.Variable, stateSize int) (out, outR [][]linalg.Vector) { out = make([][]linalg.Vector, len(ins)) outR = make([][]linalg.Vector, len(ins)) for seqIdx, inSeq := range ins { var state autofunc.RResult = autofunc.NewRVariable(start, rv) for _, in := range inSeq { inR := rv[in] packedIn := append(linalg.Vector{}, in.Output()...) packedIn = append(packedIn, state.Output()...) packedInR := append(linalg.Vector{}, inR...) packedInR = append(packedInR, state.ROutput()...) stepOut := f.ApplyR(rv, &autofunc.RVariable{ Variable: &autofunc.Variable{Vector: packedIn}, ROutputVec: packedInR, }) outSize := len(stepOut.Output()) - stateSize out[seqIdx] = append(out[seqIdx], stepOut.Output()[:outSize]) outR[seqIdx] = append(outR[seqIdx], stepOut.ROutput()[:outSize]) state = &autofunc.RVariable{ Variable: &autofunc.Variable{Vector: stepOut.Output()[outSize:]}, ROutputVec: stepOut.ROutput()[outSize:], } } } return }
func TestMaxPoolingBatchR(t *testing.T) { layer := &MaxPoolingLayer{ XSpan: 5, YSpan: 4, InputWidth: 17, InputHeight: 19, InputDepth: 3, } n := 3 batchInput := make(linalg.Vector, n*layer.InputWidth*layer.InputHeight*layer.InputDepth) for i := range batchInput { batchInput[i] = rand.NormFloat64() } batchRes := &autofunc.Variable{Vector: batchInput} rVec := autofunc.RVector{ batchRes: make(linalg.Vector, len(batchInput)), } for i := range rVec[batchRes] { rVec[batchRes][i] = rand.NormFloat64() } testRBatcher(t, rVec, layer, autofunc.NewRVariable(batchRes, rVec), n, []*autofunc.Variable{batchRes}) }
func TestConvLayerBatchR(t *testing.T) { layer := &ConvLayer{ FilterCount: 3, FilterWidth: 2, FilterHeight: 4, Stride: 2, InputHeight: 17, InputWidth: 19, InputDepth: 5, } layer.Randomize() n := 3 batchInput := make(linalg.Vector, n*layer.InputWidth*layer.InputHeight*layer.InputDepth) for i := range batchInput { batchInput[i] = rand.NormFloat64() } batchRes := &autofunc.Variable{Vector: batchInput} params := []*autofunc.Variable{batchRes, layer.Biases, layer.FilterVar} rVec := autofunc.RVector{} for _, param := range params { vec := make(linalg.Vector, len(param.Vector)) for i := range vec { vec[i] = rand.NormFloat64() } rVec[param] = vec } testRBatcher(t, rVec, layer, autofunc.NewRVariable(batchRes, rVec), n, params) }
func (d *DropoutLayer) ApplyR(v autofunc.RVector, in autofunc.RResult) autofunc.RResult { if d.Training { mask := d.dropoutMask(len(in.Output())) maskVar := autofunc.NewRVariable(mask, v) return autofunc.MulR(in, maskVar) } else { return autofunc.ScaleR(in, d.KeepProbability) } }
// StartRState is like StartState but with an RState. func (l *LSTM) StartRState(rv autofunc.RVector) RState { rVar := autofunc.NewRVariable(l.initState, rv) return lstmRState{ Internal: l.initState.Vector[:len(l.initState.Vector)/2], InternalR: rVar.ROutputVec[:len(l.initState.Vector)/2], Output: l.initState.Vector[len(l.initState.Vector)/2:], OutputR: rVar.ROutputVec[len(l.initState.Vector)/2:], } }
func (r *RegularizingCost) CostR(v autofunc.RVector, a linalg.Vector, x autofunc.RResult) autofunc.RResult { regFunc := autofunc.SquaredNorm{} cost := r.CostFunc.CostR(v, a, x) for _, variable := range r.Variables { norm := regFunc.ApplyR(v, autofunc.NewRVariable(variable, v)) cost = autofunc.AddR(cost, autofunc.ScaleR(norm, r.Penalty)) } return cost }
func (_ SigmoidCECost) CostR(v autofunc.RVector, x linalg.Vector, a autofunc.RResult) autofunc.RResult { logsig := autofunc.LogSigmoid{} log := logsig.ApplyR(v, a) invLog := logsig.ApplyR(v, autofunc.ScaleR(a, -1)) xVar := autofunc.NewRVariable(&autofunc.Variable{x}, v) oneMinusX := autofunc.AddScalerR(autofunc.ScaleR(xVar, -1), 1) sums := autofunc.AddR(autofunc.MulR(xVar, log), autofunc.MulR(oneMinusX, invLog)) return autofunc.ScaleR(autofunc.SumAllR(sums), -1) }
func (b *BlockChecker) testNilUpstreamR(t *testing.T) { t.Run("Nil Upstream R", func(t *testing.T) { out := b.B.ApplyBlockR(b.RV, []rnn.RState{b.B.StartRState(b.RV)}, []autofunc.RResult{autofunc.NewRVariable(b.Input[0][0], b.RV)}) g1 := autofunc.NewGradient(b.Vars) rg1 := autofunc.NewRGradient(b.Vars) initLen1 := len(g1) c := len(out.PropagateRGradient(nil, nil, nil, rg1, g1)) if c != 1 { t.Errorf("expected %d downstream states, got %d", 1, c) } g2 := autofunc.NewGradient(b.Vars) rg2 := autofunc.NewRGradient(b.Vars) initLen2 := len(g2) zeroUpstream := make([]linalg.Vector, len(out.Outputs())) for i, x := range out.Outputs() { zeroUpstream[i] = make(linalg.Vector, len(x)) } nilStateUpstream := make([]rnn.RStateGrad, len(out.RStates())) c = len(out.PropagateRGradient(zeroUpstream, zeroUpstream, nilStateUpstream, rg2, g2)) if c != 1 { t.Errorf("expected %d downstream states, got %d", 1, c) } if len(g1) != initLen1 { t.Errorf("all nil gradient length changed from %d to %d", initLen1, len(g1)) } if len(rg1) != initLen1 { t.Errorf("all nil r-gradient length changed from %d to %d", initLen1, len(rg1)) } if len(g2) != initLen2 { t.Errorf("non-nil gradient length changed from %d to %d", initLen2, len(g2)) } if len(rg2) != initLen2 { t.Errorf("non-nil r-gradient length changed from %d to %d", initLen2, len(rg2)) } for i, variable := range b.Vars { val1 := g1[variable] val2 := g2[variable] if !b.vecsEqual(val1, val2) { t.Errorf("gradients for var %d don't match: %v and %v", i, val1, val2) } val1 = rg1[variable] val2 = rg2[variable] if !b.vecsEqual(val1, val2) { t.Errorf("r-gradients for var %d don't match: %v and %v", i, val1, val2) } } }) }
// StartRState is like StartState but with an RState. func (b *BatcherBlock) StartRState(rv autofunc.RVector) RState { if b.Start != nil { rVar := autofunc.NewRVariable(b.Start, rv) return VecRState{ State: rVar.Output(), RState: rVar.ROutput(), } } zero := make(linalg.Vector, b.StateSize) return VecRState{ State: zero, RState: zero, } }
func (_ CrossEntropyCost) CostR(v autofunc.RVector, x linalg.Vector, a autofunc.RResult) autofunc.RResult { return autofunc.PoolR(a, func(a autofunc.RResult) autofunc.RResult { xVar := autofunc.NewRVariable(&autofunc.Variable{x}, autofunc.RVector{}) logA := autofunc.Log{}.ApplyR(v, a) oneMinusA := autofunc.AddScalerR(autofunc.ScaleR(a, -1), 1) oneMinusX := autofunc.AddScalerR(autofunc.ScaleR(xVar, -1), 1) log1A := autofunc.Log{}.ApplyR(v, oneMinusA) errorVec := autofunc.AddR(autofunc.MulR(xVar, logA), autofunc.MulR(oneMinusX, log1A)) return autofunc.ScaleR(autofunc.SumAllR(errorVec), -1) }) }
func (l *lstmGate) BatchR(rv autofunc.RVector, in autofunc.RResult, n int) autofunc.RResult { if l.Peephole == nil { return l.Activation.ApplyR(rv, l.Dense.BatchR(rv, in, n)) } return autofunc.PoolR(in, func(in autofunc.RResult) autofunc.RResult { vecSize := len(in.Output()) / n var weightedInputs []autofunc.RResult var peepholed []autofunc.RResult peephole := autofunc.NewRVariable(l.Peephole, rv) for i := 0; i < n; i++ { start := vecSize * i weightedEnd := start + vecSize - len(l.Peephole.Vector) weightedInputs = append(weightedInputs, autofunc.SliceR(in, start, weightedEnd)) peepholeMe := autofunc.SliceR(in, weightedEnd, (i+1)*vecSize) peepholed = append(peepholed, autofunc.MulR(peephole, peepholeMe)) } weighted := l.Dense.BatchR(rv, autofunc.ConcatR(weightedInputs...), n) joinedPeep := autofunc.ConcatR(peepholed...) return l.Activation.ApplyR(rv, autofunc.AddR(joinedPeep, weighted)) }) }
func (_ MeanSquaredCost) CostR(v autofunc.RVector, a linalg.Vector, x autofunc.RResult) autofunc.RResult { aVar := &autofunc.Variable{a.Copy().Scale(-1)} aVarR := autofunc.NewRVariable(aVar, v) return autofunc.SquaredNorm{}.ApplyR(v, autofunc.AddR(aVarR, x)) }
func (_ DotCost) CostR(v autofunc.RVector, x linalg.Vector, a autofunc.RResult) autofunc.RResult { xVar := autofunc.NewRVariable(&autofunc.Variable{x}, v) return autofunc.ScaleR(autofunc.SumAllR(autofunc.MulR(xVar, a)), -1) }
// StartStateR is like StartState but with r-operators. func (g *GRU) StartRState(rv autofunc.RVector) RState { resVar := autofunc.NewRVariable(g.initState, rv) return VecRState{State: resVar.Output(), RState: resVar.ROutput()} }