func testRBatcher(t *testing.T, rv autofunc.RVector, b batchFuncR, in autofunc.RResult, n int, params []*autofunc.Variable) { funcRBatcher := autofunc.RFuncBatcher{F: b} t.Run("Forward", func(t *testing.T) { expected := funcRBatcher.BatchR(rv, in, n) actual := b.BatchR(rv, in, n) diff := actual.Output().Copy().Scale(-1).Add(expected.Output()).MaxAbs() if diff > 1e-5 { t.Errorf("expected output %v but got %v", expected, actual) } diff = actual.ROutput().Copy().Scale(-1).Add(expected.ROutput()).MaxAbs() if diff > 1e-5 { t.Errorf("expected r-output %v but got %v", expected, actual) } }) t.Run("Backward", func(t *testing.T) { expectedOut := funcRBatcher.BatchR(rv, in, n) actualOut := b.BatchR(rv, in, n) expected := autofunc.NewGradient(params) actual := autofunc.NewGradient(params) expectedR := autofunc.NewRGradient(params) actualR := autofunc.NewRGradient(params) outGrad := make(linalg.Vector, len(expectedOut.Output())) outGradR := make(linalg.Vector, len(expectedOut.Output())) for i := range outGrad { outGrad[i] = rand.NormFloat64() outGradR[i] = rand.NormFloat64() } expectedOut.PropagateRGradient(outGrad.Copy(), outGradR.Copy(), expectedR, expected) actualOut.PropagateRGradient(outGrad, outGradR, actualR, actual) for i, variable := range params { actualVec := actual[variable] expectedVec := expected[variable] diff := actualVec.Copy().Scale(-1).Add(expectedVec).MaxAbs() if diff > 1e-5 { t.Errorf("variable %d (grad): expected %v got %v", i, expectedVec, actualVec) } actualVec = actualR[variable] expectedVec = expectedR[variable] diff = actualVec.Copy().Scale(-1).Add(expectedVec).MaxAbs() if diff > 1e-5 { t.Errorf("variable %d (rgrad): expected %v got %v", i, expectedVec, actualVec) } } }) }
func (b *BlockChecker) testNilUpstreamR(t *testing.T) { t.Run("Nil Upstream R", func(t *testing.T) { out := b.B.ApplyBlockR(b.RV, []rnn.RState{b.B.StartRState(b.RV)}, []autofunc.RResult{autofunc.NewRVariable(b.Input[0][0], b.RV)}) g1 := autofunc.NewGradient(b.Vars) rg1 := autofunc.NewRGradient(b.Vars) initLen1 := len(g1) c := len(out.PropagateRGradient(nil, nil, nil, rg1, g1)) if c != 1 { t.Errorf("expected %d downstream states, got %d", 1, c) } g2 := autofunc.NewGradient(b.Vars) rg2 := autofunc.NewRGradient(b.Vars) initLen2 := len(g2) zeroUpstream := make([]linalg.Vector, len(out.Outputs())) for i, x := range out.Outputs() { zeroUpstream[i] = make(linalg.Vector, len(x)) } nilStateUpstream := make([]rnn.RStateGrad, len(out.RStates())) c = len(out.PropagateRGradient(zeroUpstream, zeroUpstream, nilStateUpstream, rg2, g2)) if c != 1 { t.Errorf("expected %d downstream states, got %d", 1, c) } if len(g1) != initLen1 { t.Errorf("all nil gradient length changed from %d to %d", initLen1, len(g1)) } if len(rg1) != initLen1 { t.Errorf("all nil r-gradient length changed from %d to %d", initLen1, len(rg1)) } if len(g2) != initLen2 { t.Errorf("non-nil gradient length changed from %d to %d", initLen2, len(g2)) } if len(rg2) != initLen2 { t.Errorf("non-nil r-gradient length changed from %d to %d", initLen2, len(rg2)) } for i, variable := range b.Vars { val1 := g1[variable] val2 := g2[variable] if !b.vecsEqual(val1, val2) { t.Errorf("gradients for var %d don't match: %v and %v", i, val1, val2) } val1 = rg1[variable] val2 = rg2[variable] if !b.vecsEqual(val1, val2) { t.Errorf("r-gradients for var %d don't match: %v and %v", i, val1, val2) } } }) }
func (b *SingleRGradienter) RGradient(rv autofunc.RVector, s sgd.SampleSet) (autofunc.Gradient, autofunc.RGradient) { if b.gradCache == nil { b.gradCache = autofunc.NewGradient(b.Learner.Parameters()) } else { b.gradCache.Zero() } if b.rgradCache == nil { b.rgradCache = autofunc.NewRGradient(b.Learner.Parameters()) } else { b.rgradCache.Zero() } for i := 0; i < s.Len(); i++ { sample := s.GetSample(i) vs := sample.(VectorSample) output := vs.Output inVar := &autofunc.Variable{vs.Input} rVar := autofunc.NewRVariable(inVar, rv) result := b.Learner.ApplyR(rv, rVar) cost := b.CostFunc.CostR(rv, output, result) cost.PropagateRGradient(linalg.Vector{1}, linalg.Vector{0}, b.rgradCache, b.gradCache) } return b.gradCache, b.rgradCache }
func benchmarkConvLayer(b *testing.B, layer *ConvLayer) { b.Run("Forward", func(b *testing.B) { benchmarkConvLayerForward(b, layer) }) b.Run("Backward", func(b *testing.B) { benchmarkConvLayerBackward(b, layer) }) b.Run("Parallel", func(b *testing.B) { parallelism := runtime.GOMAXPROCS(0) inputs := make(chan *autofunc.Variable, parallelism) upstreams := make(chan linalg.Vector, parallelism) grads := make(chan autofunc.Gradient, parallelism) for i := 0; i < parallelism; i++ { testInput := NewTensor3(layer.InputWidth, layer.InputHeight, layer.InputDepth) for i := range testInput.Data { testInput.Data[i] = rand.NormFloat64() } inputVar := &autofunc.Variable{Vector: testInput.Data} inputs <- inputVar upstream := make(linalg.Vector, len(layer.Apply(inputVar).Output())) for i := range upstream { upstream[i] = rand.NormFloat64() } upstreams <- upstream grad := autofunc.NewGradient(layer.Parameters()) grads <- grad } b.ResetTimer() b.RunParallel(func(pb *testing.PB) { benchmarkConvLayerParallel(pb, layer, <-inputs, <-upstreams, <-grads) }) }) }
func BenchmarkDenseLayerBackProp(b *testing.B) { net := Network{ &DenseLayer{InputCount: 1000, OutputCount: 2000}, &Sigmoid{}, &DenseLayer{InputCount: 2000, OutputCount: 512}, &Sigmoid{}, &DenseLayer{InputCount: 512, OutputCount: 10}, &Sigmoid{}, } rand.Seed(123) net.Randomize() inVec := &autofunc.Variable{Vector: make(linalg.Vector, 1000)} for i := range inVec.Vector { inVec.Vector[i] = rand.Float64()*2 - 1 } downstream := make(linalg.Vector, 10) for i := range downstream { downstream[i] = 1 } grad := autofunc.NewGradient(net.Parameters()) b.ResetTimer() for i := 0; i < b.N; i++ { net.Apply(inVec).PropagateGradient(downstream, grad) } }
func testBatchRGradienter(t *testing.T, batchSize int, b *BatchRGradienter) { rand.Seed(batchRGradienterSeed) net := Network{ &DenseLayer{ InputCount: 10, OutputCount: 30, }, &Sigmoid{}, &DenseLayer{ InputCount: 30, OutputCount: 3, }, &Sigmoid{}, } net.Randomize() b.Learner = net.BatchLearner() inputs := make([]linalg.Vector, batchSize) outputs := make([]linalg.Vector, batchSize) for i := range inputs { inputVec := make(linalg.Vector, 10) outputVec := make(linalg.Vector, 3) for j := range inputVec { inputVec[j] = rand.NormFloat64() } for j := range outputVec { outputVec[j] = rand.Float64() } inputs[i] = inputVec outputs[i] = outputVec } samples := VectorSampleSet(inputs, outputs) rVector := autofunc.RVector(autofunc.NewGradient(net.Parameters())) for _, vec := range rVector { for i := range vec { vec[i] = rand.NormFloat64() } } single := SingleRGradienter{Learner: net, CostFunc: b.CostFunc} expectedGrad := single.Gradient(samples) actualGrad := b.Gradient(samples) if !vecMapsEqual(expectedGrad, actualGrad) { t.Error("bad gradient from Gradient()") } expectedGrad, expectedRGrad := single.RGradient(rVector, samples) actualGrad, actualRGrad := b.RGradient(rVector, samples) if !vecMapsEqual(expectedGrad, actualGrad) { t.Error("bad gradient from RGradient()") } if !vecMapsEqual(expectedRGrad, actualRGrad) { t.Error("bad r-gradient from RGradient()") } }
func DreamCmd(netPath, imgPath string) { networkData, err := ioutil.ReadFile(netPath) if err != nil { fmt.Fprintln(os.Stderr, "Error reading network:", err) os.Exit(1) } network, err := neuralnet.DeserializeNetwork(networkData) if err != nil { fmt.Fprintln(os.Stderr, "Error deserializing network:", err) os.Exit(1) } convIn := network[1].(*neuralnet.ConvLayer) inputImage := &autofunc.Variable{ Vector: make(linalg.Vector, convIn.InputWidth*convIn.InputHeight* convIn.InputDepth), } for i := range inputImage.Vector { inputImage.Vector[i] = rand.Float64()*0.01 + 0.5 } desiredOut := linalg.Vector{0, 1} cost := neuralnet.DotCost{} grad := autofunc.NewGradient([]*autofunc.Variable{inputImage}) for i := 0; i < 1000; i++ { output := network.Apply(inputImage) costOut := cost.Cost(desiredOut, output) grad.Zero() log.Println("cost is", costOut.Output()[0]) costOut.PropagateGradient(linalg.Vector{1}, grad) grad.AddToVars(-0.01) } newImage := image.NewRGBA(image.Rect(0, 0, convIn.InputWidth, convIn.InputHeight)) var idx int for y := 0; y < convIn.InputHeight; y++ { for x := 0; x < convIn.InputWidth; x++ { r := uint8(0xff * inputImage.Vector[idx]) g := uint8(0xff * inputImage.Vector[idx+1]) b := uint8(0xff * inputImage.Vector[idx+2]) newImage.SetRGBA(x, y, color.RGBA{ R: r, G: g, B: b, A: 0xff, }) idx += 3 } } output, err := os.Create(imgPath) if err != nil { fmt.Fprintln(os.Stderr, "Failed to create output file:", err) os.Exit(1) } defer output.Close() png.Encode(output, newImage) }
func (g *gradientCache) Alloc() autofunc.Gradient { if len(g.gradients) == 0 { res := autofunc.NewGradient(g.variables) return res } res := g.gradients[len(g.gradients)-1] g.gradients = g.gradients[:len(g.gradients)-1] res.Zero() return res }
func TestDenseBackward(t *testing.T) { network, inputVar, upstream := denseTestInfo() partial0 := 0.1966119333 * upstream[0] partial1 := 0.1049935854 * upstream[1] input := inputVar.Vector weightGradient := [][]float64{ []float64{partial0 * input[0], partial0 * input[1], partial0 * input[2]}, []float64{partial1 * input[0], partial1 * input[1], partial1 * input[2]}, } weightVec := network[0].(*DenseLayer).Weights.Data.Vector upstreamGradient := []float64{ weightVec[0]*partial0 + weightVec[3]*partial1, weightVec[1]*partial0 + weightVec[4]*partial1, weightVec[2]*partial0 + weightVec[5]*partial1, } params := network.Parameters() params = append(params, inputVar) actualGrad := autofunc.NewGradient(params) output := network.Apply(inputVar) output.PropagateGradient(upstream, actualGrad) for i, xs := range weightGradient { for j, x := range xs { actualVec := actualGrad[network[0].(*DenseLayer).Weights.Data] actual := actualVec[i*3+j] if math.Abs(actual-x) > 1e-6 { t.Errorf("weight gradient %d,%d should be %f but got %f", i, j, x, actual) } } } biasGradient := []float64{partial0, partial1} for i, x := range biasGradient { actualVec := actualGrad[network[0].(*DenseLayer).Biases.Var] if actual := actualVec[i]; math.Abs(actual-x) > 1e-6 { t.Errorf("bias gradient %d should be %f but got %f", i, x, actual) } } for i, x := range upstreamGradient { actualVec := actualGrad[inputVar] if actual := actualVec[i]; math.Abs(actual-x) > 1e-6 { t.Errorf("upstream gradient %d should be %f but got %f", i, x, actual) } } }
func TestConvBackward(t *testing.T) { layer, input, outGrad := convLayerTestInfo() actualGrad := autofunc.NewGradient(append(layer.Parameters(), input)) layer.Apply(input).PropagateGradient(outGrad, actualGrad) convLayer := layer[0].(*ConvLayer) expectedGrad := autofunc.Gradient{ convLayer.FilterVar: []float64{ 9.181420449e-02, 6.070772494e-02, 4.831743717e-02, 6.140456075e-02, 4.619374891e-02, 9.677697371e-02, 5.711791144e-02, 5.124701355e-02, 8.690832544e-02, 2.255616739e-02, 9.041001878e-02, 4.383411433e-02, 1.725619176e-01, 1.501485079e-01, 1.396596513e-01, 8.822688174e-02, 1.043560711e-01, 1.851411351e-01, 1.769153948e-01, 1.366024735e-01, 1.678136736e-01, 6.694391158e-02, 1.517132408e-01, 8.335992965e-02, }, convLayer.Biases: []float64{1.333355836e-01, 2.790278869e-01}, input: []float64{ 1.346240470e-02, 1.840140585e-02, 1.830078429e-02, 3.341979500e-02, 4.527417587e-02, 6.139417717e-02, 6.285708549e-02, 1.122305051e-01, 0, 0, 2.692730031e-02, 1.193745091e-02, 2.396698285e-02, 3.739434288e-02, 8.890665566e-02, 4.124498873e-02, 8.115978953e-02, 1.253480957e-01, 0, 0, 3.633179008e-02, 3.105761526e-02, 5.291576339e-02, 3.939048624e-02, 8.488640888e-02, 7.725933595e-02, 6.877644332e-02, 5.033669814e-02, 0, 0, 7.172645109e-03, 2.625700212e-02, 3.193879788e-02, 3.368514841e-02, 2.737903811e-02, 6.263677753e-03, 1.786440555e-02, 3.198290875e-02, 0, 0, 2.969143512e-02, 4.797023692e-02, 3.826207676e-02, 6.320548619e-02, 4.395410081e-02, 5.088142526e-02, 2.968988521e-02, 6.090264241e-02, 0, 0, 5.255802153e-02, 1.594788029e-02, 3.863840312e-02, 6.542970202e-02, 6.192735934e-02, 6.301981015e-03, 3.169670830e-02, 6.425452037e-02, 0, 0, 4.337086165e-02, 3.224390653e-02, 3.146379199e-02, 1.187088457e-02, 5.068287349e-02, 3.269456802e-02, 3.291436767e-02, 1.194641079e-02, 0, 0, }, } varNames := map[*autofunc.Variable]string{ convLayer.FilterVar: "filters", convLayer.Biases: "biases", input: "input", } for variable, expected := range expectedGrad { name := varNames[variable] actual := actualGrad[variable] if len(expected) != len(actual) { t.Errorf("variable %s: expected len %d got len %d", name, len(expected), len(actual)) continue } for i, x := range expected { a := actual[i] if math.Abs(x-a) > 1e-6 { t.Errorf("variable %s: value %d: expected %f got %f", name, i, x, a) } } } }
func BenchmarkSoftmaxBackProp(b *testing.B) { rand.Seed(123) inputVec := make([]float64, 3000) for i := range inputVec { inputVec[i] = rand.Float64()*5 - 2.5 } inputVar := &autofunc.Variable{Vector: inputVec} outGrad := autofunc.NewGradient([]*autofunc.Variable{inputVar}) layer := SoftmaxLayer{} b.ResetTimer() for i := 0; i < b.N; i++ { layer.Apply(inputVar).PropagateGradient(inputVec, outGrad) } }
func benchmarkConvLayerBackward(b *testing.B, layer *ConvLayer) { layer.Randomize() testInput := NewTensor3(layer.InputWidth, layer.InputHeight, layer.InputDepth) for i := range testInput.Data { testInput.Data[i] = rand.NormFloat64() } inputVar := &autofunc.Variable{Vector: testInput.Data} upstream := make(linalg.Vector, len(layer.Apply(inputVar).Output())) for i := range upstream { upstream[i] = rand.NormFloat64() } grad := autofunc.NewGradient(layer.Parameters()) b.ResetTimer() for i := 0; i < b.N; i++ { layer.Apply(inputVar).PropagateGradient(upstream, grad) } }
func (b *SingleRGradienter) Gradient(s sgd.SampleSet) autofunc.Gradient { if b.gradCache == nil { b.gradCache = autofunc.NewGradient(b.Learner.Parameters()) } else { b.gradCache.Zero() } for i := 0; i < s.Len(); i++ { sample := s.GetSample(i) vs := sample.(VectorSample) output := vs.Output inVar := &autofunc.Variable{vs.Input} result := b.Learner.Apply(inVar) cost := b.CostFunc.Cost(output, result) cost.PropagateGradient(linalg.Vector{1}, b.gradCache) } return b.gradCache }
func TestUnstackLayerInverse(t *testing.T) { // This test utilizes the fact that gradients have to // be un-unstacked when they are propagated backwards. // Thus, we can check that unstacking and un-unstacking // are truly inverses as they should be. width := 15 height := 13 depth := 18 inputVal := make(linalg.Vector, width*height*depth) for i := range inputVal { inputVal[i] = rand.Float64()*2 - 1 } variable := &autofunc.Variable{inputVal} layer := &UnstackLayer{ InputWidth: width, InputHeight: height, InputDepth: depth, InverseStride: 3, } output := layer.Apply(variable) outGrad := make(linalg.Vector, len(output.Output())) copy(outGrad, output.Output()) grad := autofunc.NewGradient([]*autofunc.Variable{variable}) output.PropagateGradient(outGrad, grad) original := grad[variable] if len(original) != len(inputVal) { t.Fatalf("expected output length %d got %d", len(inputVal), len(original)) } for i, x := range inputVal { a := original[i] if math.Abs(a-x) > 1e-6 { t.Fatalf("entry %d should be %f but got %f", i, x, a) } } }
func sgdOnSequences(f *rnn.Bidirectional, s []seqtoseq.Sample) { gradient := autofunc.NewGradient(f.Parameters()) for _, x := range s { inRes := seqfunc.ConstResult([][]linalg.Vector{x.Inputs}) output := f.ApplySeqs(inRes) upstreamGrad := make([]linalg.Vector, len(x.Outputs)) for i, o := range x.Outputs { upstreamGrad[i] = o.Copy().Scale(-1) } output.PropagateGradient([][]linalg.Vector{upstreamGrad}, gradient) } for _, vec := range gradient { for i, x := range vec { if x > 0 { vec[i] = 1 } else { vec[i] = -1 } } } gradient.AddToVars(-StepSize) }
// Step performs a step of gradient boosting and // returns the loss before the step was performed. func (g *Gradient) Step() float64 { if g.OutCache == nil { g.OutCache = g.Sum.Classify(g.List) } curOutput := &autofunc.Variable{ Vector: g.OutCache, } curLoss := g.Loss.Loss(curOutput, g.Desired) grad := autofunc.NewGradient([]*autofunc.Variable{curOutput}) curLoss.PropagateGradient([]float64{1}, grad) classifier := g.Pool.BestClassifier(g.List, grad[curOutput]) classOutput := classifier.Classify(g.List) stepAmount := g.Loss.OptimalStep(curOutput.Vector, classOutput, g.Desired) g.Sum.Weights = append(g.Sum.Weights, stepAmount) g.Sum.Classifiers = append(g.Sum.Classifiers, classifier) g.OutCache.Add(classOutput.Scale(stepAmount)) return curLoss.Output()[0] }
func TestMaxPoolingBackward(t *testing.T) { layer := &MaxPoolingLayer{3, 3, 10, 11, 2} input := []float64{ 0.5305, 0.7935, 0.3718, 0.4026, 0.8246, 0.6875, 0.6069, 0.0399, 0.4759, 0.3548, 0.8465, 0.0479, 0.4841, 0.1277, 0.2060, 0.6833, 0.0844, 0.0793, 0.1564, 0.2891, 0.9761, 0.1716, 0.2394, 0.6439, 0.2834, 0.5429, 0.5479, 0.6228, 0.3308, 0.4145, 0.4472, 0.8445, 0.1258, 0.9365, 0.8861, 0.5686, 0.7676, 0.5818, 0.8840, 0.4068, 0.0427, 0.2888, 0.2321, 0.2350, 0.3702, 0.8161, 0.9992, 0.3097, 0.2996, 0.7116, 0.6126, 0.5868, 0.0587, 0.3701, 0.8875, 0.5653, 0.1161, 0.3778, 0.5768, 0.6405, 0.2868, 0.2617, 0.6762, 0.9683, 0.7948, 0.8449, 0.7876, 0.3225, 0.0139, 0.2315, 0.5635, 0.5076, 0.8530, 0.4785, 0.8244, 0.0356, 0.1402, 0.8464, 0.6470, 0.5444, 0.4489, 0.3268, 0.9251, 0.6568, 0.7592, 0.0223, 0.6244, 0.9696, 0.2035, 0.6457, 0.0505, 0.8712, 0.2836, 0.0689, 0.6179, 0.0421, 0.0373, 0.2316, 0.7921, 0.7195, 0.7107, 0.7147, 0.3756, 0.0563, 0.3803, 0.4184, 0.2551, 0.7702, 0.8207, 0.9405, 0.4711, 0.1529, 0.1081, 0.6531, 0.5117, 0.1368, 0.2331, 0.7265, 0.0986, 0.7236, 0.1467, 0.1398, 0.4580, 0.1640, 0.2878, 0.3895, 0.5600, 0.1037, 0.9899, 0.8434, 0.5762, 0.3068, 0.6564, 0.4465, 0.0134, 0.8445, 0.8760, 0.9951, 0.4819, 0.5924, 0.2894, 0.4773, 0.0628, 0.3025, 0.2345, 0.9472, 0.7258, 0.2077, 0.3428, 0.6104, 0.0639, 0.0854, 1.0000, 0.0372, 0.3874, 0.6501, 0.6533, 0.2953, 0.5591, 0.9967, 0.6510, 0.3776, 0.6511, 0.9123, 0.9738, 0.4100, 0.3743, 0.9791, 0.3929, 0.8278, 0.1919, 0.2566, 0.3484, 0.3768, 0.0108, 0.5234, 0.4480, 0.3097, 0.5598, 0.5840, 0.0082, 0.5011, 0.3124, 0.8709, 0.6181, 0.1428, 0.7824, 0.7105, 0.0922, 0.5858, 0.1643, 0.3963, 0.1715, 0.2448, 0.7961, 0.1675, 0.2949, 0.3438, 0.4825, 0.8616, 0.5648, 0.3950, 0.7001, 0.3238, 0.3235, 0.4789, 0.4206, 0.0502, 0.3165, 0.2146, 0.5393, 0.9277, 0.4361, 0.1530, 0.3192, 0.9463, 0.0317, 0.3078, 0.8892, 0.0508, } downstreamGrad := NewTensor3(4, 4, 2) for i := range downstreamGrad.Data { downstreamGrad.Data[i] = rand.Float64()*2 - 1 } gradientMask := []int{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, } inputVar := &autofunc.Variable{input} g := autofunc.NewGradient([]*autofunc.Variable{inputVar}) downstreamCopy := make(linalg.Vector, len(downstreamGrad.Data)) copy(downstreamCopy, downstreamGrad.Data) layer.Apply(inputVar).PropagateGradient(downstreamCopy, g) actualGrad := g[inputVar] idx := 0 for y := 0; y < 11; y++ { for x := 0; x < 10; x++ { for z := 0; z < 2; z++ { isChosen := gradientMask[z+x*2+y*20] == 1 gradValue := actualGrad[idx] outputGrad := downstreamGrad.Get(x/3, y/3, z) idx++ if !isChosen && gradValue != 0 { t.Errorf("expected gradient at %d,%d,%d to be 0, but got %f", x, y, z, gradValue) } else if isChosen && gradValue != outputGrad { t.Errorf("expected gradient at %d,%d,%d to be %f, but got %f", x, y, z, outputGrad, gradValue) } } } } }