Example #1
0
func testRBatcher(t *testing.T, rv autofunc.RVector, b batchFuncR, in autofunc.RResult,
	n int, params []*autofunc.Variable) {
	funcRBatcher := autofunc.RFuncBatcher{F: b}

	t.Run("Forward", func(t *testing.T) {
		expected := funcRBatcher.BatchR(rv, in, n)
		actual := b.BatchR(rv, in, n)
		diff := actual.Output().Copy().Scale(-1).Add(expected.Output()).MaxAbs()
		if diff > 1e-5 {
			t.Errorf("expected output %v but got %v", expected, actual)
		}
		diff = actual.ROutput().Copy().Scale(-1).Add(expected.ROutput()).MaxAbs()
		if diff > 1e-5 {
			t.Errorf("expected r-output %v but got %v", expected, actual)
		}
	})

	t.Run("Backward", func(t *testing.T) {
		expectedOut := funcRBatcher.BatchR(rv, in, n)
		actualOut := b.BatchR(rv, in, n)

		expected := autofunc.NewGradient(params)
		actual := autofunc.NewGradient(params)
		expectedR := autofunc.NewRGradient(params)
		actualR := autofunc.NewRGradient(params)

		outGrad := make(linalg.Vector, len(expectedOut.Output()))
		outGradR := make(linalg.Vector, len(expectedOut.Output()))
		for i := range outGrad {
			outGrad[i] = rand.NormFloat64()
			outGradR[i] = rand.NormFloat64()
		}

		expectedOut.PropagateRGradient(outGrad.Copy(), outGradR.Copy(), expectedR, expected)
		actualOut.PropagateRGradient(outGrad, outGradR, actualR, actual)

		for i, variable := range params {
			actualVec := actual[variable]
			expectedVec := expected[variable]
			diff := actualVec.Copy().Scale(-1).Add(expectedVec).MaxAbs()
			if diff > 1e-5 {
				t.Errorf("variable %d (grad): expected %v got %v", i, expectedVec, actualVec)
			}
			actualVec = actualR[variable]
			expectedVec = expectedR[variable]
			diff = actualVec.Copy().Scale(-1).Add(expectedVec).MaxAbs()
			if diff > 1e-5 {
				t.Errorf("variable %d (rgrad): expected %v got %v", i, expectedVec, actualVec)
			}
		}
	})
}
Example #2
0
func (b *BlockChecker) testNilUpstreamR(t *testing.T) {
	t.Run("Nil Upstream R", func(t *testing.T) {
		out := b.B.ApplyBlockR(b.RV, []rnn.RState{b.B.StartRState(b.RV)},
			[]autofunc.RResult{autofunc.NewRVariable(b.Input[0][0], b.RV)})
		g1 := autofunc.NewGradient(b.Vars)
		rg1 := autofunc.NewRGradient(b.Vars)
		initLen1 := len(g1)
		c := len(out.PropagateRGradient(nil, nil, nil, rg1, g1))
		if c != 1 {
			t.Errorf("expected %d downstream states, got %d", 1, c)
		}
		g2 := autofunc.NewGradient(b.Vars)
		rg2 := autofunc.NewRGradient(b.Vars)
		initLen2 := len(g2)

		zeroUpstream := make([]linalg.Vector, len(out.Outputs()))
		for i, x := range out.Outputs() {
			zeroUpstream[i] = make(linalg.Vector, len(x))
		}
		nilStateUpstream := make([]rnn.RStateGrad, len(out.RStates()))
		c = len(out.PropagateRGradient(zeroUpstream, zeroUpstream, nilStateUpstream, rg2, g2))
		if c != 1 {
			t.Errorf("expected %d downstream states, got %d", 1, c)
		}

		if len(g1) != initLen1 {
			t.Errorf("all nil gradient length changed from %d to %d", initLen1, len(g1))
		}
		if len(rg1) != initLen1 {
			t.Errorf("all nil r-gradient length changed from %d to %d", initLen1, len(rg1))
		}
		if len(g2) != initLen2 {
			t.Errorf("non-nil gradient length changed from %d to %d", initLen2, len(g2))
		}
		if len(rg2) != initLen2 {
			t.Errorf("non-nil r-gradient length changed from %d to %d", initLen2, len(rg2))
		}

		for i, variable := range b.Vars {
			val1 := g1[variable]
			val2 := g2[variable]
			if !b.vecsEqual(val1, val2) {
				t.Errorf("gradients for var %d don't match: %v and %v", i, val1, val2)
			}
			val1 = rg1[variable]
			val2 = rg2[variable]
			if !b.vecsEqual(val1, val2) {
				t.Errorf("r-gradients for var %d don't match: %v and %v", i, val1, val2)
			}
		}
	})
}
Example #3
0
func (b *SingleRGradienter) RGradient(rv autofunc.RVector, s sgd.SampleSet) (autofunc.Gradient,
	autofunc.RGradient) {
	if b.gradCache == nil {
		b.gradCache = autofunc.NewGradient(b.Learner.Parameters())
	} else {
		b.gradCache.Zero()
	}
	if b.rgradCache == nil {
		b.rgradCache = autofunc.NewRGradient(b.Learner.Parameters())
	} else {
		b.rgradCache.Zero()
	}

	for i := 0; i < s.Len(); i++ {
		sample := s.GetSample(i)
		vs := sample.(VectorSample)
		output := vs.Output
		inVar := &autofunc.Variable{vs.Input}
		rVar := autofunc.NewRVariable(inVar, rv)
		result := b.Learner.ApplyR(rv, rVar)
		cost := b.CostFunc.CostR(rv, output, result)
		cost.PropagateRGradient(linalg.Vector{1}, linalg.Vector{0},
			b.rgradCache, b.gradCache)
	}

	return b.gradCache, b.rgradCache
}
Example #4
0
func benchmarkConvLayer(b *testing.B, layer *ConvLayer) {
	b.Run("Forward", func(b *testing.B) {
		benchmarkConvLayerForward(b, layer)
	})
	b.Run("Backward", func(b *testing.B) {
		benchmarkConvLayerBackward(b, layer)
	})
	b.Run("Parallel", func(b *testing.B) {
		parallelism := runtime.GOMAXPROCS(0)
		inputs := make(chan *autofunc.Variable, parallelism)
		upstreams := make(chan linalg.Vector, parallelism)
		grads := make(chan autofunc.Gradient, parallelism)
		for i := 0; i < parallelism; i++ {
			testInput := NewTensor3(layer.InputWidth, layer.InputHeight, layer.InputDepth)
			for i := range testInput.Data {
				testInput.Data[i] = rand.NormFloat64()
			}
			inputVar := &autofunc.Variable{Vector: testInput.Data}
			inputs <- inputVar
			upstream := make(linalg.Vector, len(layer.Apply(inputVar).Output()))
			for i := range upstream {
				upstream[i] = rand.NormFloat64()
			}
			upstreams <- upstream
			grad := autofunc.NewGradient(layer.Parameters())
			grads <- grad
		}
		b.ResetTimer()
		b.RunParallel(func(pb *testing.PB) {
			benchmarkConvLayerParallel(pb, layer, <-inputs, <-upstreams, <-grads)
		})
	})
}
Example #5
0
func BenchmarkDenseLayerBackProp(b *testing.B) {
	net := Network{
		&DenseLayer{InputCount: 1000, OutputCount: 2000},
		&Sigmoid{},
		&DenseLayer{InputCount: 2000, OutputCount: 512},
		&Sigmoid{},
		&DenseLayer{InputCount: 512, OutputCount: 10},
		&Sigmoid{},
	}
	rand.Seed(123)
	net.Randomize()
	inVec := &autofunc.Variable{Vector: make(linalg.Vector, 1000)}
	for i := range inVec.Vector {
		inVec.Vector[i] = rand.Float64()*2 - 1
	}

	downstream := make(linalg.Vector, 10)
	for i := range downstream {
		downstream[i] = 1
	}

	grad := autofunc.NewGradient(net.Parameters())

	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		net.Apply(inVec).PropagateGradient(downstream, grad)
	}
}
func testBatchRGradienter(t *testing.T, batchSize int, b *BatchRGradienter) {
	rand.Seed(batchRGradienterSeed)

	net := Network{
		&DenseLayer{
			InputCount:  10,
			OutputCount: 30,
		},
		&Sigmoid{},
		&DenseLayer{
			InputCount:  30,
			OutputCount: 3,
		},
		&Sigmoid{},
	}
	net.Randomize()
	b.Learner = net.BatchLearner()

	inputs := make([]linalg.Vector, batchSize)
	outputs := make([]linalg.Vector, batchSize)
	for i := range inputs {
		inputVec := make(linalg.Vector, 10)
		outputVec := make(linalg.Vector, 3)
		for j := range inputVec {
			inputVec[j] = rand.NormFloat64()
		}
		for j := range outputVec {
			outputVec[j] = rand.Float64()
		}
		inputs[i] = inputVec
		outputs[i] = outputVec
	}
	samples := VectorSampleSet(inputs, outputs)

	rVector := autofunc.RVector(autofunc.NewGradient(net.Parameters()))
	for _, vec := range rVector {
		for i := range vec {
			vec[i] = rand.NormFloat64()
		}
	}

	single := SingleRGradienter{Learner: net, CostFunc: b.CostFunc}
	expectedGrad := single.Gradient(samples)
	actualGrad := b.Gradient(samples)

	if !vecMapsEqual(expectedGrad, actualGrad) {
		t.Error("bad gradient from Gradient()")
	}

	expectedGrad, expectedRGrad := single.RGradient(rVector, samples)
	actualGrad, actualRGrad := b.RGradient(rVector, samples)

	if !vecMapsEqual(expectedGrad, actualGrad) {
		t.Error("bad gradient from RGradient()")
	}
	if !vecMapsEqual(expectedRGrad, actualRGrad) {
		t.Error("bad r-gradient from RGradient()")
	}
}
Example #7
0
func DreamCmd(netPath, imgPath string) {
	networkData, err := ioutil.ReadFile(netPath)
	if err != nil {
		fmt.Fprintln(os.Stderr, "Error reading network:", err)
		os.Exit(1)
	}
	network, err := neuralnet.DeserializeNetwork(networkData)
	if err != nil {
		fmt.Fprintln(os.Stderr, "Error deserializing network:", err)
		os.Exit(1)
	}

	convIn := network[1].(*neuralnet.ConvLayer)
	inputImage := &autofunc.Variable{
		Vector: make(linalg.Vector, convIn.InputWidth*convIn.InputHeight*
			convIn.InputDepth),
	}
	for i := range inputImage.Vector {
		inputImage.Vector[i] = rand.Float64()*0.01 + 0.5
	}

	desiredOut := linalg.Vector{0, 1}
	cost := neuralnet.DotCost{}
	grad := autofunc.NewGradient([]*autofunc.Variable{inputImage})
	for i := 0; i < 1000; i++ {
		output := network.Apply(inputImage)
		costOut := cost.Cost(desiredOut, output)
		grad.Zero()
		log.Println("cost is", costOut.Output()[0])
		costOut.PropagateGradient(linalg.Vector{1}, grad)
		grad.AddToVars(-0.01)
	}

	newImage := image.NewRGBA(image.Rect(0, 0, convIn.InputWidth, convIn.InputHeight))
	var idx int
	for y := 0; y < convIn.InputHeight; y++ {
		for x := 0; x < convIn.InputWidth; x++ {
			r := uint8(0xff * inputImage.Vector[idx])
			g := uint8(0xff * inputImage.Vector[idx+1])
			b := uint8(0xff * inputImage.Vector[idx+2])
			newImage.SetRGBA(x, y, color.RGBA{
				R: r,
				G: g,
				B: b,
				A: 0xff,
			})
			idx += 3
		}
	}

	output, err := os.Create(imgPath)
	if err != nil {
		fmt.Fprintln(os.Stderr, "Failed to create output file:", err)
		os.Exit(1)
	}
	defer output.Close()
	png.Encode(output, newImage)
}
Example #8
0
func (g *gradientCache) Alloc() autofunc.Gradient {
	if len(g.gradients) == 0 {
		res := autofunc.NewGradient(g.variables)
		return res
	}
	res := g.gradients[len(g.gradients)-1]
	g.gradients = g.gradients[:len(g.gradients)-1]
	res.Zero()
	return res
}
Example #9
0
func TestDenseBackward(t *testing.T) {
	network, inputVar, upstream := denseTestInfo()

	partial0 := 0.1966119333 * upstream[0]
	partial1 := 0.1049935854 * upstream[1]

	input := inputVar.Vector
	weightGradient := [][]float64{
		[]float64{partial0 * input[0], partial0 * input[1], partial0 * input[2]},
		[]float64{partial1 * input[0], partial1 * input[1], partial1 * input[2]},
	}

	weightVec := network[0].(*DenseLayer).Weights.Data.Vector
	upstreamGradient := []float64{
		weightVec[0]*partial0 + weightVec[3]*partial1,
		weightVec[1]*partial0 + weightVec[4]*partial1,
		weightVec[2]*partial0 + weightVec[5]*partial1,
	}

	params := network.Parameters()
	params = append(params, inputVar)
	actualGrad := autofunc.NewGradient(params)
	output := network.Apply(inputVar)
	output.PropagateGradient(upstream, actualGrad)

	for i, xs := range weightGradient {
		for j, x := range xs {
			actualVec := actualGrad[network[0].(*DenseLayer).Weights.Data]
			actual := actualVec[i*3+j]
			if math.Abs(actual-x) > 1e-6 {
				t.Errorf("weight gradient %d,%d should be %f but got %f", i, j, x, actual)
			}
		}
	}

	biasGradient := []float64{partial0, partial1}
	for i, x := range biasGradient {
		actualVec := actualGrad[network[0].(*DenseLayer).Biases.Var]
		if actual := actualVec[i]; math.Abs(actual-x) > 1e-6 {
			t.Errorf("bias gradient %d should be %f but got %f", i, x, actual)
		}
	}

	for i, x := range upstreamGradient {
		actualVec := actualGrad[inputVar]
		if actual := actualVec[i]; math.Abs(actual-x) > 1e-6 {
			t.Errorf("upstream gradient %d should be %f but got %f", i, x, actual)
		}
	}
}
Example #10
0
func TestConvBackward(t *testing.T) {
	layer, input, outGrad := convLayerTestInfo()

	actualGrad := autofunc.NewGradient(append(layer.Parameters(), input))
	layer.Apply(input).PropagateGradient(outGrad, actualGrad)

	convLayer := layer[0].(*ConvLayer)
	expectedGrad := autofunc.Gradient{
		convLayer.FilterVar: []float64{
			9.181420449e-02, 6.070772494e-02, 4.831743717e-02, 6.140456075e-02,
			4.619374891e-02, 9.677697371e-02, 5.711791144e-02, 5.124701355e-02,
			8.690832544e-02, 2.255616739e-02, 9.041001878e-02, 4.383411433e-02,
			1.725619176e-01, 1.501485079e-01, 1.396596513e-01, 8.822688174e-02,
			1.043560711e-01, 1.851411351e-01, 1.769153948e-01, 1.366024735e-01,
			1.678136736e-01, 6.694391158e-02, 1.517132408e-01, 8.335992965e-02,
		},
		convLayer.Biases: []float64{1.333355836e-01, 2.790278869e-01},
		input: []float64{
			1.346240470e-02, 1.840140585e-02, 1.830078429e-02, 3.341979500e-02, 4.527417587e-02, 6.139417717e-02, 6.285708549e-02, 1.122305051e-01, 0, 0,
			2.692730031e-02, 1.193745091e-02, 2.396698285e-02, 3.739434288e-02, 8.890665566e-02, 4.124498873e-02, 8.115978953e-02, 1.253480957e-01, 0, 0,
			3.633179008e-02, 3.105761526e-02, 5.291576339e-02, 3.939048624e-02, 8.488640888e-02, 7.725933595e-02, 6.877644332e-02, 5.033669814e-02, 0, 0,
			7.172645109e-03, 2.625700212e-02, 3.193879788e-02, 3.368514841e-02, 2.737903811e-02, 6.263677753e-03, 1.786440555e-02, 3.198290875e-02, 0, 0,
			2.969143512e-02, 4.797023692e-02, 3.826207676e-02, 6.320548619e-02, 4.395410081e-02, 5.088142526e-02, 2.968988521e-02, 6.090264241e-02, 0, 0,
			5.255802153e-02, 1.594788029e-02, 3.863840312e-02, 6.542970202e-02, 6.192735934e-02, 6.301981015e-03, 3.169670830e-02, 6.425452037e-02, 0, 0,
			4.337086165e-02, 3.224390653e-02, 3.146379199e-02, 1.187088457e-02, 5.068287349e-02, 3.269456802e-02, 3.291436767e-02, 1.194641079e-02, 0, 0,
		},
	}
	varNames := map[*autofunc.Variable]string{
		convLayer.FilterVar: "filters",
		convLayer.Biases:    "biases",
		input:               "input",
	}

	for variable, expected := range expectedGrad {
		name := varNames[variable]
		actual := actualGrad[variable]
		if len(expected) != len(actual) {
			t.Errorf("variable %s: expected len %d got len %d",
				name, len(expected), len(actual))
			continue
		}
		for i, x := range expected {
			a := actual[i]
			if math.Abs(x-a) > 1e-6 {
				t.Errorf("variable %s: value %d: expected %f got %f", name, i, x, a)
			}
		}
	}
}
Example #11
0
func BenchmarkSoftmaxBackProp(b *testing.B) {
	rand.Seed(123)
	inputVec := make([]float64, 3000)
	for i := range inputVec {
		inputVec[i] = rand.Float64()*5 - 2.5
	}
	inputVar := &autofunc.Variable{Vector: inputVec}
	outGrad := autofunc.NewGradient([]*autofunc.Variable{inputVar})
	layer := SoftmaxLayer{}

	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		layer.Apply(inputVar).PropagateGradient(inputVec, outGrad)
	}
}
Example #12
0
func benchmarkConvLayerBackward(b *testing.B, layer *ConvLayer) {
	layer.Randomize()
	testInput := NewTensor3(layer.InputWidth, layer.InputHeight, layer.InputDepth)
	for i := range testInput.Data {
		testInput.Data[i] = rand.NormFloat64()
	}
	inputVar := &autofunc.Variable{Vector: testInput.Data}
	upstream := make(linalg.Vector, len(layer.Apply(inputVar).Output()))
	for i := range upstream {
		upstream[i] = rand.NormFloat64()
	}
	grad := autofunc.NewGradient(layer.Parameters())
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		layer.Apply(inputVar).PropagateGradient(upstream, grad)
	}
}
Example #13
0
func (b *SingleRGradienter) Gradient(s sgd.SampleSet) autofunc.Gradient {
	if b.gradCache == nil {
		b.gradCache = autofunc.NewGradient(b.Learner.Parameters())
	} else {
		b.gradCache.Zero()
	}

	for i := 0; i < s.Len(); i++ {
		sample := s.GetSample(i)
		vs := sample.(VectorSample)
		output := vs.Output
		inVar := &autofunc.Variable{vs.Input}
		result := b.Learner.Apply(inVar)
		cost := b.CostFunc.Cost(output, result)
		cost.PropagateGradient(linalg.Vector{1}, b.gradCache)
	}

	return b.gradCache
}
Example #14
0
func TestUnstackLayerInverse(t *testing.T) {
	// This test utilizes the fact that gradients have to
	// be un-unstacked when they are propagated backwards.
	// Thus, we can check that unstacking and un-unstacking
	// are truly inverses as they should be.

	width := 15
	height := 13
	depth := 18

	inputVal := make(linalg.Vector, width*height*depth)
	for i := range inputVal {
		inputVal[i] = rand.Float64()*2 - 1
	}
	variable := &autofunc.Variable{inputVal}

	layer := &UnstackLayer{
		InputWidth:    width,
		InputHeight:   height,
		InputDepth:    depth,
		InverseStride: 3,
	}
	output := layer.Apply(variable)

	outGrad := make(linalg.Vector, len(output.Output()))
	copy(outGrad, output.Output())
	grad := autofunc.NewGradient([]*autofunc.Variable{variable})
	output.PropagateGradient(outGrad, grad)

	original := grad[variable]
	if len(original) != len(inputVal) {
		t.Fatalf("expected output length %d got %d", len(inputVal), len(original))
	}

	for i, x := range inputVal {
		a := original[i]
		if math.Abs(a-x) > 1e-6 {
			t.Fatalf("entry %d should be %f but got %f", i, x, a)
		}
	}
}
Example #15
0
func sgdOnSequences(f *rnn.Bidirectional, s []seqtoseq.Sample) {
	gradient := autofunc.NewGradient(f.Parameters())
	for _, x := range s {
		inRes := seqfunc.ConstResult([][]linalg.Vector{x.Inputs})
		output := f.ApplySeqs(inRes)
		upstreamGrad := make([]linalg.Vector, len(x.Outputs))
		for i, o := range x.Outputs {
			upstreamGrad[i] = o.Copy().Scale(-1)
		}
		output.PropagateGradient([][]linalg.Vector{upstreamGrad}, gradient)
	}
	for _, vec := range gradient {
		for i, x := range vec {
			if x > 0 {
				vec[i] = 1
			} else {
				vec[i] = -1
			}
		}
	}
	gradient.AddToVars(-StepSize)
}
Example #16
0
// Step performs a step of gradient boosting and
// returns the loss before the step was performed.
func (g *Gradient) Step() float64 {
	if g.OutCache == nil {
		g.OutCache = g.Sum.Classify(g.List)
	}
	curOutput := &autofunc.Variable{
		Vector: g.OutCache,
	}
	curLoss := g.Loss.Loss(curOutput, g.Desired)

	grad := autofunc.NewGradient([]*autofunc.Variable{curOutput})
	curLoss.PropagateGradient([]float64{1}, grad)

	classifier := g.Pool.BestClassifier(g.List, grad[curOutput])
	classOutput := classifier.Classify(g.List)
	stepAmount := g.Loss.OptimalStep(curOutput.Vector, classOutput, g.Desired)

	g.Sum.Weights = append(g.Sum.Weights, stepAmount)
	g.Sum.Classifiers = append(g.Sum.Classifiers, classifier)

	g.OutCache.Add(classOutput.Scale(stepAmount))

	return curLoss.Output()[0]
}
func TestMaxPoolingBackward(t *testing.T) {
	layer := &MaxPoolingLayer{3, 3, 10, 11, 2}

	input := []float64{
		0.5305, 0.7935, 0.3718, 0.4026, 0.8246, 0.6875, 0.6069, 0.0399, 0.4759, 0.3548, 0.8465, 0.0479, 0.4841, 0.1277, 0.2060, 0.6833, 0.0844, 0.0793, 0.1564, 0.2891,
		0.9761, 0.1716, 0.2394, 0.6439, 0.2834, 0.5429, 0.5479, 0.6228, 0.3308, 0.4145, 0.4472, 0.8445, 0.1258, 0.9365, 0.8861, 0.5686, 0.7676, 0.5818, 0.8840, 0.4068,
		0.0427, 0.2888, 0.2321, 0.2350, 0.3702, 0.8161, 0.9992, 0.3097, 0.2996, 0.7116, 0.6126, 0.5868, 0.0587, 0.3701, 0.8875, 0.5653, 0.1161, 0.3778, 0.5768, 0.6405,

		0.2868, 0.2617, 0.6762, 0.9683, 0.7948, 0.8449, 0.7876, 0.3225, 0.0139, 0.2315, 0.5635, 0.5076, 0.8530, 0.4785, 0.8244, 0.0356, 0.1402, 0.8464, 0.6470, 0.5444,
		0.4489, 0.3268, 0.9251, 0.6568, 0.7592, 0.0223, 0.6244, 0.9696, 0.2035, 0.6457, 0.0505, 0.8712, 0.2836, 0.0689, 0.6179, 0.0421, 0.0373, 0.2316, 0.7921, 0.7195,
		0.7107, 0.7147, 0.3756, 0.0563, 0.3803, 0.4184, 0.2551, 0.7702, 0.8207, 0.9405, 0.4711, 0.1529, 0.1081, 0.6531, 0.5117, 0.1368, 0.2331, 0.7265, 0.0986, 0.7236,

		0.1467, 0.1398, 0.4580, 0.1640, 0.2878, 0.3895, 0.5600, 0.1037, 0.9899, 0.8434, 0.5762, 0.3068, 0.6564, 0.4465, 0.0134, 0.8445, 0.8760, 0.9951, 0.4819, 0.5924,
		0.2894, 0.4773, 0.0628, 0.3025, 0.2345, 0.9472, 0.7258, 0.2077, 0.3428, 0.6104, 0.0639, 0.0854, 1.0000, 0.0372, 0.3874, 0.6501, 0.6533, 0.2953, 0.5591, 0.9967,
		0.6510, 0.3776, 0.6511, 0.9123, 0.9738, 0.4100, 0.3743, 0.9791, 0.3929, 0.8278, 0.1919, 0.2566, 0.3484, 0.3768, 0.0108, 0.5234, 0.4480, 0.3097, 0.5598, 0.5840,

		0.0082, 0.5011, 0.3124, 0.8709, 0.6181, 0.1428, 0.7824, 0.7105, 0.0922, 0.5858, 0.1643, 0.3963, 0.1715, 0.2448, 0.7961, 0.1675, 0.2949, 0.3438, 0.4825, 0.8616,
		0.5648, 0.3950, 0.7001, 0.3238, 0.3235, 0.4789, 0.4206, 0.0502, 0.3165, 0.2146, 0.5393, 0.9277, 0.4361, 0.1530, 0.3192, 0.9463, 0.0317, 0.3078, 0.8892, 0.0508,
	}

	downstreamGrad := NewTensor3(4, 4, 2)
	for i := range downstreamGrad.Data {
		downstreamGrad.Data[i] = rand.Float64()*2 - 1
	}

	gradientMask := []int{
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
		1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0,
		0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1,

		0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,
		0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
		0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,

		0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
		0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
		0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,

		0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1,
		0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0,
	}

	inputVar := &autofunc.Variable{input}
	g := autofunc.NewGradient([]*autofunc.Variable{inputVar})
	downstreamCopy := make(linalg.Vector, len(downstreamGrad.Data))
	copy(downstreamCopy, downstreamGrad.Data)
	layer.Apply(inputVar).PropagateGradient(downstreamCopy, g)
	actualGrad := g[inputVar]

	idx := 0
	for y := 0; y < 11; y++ {
		for x := 0; x < 10; x++ {
			for z := 0; z < 2; z++ {
				isChosen := gradientMask[z+x*2+y*20] == 1
				gradValue := actualGrad[idx]
				outputGrad := downstreamGrad.Get(x/3, y/3, z)
				idx++
				if !isChosen && gradValue != 0 {
					t.Errorf("expected gradient at %d,%d,%d to be 0, but got %f",
						x, y, z, gradValue)
				} else if isChosen && gradValue != outputGrad {
					t.Errorf("expected gradient at %d,%d,%d to be %f, but got %f",
						x, y, z, outputGrad, gradValue)
				}
			}
		}
	}
}