func (r *RMSProp) update(a, b, c, d float64) { grad := blas64.Vector{Inc: 1, Data: r.C.WeightsGrad()} grad2 := blas64.Vector{Inc: 1, Data: make([]float64, len(grad.Data))} for i, w := range grad.Data { grad2.Data[i] = w * w } n := blas64.Vector{Inc: 1, Data: r.N} blas64.Scal(len(n.Data), a, n) blas64.Axpy(len(n.Data), 1-a, grad2, n) g := blas64.Vector{Inc: 1, Data: r.G} blas64.Scal(len(g.Data), a, g) blas64.Axpy(len(g.Data), 1-a, grad, g) rms := blas64.Vector{Inc: 1, Data: make([]float64, len(r.D))} for i, g := range r.G { rms.Data[i] = grad.Data[i] / math.Sqrt(r.N[i]-g*g+d) } rD := blas64.Vector{Inc: 1, Data: r.D} blas64.Scal(len(rD.Data), b, rD) blas64.Axpy(len(rD.Data), -c, rms, rD) val := blas64.Vector{Inc: 1, Data: r.C.WeightsVal()} blas64.Axpy(len(rD.Data), 1, rD, val) }
// AddScaledVec adds the vectors a and alpha*b, placing the result in the receiver. func (v *Vector) AddScaledVec(a *Vector, alpha float64, b *Vector) { if alpha == 1 { v.AddVec(a, b) return } if alpha == -1 { v.SubVec(a, b) return } ar := a.Len() br := b.Len() if ar != br { panic(matrix.ErrShape) } v.reuseAs(ar) if alpha == 0 { v.CopyVec(a) return } switch { case v == a && v == b: // v <- v + alpha * v = (alpha + 1) * v blas64.Scal(ar, alpha+1, v.mat) case v == a && v != b: // v <- v + alpha * b blas64.Axpy(ar, alpha, b.mat, v.mat) case v != a && v == b: // v <- a + alpha * v if v.mat.Inc == 1 && a.mat.Inc == 1 { // Fast path for a common case. v := v.mat.Data for i, a := range a.mat.Data { v[i] *= alpha v[i] += a } return } blas64.Scal(ar, alpha, v.mat) blas64.Axpy(ar, 1, a.mat, v.mat) default: // v <- a + alpha * b if v.mat.Inc == 1 && a.mat.Inc == 1 && b.mat.Inc == 1 { // Fast path for a common case. asm.DaxpyUnitary(alpha, b.mat.Data, a.mat.Data, v.mat.Data) return } blas64.Copy(ar, a.mat, v.mat) blas64.Axpy(ar, alpha, b.mat, v.mat) } }
func (s *similarityCircuit) Backward() { uvuu := s.UV / (s.Unorm * s.Unorm) uvvv := s.UV / (s.Vnorm * s.Vnorm) uvg := s.TopGrad / (s.Unorm * s.Vnorm) u := blas64.Vector{Inc: 1, Data: s.UVal} v := blas64.Vector{Inc: 1, Data: s.VVal} ugrad := blas64.Vector{Inc: 1, Data: s.UGrad} blas64.Axpy(len(s.UGrad), uvg, v, ugrad) blas64.Axpy(len(s.UGrad), -uvuu*uvg, u, ugrad) vgrad := blas64.Vector{Inc: 1, Data: s.VGrad} blas64.Axpy(len(s.VGrad), uvg, u, vgrad) blas64.Axpy(len(s.VGrad), -uvvv*uvg, v, vgrad) }
func (c *ConvLayer) convolveR(v autofunc.RVector, in, inR linalg.Vector, out *Tensor3) { inMat := c.inputToMatrix(in) inMatR := c.inputToMatrix(inR) filterMat := blas64.General{ Rows: c.FilterCount, Cols: inMat.Cols, Stride: inMat.Stride, Data: c.FilterVar.Vector, } outMat := blas64.General{ Rows: out.Width * out.Height, Cols: out.Depth, Stride: out.Depth, Data: out.Data, } blas64.Gemm(blas.NoTrans, blas.Trans, 1, inMatR, filterMat, 0, outMat) if filterRV, ok := v[c.FilterVar]; ok { filterMatR := blas64.General{ Rows: c.FilterCount, Cols: inMat.Cols, Stride: inMat.Stride, Data: filterRV, } blas64.Gemm(blas.NoTrans, blas.Trans, 1, inMat, filterMatR, 1, outMat) } if biasRV, ok := v[c.Biases]; ok { biasVec := blas64.Vector{Inc: 1, Data: biasRV} for i := 0; i < len(out.Data); i += outMat.Cols { outRow := out.Data[i : i+outMat.Cols] outVec := blas64.Vector{Inc: 1, Data: outRow} blas64.Axpy(len(outRow), 1, biasVec, outVec) } } }
// replaces x with Q.x func (f LQFactor) applyQTo(x *Dense, trans bool) { nh, nc := f.LQ.Dims() m, n := x.Dims() if m != nc { panic(ErrShape) } proj := make([]float64, n) if trans { for k := nh - 1; k >= 0; k-- { hh := f.LQ.RawRowView(k)[k:] sub := x.View(k, 0, m-k, n).(*Dense) blas64.Gemv(blas.Trans, 1, sub.Mat, blas64.Vector{Inc: 1, Data: hh}, 0, blas64.Vector{Inc: 1, Data: proj}, ) for i := k; i < m; i++ { row := x.RawRowView(i) blas64.Axpy(n, -hh[i-k], blas64.Vector{Inc: 1, Data: proj}, blas64.Vector{Inc: 1, Data: row}, ) } } } else { for k := 0; k < nh; k++ { hh := f.LQ.RawRowView(k)[k:] sub := x.View(k, 0, m-k, n).(*Dense) blas64.Gemv(blas.Trans, 1, sub.Mat, blas64.Vector{Inc: 1, Data: hh}, 0, blas64.Vector{Inc: 1, Data: proj}, ) for i := k; i < m; i++ { row := x.RawRowView(i) blas64.Axpy(n, -hh[i-k], blas64.Vector{Inc: 1, Data: proj}, blas64.Vector{Inc: 1, Data: row}, ) } } } }
func (c *convLayerResult) propagateBiases(upstream linalg.Vector, grad autofunc.Gradient) { if biasGrad, ok := grad[c.Layer.Biases]; ok { biasGradVec := blas64.Vector{Inc: 1, Data: biasGrad} for i := 0; i < len(upstream); i += c.Layer.OutputDepth() { row := blas64.Vector{ Inc: 1, Data: upstream[i : i+c.Layer.OutputDepth()], } blas64.Axpy(len(biasGrad), 1, row, biasGradVec) } } }
// MulAdd adds the tensor t1, shifted // by x1 and y1, and scaled by s, to t. // It modifies t but leaves t1 alone. // // For instance, if x1=1 and y1=0, then // the first column of t is not affected, // and the first column of t1 is added to // the second column of t. // // Both tensors must have the same depth. func (t *Tensor3) MulAdd(x, y int, t1 *Tensor3, s float64) { if t.Depth != t1.Depth { panic("depths must match") } var sourceStartX, targetStartX int if x > 0 { targetStartX = x } else { sourceStartX = -x } var sourceStartY, targetStartY int if y > 0 { targetStartY = y } else { sourceStartY = -y } yCount := t.Height - targetStartY xCount := t.Width - targetStartX if sourceLimit := t1.Height - sourceStartY; sourceLimit < yCount { yCount = sourceLimit } if sourceLimit := t1.Width - sourceStartX; sourceLimit < xCount { xCount = sourceLimit } if rowSize := xCount * t.Depth; rowSize < minOptimizeTensorRowSize { for y := 0; y < yCount; y++ { for x := 0; x < xCount; x++ { for z := 0; z < t.Depth; z++ { val1 := t.Get(x+targetStartX, y+targetStartY, z) val2 := t1.Get(x+sourceStartX, y+sourceStartY, z) t.Set(x+targetStartX, y+targetStartY, z, val1+(val2*s)) } } } } else { for y := 0; y < yCount; y++ { target := t.Data[((y+targetStartY)*t.Width+targetStartX)*t.Depth:] source := t1.Data[((y+sourceStartY)*t1.Width+sourceStartX)*t1.Depth:] targetVec := blas64.Vector{Inc: 1, Data: target} sourceVec := blas64.Vector{Inc: 1, Data: source} blas64.Axpy(rowSize, s, sourceVec, targetVec) } } }
// LQ computes an LQ Decomposition for an m-by-n matrix a with m <= n by Householder // reflections. The LQ decomposition is an m-by-n orthogonal matrix q and an m-by-m // lower triangular matrix l so that a = l.q. LQ will panic with ErrShape if m > n. // // The LQ decomposition always exists, even if the matrix does not have full rank, // so LQ will never fail unless m > n. The primary use of the LQ decomposition is // in the least squares solution of non-square systems of simultaneous linear equations. // This will fail if LQIsFullRank() returns false. The matrix a is overwritten by the // decomposition. func LQ(a *Dense) LQFactor { // Initialize. m, n := a.Dims() if m > n { panic(ErrShape) } lq := *a lDiag := make([]float64, m) projs := NewVector(m, nil) // Main loop. for k := 0; k < m; k++ { hh := lq.RawRowView(k)[k:] norm := blas64.Nrm2(len(hh), blas64.Vector{Inc: 1, Data: hh}) lDiag[k] = norm if norm != 0 { hhNorm := (norm * math.Sqrt(1-hh[0]/norm)) if hhNorm == 0 { hh[0] = 0 } else { // Form k-th Householder vector. s := 1 / hhNorm hh[0] -= norm blas64.Scal(len(hh), s, blas64.Vector{Inc: 1, Data: hh}) // Apply transformation to remaining columns. if k < m-1 { a = lq.View(k+1, k, m-k-1, n-k).(*Dense) projs = projs.ViewVec(0, m-k-1) projs.MulVec(a, false, NewVector(len(hh), hh)) for j := 0; j < m-k-1; j++ { dst := a.RawRowView(j) blas64.Axpy(len(dst), -projs.at(j), blas64.Vector{Inc: 1, Data: hh}, blas64.Vector{Inc: 1, Data: dst}, ) } } } } } *a = lq return LQFactor{a, lDiag} }
func (c *ConvLayer) convolve(in linalg.Vector, out *Tensor3) { inMat := c.inputToMatrix(in) filterMat := blas64.General{ Rows: c.FilterCount, Cols: inMat.Cols, Stride: inMat.Stride, Data: c.FilterVar.Vector, } outMat := blas64.General{ Rows: out.Width * out.Height, Cols: out.Depth, Stride: out.Depth, Data: out.Data, } blas64.Gemm(blas.NoTrans, blas.Trans, 1, inMat, filterMat, 0, outMat) biasVec := blas64.Vector{Inc: 1, Data: c.Biases.Vector} for i := 0; i < len(out.Data); i += outMat.Cols { outRow := out.Data[i : i+outMat.Cols] outVec := blas64.Vector{Inc: 1, Data: outRow} blas64.Axpy(len(outRow), 1, biasVec, outVec) } }