func (r *RMSProp) update(a, b, c, d float64) { grad := blas64.Vector{Inc: 1, Data: r.C.WeightsGrad()} grad2 := blas64.Vector{Inc: 1, Data: make([]float64, len(grad.Data))} for i, w := range grad.Data { grad2.Data[i] = w * w } n := blas64.Vector{Inc: 1, Data: r.N} blas64.Scal(len(n.Data), a, n) blas64.Axpy(len(n.Data), 1-a, grad2, n) g := blas64.Vector{Inc: 1, Data: r.G} blas64.Scal(len(g.Data), a, g) blas64.Axpy(len(g.Data), 1-a, grad, g) rms := blas64.Vector{Inc: 1, Data: make([]float64, len(r.D))} for i, g := range r.G { rms.Data[i] = grad.Data[i] / math.Sqrt(r.N[i]-g*g+d) } rD := blas64.Vector{Inc: 1, Data: r.D} blas64.Scal(len(rD.Data), b, rD) blas64.Axpy(len(rD.Data), -c, rms, rD) val := blas64.Vector{Inc: 1, Data: r.C.WeightsVal()} blas64.Axpy(len(rD.Data), 1, rD, val) }
// AddScaledVec adds the vectors a and alpha*b, placing the result in the receiver. func (v *Vector) AddScaledVec(a *Vector, alpha float64, b *Vector) { if alpha == 1 { v.AddVec(a, b) return } if alpha == -1 { v.SubVec(a, b) return } ar := a.Len() br := b.Len() if ar != br { panic(matrix.ErrShape) } v.reuseAs(ar) if alpha == 0 { v.CopyVec(a) return } switch { case v == a && v == b: // v <- v + alpha * v = (alpha + 1) * v blas64.Scal(ar, alpha+1, v.mat) case v == a && v != b: // v <- v + alpha * b blas64.Axpy(ar, alpha, b.mat, v.mat) case v != a && v == b: // v <- a + alpha * v if v.mat.Inc == 1 && a.mat.Inc == 1 { // Fast path for a common case. v := v.mat.Data for i, a := range a.mat.Data { v[i] *= alpha v[i] += a } return } blas64.Scal(ar, alpha, v.mat) blas64.Axpy(ar, 1, a.mat, v.mat) default: // v <- a + alpha * b if v.mat.Inc == 1 && a.mat.Inc == 1 && b.mat.Inc == 1 { // Fast path for a common case. asm.DaxpyUnitary(alpha, b.mat.Data, a.mat.Data, v.mat.Data) return } blas64.Copy(ar, a.mat, v.mat) blas64.Axpy(ar, alpha, b.mat, v.mat) } }
// ScaleVec scales the vector a by alpha, placing the result in the receiver. func (v *Vector) ScaleVec(alpha float64, a *Vector) { n := a.Len() if v != a { v.reuseAs(n) blas64.Copy(n, a.mat, v.mat) } blas64.Scal(n, alpha, v.mat) }
// LQ computes an LQ Decomposition for an m-by-n matrix a with m <= n by Householder // reflections. The LQ decomposition is an m-by-n orthogonal matrix q and an m-by-m // lower triangular matrix l so that a = l.q. LQ will panic with ErrShape if m > n. // // The LQ decomposition always exists, even if the matrix does not have full rank, // so LQ will never fail unless m > n. The primary use of the LQ decomposition is // in the least squares solution of non-square systems of simultaneous linear equations. // This will fail if LQIsFullRank() returns false. The matrix a is overwritten by the // decomposition. func LQ(a *Dense) LQFactor { // Initialize. m, n := a.Dims() if m > n { panic(ErrShape) } lq := *a lDiag := make([]float64, m) projs := NewVector(m, nil) // Main loop. for k := 0; k < m; k++ { hh := lq.RawRowView(k)[k:] norm := blas64.Nrm2(len(hh), blas64.Vector{Inc: 1, Data: hh}) lDiag[k] = norm if norm != 0 { hhNorm := (norm * math.Sqrt(1-hh[0]/norm)) if hhNorm == 0 { hh[0] = 0 } else { // Form k-th Householder vector. s := 1 / hhNorm hh[0] -= norm blas64.Scal(len(hh), s, blas64.Vector{Inc: 1, Data: hh}) // Apply transformation to remaining columns. if k < m-1 { a = lq.View(k+1, k, m-k-1, n-k).(*Dense) projs = projs.ViewVec(0, m-k-1) projs.MulVec(a, false, NewVector(len(hh), hh)) for j := 0; j < m-k-1; j++ { dst := a.RawRowView(j) blas64.Axpy(len(dst), -projs.at(j), blas64.Vector{Inc: 1, Data: hh}, blas64.Vector{Inc: 1, Data: dst}, ) } } } } } *a = lq return LQFactor{a, lDiag} }
// AddScaledVec adds the vectors a and alpha*b, placing the result in the receiver. func (v *Vector) AddScaledVec(a *Vector, alpha float64, b *Vector) { if alpha == 1 { v.AddVec(a, b) return } if alpha == -1 { v.SubVec(a, b) return } ar := a.Len() br := b.Len() if ar != br { panic(matrix.ErrShape) } v.reuseAs(ar) switch { case alpha == 0: // v <- a v.CopyVec(a) case v == a && v == b: // v <- v + alpha * v = (alpha + 1) * v blas64.Scal(ar, alpha+1, v.mat) case v == a && v != b: // v <- v + alpha * b if v.mat.Inc == 1 && b.mat.Inc == 1 { // Fast path for a common case. asm.DaxpyUnitaryTo(v.mat.Data, alpha, b.mat.Data, a.mat.Data) } else { asm.DaxpyInc(alpha, b.mat.Data, v.mat.Data, uintptr(ar), uintptr(b.mat.Inc), uintptr(v.mat.Inc), 0, 0) } default: // v <- a + alpha * b or v <- a + alpha * v if v.mat.Inc == 1 && a.mat.Inc == 1 && b.mat.Inc == 1 { // Fast path for a common case. asm.DaxpyUnitaryTo(v.mat.Data, alpha, b.mat.Data, a.mat.Data) } else { asm.DaxpyIncTo(v.mat.Data, uintptr(v.mat.Inc), 0, alpha, b.mat.Data, a.mat.Data, uintptr(ar), uintptr(b.mat.Inc), uintptr(a.mat.Inc), 0, 0) } } }
// SymRankOne performs a rank-1 update of the original matrix A and refactorizes // its Cholesky factorization, storing the result into the reciever. That is, if // in the original Cholesky factorization // U^T * U = A, // in the updated factorization // U'^T * U' = A + alpha * x * x^T = A'. // // Note that when alpha is negative, the updating problem may be ill-conditioned // and the results may be inaccurate, or the updated matrix A' may not be // positive definite and not have a Cholesky factorization. SymRankOne returns // whether the updated matrix A' is positive definite. // // SymRankOne updates a Cholesky factorization in O(n²) time. The Cholesky // factorization computation from scratch is O(n³). func (c *Cholesky) SymRankOne(orig *Cholesky, alpha float64, x *Vector) (ok bool) { if !orig.valid() { panic(badCholesky) } n := orig.Size() if x.Len() != n { panic(matrix.ErrShape) } if orig != c { if c.isZero() { c.chol = NewTriDense(n, matrix.Upper, nil) } else if c.chol.mat.N != n { panic(matrix.ErrShape) } c.chol.Copy(orig.chol) } if alpha == 0 { return true } // Algorithms for updating and downdating the Cholesky factorization are // described, for example, in // - J. J. Dongarra, J. R. Bunch, C. B. Moler, G. W. Stewart: LINPACK // Users' Guide. SIAM (1979), pages 10.10--10.14 // or // - P. E. Gill, G. H. Golub, W. Murray, and M. A. Saunders: Methods for // modifying matrix factorizations. Mathematics of Computation 28(126) // (1974), Method C3 on page 521 // // The implementation is based on LINPACK code // http://www.netlib.org/linpack/dchud.f // http://www.netlib.org/linpack/dchdd.f // and // https://icl.cs.utk.edu/lapack-forum/viewtopic.php?f=2&t=2646 // // According to http://icl.cs.utk.edu/lapack-forum/archives/lapack/msg00301.html // LINPACK is released under BSD license. // // See also: // - M. A. Saunders: Large-scale Linear Programming Using the Cholesky // Factorization. Technical Report Stanford University (1972) // http://i.stanford.edu/pub/cstr/reports/cs/tr/72/252/CS-TR-72-252.pdf // - Matthias Seeger: Low rank updates for the Cholesky decomposition. // EPFL Technical Report 161468 (2004) // http://infoscience.epfl.ch/record/161468 work := make([]float64, n) blas64.Copy(n, x.RawVector(), blas64.Vector{1, work}) if alpha > 0 { // Compute rank-1 update. if alpha != 1 { blas64.Scal(n, math.Sqrt(alpha), blas64.Vector{1, work}) } umat := c.chol.mat stride := umat.Stride for i := 0; i < n; i++ { // Compute parameters of the Givens matrix that zeroes // the i-th element of x. c, s, r, _ := blas64.Rotg(umat.Data[i*stride+i], work[i]) if r < 0 { // Multiply by -1 to have positive diagonal // elemnts. r *= -1 c *= -1 s *= -1 } umat.Data[i*stride+i] = r if i < n-1 { // Multiply the extended factorization matrix by // the Givens matrix from the left. Only // the i-th row and x are modified. blas64.Rot(n-i-1, blas64.Vector{1, umat.Data[i*stride+i+1 : i*stride+n]}, blas64.Vector{1, work[i+1 : n]}, c, s) } } c.updateCond(-1) return true } // Compute rank-1 downdate. alpha = math.Sqrt(-alpha) if alpha != 1 { blas64.Scal(n, alpha, blas64.Vector{1, work}) } // Solve U^T * p = x storing the result into work. ok = lapack64.Trtrs(blas.Trans, c.chol.RawTriangular(), blas64.General{ Rows: n, Cols: 1, Stride: 1, Data: work, }) if !ok { // The original matrix is singular. Should not happen, because // the factorization is valid. panic(badCholesky) } norm := blas64.Nrm2(n, blas64.Vector{1, work}) if norm >= 1 { // The updated matrix is not positive definite. return false } norm = math.Sqrt((1 + norm) * (1 - norm)) cos := make([]float64, n) sin := make([]float64, n) for i := n - 1; i >= 0; i-- { // Compute parameters of Givens matrices that zero elements of p // backwards. cos[i], sin[i], norm, _ = blas64.Rotg(norm, work[i]) if norm < 0 { norm *= -1 cos[i] *= -1 sin[i] *= -1 } } umat := c.chol.mat stride := umat.Stride for i := n - 1; i >= 0; i-- { // Apply Givens matrices to U. // TODO(vladimir-ch): Use workspace to avoid modifying the // receiver in case an invalid factorization is created. blas64.Rot(n-i, blas64.Vector{1, work[i:n]}, blas64.Vector{1, umat.Data[i*stride+i : i*stride+n]}, cos[i], sin[i]) if umat.Data[i*stride+i] == 0 { // The matrix is singular (may rarely happen due to // floating-point effects?). ok = false } else if umat.Data[i*stride+i] < 0 { // Diagonal elements should be positive. If it happens // that on the i-th row the diagonal is negative, // multiply U from the left by an identity matrix that // has -1 on the i-th row. blas64.Scal(n-i, -1, blas64.Vector{1, umat.Data[i*stride+i : i*stride+n]}) } } if ok { c.updateCond(-1) } else { c.Reset() } return ok }