func (fm *FeatureMatrix) Mat64(header, transpose bool) *mat64.Dense { var ( idx int iter fmIt dense *mat64.Dense ) ncol := len(fm.Data) nrow := len(fm.CaseLabels) if !transpose { iter = rowIter(fm, header) dense = mat64.NewDense(nrow, ncol, nil) } else { iter = colIter(fm, header) dense = mat64.NewDense(ncol, nrow+1, nil) } for row, ok := iter(); ok; idx++ { for j, val := range row { flt, _ := strconv.ParseFloat(val, 64) dense.Set(idx, j, flt) } row, ok = iter() } return dense }
// LatinHypercube generates rows(batch) samples using Latin hypercube sampling // from the given distribution. If src is not nil, it will be used to generate // random numbers, otherwise rand.Float64 will be used. // // Latin hypercube sampling divides the cumulative distribution function into equally // spaced bins and guarantees that one sample is generated per bin. Within each bin, // the location is randomly sampled. The distmv.UnitNormal variable can be used // for easy generation from the unit interval. func LatinHypercube(batch *mat64.Dense, q distmv.Quantiler, src *rand.Rand) { r, c := batch.Dims() var f64 func() float64 var perm func(int) []int if src != nil { f64 = src.Float64 perm = src.Perm } else { f64 = rand.Float64 perm = rand.Perm } r64 := float64(r) for i := 0; i < c; i++ { p := perm(r) for j := 0; j < r; j++ { var v float64 v = f64() v = v/r64 + float64(j)/r64 batch.Set(p[j], i, v) } } p := make([]float64, c) for i := 0; i < r; i++ { copy(p, batch.RawRowView(i)) q.Quantile(batch.RawRowView(i), p) } }
// Generate generates a list of n random features given an input dimension d func (iso IsoSqExp) Generate(n int, dim int, features *mat64.Dense) { scale := math.Exp(iso.LogScale) for i := 0; i < n; i++ { for j := 0; j < dim; j++ { features.Set(i, j, rand.NormFloat64()*scale) } } }
// Add adds a new point to the gaussian process func (gp *Trainer) Add(newInput []float64, newOutput []float64) { gp.nData++ // See if we need to allocate new memory var inputAtCap bool if len(gp.inputData) == cap(gp.inputData) { inputAtCap = true } /* var outputAtCap bool if len(gp.outputData) == cap(gp.outputData) { outputAtCap = true } */ gp.inputData = append(gp.inputData, newInput) gp.outputData = append(gp.outputData, newOutput) // If we had to allocate memory, allocate new memory for the kernel matrix if gp.Implicit { // If it's implicit, just need to update matrix size, because the kernel // is computed on the fly //gp.kernelMat = panic("not coded") } var newKernelMatrix *mat64.Dense if inputAtCap { oldKernelMatrix := gp.kernelMatrix // If we had to allocate new memory for the inputs, then need to expand // the size of the matrix as well newKernelData := make([]float64, cap(gp.inputData)*cap(gp.inputData)) panic("Need to use raw matrix") //newKernelMatrix = mat64.NewDense(gp.nData, gp.nData, newKernelData) // Copy the old kernel data into the new one. View and newKernelMatrix share // the same underlying array view := &mat64.Dense{} view.View(newKernelMatrix, 0, 0, gp.nData-1, gp.nData-1) view.Copy(oldKernelMatrix) gp.kernelData = newKernelData } else { // We aren't at capacity, so just need to increase the size newKernelMatrix = mat64.NewDense(nData, nData, gp.kernelData) } // Set the new values of the kernel matrix for i := 0; i < nData; i++ { oldInput := gp.inputData[i*gp.inputDim : (i+1)*gp.inputDim] ker := gp.Kernel(oldData, newInput) newKernelMatrix.Set(i, gp.nData, ker) newKernelMatrix.Set(gp.nData, i, ker) } gp.kernelMatrix = newKernelMatrix }
func (SquaredDistance) LossDerivHess(prediction, truth, derivative []float64, hessian *mat64.Dense) (loss float64) { if len(prediction) != len(truth) || len(prediction) != len(derivative) { panic(lenMismatch) } n, m := hessian.Dims() if len(prediction) != n { panic(lenMismatch) } if len(prediction) != m { panic(lenMismatch) } for i := range prediction { diff := prediction[i] - truth[i] derivative[i] = diff loss += diff * diff } nFloat := float64(n) loss /= nFloat corr := 2 / nFloat for i := range derivative { derivative[i] *= corr } for i := 0; i < n; i++ { for j := 0; j < n; j++ { if i == j { hessian.Set(i, j, corr) } else { hessian.Set(i, j, 0) } } } return loss }
// Rejection generates rows(batch) samples using the rejection sampling algorithm and // stores them in place into samples. // Sampling continues until batch is filled. Rejection returns the total number of proposed // locations and a boolean indicating if the rejection sampling assumption is // violated (see details below). If the returned boolean is false, all elements // of samples are set to NaN. If src != nil, it will be used to generate random // numbers, otherwise rand.Float64 will be used. // // Rejection sampling generates points from the target distribution by using // the proposal distribution. At each step of the algorithm, the proposaed point // is accepted with probability // p = target(x) / (proposal(x) * c) // where target(x) is the probability of the point according to the target distribution // and proposal(x) is the probability according to the proposal distribution. // The constant c must be chosen such that target(x) < proposal(x) * c for all x. // The expected number of proposed samples is len(samples) * c. // // Target may return the true (log of) the probablity of the location, or it may return // a value that is proportional to the probability (logprob + constant). This is // useful for cases where the probability distribution is only known up to a normalization // constant. func Rejection(batch *mat64.Dense, target distmv.LogProber, proposal distmv.RandLogProber, c float64, src *rand.Rand) (nProposed int, ok bool) { if c < 1 { panic("rejection: acceptance constant must be greater than 1") } f64 := rand.Float64 if src != nil { f64 = src.Float64 } r, dim := batch.Dims() v := make([]float64, dim) var idx int for { nProposed++ proposal.Rand(v) qx := proposal.LogProb(v) px := target.LogProb(v) accept := math.Exp(px-qx) / c if accept > 1 { // Invalidate the whole result and return a failure. for i := 0; i < r; i++ { for j := 0; j < dim; j++ { batch.Set(i, j, math.NaN()) } } return nProposed, false } if accept > f64() { batch.SetRow(idx, v) idx++ if idx == r { break } } } return nProposed, true }