// mul1N pointwise multiplies a scalar (1-component) with an N-component vector, // yielding an N-component vector stored in dst. func mul1N(dst, a, b *data.Slice) { util.Assert(a.NComp() == 1) util.Assert(dst.NComp() == b.NComp()) for c := 0; c < dst.NComp(); c++ { cuda.Mul(dst.Comp(c), a, b.Comp(c)) } }
func divN1(dst, a, b *data.Slice) { util.Assert(dst.NComp() == a.NComp()) util.Assert(b.NComp() == 1) for c := 0; c < dst.NComp(); c++ { cuda.Div(dst.Comp(c), a.Comp(c), b) } }
// average of slice over universe func sAverageUniverse(s *data.Slice) []float64 { nCell := float64(prod(s.Size())) avg := make([]float64, s.NComp()) for i := range avg { avg[i] = float64(cuda.Sum(s.Comp(i))) / nCell checkNaN1(avg[i]) } return avg }
func shiftMag(m *data.Slice, dx int) { m2 := cuda.Buffer(1, m.Size()) defer cuda.Recycle(m2) for c := 0; c < m.NComp(); c++ { comp := m.Comp(c) cuda.ShiftX(m2, comp, dx, float32(ShiftMagL[c]), float32(ShiftMagR[c])) data.Copy(comp, m2) // str0 ? } }
// average of slice over the magnet volume func sAverageMagnet(s *data.Slice) []float64 { if geometry.Gpu().IsNil() { return sAverageUniverse(s) } else { avg := make([]float64, s.NComp()) for i := range avg { avg[i] = float64(cuda.Dot(s.Comp(i), geometry.Gpu())) / magnetNCell() checkNaN1(avg[i]) } return avg } }
// store MFM image in output, based on magnetization in inp. func (c *MFMConvolution) Exec(outp, inp, vol *data.Slice, Bsat LUTPtr, regions *Bytes) { for i := 0; i < 3; i++ { zero1_async(c.fftRBuf) copyPadMul(c.fftRBuf, inp.Comp(i), vol, c.kernSize, c.size, Bsat, regions) c.fwPlan.ExecAsync(c.fftRBuf, c.fftCBuf) Nx, Ny := c.fftKernSize[X]/2, c.fftKernSize[Y] // ?? kernMulC_async(c.fftCBuf, c.gpuFFTKern[i], Nx, Ny) c.bwPlan.ExecAsync(c.fftCBuf, c.fftRBuf) copyUnPad(outp.Comp(i), c.fftRBuf, c.size, c.kernSize) } }
// Sets dst to the full (unnormalized) magnetization in A/m func SetMFull(dst *data.Slice) { // scale m by Msat... msat, rM := Msat.Slice() if rM { defer cuda.Recycle(msat) } for c := 0; c < 3; c++ { cuda.Mul(dst.Comp(c), M.Buffer().Comp(c), msat) } // ...and by cell volume if applicable vol, rV := geometry.Slice() if rV { defer cuda.Recycle(vol) } if !vol.IsNil() { for c := 0; c < 3; c++ { cuda.Mul(dst.Comp(c), dst.Comp(c), vol) } } }
// forward FFT component i func (c *DemagConvolution) fwFFT(i int, inp, vol *data.Slice, Bsat LUTPtr, regions *Bytes) { zero1_async(c.fftRBuf[i]) in := inp.Comp(i) copyPadMul(c.fftRBuf[i], in, vol, c.realKernSize, c.inputSize, Bsat, regions) c.fwPlan.ExecAsync(c.fftRBuf[i], c.fftCBuf[i]) }
// backward FFT component i func (c *DemagConvolution) bwFFT(i int, outp *data.Slice) { c.bwPlan.ExecAsync(c.fftCBuf[i], c.fftRBuf[i]) out := outp.Comp(i) copyUnPad(out, c.fftRBuf[i], c.inputSize, c.realKernSize) }
func (d *constValue) EvalTo(dst *data.Slice) { for c, v := range d.value { cuda.Memset(dst.Comp(c), float32(v)) } }
// forward FFT component i func (c *DemagConvolution) fwFFT(i int, inp, vol *data.Slice, Msat MSlice) { zero1_async(c.fftRBuf[i]) in := inp.Comp(i) copyPadMul(c.fftRBuf[i], in, vol, c.realKernSize, c.inputSize, Msat) c.fwPlan.ExecAsync(c.fftRBuf[i], c.fftCBuf[i]) }