// shift dst by shx cells (positive or negative) along X-axis. // new edge value is clampL at left edge or clampR at right edge. func ShiftX(dst, src *data.Slice, shiftX int, clampL, clampR float32) { util.Argument(dst.NComp() == 1 && src.NComp() == 1) util.Assert(dst.Len() == src.Len()) N := dst.Size() cfg := make3DConf(N) k_shiftx_async(dst.DevPtr(0), src.DevPtr(0), N[X], N[Y], N[Z], shiftX, clampL, clampR, cfg) }
// kernel multiplication for 2D demag convolution on X and Y, exploiting full kernel symmetry. func kernMulRSymm2Dxy_async(fftMx, fftMy, Kxx, Kyy, Kxy *data.Slice, Nx, Ny int) { util.Argument(fftMy.NComp() == 1 && Kxx.NComp() == 1) cfg := make3DConf([3]int{Nx, Ny, 1}) k_kernmulRSymm2Dxy_async(fftMx.DevPtr(0), fftMy.DevPtr(0), Kxx.DevPtr(0), Kyy.DevPtr(0), Kxy.DevPtr(0), Nx, Ny, cfg) }
// Sets vector dst to zero where mask != 0. func ZeroMask(dst *data.Slice, mask LUTPtr, regions *Bytes) { N := dst.Len() cfg := make1DConf(N) for c := 0; c < dst.NComp(); c++ { k_zeromask_async(dst.DevPtr(c), unsafe.Pointer(mask), regions.Ptr, N, cfg) } }
// Set Bth to thermal noise (Brown). // see temperature.cu func SetTemperature(Bth, noise *data.Slice, temp_red LUTPtr, k2mu0_VgammaDt float64, regions *Bytes) { util.Argument(Bth.NComp() == 1 && noise.NComp() == 1) N := Bth.Len() cfg := make1DConf(N) k_settemperature_async(Bth.DevPtr(0), noise.DevPtr(0), float32(k2mu0_VgammaDt), unsafe.Pointer(temp_red), regions.Ptr, N, cfg) }
// select the part of src within the specified region, set 0's everywhere else. func RegionSelect(dst, src *data.Slice, regions *Bytes, region byte) { util.Argument(dst.NComp() == src.NComp()) N := dst.Len() cfg := make1DConf(N) for c := 0; c < dst.NComp(); c++ { k_regionselect_async(dst.DevPtr(c), src.DevPtr(c), regions.Ptr, region, N, cfg) } }
// Copies src (larger) into dst (smaller). // Used to extract demag field after convolution on padded m. func copyUnPad(dst, src *data.Slice, dstsize, srcsize [3]int) { util.Argument(dst.NComp() == 1 && src.NComp() == 1) util.Argument(dst.Len() == prod(dstsize) && src.Len() == prod(srcsize)) cfg := make3DConf(dstsize) k_copyunpad_async(dst.DevPtr(0), dstsize[X], dstsize[Y], dstsize[Z], src.DevPtr(0), srcsize[X], srcsize[Y], srcsize[Z], cfg) }
// Dot product. func Dot(a, b *data.Slice) float32 { nComp := a.NComp() util.Argument(nComp == b.NComp()) out := reduceBuf(0) // not async over components for c := 0; c < nComp; c++ { k_reducedot_async(a.DevPtr(c), b.DevPtr(c), out, 0, a.Len(), reducecfg) // all components add to out } return copyback(out) }
// Finds the average exchange strength around each cell, for debugging. func ExchangeDecode(dst *data.Slice, Aex_red SymmLUT, regions *Bytes, mesh *data.Mesh) { c := mesh.CellSize() wx := float32(2 * 1e-18 / (c[X] * c[X])) wy := float32(2 * 1e-18 / (c[Y] * c[Y])) wz := float32(2 * 1e-18 / (c[Z] * c[Z])) N := mesh.Size() pbc := mesh.PBC_code() cfg := make3DConf(N) k_exchangedecode_async(dst.DevPtr(0), unsafe.Pointer(Aex_red), regions.Ptr, wx, wy, wz, N[X], N[Y], N[Z], pbc, cfg) }
// Set Bth to thermal noise (Brown). // see temperature.cu func SetTemperature(Bth, noise *data.Slice, k2mu0_Mu0VgammaDt float64, Msat, Temp, Alpha MSlice) { util.Argument(Bth.NComp() == 1 && noise.NComp() == 1) N := Bth.Len() cfg := make1DConf(N) k_settemperature2_async(Bth.DevPtr(0), noise.DevPtr(0), float32(k2mu0_Mu0VgammaDt), Msat.DevPtr(0), Msat.Mul(0), Temp.DevPtr(0), Temp.Mul(0), Alpha.DevPtr(0), Alpha.Mul(0), N, cfg) }
// Memset sets the Slice's components to the specified values. // To be carefully used on unified slice (need sync) func Memset(s *data.Slice, val ...float32) { if Synchronous { // debug Sync() timer.Start("memset") } util.Argument(len(val) == s.NComp()) for c, v := range val { cu.MemsetD32Async(cu.DevicePtr(uintptr(s.DevPtr(c))), math.Float32bits(v), int64(s.Len()), stream0) } if Synchronous { //debug Sync() timer.Stop("memset") } }
// Crop stores in dst a rectangle cropped from src at given offset position. // dst size may be smaller than src. func Crop(dst, src *data.Slice, offX, offY, offZ int) { D := dst.Size() S := src.Size() util.Argument(dst.NComp() == src.NComp()) util.Argument(D[X]+offX <= S[X] && D[Y]+offY <= S[Y] && D[Z]+offZ <= S[Z]) cfg := make3DConf(D) for c := 0; c < dst.NComp(); c++ { k_crop_async(dst.DevPtr(c), D[X], D[Y], D[Z], src.DevPtr(c), S[X], S[Y], S[Z], offX, offY, offZ, cfg) } }
// Select and resize one layer for interactive output func Resize(dst, src *data.Slice, layer int) { dstsize := dst.Size() srcsize := src.Size() util.Assert(dstsize[Z] == 1) util.Assert(dst.NComp() == 1 && src.NComp() == 1) scalex := srcsize[X] / dstsize[X] scaley := srcsize[Y] / dstsize[Y] util.Assert(scalex > 0 && scaley > 0) cfg := make3DConf(dstsize) k_resize_async(dst.DevPtr(0), dstsize[X], dstsize[Y], dstsize[Z], src.DevPtr(0), srcsize[X], srcsize[Y], srcsize[Z], layer, scalex, scaley, cfg) }
// SetMaxAngle sets dst to the maximum angle of each cells magnetization with all of its neighbors, // provided the exchange stiffness with that neighbor is nonzero. func SetMaxAngle(dst, m *data.Slice, Aex_red SymmLUT, regions *Bytes, mesh *data.Mesh) { N := mesh.Size() pbc := mesh.PBC_code() cfg := make3DConf(N) k_setmaxangle_async(dst.DevPtr(0), m.DevPtr(X), m.DevPtr(Y), m.DevPtr(Z), unsafe.Pointer(Aex_red), regions.Ptr, N[X], N[Y], N[Z], pbc, cfg) }
// Returns a buffer obtained from GetBuffer to the pool. func Recycle(s *data.Slice) { if Synchronous { Sync() } N := s.Len() pool := buf_pool[N] // put each component buffer back on the stack for i := 0; i < s.NComp(); i++ { ptr := s.DevPtr(i) if _, ok := buf_check[ptr]; !ok { log.Panic("recyle: was not obtained with getbuffer") } pool = append(pool, ptr) } s.Disable() // make it unusable, protect against accidental use after recycle buf_pool[N] = pool }
// Set s to the toplogogical charge density s = m · (m/∂x ❌ ∂m/∂y) // See topologicalcharge.cu func SetTopologicalCharge(s *data.Slice, m *data.Slice, mesh *data.Mesh) { cellsize := mesh.CellSize() N := s.Size() util.Argument(m.Size() == N) cfg := make3DConf(N) icxcy := float32(1.0 / (cellsize[X] * cellsize[Y])) k_settopologicalcharge_async(s.DevPtr(X), m.DevPtr(X), m.DevPtr(Y), m.DevPtr(Z), icxcy, N[X], N[Y], N[Z], mesh.PBC_code(), cfg) }
// multiply-add: dst[i] = src1[i] * factor1 + src2[i] * factor2 + src3 * factor3 func Madd3(dst, src1, src2, src3 *data.Slice, factor1, factor2, factor3 float32) { N := dst.Len() nComp := dst.NComp() util.Assert(src1.Len() == N && src2.Len() == N && src3.Len() == N) util.Assert(src1.NComp() == nComp && src2.NComp() == nComp && src3.NComp() == nComp) cfg := make1DConf(N) for c := 0; c < nComp; c++ { k_madd3_async(dst.DevPtr(c), src1.DevPtr(c), factor1, src2.DevPtr(c), factor2, src3.DevPtr(c), factor3, N, cfg) } }
// Execute the FFT plan, asynchronous. // src and dst are 3D arrays stored 1D arrays. func (p *fft3DC2RPlan) ExecAsync(src, dst *data.Slice) { if Synchronous { Sync() timer.Start("fft") } oksrclen := p.InputLenFloats() if src.Len() != oksrclen { panic(fmt.Errorf("fft size mismatch: expecting src len %v, got %v", oksrclen, src.Len())) } okdstlen := p.OutputLenFloats() if dst.Len() != okdstlen { panic(fmt.Errorf("fft size mismatch: expecting dst len %v, got %v", okdstlen, dst.Len())) } p.handle.ExecC2R(cu.DevicePtr(uintptr(src.DevPtr(0))), cu.DevicePtr(uintptr(dst.DevPtr(0)))) if Synchronous { Sync() timer.Stop("fft") } }
// Execute the FFT plan, asynchronous. // src and dst are 3D arrays stored 1D arrays. func (p *fft3DR2CPlan) ExecAsync(src, dst *data.Slice) { if Synchronous { Sync() timer.Start("fft") } util.Argument(src.NComp() == 1 && dst.NComp() == 1) oksrclen := p.InputLen() if src.Len() != oksrclen { log.Panicf("fft size mismatch: expecting src len %v, got %v", oksrclen, src.Len()) } okdstlen := p.OutputLen() if dst.Len() != okdstlen { log.Panicf("fft size mismatch: expecting dst len %v, got %v", okdstlen, dst.Len()) } p.handle.ExecR2C(cu.DevicePtr(uintptr(src.DevPtr(0))), cu.DevicePtr(uintptr(dst.DevPtr(0)))) if Synchronous { Sync() timer.Stop("fft") } }
// dst += LUT[region], for vectors. Used to add terms to excitation. func RegionAddV(dst *data.Slice, lut LUTPtrs, regions *Bytes) { util.Argument(dst.NComp() == 3) N := dst.Len() cfg := make1DConf(N) k_regionaddv_async(dst.DevPtr(X), dst.DevPtr(Y), dst.DevPtr(Z), lut[X], lut[Y], lut[Z], regions.Ptr, N, cfg) }
// multiply: dst[i] = a[i] * b[i] // a and b must have the same number of components func Mul(dst, a, b *data.Slice) { N := dst.Len() nComp := dst.NComp() util.Assert(a.Len() == N && a.NComp() == nComp && b.Len() == N && b.NComp() == nComp) cfg := make1DConf(N) for c := 0; c < nComp; c++ { k_mul_async(dst.DevPtr(c), a.DevPtr(c), b.DevPtr(c), N, cfg) } }
// Copies src into dst, which is larger, and multiplies by vol*Bsat. // The remainder of dst is not filled with zeros. // Used to zero-pad magnetization before convolution and in the meanwhile multiply m by its length. func copyPadMul(dst, src, vol *data.Slice, dstsize, srcsize [3]int, Bsat LUTPtr, regions *Bytes) { util.Argument(dst.NComp() == 1 && src.NComp() == 1) util.Assert(dst.Len() == prod(dstsize) && src.Len() == prod(srcsize)) cfg := make3DConf(srcsize) k_copypadmul_async(dst.DevPtr(0), dstsize[X], dstsize[Y], dstsize[Z], src.DevPtr(0), vol.DevPtr(0), srcsize[X], srcsize[Y], srcsize[Z], unsafe.Pointer(Bsat), regions.Ptr, cfg) }
// Copies src into dst, which is larger, and multiplies by vol*Bsat. // The remainder of dst is not filled with zeros. // Used to zero-pad magnetization before convolution and in the meanwhile multiply m by its length. func copyPadMul(dst, src, vol *data.Slice, dstsize, srcsize [3]int, Msat MSlice) { util.Argument(dst.NComp() == 1 && src.NComp() == 1) util.Assert(dst.Len() == prod(dstsize) && src.Len() == prod(srcsize)) cfg := make3DConf(srcsize) k_copypadmul2_async(dst.DevPtr(0), dstsize[X], dstsize[Y], dstsize[Z], src.DevPtr(0), srcsize[X], srcsize[Y], srcsize[Z], Msat.DevPtr(0), Msat.Mul(0), vol.DevPtr(0), cfg) }
// Add Zhang-Li ST torque (Tesla) to torque. // see zhangli.cu func AddZhangLiTorque(torque, m *data.Slice, Msat, J, alpha, xi, pol MSlice, mesh *data.Mesh) { c := mesh.CellSize() N := mesh.Size() cfg := make3DConf(N) k_addzhanglitorque2_async( torque.DevPtr(X), torque.DevPtr(Y), torque.DevPtr(Z), m.DevPtr(X), m.DevPtr(Y), m.DevPtr(Z), Msat.DevPtr(0), Msat.Mul(0), J.DevPtr(X), J.Mul(X), J.DevPtr(Y), J.Mul(Y), J.DevPtr(Z), J.Mul(Z), alpha.DevPtr(0), alpha.Mul(0), xi.DevPtr(0), xi.Mul(0), pol.DevPtr(0), pol.Mul(0), float32(c[X]), float32(c[Y]), float32(c[Z]), N[X], N[Y], N[Z], mesh.PBC_code(), cfg) }
// Normalize vec to unit length, unless length or vol are zero. func Normalize(vec, vol *data.Slice) { util.Argument(vol == nil || vol.NComp() == 1) N := vec.Len() cfg := make1DConf(N) k_normalize_async(vec.DevPtr(X), vec.DevPtr(Y), vec.DevPtr(Z), vol.DevPtr(0), N, cfg) }
// kernel multiplication for general 1D convolution. Does not assume any symmetry. // Used for MFM images. func kernMulC_async(fftM, K *data.Slice, Nx, Ny int) { util.Argument(fftM.NComp() == 1 && K.NComp() == 1) cfg := make3DConf([3]int{Nx, Ny, 1}) k_kernmulC_async(fftM.DevPtr(0), K.DevPtr(0), Nx, Ny, cfg) }
// kernel multiplication for 2D demag convolution on Z, exploiting full kernel symmetry. func kernMulRSymm2Dz_async(fftMz, Kzz *data.Slice, Nx, Ny int) { util.Argument(fftMz.NComp() == 1 && Kzz.NComp() == 1) cfg := make3DConf([3]int{Nx, Ny, 1}) k_kernmulRSymm2Dz_async(fftMz.DevPtr(0), Kzz.DevPtr(0), Nx, Ny, cfg) }
// kernel multiplication for 3D demag convolution, exploiting full kernel symmetry. func kernMulRSymm3D_async(fftM [3]*data.Slice, Kxx, Kyy, Kzz, Kyz, Kxz, Kxy *data.Slice, Nx, Ny, Nz int) { util.Argument(fftM[X].NComp() == 1 && Kxx.NComp() == 1) cfg := make3DConf([3]int{Nx, Ny, Nz}) k_kernmulRSymm3D_async(fftM[X].DevPtr(0), fftM[Y].DevPtr(0), fftM[Z].DevPtr(0), Kxx.DevPtr(0), Kyy.DevPtr(0), Kzz.DevPtr(0), Kyz.DevPtr(0), Kxz.DevPtr(0), Kxy.DevPtr(0), Nx, Ny, Nz, cfg) }
// zero 1-component slice func zero1_async(dst *data.Slice) { cu.MemsetD32Async(cu.DevicePtr(uintptr(dst.DevPtr(0))), 0, int64(dst.Len()), stream0) }
// Landau-Lifshitz torque divided by gamma0: // - 1/(1+α²) [ m x B + α m x (m x B) ] // torque in Tesla // m normalized // B in Tesla // see lltorque.cu func LLTorque(torque, m, B *data.Slice, alpha LUTPtr, regions *Bytes) { N := torque.Len() cfg := make1DConf(N) k_lltorque_async(torque.DevPtr(X), torque.DevPtr(Y), torque.DevPtr(Z), m.DevPtr(X), m.DevPtr(Y), m.DevPtr(Z), B.DevPtr(X), B.DevPtr(Y), B.DevPtr(Z), unsafe.Pointer(alpha), regions.Ptr, N, cfg) }
// Landau-Lifshitz torque with precession disabled. // Used by engine.Relax(). func LLNoPrecess(torque, m, B *data.Slice) { N := torque.Len() cfg := make1DConf(N) k_llnoprecess_async(torque.DevPtr(X), torque.DevPtr(Y), torque.DevPtr(Z), m.DevPtr(X), m.DevPtr(Y), m.DevPtr(Z), B.DevPtr(X), B.DevPtr(Y), B.DevPtr(Z), N, cfg) }