// Only the damping term of LLGTorque, with alpha 1. Useful for relaxation. func DampingTorque(torque, m, B *data.Slice) { N := torque.Len() cfg := make1DConf(N) k_dampingtorque(torque.DevPtr(0), torque.DevPtr(1), torque.DevPtr(2), m.DevPtr(0), m.DevPtr(1), m.DevPtr(2), B.DevPtr(0), B.DevPtr(1), B.DevPtr(2), N, cfg) }
// Memset sets the Slice's components to the specified values. func Memset(s *data.Slice, val ...float32) { util.Argument(len(val) == s.NComp()) str := stream() for c, v := range val { cu.MemsetD32Async(cu.DevicePtr(s.DevPtr(c)), math.Float32bits(v), int64(s.Len()), str) } syncAndRecycle(str) }
func kernMulRSymm2Dx(fftMx, K00 *data.Slice, N1, N2 int, str cu.Stream) { util.Argument(K00.Len() == (N1/2+1)*N2) util.Argument(fftMx.NComp() == 1 && K00.NComp() == 1) cfg := make2DConf(N1, N2) k_kernmulRSymm2Dx_async(fftMx.DevPtr(0), K00.DevPtr(0), N1, N2, cfg, str) }
// Does not yet use Y mirror symmetry!! // Even though it is implemented partially in kernel func kernMulRSymm3D(fftM [3]*data.Slice, K00, K11, K22, K12, K02, K01 *data.Slice, N0, N1, N2 int, str cu.Stream) { util.Argument(K00.Len() == N0*(N1)*N2) // no symmetry yet util.Argument(fftM[0].NComp() == 1 && K00.NComp() == 1) cfg := make2DConf(N1, N2) k_kernmulRSymm3D_async(fftM[0].DevPtr(0), fftM[1].DevPtr(0), fftM[2].DevPtr(0), K00.DevPtr(0), K11.DevPtr(0), K22.DevPtr(0), K12.DevPtr(0), K02.DevPtr(0), K01.DevPtr(0), N0, N1, N2, cfg, str) }
func kernMulRSymm2Dyz(fftMy, fftMz, K11, K22, K12 *data.Slice, N1, N2 int, str cu.Stream) { util.Argument(K11.Len() == (N1/2+1)*N2) util.Argument(fftMy.NComp() == 1 && K11.NComp() == 1) cfg := make2DConf(N1, N2) k_kernmulRSymm2Dyz_async(fftMy.DevPtr(0), fftMz.DevPtr(0), K11.DevPtr(0), K22.DevPtr(0), K12.DevPtr(0), N1, N2, cfg, str) }
// Add uniaxial magnetocrystalline anisotropy field to Beff. // m: normalized magnetization. // K: anisotropy axis in J/m³ func AddUniaxialAnisotropy(Beff, m *data.Slice, Kx, Ky, Kz, Msat float64) { // TODO: size check N := Beff.Len() cfg := make1DConf(N) k_adduniaxialanisotropy(Beff.DevPtr(0), Beff.DevPtr(1), Beff.DevPtr(2), m.DevPtr(0), m.DevPtr(1), m.DevPtr(2), float32(Kx/Msat), float32(Ky/Msat), float32(Kz/Msat), N, cfg) }
// Landau-Lifshitz torque divided by gamma0: // - 1/(1+α²) [ m x B + α (m/|m|) x (m x B) ] // torque in Tesla/s // m normalized // B in Tesla func LLGTorque(torque, m, B *data.Slice, alpha float32) { // TODO: assert... N := torque.Len() cfg := make1DConf(N) k_llgtorque(torque.DevPtr(0), torque.DevPtr(1), torque.DevPtr(2), m.DevPtr(0), m.DevPtr(1), m.DevPtr(2), B.DevPtr(0), B.DevPtr(1), B.DevPtr(2), alpha, N, cfg) }
// Adds a constant to each element of the slice. // dst[comp][index] += cnst[comp] func AddConst(dst *data.Slice, cnst ...float32) { util.Argument(len(cnst) == dst.NComp()) N := dst.Len() cfg := make1DConf(N) str := stream() for c := 0; c < dst.NComp(); c++ { if cnst[c] != 0 { k_madd2_async(dst.DevPtr(c), dst.DevPtr(c), 1, nil, cnst[c], N, cfg, str) } } syncAndRecycle(str) }
// Copies src into dst, which is larger or smaller. // The remainder of dst is not filled with zeros. func copyPad(dst, src *data.Slice, dstsize, srcsize [3]int, str cu.Stream) { util.Argument(dst.NComp() == 1 && src.NComp() == 1) util.Assert(dst.Len() == prod(dstsize)) util.Assert(src.Len() == prod(srcsize)) N0 := iMin(dstsize[1], srcsize[1]) N1 := iMin(dstsize[2], srcsize[2]) cfg := make2DConf(N0, N1) k_copypad_async(dst.DevPtr(0), dstsize[0], dstsize[1], dstsize[2], src.DevPtr(0), srcsize[0], srcsize[1], srcsize[2], cfg, str) }
// Copies src into dst, which is larger or smaller, and multiplies by vol*Bsat. // The remainder of dst is not filled with zeros. func copyPadMul(dst, src *data.Slice, dstsize, srcsize [3]int, vol *data.Slice, Bsat float64, str cu.Stream) { util.Argument(dst.NComp() == 1) util.Argument(src.NComp() == 1) util.Argument(vol.NComp() == 1) util.Assert(dst.Len() == prod(dstsize) && src.Len() == prod(srcsize)) util.Assert(vol.Mesh().Size() == srcsize) N0 := iMin(dstsize[1], srcsize[1]) N1 := iMin(dstsize[2], srcsize[2]) cfg := make2DConf(N0, N1) k_copypadmul_async(dst.DevPtr(0), dstsize[0], dstsize[1], dstsize[2], src.DevPtr(0), srcsize[0], srcsize[1], srcsize[2], vol.DevPtr(0), float32(Bsat), cfg, str) }
// Execute the FFT plan, asynchronous. // src and dst are 3D arrays stored 1D arrays. func (p *fft3DC2RPlan) ExecAsync(src, dst *data.Slice) { oksrclen := p.InputLenFloats() if src.Len() != oksrclen { panic(fmt.Errorf("fft size mismatch: expecting src len %v, got %v", oksrclen, src.Len())) } okdstlen := p.OutputLenFloats() if dst.Len() != okdstlen { panic(fmt.Errorf("fft size mismatch: expecting dst len %v, got %v", okdstlen, dst.Len())) } p.handle.ExecC2R(cu.DevicePtr(src.DevPtr(0)), cu.DevicePtr(dst.DevPtr(0))) }
// Execute the FFT plan, asynchronous. // src and dst are 3D arrays stored 1D arrays. func (p *fft3DR2CPlan) ExecAsync(src, dst *data.Slice) { util.Argument(src.NComp() == 1 && dst.NComp() == 1) oksrclen := p.InputLen() if src.Len() != oksrclen { log.Panicf("fft size mismatch: expecting src len %v, got %v", oksrclen, src.Len()) } okdstlen := p.OutputLen() if dst.Len() != okdstlen { log.Panicf("fft size mismatch: expecting dst len %v, got %v", okdstlen, dst.Len()) } p.handle.ExecR2C(cu.DevicePtr(src.DevPtr(0)), cu.DevicePtr(dst.DevPtr(0))) }
// multiply-add: dst[i] = src1[i] * factor1 + src2[i] * factor2 + src3 * factor3 func Madd3(dst, src1, src2, src3 *data.Slice, factor1, factor2, factor3 float32) { N := dst.Len() nComp := dst.NComp() util.Assert(src1.Len() == N && src2.Len() == N && src3.Len() == N) util.Assert(src1.NComp() == nComp && src2.NComp() == nComp && src3.NComp() == nComp) cfg := make1DConf(N) str := stream() for c := 0; c < nComp; c++ { k_madd3_async(dst.DevPtr(c), src1.DevPtr(c), factor1, src2.DevPtr(c), factor2, src3.DevPtr(c), factor3, N, cfg, str) } syncAndRecycle(str) }
// Extract real parts, copy them from src to dst. // In the meanwhile, check if imaginary parts are nearly zero // and scale the kernel to compensate for unnormalized FFTs. func scaleRealParts(dst, src *data.Slice, scale float32) { util.Argument(2*dst.Len() == src.Len()) util.Argument(dst.NComp() == 1 && src.NComp() == 1) srcList := src.HostCopy().Host()[0] dstList := dst.Host()[0] // Normally, the FFT'ed kernel is purely real because of symmetry, // so we only store the real parts... maximg := float32(0.) maxreal := float32(0.) for i := 0; i < src.Len()/2; i++ { dstList[i] = srcList[2*i] * scale if fabs(srcList[2*i+0]) > maxreal { maxreal = fabs(srcList[2*i+0]) } if fabs(srcList[2*i+1]) > maximg { maximg = fabs(srcList[2*i+1]) } } // ...however, we check that the imaginary parts are nearly zero, // just to be sure we did not make a mistake during kernel creation. if maximg/maxreal > FFT_IMAG_TOLERANCE { log.Fatalf("Too large FFT kernel imaginary/real part: %v", maximg/maxreal) } }
//// Maximum of the norms of the difference between all vectors (x1,y1,z1) and (x2,y2,z2) //// (dx, dy, dz) = (x1, y1, z1) - (x2, y2, z2) //// max_i sqrt( dx[i]*dx[i] + dy[i]*dy[i] + dz[i]*dz[i] ) func MaxVecDiff(x, y *data.Slice) float64 { util.Argument(x.Len() == y.Len()) out := reduceBuf(0) k_reducemaxvecdiff2(x.DevPtr(0), x.DevPtr(1), x.DevPtr(2), y.DevPtr(0), y.DevPtr(1), y.DevPtr(2), out, 0, x.Len(), reducecfg) return math.Sqrt(float64(copyback(out))) }
// Maximum of the norms of all vectors (x[i], y[i], z[i]). // max_i sqrt( x[i]*x[i] + y[i]*y[i] + z[i]*z[i] ) func MaxVecNorm(v *data.Slice) float64 { out := reduceBuf(0) k_reducemaxvecnorm2(v.DevPtr(0), v.DevPtr(1), v.DevPtr(2), out, 0, v.Len(), reducecfg) return math.Sqrt(float64(copyback(out))) }
// zero 1-component slice func zero1(dst *data.Slice, str cu.Stream) { cu.MemsetD32Async(cu.DevicePtr(dst.DevPtr(0)), 0, int64(dst.Len()), str) }
// Maximum of absolute values of all elements. func MaxAbs(in *data.Slice) float32 { util.Argument(in.NComp() == 1) out := reduceBuf(0) k_reducemaxabs(in.DevPtr(0), out, 0, in.Len(), reducecfg) return copyback(out) }
// Normalize the vector field to length mask * norm. // nil mask interpreted as 1s. // 0-length vectors are unaffected. func Normalize(vec *data.Slice) { N := vec.Len() cfg := make1DConf(N) k_normalize(vec.DevPtr(0), vec.DevPtr(1), vec.DevPtr(2), N, cfg) }