func testConvolution(c *DemagConvolution, mesh *data.Mesh) { inhost := data.NewSlice(3, mesh) initConvTestInput(inhost.Vectors()) gpu := NewSlice(3, mesh) defer gpu.Free() data.Copy(gpu, inhost) c.Exec(gpu, gpu, data.NilSlice(1, mesh), 1) output := gpu.HostCopy() //data.MustWriteFile("gpu.dump", output, 0) // rm! brute := data.NewSlice(3, mesh) bruteConv(inhost.Vectors(), brute.Vectors(), c.kern) //data.MustWriteFile("brute.dump", brute, 0) // rm! a, b := output.Host(), brute.Host() err := float32(0) for c := range a { for i := range a[c] { if abs(a[c][i]-b[c][i]) > err { err = abs(a[c][i] - b[c][i]) } } } if err > CONV_TOLERANCE { log.Fatal("convolution self-test error: ", err) } else { log.Println("convolution self-test error:", err) } }
func main() { cuda.Init() N0, N1, N2 := 1, 64, 128 c := 1. mesh := data.NewMesh(N0, N1, N2, c/2, c*2, c) m := cuda.NewSlice(3, mesh) conv := cuda.NewDemag(mesh) cuda.Memset(m, 1, 1, 1) B := cuda.NewSlice(3, mesh) Bsat := 1. vol := data.NilSlice(1, mesh) conv.Exec(B, m, vol, Bsat) out := B.HostCopy() bx := out.Vectors()[0][N0/2][N1/2][N2/2] by := out.Vectors()[1][N0/2][N1/2][N2/2] bz := out.Vectors()[2][N0/2][N1/2][N2/2] fmt.Println("demag tensor:", bx, by, bz) check(bx, -1) check(by, 0) check(bz, 0) fmt.Println("OK") }
func main() { cuda.Init() N0, N1, N2 := 16, 16, 16 c := 1. mesh := data.NewMesh(N0, N1, N2, c, c, c) m := cuda.NewSlice(3, mesh) conv := cuda.NewDemag(mesh) mhost := m.HostCopy() m_ := mhost.Vectors() r := float64(N2) / 2 for i := 0; i < N0; i++ { x := c * (float64(i) + 0.5 - float64(N0)/2) for j := 0; j < N1; j++ { y := c * (float64(j) + 0.5 - float64(N1)/2) for k := 0; k < N2; k++ { z := c * (float64(k) + 0.5 - float64(N2)/2) if x*x+y*y+z*z < r*r { m_[0][i][j][k] = 1 m_[1][i][j][k] = 2 m_[2][i][j][k] = 3 } } } } data.Copy(m, mhost) B := cuda.NewSlice(3, mesh) conv.Exec(B, m, data.NilSlice(1, mesh), 1) out := B.HostCopy() bx := out.Vectors()[0][N0/2][N1/2][N2/2] by := out.Vectors()[1][N0/2][N1/2][N2/2] bz := out.Vectors()[2][N0/2][N1/2][N2/2] fmt.Println("demag tensor:", bx, by/2, bz/3) check(bx, -1./3.) check(by, -2./3.) check(bz, -3./3.) fmt.Println("OK") }
func initialize() { // these 2 GPU arrays are re-used to stored various quantities. arr1, arr2 := cuda.NewSynced(3, mesh), cuda.NewSynced(3, mesh) // cell volumes currently unused vol = data.NilSlice(1, mesh) // magnetization m = newBuffered(arr1, "m", nil) M = m // effective field b_eff := newBuffered(arr2, "B_eff", nil) B_eff = b_eff // demag field demag_ := cuda.NewDemag(mesh) b_demag := newBuffered(arr2, "B_demag", func(b *data.Slice) { m_ := m.Read() demag_.Exec(b, m_, vol, Mu0*Msat()) //TODO: consistent msat or bsat m.ReadDone() }) B_demag = b_demag // exchange field b_exch := newAdder("B_exch", func(dst *data.Slice) { m_ := m.Read() cuda.AddExchange(dst, m_, Aex(), Msat()) m.ReadDone() }) B_exch = b_exch // Dzyaloshinskii-Moriya field b_dmi := newAdder("B_dmi", func(dst *data.Slice) { d := DMI() if d != 0 { m_ := m.Read() cuda.AddDMI(dst, m_, d, Msat()) m.ReadDone() } }) B_dmi = b_dmi // uniaxial anisotropy b_uni := newAdder("B_uni", func(dst *data.Slice) { ku1 := Ku1() // in J/m3 if ku1 != [3]float64{0, 0, 0} { m_ := m.Read() cuda.AddUniaxialAnisotropy(dst, m_, ku1[2], ku1[1], ku1[0], Msat()) m.ReadDone() } }) B_uni = b_uni // external field b_ext := newAdder("B_ext", func(dst *data.Slice) { bext := B_ext() cuda.AddConst(dst, float32(bext[2]), float32(bext[1]), float32(bext[0])) }) // llg torque torque := newBuffered(arr2, "torque", func(b *data.Slice) { m_ := m.Read() cuda.LLGTorque(b, m_, b, float32(Alpha())) m.ReadDone() }) Torque = torque // spin-transfer torque stt := newAdder("stt", func(dst *data.Slice) { j := J() if j != [3]float64{0, 0, 0} { m_ := m.Read() p := SpinPol() jx := j[2] * p jy := j[1] * p jz := j[0] * p cuda.AddZhangLiTorque(dst, m_, [3]float64{jx, jy, jz}, Msat(), nil, Alpha(), Xi()) m.ReadDone() } }) STT = stt // data table table := newTable("datatable") Table = table // solver torqueFn := func(good bool) *data.Synced { m.touch(good) // saves if needed table.send(m.Synced, good) b_demag.update(good) b_exch.addTo(b_eff, good) b_dmi.addTo(b_eff, good) b_uni.addTo(b_eff, good) b_ext.addTo(b_eff, good) b_eff.touch(good) torque.update(good) stt.addTo(torque, good) return torque.Synced } Solver = cuda.NewHeun(m.Synced, torqueFn, cuda.Normalize, 1e-15, Gamma0, &Time) }