func main() { nimble.Init() defer nimble.Cleanup() nimble.SetOD("demag1.out") gpu.LockCudaThread() N0, N1, N2 := 1, 3*64, 5*64 cx, cy, cz := 3e-9, 3.125e-9, 3.125e-9 mesh := nimble.NewMesh(N0, N1, N2, cx, cy, cz) fmt.Println("mesh:", mesh) mbox := gpu.NewConst("m", "", mesh, nimble.UnifiedMemory, []float64{1, 0, 0}) m := mbox.Output() const acc = 4 kernel := mag.BruteKernel(mesh, acc) conv := gpu.NewConvolution("B", "T", mesh, nimble.UnifiedMemory, kernel, m) B := conv.Output() const probe = 24 * 121 outputc := B.NewReader() nimble.RunStack() output := host(outputc.ReadNext(mesh.NCell())) if output[0][probe] > -0.97 || output[0][probe] < -0.99 || output[1][probe] != 0 || output[2][probe] != 0 { fmt.Println("failed, got:", output[0][probe]) os.Exit(2) } else { fmt.Println("OK") } }
// Precision test for the kernel: thin film. func main() { nimble.Init() defer nimble.Cleanup() nimble.SetOD("kernel-film.out") gpu.LockCudaThread() Y, X := core.IntArg(0), core.IntArg(1) y, x := float64(Y), float64(X) N0, N1, N2 := 1, 1024/Y, 1024/X cx, cy, cz := 1e-9, 1e-9*y, 1e-9*x mesh := nimble.NewMesh(N0, N1, N2, cx, cy, cz) fmt.Println("mesh:", mesh) mbox := gpu.NewConst("m", "", mesh, nimble.UnifiedMemory, []float64{1, 0, 0}) m := mbox.Output() acc := 4. kernel := mag.BruteKernel(mesh, acc) conv := gpu.NewConvolution("B", "T", mesh, nimble.UnifiedMemory, kernel, m) B := conv.Output() outputc := B.NewReader() nimble.RunStack() output := host(outputc.ReadNext(mesh.NCell())) Bz := core.Reshape(output[0], [3]int{N0, N1, N2}) probe := float64(Bz[N0/2][N1/2][N2/2]) fmt.Println(probe) want := -1. if math.Abs(probe-want) > 0.01 { fmt.Println("FAIL") os.Exit(2) } else { fmt.Println("OK") } }
func main() { nimble.Init() defer nimble.Cleanup() nimble.SetOD("demag2.out") gpu.LockCudaThread() N0, N1, N2 := 4, 32, 1024 cx, cy, cz := 3e-9, 3.125e-9, 3.125e-9 mesh := nimble.NewMesh(N0, N1, N2, cx, cy, cz) fmt.Println("mesh:", mesh) mbox := gpu.NewConst("m", "", mesh, nimble.UnifiedMemory, []float64{1, 0, 0}) m := mbox.Output() const acc = 4 kernel := mag.BruteKernel(mesh, acc) conv := gpu.NewConvolution("B", "T", mesh, nimble.UnifiedMemory, kernel, m) B := conv.Output() outputc := B.NewReader() nimble.RunStack() output := host(outputc.ReadNext(mesh.NCell())) out0 := core.Reshape(output[0], mesh.Size()) out1 := core.Reshape(output[1], mesh.Size()) out2 := core.Reshape(output[2], mesh.Size()) X, Y, Z := N0/2, N1/2, N2/2 if out0[X][Y][Z] < -0.95 || out0[X][Y][Z] > -0.90 || out1[X][Y][Z] > 0.001 || out2[X][Y][Z] > 0.001 { fmt.Println("failed, got:", out0[X][Y][Z], out1[X][Y][Z], out2[X][Y][Z]) os.Exit(2) } else { fmt.Println("OK") } }
func main() { nimble.Init() nimble.SetOD("demag.out") N0, N1, N2 := 1, 32, 128 cx, cy, cz := 3e-9, 3.125e-9, 3.125e-9 mesh := nimble.NewMesh(N0, N1, N2, cx, cy, cz) fmt.Println("mesh:", mesh) //testM := m := nimble.NewConstant("m", "", mesh, testM).Output() const acc = 10 kernel := mag.BruteKernel(ZeroPad(mesh), acc) demag := conv.NewSymm2D(mesh, kernel, m) hd := demag.Output() // Msat := 1.0053 // aex := mag.Mu0 * 13e-12 / Msat // hex := MakeChan3("Hex", "", mesh) // Stack(mag.NewExchange6(m.NewReader(), hex, mesh, aex)) // // heff := MakeChan3("Heff", "", mesh) // Stack(NewAdder3(heff, hd.NewReader(), hex.NewReader())) // // const alpha = 1 // torque := MakeChan3("τ", "", mesh) // Stack(mag.NewLLGTorque(torque, m.NewReader(), heff.NewReader(), alpha)) // // const dt = 50e-15 // solver := mag.NewEuler(m, torque.NewReader(), mag.Gamma0, dt) // mag.SetAll(m.UnsafeArray(), mag.Uniform(0, 0.1, 1)) // Stack(dump.NewAutosaver("h.dump", hd.NewReader(), 1)) // Stack(dump.NewAutosaver("m.dump", m.NewReader(), 1)) // Stack(dump.NewAutosaver("hex.dump", hex.NewReader(), 1)) // Stack(dump.NewAutotable("m.table", m.NewReader(), 1)) // // RunStack() // // solver.Steps(100) // res := m.UnsafeArray() // got := [3]float32{res[0][0][0][0], res[1][0][0][0], res[2][0][0][0]} // expect := [3]float32{-0.075877085, 0.17907967, 0.9809043} // Log("result:", got) // if got != expect { // Fatal(fmt.Errorf("expected: %v", expect)) // } //solver.Steps(10000) ProfDump(os.Stdout) Cleanup() }
// Precision test for the kernel: cube. func main() { nimble.Init() core.LOG = false defer nimble.Cleanup() nimble.SetOD("kernel-cube.out") N0, N1, N2 := 2, 2, 2 cx, cy, cz := 1e-9, 1e-9, 1e-9 mesh := nimble.NewMesh(N0, N1, N2, cx, cy, cz) fmt.Println("mesh:", mesh) for acc := 1; acc < 10; acc++ { mag.BruteKernel(mesh, float64(acc)) //fmt.Println(acc, kernel[0][0][0][0][0], kernel[1][0][0][0][0]) } }
func main() { nimble.Init() defer nimble.Cleanup() nimble.SetOD("gpu4.out") mem := nimble.GPUMemory const ( a = 8 N0, N1, N2 = 1, 32 * a, 128 * a c = 1e-9 cx, cy, cz = c, c, c Bsat = 800e3 * mag.Mu0 Aex_red = 13e-12 / (Bsat / mag.Mu0) α = 1 ) mesh := nimble.NewMesh(N0, N1, N2, cx, cy, cz) fmt.Println("mesh:", mesh) m := nimble.MakeChanN(3, "m", "", mesh, mem, 0) M := gpu.Device3(m.ChanN().UnsafeData()) M[0].Memset(float32(1 / math.Sqrt(3))) M[1].Memset(float32(1 / math.Sqrt(3))) M[2].Memset(float32(1 / math.Sqrt(3))) const acc = 1 kernel := mag.BruteKernel(mesh, acc) B := gpu.NewConvolution("B", "T", mesh, mem, kernel, m).Output() Bex := gpu.NewExchange6("Bex", m, Aex_red).Output() BeffBox := gpu.NewSum("Beff", B, Bex, Bsat, 1, mem) Beff := BeffBox.Output() tBox := gpu.NewLLGTorque("torque", m, Beff, α) torque := tBox.Output() solver := gpu.NewHeun(m, torque, 10e-15, mag.Gamma0) solver.Maxerr = 2e-4 solver.Mindt = 1e-15 solver.Steps(100) }
// Standard problem 4 on GPU func main() { nimble.Init() defer nimble.Cleanup() nimble.SetOD("gpu4-3d.out") gpu.LockCudaThread() mem := nimble.GPUMemory const ( N0, N1, N2 = 1 * 2, 32, 128 Sx, Sy, Sz = 3e-9, 125e-9, 500e-9 cx, cy, cz = Sx / N0, Sy / N1, Sz / N2 Bsat = 800e3 * mag.Mu0 Aex_red = 13e-12 / (Bsat / mag.Mu0) α = 1 ) mesh := nimble.NewMesh(N0, N1, N2, cx, cy, cz) fmt.Println("mesh:", mesh) // TODO: MakeChanN -> NewQuant() m := nimble.MakeChanN(3, "m", "", mesh, mem, 0) M := gpu.Device3(m.ChanN().UnsafeData()) M[0].Memset(float32(1 / math.Sqrt(3))) M[1].Memset(float32(1 / math.Sqrt(3))) M[2].Memset(float32(1 / math.Sqrt(3))) acc := 5. kernel := mag.BruteKernel(mesh, acc) B := gpu.NewConvolution("B", "T", mesh, mem, kernel, m).Output() exch := gpu.NewExchange6("Bex", m, Aex_red) Bex := exch.Output() BeffBox := gpu.NewSum("Beff", B, Bex, Bsat, 1, mem) Beff := BeffBox.Output() tBox := gpu.NewLLGTorque("torque", m, Beff, α) torque := tBox.Output() solver := gpu.NewHeun(m, torque, 10e-15, mag.Gamma0) solver.Maxerr = 5e-4 solver.Maxdt = 1e-12 solver.Mindt = 1e-15 solver.Headroom = 0.5 every := 100 uni.Autosave(m, every, gpu.GPUDevice) uni.Autotable(m, every/10, gpu.GPUDevice) solver.Advance(2e-9) var avg [3]float32 for i := range avg { avg[i] = gpu.Sum(m.UnsafeData()[i].Device()) / float32(mesh.NCell()) } want := [3]float32{0, 0.12521397, 0.9669811} err := math.Sqrt(float64(sqr(avg[0]-want[0]) + sqr(avg[1]-want[1]) + sqr(avg[2]-want[2]))) fmt.Println("avg:", avg, "err:", err) if err > 1e-2 { fmt.Println("FAILED") os.Exit(2) } fmt.Println("OK") const ( Bx = -24.6E-3 By = 4.3E-3 Bz = 0 ) Bext := gpu.NewConst("Bext", "T", mesh, mem, []float64{Bz, By, Bx}).Output() BeffBox.MAdd(Bext, 1) tBox.Alpha = 0.02 solver.Advance(1e-9) for i := range avg { avg[i] = gpu.Sum(m.UnsafeData()[i].Device()) / float32(mesh.NCell()) } want = [3]float32{0.04303933, 0.13000599, -0.9842051} err = math.Sqrt(float64(sqr(avg[0]-want[0]) + sqr(avg[1]-want[1]) + sqr(avg[2]-want[2]))) fmt.Println("avg:", avg, "err:", err) if err > 1e-2 { fmt.Println("FAILED") os.Exit(2) } fmt.Println("OK") }
func main() { nimble.Init() defer nimble.Cleanup() nimble.SetOD("gpueuler.out") N0, N1, N2 := 1, 32, 128 cx, cy, cz := 3e-9, 3.125e-9, 3.125e-9 mesh := nimble.NewMesh(N0, N1, N2, cx, cy, cz) fmt.Println("mesh:", mesh) m := nimble.MakeChanN(3, "m", "", mesh, nimble.UnifiedMemory, 0) fmt.Println("m:", m) acc := 8 kernel := mag.BruteKernel(mesh, acc) B := conv.NewSymm2D("B", "T", mesh, nimble.UnifiedMemory, kernel, m).Output() const ( Bsat = 1.0053 aex = mag.Mu0 * 13e-12 / Bsat α = 1 ) //exch := cpu.NewExchange6("Bex", "T", nimble.UnifiedMemory, m.NewReader(), aex) exch := gpu.NewExchange6("Bex", m, aex) nimble.Stack(exch) Bex := exch.Output() // heff := MakeChan3("Heff", "", mesh) Beff := gpu.NewSum("Beff", B, Bex, Bsat, 1, nimble.UnifiedMemory).Output() tBox := gpu.NewLLGTorque("torque", m, Beff, α) nimble.Stack(tBox) torque := tBox.Output() const dt = 100e-15 solver := gpu.NewEuler(m, torque, mag.Gamma0, dt) M := cpu.Host(m.ChanN().UnsafeData()) for i := range M[2] { M[2][i] = 1 M[1][i] = 0.1 } every := 100 nimble.Autosave(B, every) nimble.Autosave(m, every) nimble.Autosave(Bex, every) nimble.Autosave(Beff, every) nimble.Autosave(torque, every) nimble.Autotable(m, every) nimble.RunStack() solver.Steps(100) res := cpu.Host(m.ChanN().UnsafeData()) got := [3]float32{res[0][0], res[1][0], res[2][0]} expect := [3]float32{-0.033120323, 0.20761484, 0.9776498} solver.Steps(10000) fmt.Println("result:", got) if got != expect { fmt.Println("expected:", expect) os.Exit(2) } else { fmt.Println("OK") } }
func main() { nimble.Init() defer nimble.Cleanup() nimble.SetOD("test4.out") N0, N1, N2 := 1, 32, 128 cx, cy, cz := 3e-9, 3.125e-9, 3.125e-9 mesh := nimble.NewMesh(N0, N1, N2, cx, cy, cz) fmt.Println("mesh:", mesh) m := nimble.MakeChanN(3, "m", "", mesh, nimble.UnifiedMemory, 0) fmt.Println("m:", m) acc := 8 kernel := mag.BruteKernel(mesh, acc) B := gpu.NewConvolution("B", "T", mesh, nimble.UnifiedMemory, kernel, m).Output() const Bsat = 1.0053 const aex = mag.Mu0 * 13e-12 / Bsat exch := cpu.NewExchange6("Bex", "T", nimble.UnifiedMemory, m.NewReader(), aex) Bex := exch.Output() Beff := cpu.NewSum("Beff", B, Bex, Bsat, 1, nimble.UnifiedMemory).Output() const alpha = 1 tbox := cpu.NewLLGTorque("torque", m, Beff, alpha) nimble.Stack(tbox) torque := tbox.Output() const dt = 100e-15 solver := cpu.NewEuler(m, torque.NewReader(), mag.Gamma0, dt) M := cpu.Host(m.ChanN().UnsafeData()) for i := range M[2] { M[2][i] = 1 M[1][i] = 0.1 } every := 100 uni.Autosave(B, every, cpu.CPUDevice) uni.Autosave(m, every, cpu.CPUDevice) uni.Autosave(Bex, every, cpu.CPUDevice) uni.Autosave(Beff, every, cpu.CPUDevice) uni.Autosave(torque, every, cpu.CPUDevice) uni.Autotable(m, every, cpu.CPUDevice) nimble.RunStack() solver.Steps(100) res := cpu.Host(m.ChanN().UnsafeData()) got := [3]float32{res[0][0], res[1][0], res[2][0]} expect := [3]float32{-0.03450077, 0.21015842, 0.9770585} fmt.Println("result:", got) if got != expect { fmt.Println("expected:", expect) os.Exit(2) } else { fmt.Println("OK") } }
// Standard problem 4 on GPU func main() { nimble.Init() defer nimble.Cleanup() nimble.SetOD("gpu4.out") mem := nimble.GPUMemory const ( N0, N1, N2 = 1, 32 * 2, 128 cx, cy, cz = 3e-9, 3.125e-9 / 2, 3.125e-9 Bsat = 1.0053 Aex_red = mag.Mu0 * 13e-12 / Bsat α = 1 ) mesh := nimble.NewMesh(N0, N1, N2, cx, cy, cz) fmt.Println("mesh:", mesh) // TODO: MakeChanN -> NewQuant() m := nimble.MakeChanN(3, "m", "", mesh, mem, 0) M := gpu.Device3(m.ChanN().UnsafeData()) M[0].Memset(0) M[1].Memset(0.1) M[2].Memset(0.99) acc := 10 kernel := mag.BruteKernel(mesh, acc) B := gpu.NewConvolution("B", "T", mesh, mem, kernel, m).Output() exch := gpu.NewExchange6("Bex", m, Aex_red) nimble.Stack(exch) Bex := exch.Output() BeffBox := gpu.NewSum("Beff", B, Bex, Bsat, 1, mem) Beff := BeffBox.Output() tBox := gpu.NewLLGTorque("torque", m, Beff, α) nimble.Stack(tBox) torque := tBox.Output() solver := gpu.NewHeun(m, torque, 1e-15, mag.Gamma0) every := 100 uni.Autosave(m, every, gpu.GPUDevice) uni.Autotable(m, every/10, gpu.GPUDevice) solver.Advance(0.3e-9) // res := cpu.Host(m.ChanN().UnsafeData()) // got := [3]float32{res[0][0], res[1][0], res[2][0]} // expect := [3]float32{1.090642e-06, 0.6730072, 0.739636} // fmt.Println("result:", got) // if got != expect { // fmt.Println("expected:", expect) // os.Exit(2) // } else { // fmt.Println("OK") // } const ( Bx = -24.6E-3 By = 4.3E-3 Bz = 0 ) Bext := gpu.RunConst("Bext", "T", mesh, mem, []float64{Bz, By, Bx}) BeffBox.MAdd(Bext, 1) tBox.SetAlpha(0.02) solver.Advance(1e-9) }