func main() { nimble.Init() defer nimble.Cleanup() nimble.SetOD("demag2.out") gpu.LockCudaThread() N0, N1, N2 := 4, 32, 1024 cx, cy, cz := 3e-9, 3.125e-9, 3.125e-9 mesh := nimble.NewMesh(N0, N1, N2, cx, cy, cz) fmt.Println("mesh:", mesh) mbox := gpu.NewConst("m", "", mesh, nimble.UnifiedMemory, []float64{1, 0, 0}) m := mbox.Output() const acc = 4 kernel := mag.BruteKernel(mesh, acc) conv := gpu.NewConvolution("B", "T", mesh, nimble.UnifiedMemory, kernel, m) B := conv.Output() outputc := B.NewReader() nimble.RunStack() output := host(outputc.ReadNext(mesh.NCell())) out0 := core.Reshape(output[0], mesh.Size()) out1 := core.Reshape(output[1], mesh.Size()) out2 := core.Reshape(output[2], mesh.Size()) X, Y, Z := N0/2, N1/2, N2/2 if out0[X][Y][Z] < -0.95 || out0[X][Y][Z] > -0.90 || out1[X][Y][Z] > 0.001 || out2[X][Y][Z] > 0.001 { fmt.Println("failed, got:", out0[X][Y][Z], out1[X][Y][Z], out2[X][Y][Z]) os.Exit(2) } else { fmt.Println("OK") } }
// Precision test for the kernel: thin film. func main() { nimble.Init() defer nimble.Cleanup() nimble.SetOD("kernel-film.out") gpu.LockCudaThread() Y, X := core.IntArg(0), core.IntArg(1) y, x := float64(Y), float64(X) N0, N1, N2 := 1, 1024/Y, 1024/X cx, cy, cz := 1e-9, 1e-9*y, 1e-9*x mesh := nimble.NewMesh(N0, N1, N2, cx, cy, cz) fmt.Println("mesh:", mesh) mbox := gpu.NewConst("m", "", mesh, nimble.UnifiedMemory, []float64{1, 0, 0}) m := mbox.Output() acc := 4. kernel := mag.BruteKernel(mesh, acc) conv := gpu.NewConvolution("B", "T", mesh, nimble.UnifiedMemory, kernel, m) B := conv.Output() outputc := B.NewReader() nimble.RunStack() output := host(outputc.ReadNext(mesh.NCell())) Bz := core.Reshape(output[0], [3]int{N0, N1, N2}) probe := float64(Bz[N0/2][N1/2][N2/2]) fmt.Println(probe) want := -1. if math.Abs(probe-want) > 0.01 { fmt.Println("FAIL") os.Exit(2) } else { fmt.Println("OK") } }