// Locks to an OS thread and initializes CUDA for that thread. func Init(gpu int) { if cudaCtx != 0 { return // needed for tests } runtime.LockOSThread() tryCuInit() dev := cu.Device(gpu) cudaCtx = cu.CtxCreate(cu.CTX_SCHED_YIELD, dev) cudaCtx.SetCurrent() M, m := dev.ComputeCapability() cudaCC = 10*M + m Version = cu.Version() DevName = dev.Name() TotalMem = dev.TotalMem() GPUInfo = fmt.Sprint("CUDA ", Version, " ", DevName, "(", (TotalMem)/(1024*1024), "MB) ", "cc", M, ".", m) if M < 2 { log.Fatalln("GPU has insufficient compute capability, need 2.0 or higher.") } if Synchronous { log.Println("DEBUG: synchronized CUDA calls") } // test PTX load so that we can catch CUDA_ERROR_NO_BINARY_FOR_GPU early fatbinLoad(madd2_map, "madd2") }
// needed for all other tests. func init() { cu.Init(0) ctx := cu.CtxCreate(cu.CTX_SCHED_AUTO, 0) cu.CtxSetCurrent(ctx) fmt.Println("Created CUDA context") }