func Init() { if cudaCtx != 0 { return // already inited } var flag uint switch *Flag_sched { default: panic("sched flag: expecting auto,spin,yield or sync: " + *Flag_sched) case "auto": flag = cu.CTX_SCHED_AUTO case "spin": flag = cu.CTX_SCHED_SPIN case "yield": flag = cu.CTX_SCHED_YIELD case "sync": flag = cu.CTX_BLOCKING_SYNC } tryCuInit() dev := cu.Device(*Flag_gpu) cudaCtx = cu.CtxCreate(flag, dev) M, m := dev.ComputeCapability() concurrent := dev.Attribute(cu.CONCURRENT_KERNELS) log.Print("CUDA ", float32(cu.Version())/1000, " ", dev.Name(), "(", (dev.TotalMem())/(1024*1024), "MB) ", "compute ", M, ".", m, " concurrent: ", concurrent == 1, "\n") cudaCC = 10*M + m log.Println("set preference for device L1 cache") cuda.DeviceSetCacheConfig(cuda.FUNC_CACHE_PREFER_L1) initStreampool() }
func InitCuda() { runtime.LockOSThread() cu.Init(0) cu.CtxCreate(cu.CTX_SCHED_AUTO, 0).SetCurrent() }
// needed for all other tests. func init() { cu.Init(0) ctx := cu.CtxCreate(cu.CTX_SCHED_AUTO, 0) cu.CtxSetCurrent(ctx) fmt.Println("Created CUDA context") }