func LaunchKernel(f Function, gridDimX, gridDimY, gridDimZ int, blockDimX, blockDimY, blockDimZ int, sharedMemBytes int, stream Stream, kernelParams []unsafe.Pointer) { // Since Go 1.6, a cgo argument cannot have a Go pointer to Go pointer, // so we copy the argument values go C memory first. argv := C.malloc(C.size_t(len(kernelParams) * pointerSize)) argp := C.malloc(C.size_t(len(kernelParams) * pointerSize)) defer C.free(argv) defer C.free(argp) for i := range kernelParams { *((*unsafe.Pointer)(offset(argp, i))) = offset(argv, i) // argp[i] = &argv[i] *((*uint64)(offset(argv, i))) = *((*uint64)(kernelParams[i])) // argv[i] = *kernelParams[i] } err := Result(C.cuLaunchKernel( C.CUfunction(unsafe.Pointer(uintptr(f))), C.uint(gridDimX), C.uint(gridDimY), C.uint(gridDimZ), C.uint(blockDimX), C.uint(blockDimY), C.uint(blockDimZ), C.uint(sharedMemBytes), C.CUstream(unsafe.Pointer(uintptr(stream))), (*unsafe.Pointer)(argp), (*unsafe.Pointer)(unsafe.Pointer(uintptr(0))))) if err != SUCCESS { panic(err) } }
// Launches a CUDA kernel on the device. // Example: // mod := ModuleLoad("file.ptx") // f := mod.GetFunction("test") // // var arg1 uintptr // arg1 = uintptr(someArray) // // var arg2 float32 // arg2 = 42 // // var arg3 int // arg3 = 1024 // // args := []uintptr{(uintptr)(unsafe.Pointer(&array)), (uintptr)(unsafe.Pointer(&value)), (uintptr)(unsafe.Pointer(&n))} // // block := 128 // grid := DivUp(N, block) // shmem := 0 // stream := STREAM0 // LaunchKernel(f, grid, 1, 1, block, 1, 1, shmem, stream, args) // // A more easy-to-use wrapper is implemented in closure.go // func LaunchKernel(f Function, gridDimX, gridDimY, gridDimZ int, blockDimX, blockDimY, blockDimZ int, sharedMemBytes int, stream Stream, kernelParams []uintptr) { //debug: print all arguments argvals := make([]int, len(kernelParams)) for i := range kernelParams { argvals[i] = *(*int)(unsafe.Pointer(kernelParams[i])) } //fmt.Println("LaunchKernel: ", "func: ", f, "gridDim: ", gridDimX, gridDimY, gridDimZ, "blockDim: ", blockDimX, blockDimY, blockDimZ, "shmem: ", sharedMemBytes, "stream: ", stream, "argptrs: ", kernelParams, "argvals:", argvals) err := Result(C.cuLaunchKernel(C.CUfunction(unsafe.Pointer(uintptr(f))), C.uint(gridDimX), C.uint(gridDimY), C.uint(gridDimZ), C.uint(blockDimX), C.uint(blockDimY), C.uint(blockDimZ), C.uint(sharedMemBytes), C.CUstream(unsafe.Pointer(uintptr(stream))), (*unsafe.Pointer)(unsafe.Pointer(&kernelParams[0])), (*unsafe.Pointer)(unsafe.Pointer(uintptr(0))))) if err != SUCCESS { panic(err) } }