Exemplo n.º 1
0
func LaunchKernel(f Function, gridDimX, gridDimY, gridDimZ int, blockDimX, blockDimY, blockDimZ int, sharedMemBytes int, stream Stream, kernelParams []unsafe.Pointer) {

	// Since Go 1.6, a cgo argument cannot have a Go pointer to Go pointer,
	// so we copy the argument values go C memory first.
	argv := C.malloc(C.size_t(len(kernelParams) * pointerSize))
	argp := C.malloc(C.size_t(len(kernelParams) * pointerSize))
	defer C.free(argv)
	defer C.free(argp)
	for i := range kernelParams {
		*((*unsafe.Pointer)(offset(argp, i))) = offset(argv, i)       // argp[i] = &argv[i]
		*((*uint64)(offset(argv, i))) = *((*uint64)(kernelParams[i])) // argv[i] = *kernelParams[i]
	}

	err := Result(C.cuLaunchKernel(
		C.CUfunction(unsafe.Pointer(uintptr(f))),
		C.uint(gridDimX),
		C.uint(gridDimY),
		C.uint(gridDimZ),
		C.uint(blockDimX),
		C.uint(blockDimY),
		C.uint(blockDimZ),
		C.uint(sharedMemBytes),
		C.CUstream(unsafe.Pointer(uintptr(stream))),
		(*unsafe.Pointer)(argp),
		(*unsafe.Pointer)(unsafe.Pointer(uintptr(0)))))
	if err != SUCCESS {
		panic(err)
	}
}
Exemplo n.º 2
0
// Destroys the asynchronous stream
func (stream *Stream) Destroy() {
	str := *stream
	*stream = Stream(uintptr(0))
	err := Result(C.cuStreamDestroy(C.CUstream(unsafe.Pointer(uintptr(str)))))
	if err != SUCCESS {
		panic(err)
	}
}
Exemplo n.º 3
0
// Asynchronously copies a number of bytes from host to device.
// The host memory must be page-locked (see MemRegister)
func MemcpyHtoDAsync(dst DevicePtr, src unsafe.Pointer, bytes int64, stream Stream) {
	err := Result(C.cuMemcpyHtoDAsync(C.CUdeviceptr(dst), src, C.size_t(bytes), C.CUstream(unsafe.Pointer(uintptr(stream)))))
	if err != SUCCESS {
		panic(err)
	}
}
Exemplo n.º 4
0
// Asynchronously copies from device memory in one context (device) to another.
func MemcpyPeerAsync(dst DevicePtr, dstCtx Context, src DevicePtr, srcCtx Context, bytes int64, stream Stream) {
	err := Result(C.cuMemcpyPeerAsync(C.CUdeviceptr(dst), C.CUcontext(unsafe.Pointer(uintptr(dstCtx))), C.CUdeviceptr(src), C.CUcontext(unsafe.Pointer(uintptr(srcCtx))), C.size_t(bytes), C.CUstream(unsafe.Pointer(uintptr(stream)))))
	if err != SUCCESS {
		panic(err)
	}
}
Exemplo n.º 5
0
// Asynchronously sets the first N 32-bit values of dst array to value.
func MemsetD8Async(deviceptr DevicePtr, value uint8, N int64, stream Stream) {
	err := Result(C.cuMemsetD8Async(C.CUdeviceptr(deviceptr), C.uchar(value), C.size_t(N), C.CUstream(unsafe.Pointer(uintptr(stream)))))
	if err != SUCCESS {
		panic(err)
	}
}
Exemplo n.º 6
0
// Launches a CUDA kernel on the device.
// Example:
//		mod := ModuleLoad("file.ptx")
//		f := mod.GetFunction("test")
//
//		var arg1 uintptr
//		arg1 = uintptr(someArray)
//
//		var arg2 float32
//		arg2 = 42
//
//		var arg3 int
//		arg3 = 1024
//
//		args := []uintptr{(uintptr)(unsafe.Pointer(&array)), (uintptr)(unsafe.Pointer(&value)), (uintptr)(unsafe.Pointer(&n))}
//
//		block := 128
//		grid := DivUp(N, block)
//		shmem := 0
//		stream := STREAM0
//		LaunchKernel(f, grid, 1, 1, block, 1, 1, shmem, stream, args)
//
// A more easy-to-use wrapper is implemented in closure.go
//
func LaunchKernel(f Function, gridDimX, gridDimY, gridDimZ int, blockDimX, blockDimY, blockDimZ int, sharedMemBytes int, stream Stream, kernelParams []uintptr) {

	//debug: print all arguments
	argvals := make([]int, len(kernelParams))
	for i := range kernelParams {
		argvals[i] = *(*int)(unsafe.Pointer(kernelParams[i]))
	}
	//fmt.Println("LaunchKernel: ", "func: ", f, "gridDim: ", gridDimX, gridDimY, gridDimZ, "blockDim: ", blockDimX, blockDimY, blockDimZ, "shmem: ", sharedMemBytes, "stream: ", stream, "argptrs: ", kernelParams, "argvals:", argvals)

	err := Result(C.cuLaunchKernel(C.CUfunction(unsafe.Pointer(uintptr(f))), C.uint(gridDimX), C.uint(gridDimY), C.uint(gridDimZ), C.uint(blockDimX), C.uint(blockDimY), C.uint(blockDimZ), C.uint(sharedMemBytes), C.CUstream(unsafe.Pointer(uintptr(stream))), (*unsafe.Pointer)(unsafe.Pointer(&kernelParams[0])), (*unsafe.Pointer)(unsafe.Pointer(uintptr(0)))))
	if err != SUCCESS {
		panic(err)
	}
}
Exemplo n.º 7
0
// Returns Success if all operations have completed, ErrorNotReady otherwise
func (stream Stream) Query() Result {
	return Result(C.cuStreamQuery(C.CUstream(unsafe.Pointer(uintptr(stream)))))
}
Exemplo n.º 8
0
// Blocks until the stream has completed.
func (stream Stream) Synchronize() {
	err := Result(C.cuStreamSynchronize(C.CUstream(unsafe.Pointer(uintptr(stream)))))
	if err != SUCCESS {
		panic(err)
	}
}
Exemplo n.º 9
0
// Asynchronously copies a number of bytes device host to host.
// The host memory must be page-locked (see MemRegister)
func MemcpyDtoHAsync(dst HostPtr, src DevicePtr, bytes int64, stream Stream) {
	err := Result(C.cuMemcpyDtoHAsync(unsafe.Pointer(uintptr(dst)), C.CUdeviceptr(src), C.size_t(bytes), C.CUstream(unsafe.Pointer(uintptr(stream)))))
	if err != SUCCESS {
		panic(err)
	}
}