func MemCpy(dst, src unsafe.Pointer, bytes int64) { Sync() timer.Start("memcpy") cu.MemcpyAsync(cu.DevicePtr(uintptr(dst)), cu.DevicePtr(uintptr(src)), bytes, stream0) Sync() timer.Stop("memcpy") }
func MemCpyHtoD(dst, src unsafe.Pointer, bytes int64) { Sync() // sync previous kernels timer.Start("memcpyHtoD") cu.MemcpyHtoD(cu.DevicePtr(uintptr(dst)), src, bytes) Sync() // sync copy timer.Stop("memcpyHtoD") }
func MemCpyDtoH(dst, src unsafe.Pointer, bytes int64) { Sync() // sync previous kernels timer.Start("memcpyDtoH") cu.MemcpyDtoH(dst, cu.DevicePtr(uintptr(src)), bytes) Sync() // sync copy timer.Stop("memcpyDtoH") }
func newSlice(nComp int, size [3]int, alloc func(int64) unsafe.Pointer, memType int8) *data.Slice { data.EnableGPU(memFree, cu.MemFreeHost, MemCpy, MemCpyDtoH, MemCpyHtoD) length := prod(size) bytes := int64(length) * cu.SIZEOF_FLOAT32 ptrs := make([]unsafe.Pointer, nComp) for c := range ptrs { ptrs[c] = unsafe.Pointer(alloc(bytes)) cu.MemsetD32(cu.DevicePtr(uintptr(ptrs[c])), 0, int64(length)) } return data.SliceFromPtrs(size, memType, ptrs) }
// Memset sets the Slice's components to the specified values. // To be carefully used on unified slice (need sync) func Memset(s *data.Slice, val ...float32) { if Synchronous { // debug Sync() timer.Start("memset") } util.Argument(len(val) == s.NComp()) for c, v := range val { cu.MemsetD32Async(cu.DevicePtr(uintptr(s.DevPtr(c))), math.Float32bits(v), int64(s.Len()), stream0) } if Synchronous { //debug Sync() timer.Stop("memset") } }
func memFree(ptr unsafe.Pointer) { cu.MemFree(cu.DevicePtr(uintptr(ptr))) }