func (a *Array) Pin() { if a.isPinned == 0 { cu.MemHostRegister(cu.HostPtr(unsafe.Pointer(&a.List[0])), a.SizeInBytes, cu.MEMHOSTREGISTER_PORTABLE) Debug("Successfully pinned.") a.isPinned = 1 } }
// Set a single value func (b *Array) Set(comp, x, y, z int, value float64) { b.checkBounds(comp, x, y, z) acomp := b.Comp[comp] index := acomp.indexOf(x, y, z) cu.MemcpyHtoD(cu.DevicePtr(offset(uintptr(acomp.pointer), SIZEOF_FLOAT*index)), cu.HostPtr(unsafe.Pointer(&value)), 1*SIZEOF_FLOAT) }
func NewArrayPinned(components int, size3D []int) *Array { t := new(Array) t.Init(components, size3D) cu.MemHostRegister(cu.HostPtr(unsafe.Pointer(&t.List[0])), t.SizeInBytes, cu.MEMHOSTREGISTER_PORTABLE) Debug("Successfully pinned.") t.isPinned = 1 return t }
// Get a single value func (b *Array) Get(comp, x, y, z int) float64 { b.checkBounds(comp, x, y, z) var value float64 acomp := b.Comp[comp] index := acomp.indexOf(x, y, z) cu.MemcpyDtoH(cu.HostPtr(unsafe.Pointer(&value)), cu.DevicePtr(offset(uintptr(acomp.pointer), SIZEOF_FLOAT*index)), 1*SIZEOF_FLOAT) return value }
// Copy from host array to device array. func (dst *Array) CopyFromHost(src *host.Array) { CheckSize(dst.size4D, src.Size4D) partPlaneN := dst.partSize[1] * dst.partSize[2] // floats per YZ plane per GPU planeN := dst.size3D[1] * dst.size3D[2] // total floats per YZ plane NPlane := dst.size4D[0] * dst.size3D[0] // total YZ planes (NComp * X size) partPlaneBytes := SIZEOF_FLOAT * int64(partPlaneN) // bytes per YZ plane per GPU for i := 0; i < NPlane; i++ { dstOffset := i * partPlaneN dstPtr := ArrayOffset(uintptr(dst.pointer), dstOffset) srcOffset := i * planeN cu.MemcpyHtoD(cu.DevicePtr(dstPtr), cu.HostPtr(&src.List[srcOffset]), partPlaneBytes) } }
// Copy from device array to host array. func (src *Array) CopyToHost(dst *host.Array) { CheckSize(dst.Size4D, src.size4D) partPlaneN := src.partSize[1] * src.partSize[2] // floats per YZ plane per GPU planeN := src.size3D[1] * src.size3D[2] // total floats per YZ plane NPlane := src.size4D[0] * src.size3D[0] // total YZ planes (NComp * X size) partPlaneBytes := SIZEOF_FLOAT * int64(partPlaneN) // bytes per YZ plane per GPU for i := 0; i < NPlane; i++ { srcOffset := i * partPlaneN srcPtr := ArrayOffset(uintptr(src.pointer), srcOffset) dstOffset := i * planeN cu.MemcpyDtoH(cu.HostPtr(&dst.List[dstOffset]), cu.DevicePtr(srcPtr), partPlaneBytes) } }