// Copy from device array to device array. func (dst *Array) CopyFromDevice(src *Array) { CheckSize(dst.size4D, src.size4D) d := dst.pointer s := src.pointer // copies run concurrently on the individual devices length := src.partLen4D cu.MemcpyDtoDAsync(cu.DevicePtr(d), cu.DevicePtr(s), SIZEOF_FLOAT*int64(length), cu.Stream(dst.Stream)) // Synchronize with all copies dst.Stream.Sync() }
// Set a single value func (b *Array) Set(comp, x, y, z int, value float64) { b.checkBounds(comp, x, y, z) acomp := b.Comp[comp] index := acomp.indexOf(x, y, z) cu.MemcpyHtoD(cu.DevicePtr(offset(uintptr(acomp.pointer), SIZEOF_FLOAT*index)), cu.HostPtr(unsafe.Pointer(&value)), 1*SIZEOF_FLOAT) }
// Get a single value func (b *Array) Get(comp, x, y, z int) float64 { b.checkBounds(comp, x, y, z) var value float64 acomp := b.Comp[comp] index := acomp.indexOf(x, y, z) cu.MemcpyDtoH(cu.HostPtr(unsafe.Pointer(&value)), cu.DevicePtr(offset(uintptr(acomp.pointer), SIZEOF_FLOAT*index)), 1*SIZEOF_FLOAT) return value }
// Copy from host array to device array. func (dst *Array) CopyFromHost(src *host.Array) { CheckSize(dst.size4D, src.Size4D) partPlaneN := dst.partSize[1] * dst.partSize[2] // floats per YZ plane per GPU planeN := dst.size3D[1] * dst.size3D[2] // total floats per YZ plane NPlane := dst.size4D[0] * dst.size3D[0] // total YZ planes (NComp * X size) partPlaneBytes := SIZEOF_FLOAT * int64(partPlaneN) // bytes per YZ plane per GPU for i := 0; i < NPlane; i++ { dstOffset := i * partPlaneN dstPtr := ArrayOffset(uintptr(dst.pointer), dstOffset) srcOffset := i * planeN cu.MemcpyHtoD(cu.DevicePtr(dstPtr), cu.HostPtr(&src.List[srcOffset]), partPlaneBytes) } }
// Copy from device array to host array. func (src *Array) CopyToHost(dst *host.Array) { CheckSize(dst.Size4D, src.size4D) partPlaneN := src.partSize[1] * src.partSize[2] // floats per YZ plane per GPU planeN := src.size3D[1] * src.size3D[2] // total floats per YZ plane NPlane := src.size4D[0] * src.size3D[0] // total YZ planes (NComp * X size) partPlaneBytes := SIZEOF_FLOAT * int64(partPlaneN) // bytes per YZ plane per GPU for i := 0; i < NPlane; i++ { srcOffset := i * partPlaneN srcPtr := ArrayOffset(uintptr(src.pointer), srcOffset) dstOffset := i * planeN cu.MemcpyDtoH(cu.HostPtr(&dst.List[dstOffset]), cu.DevicePtr(srcPtr), partPlaneBytes) } }
// Lets the pointers of an already initialized, but not allocated array (shared) // point to an allocated array (original) possibly with an offset. func (shared *Array) PointTo(original *Array, offset int) { Assert(shared.Len()+offset <= original.Len()) shared.pointer = cu.DevicePtr(ArrayOffset(uintptr(original.pointer), offset)) }
// INTERNAL // initialize pointers to the component arrays. // called after the GPU storage has been changed. func (a *Array) initCompPtrs() { for c := range a.Comp { start := c * a.partLen3D a.Comp[c].pointer = cu.DevicePtr(offset(uintptr(a.pointer), start*SIZEOF_FLOAT)) } }