Beispiel #1
0
// Copy from device array to device array.
func (dst *Array) CopyFromDevice(src *Array) {
	CheckSize(dst.size4D, src.size4D)

	d := dst.pointer
	s := src.pointer
	// copies run concurrently on the individual devices
	length := src.partLen4D
	cu.MemcpyDtoDAsync(cu.DevicePtr(d), cu.DevicePtr(s), SIZEOF_FLOAT*int64(length), cu.Stream(dst.Stream))
	// Synchronize with all copies
	dst.Stream.Sync()

}
Beispiel #2
0
// Set a single value
func (b *Array) Set(comp, x, y, z int, value float64) {
	b.checkBounds(comp, x, y, z)
	acomp := b.Comp[comp]
	index := acomp.indexOf(x, y, z)
	cu.MemcpyHtoD(cu.DevicePtr(offset(uintptr(acomp.pointer), SIZEOF_FLOAT*index)),
		cu.HostPtr(unsafe.Pointer(&value)),
		1*SIZEOF_FLOAT)
}
Beispiel #3
0
// Get a single value
func (b *Array) Get(comp, x, y, z int) float64 {
	b.checkBounds(comp, x, y, z)
	var value float64
	acomp := b.Comp[comp]
	index := acomp.indexOf(x, y, z)
	cu.MemcpyDtoH(cu.HostPtr(unsafe.Pointer(&value)),
		cu.DevicePtr(offset(uintptr(acomp.pointer), SIZEOF_FLOAT*index)),
		1*SIZEOF_FLOAT)
	return value
}
Beispiel #4
0
// Copy from host array to device array.
func (dst *Array) CopyFromHost(src *host.Array) {
	CheckSize(dst.size4D, src.Size4D)

	partPlaneN := dst.partSize[1] * dst.partSize[2]    // floats per YZ plane per GPU
	planeN := dst.size3D[1] * dst.size3D[2]            // total floats per YZ plane
	NPlane := dst.size4D[0] * dst.size3D[0]            // total YZ planes (NComp * X size)
	partPlaneBytes := SIZEOF_FLOAT * int64(partPlaneN) // bytes per YZ plane per GPU

	for i := 0; i < NPlane; i++ {
		dstOffset := i * partPlaneN
		dstPtr := ArrayOffset(uintptr(dst.pointer), dstOffset)

		srcOffset := i * planeN

		cu.MemcpyHtoD(cu.DevicePtr(dstPtr), cu.HostPtr(&src.List[srcOffset]), partPlaneBytes)
	}
}
Beispiel #5
0
// Copy from device array to host array.
func (src *Array) CopyToHost(dst *host.Array) {
	CheckSize(dst.Size4D, src.size4D)

	partPlaneN := src.partSize[1] * src.partSize[2]    // floats per YZ plane per GPU
	planeN := src.size3D[1] * src.size3D[2]            // total floats per YZ plane
	NPlane := src.size4D[0] * src.size3D[0]            // total YZ planes (NComp * X size)
	partPlaneBytes := SIZEOF_FLOAT * int64(partPlaneN) // bytes per YZ plane per GPU

	for i := 0; i < NPlane; i++ {
		srcOffset := i * partPlaneN
		srcPtr := ArrayOffset(uintptr(src.pointer), srcOffset)

		dstOffset := i * planeN

		cu.MemcpyDtoH(cu.HostPtr(&dst.List[dstOffset]), cu.DevicePtr(srcPtr), partPlaneBytes)
	}

}
Beispiel #6
0
// Lets the pointers of an already initialized, but not allocated array (shared)
// point to an allocated array (original) possibly with an offset.
func (shared *Array) PointTo(original *Array, offset int) {
	Assert(shared.Len()+offset <= original.Len())
	shared.pointer = cu.DevicePtr(ArrayOffset(uintptr(original.pointer), offset))
}
Beispiel #7
0
// INTERNAL
// initialize pointers to the component arrays.
// called after the GPU storage has been changed.
func (a *Array) initCompPtrs() {
	for c := range a.Comp {
		start := c * a.partLen3D
		a.Comp[c].pointer = cu.DevicePtr(offset(uintptr(a.pointer), start*SIZEOF_FLOAT))
	}
}