Example #1
0
// Execute the FFT plan, asynchronous.
// src and dst are 3D arrays stored 1D arrays.
func (p *fft3DC2RPlan) ExecAsync(src, dst *data.Slice) {
	oksrclen := p.InputLenFloats()
	if src.Len() != oksrclen {
		panic(fmt.Errorf("fft size mismatch: expecting src len %v, got %v", oksrclen, src.Len()))
	}
	okdstlen := p.OutputLenFloats()
	if dst.Len() != okdstlen {
		panic(fmt.Errorf("fft size mismatch: expecting dst len %v, got %v", okdstlen, dst.Len()))
	}
	p.handle.ExecC2R(cu.DevicePtr(src.DevPtr(0)), cu.DevicePtr(dst.DevPtr(0)))
}
Example #2
0
// Execute the FFT plan, asynchronous.
// src and dst are 3D arrays stored 1D arrays.
func (p *fft3DR2CPlan) ExecAsync(src, dst *data.Slice) {
	util.Argument(src.NComp() == 1 && dst.NComp() == 1)
	oksrclen := p.InputLen()
	if src.Len() != oksrclen {
		log.Panicf("fft size mismatch: expecting src len %v, got %v", oksrclen, src.Len())
	}
	okdstlen := p.OutputLen()
	if dst.Len() != okdstlen {
		log.Panicf("fft size mismatch: expecting dst len %v, got %v", okdstlen, dst.Len())
	}
	p.handle.ExecR2C(cu.DevicePtr(src.DevPtr(0)), cu.DevicePtr(dst.DevPtr(0)))
}
Example #3
0
// copy back single float result from GPU and recycle buffer
func copyback(buf unsafe.Pointer) float32 {
	var result_ [1]float32
	result := result_[:]
	cu.MemcpyDtoH(unsafe.Pointer(&result[0]), cu.DevicePtr(buf), 1*cu.SIZEOF_FLOAT32)
	reduceBuffers <- buf
	return result_[0]
}
Example #4
0
// Memset sets the Slice's components to the specified values.
func Memset(s *data.Slice, val ...float32) {
	util.Argument(len(val) == s.NComp())
	str := stream()
	for c, v := range val {
		cu.MemsetD32Async(cu.DevicePtr(s.DevPtr(c)), math.Float32bits(v), int64(s.Len()), str)
	}
	syncAndRecycle(str)
}
Example #5
0
// internal base func for all slice() functions
func (s *slice) slice(start, stop int, elemsize uintptr) slice {
	if start >= s.cap_ || start < 0 || stop > s.cap_ || stop < 0 {
		panic("cuda4/safe: slice index out of bounds")
	}
	if start > stop {
		panic("cuda4/safe: inverted slice range")
	}
	return slice{cu.DevicePtr(uintptr(s.ptr_) + uintptr(start)*elemsize), stop - start, s.cap_ - start}
}
Example #6
0
// return a 1-float CUDA reduction buffer from a pool
// initialized to initVal
func reduceBuf(initVal float32) unsafe.Pointer {
	if reduceBuffers == nil {
		initReduceBuf()
	}
	buf := <-reduceBuffers
	str := stream()
	cu.MemsetD32Async(cu.DevicePtr(buf), math.Float32bits(initVal), 1, str)
	syncAndRecycle(str)
	return buf
}
Example #7
0
func newSlice(nComp int, m *data.Mesh, alloc func(int64) unsafe.Pointer, memType int8) *data.Slice {
	data.EnableGPU(memFree, cu.MemFreeHost, memCpy, memCpyDtoH, memCpyHtoD)
	length := m.NCell()
	bytes := int64(length) * cu.SIZEOF_FLOAT32
	ptrs := make([]unsafe.Pointer, nComp)
	for c := range ptrs {
		ptrs[c] = unsafe.Pointer(alloc(bytes))
		cu.MemsetD32(cu.DevicePtr(ptrs[c]), 0, int64(length))
	}
	return data.SliceFromPtrs(m, memType, ptrs)
}
Example #8
0
// Manually set the pointer, length and capacity.
// Side-steps the security mechanisms, use with caution.
func (s *slice) UnsafeSet(pointer unsafe.Pointer, length, capacity int) {
	s.ptr_ = cu.DevicePtr(uintptr(pointer))
	s.len_ = length
	s.cap_ = capacity
}
Example #9
0
func memCpy(dst, src unsafe.Pointer, bytes int64) {
	str := stream()
	cu.MemcpyAsync(cu.DevicePtr(dst), cu.DevicePtr(src), bytes, str)
	syncAndRecycle(str)
}
Example #10
0
func memCpyHtoD(dst, src unsafe.Pointer, bytes int64) { cu.MemcpyHtoD(cu.DevicePtr(dst), src, bytes) }
Example #11
0
func memCpyDtoH(dst, src unsafe.Pointer, bytes int64) { cu.MemcpyDtoH(dst, cu.DevicePtr(src), bytes) }
Example #12
0
func memFree(ptr unsafe.Pointer) { cu.MemFree(cu.DevicePtr(ptr)) }
Example #13
0
// zero 1-component slice
func zero1(dst *data.Slice, str cu.Stream) {
	cu.MemsetD32Async(cu.DevicePtr(dst.DevPtr(0)), 0, int64(dst.Len()), str)
}