Esempio n. 1
0
func (c *DemagConvolution) exec3D(outp, inp, vol *data.Slice, Bsat float64) {
	padded := c.kernSize

	// FW FFT
	for i := 0; i < 3; i++ {
		zero1(c.fftRBuf[i], c.stream)
		in := inp.Comp(i)
		copyPadMul(c.fftRBuf[i], in, padded, c.size, vol, Bsat, c.stream)
		c.fwPlan.ExecAsync(c.fftRBuf[i], c.fftCBuf[i])
	}

	// kern mul
	N0, N1, N2 := c.fftKernSize[0], c.fftKernSize[1], c.fftKernSize[2] // TODO: rm these
	kernMulRSymm3D(c.fftCBuf,
		c.gpuFFTKern[0][0], c.gpuFFTKern[1][1], c.gpuFFTKern[2][2],
		c.gpuFFTKern[1][2], c.gpuFFTKern[0][2], c.gpuFFTKern[0][1],
		N0, N1, N2, c.stream)

	// BW FFT
	for i := 0; i < 3; i++ {
		c.bwPlan.ExecAsync(c.fftCBuf[i], c.fftRBuf[i])
		out := outp.Comp(i)
		copyPad(out, c.fftRBuf[i], c.size, padded, c.stream)
	}
	c.stream.Synchronize()
}
Esempio n. 2
0
func (c *DemagConvolution) exec2D(outp, inp, vol *data.Slice, Bsat float64) {
	// Convolution is separated into
	// a 1D convolution for x and a 2D convolution for yz.
	// So only 2 FFT buffers are needed at the same time.

	// FFT x
	zero1(c.fftRBuf[0], c.stream)
	in := inp.Comp(0)
	padded := c.kernSize
	copyPadMul(c.fftRBuf[0], in, padded, c.size, vol, Bsat, c.stream)
	c.fwPlan.ExecAsync(c.fftRBuf[0], c.fftCBuf[0])

	// kern mul X
	N1, N2 := c.fftKernSize[1], c.fftKernSize[2] // TODO: rm these
	kernMulRSymm2Dx(c.fftCBuf[0], c.gpuFFTKern[0][0], N1, N2, c.stream)

	// bw FFT x
	c.bwPlan.ExecAsync(c.fftCBuf[0], c.fftRBuf[0])
	out := outp.Comp(0)
	copyPad(out, c.fftRBuf[0], c.size, padded, c.stream)

	// FW FFT yz
	for i := 1; i < 3; i++ {
		zero1(c.fftRBuf[i], c.stream)
		in := inp.Comp(i)
		copyPadMul(c.fftRBuf[i], in, padded, c.size, vol, Bsat, c.stream)
		c.fwPlan.ExecAsync(c.fftRBuf[i], c.fftCBuf[i])
	}

	// kern mul yz
	kernMulRSymm2Dyz(c.fftCBuf[1], c.fftCBuf[2],
		c.gpuFFTKern[1][1], c.gpuFFTKern[2][2], c.gpuFFTKern[1][2],
		N1, N2, c.stream)

	// BW FFT yz
	for i := 1; i < 3; i++ {
		c.bwPlan.ExecAsync(c.fftCBuf[i], c.fftRBuf[i])
		out := outp.Comp(i)
		copyPad(out, c.fftRBuf[i], c.size, padded, c.stream)
	}
	c.stream.Synchronize()
}