예제 #1
0
func (plan *MaxwellPlan) init() {
	if plan.initialized {
		return
	}
	plan.initialized = true
	e := GetEngine()
	dataSize := e.GridSize()
	logicSize := e.PaddedSize()
	Assert(len(dataSize) == 3)
	Assert(len(logicSize) == 3)

	// init size
	copy(plan.dataSize[:], dataSize)
	copy(plan.logicSize[:], logicSize)

	// init fft
	fftOutputSize := gpu.FFTOutputSize(logicSize)
	plan.fftBuf = gpu.NewArray(3, fftOutputSize)
	plan.fftPlan = gpu.NewDefaultFFT(dataSize, logicSize)

	// init M
	plan.M = gpu.NewArray(3, dataSize)

	// init fftKern
	copy(plan.fftKernSize[:], gpu.FFTOutputSize(logicSize))
	plan.fftKernSize[2] = plan.fftKernSize[2] / 2 // store only non-redundant parts
}
예제 #2
0
func newDerivativeUpdater(orig, diff *Quant) *derivativeUpdater {
	u := new(derivativeUpdater)
	u.val = orig
	u.diff = diff
	u.lastVal = gpu.NewArray(orig.NComp(), orig.Size3D())  // TODO: alloc only if needed?
	u.lastDiff = gpu.NewArray(orig.NComp(), orig.Size3D()) // TODO: alloc only if needed?
	u.lastT = math.Inf(-1)                                 // so the first time the derivative is taken it will be 0
	u.lastStep = 0                                         //?
	return u
}
예제 #3
0
// Mumax2 self-test function.
// Benchmarks cuda memcpyDtoD
func testMain() {
	size := []int{10, 1024, 1024}
	a := gpu.NewArray(1, size)
	defer a.Free()
	b := gpu.NewArray(1, size)
	defer b.Free()

	Log("Testing CUDA")
	N := 1000
	start := time.Now()

	for i := 0; i < N; i++ {
		a.CopyFromDevice(b)
	}

	t := float64(time.Now().Sub(start)) / 1e9
	bw := float64(int64(Prod(size))*int64(N)*SIZEOF_FLOAT) / t
	bw /= 1e9
	Log("Multi-GPU bandwidth:", float64(bw), "GB/s")
}
예제 #4
0
func NewFFTUpdater(qin, qout *Quant) *FFTUpdater {

	u := new(FFTUpdater)
	u.in = qin
	u.out = qout

	meshSize := engine.GridSize()

	u.win = gpu.NewArray(1, meshSize)
	u.win.CopyFromHost(genWindow(meshSize))

	u.q = gpu.NewArray(qin.NComp(), meshSize)

	u.norm = 1.0 / float64(gpu.FFTNormLogic(meshSize))

	u.plan = gpu.NewDefaultFFT(meshSize, meshSize)

	engine.Depends(qout.Name(), qin.Name())

	return u
}
예제 #5
0
//// Loads a sub-kernel at position pos in the 3x3 global kernel matrix.
//// The symmetry and real/imaginary/complex properties are taken into account to reduce storage.
func (plan *MaxwellPlan) LoadKernel(kernel *host.Array, matsymm int, realness int) {

	//	for i := range kernel.Array {
	//		Debug("kernel", TensorIndexStr[i], ":", kernel.Array[i], "\n\n\n")
	//	}

	//Assert(kernel.NComp() == 9) // full tensor
	if kernel.NComp() > 3 {
		testedsymm := MatrixSymmetry(kernel)
		Debug("matsymm", testedsymm)
		// TODO: re-enable!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
		//Assert(matsymm == testedsymm)
	}
	Assert(matsymm == SYMMETRIC || matsymm == ANTISYMMETRIC || matsymm == NOSYMMETRY || matsymm == DIAGONAL)

	//if FFT'd kernel is pure real or imag,
	//store only relevant part and multiply by scaling later
	scaling := [3]complex128{complex(1, 0), complex(0, 1), complex(0, 0)}[realness]
	Debug("scaling=", scaling)

	// FFT input on GPU
	logic := plan.logicSize[:]
	devIn := gpu.NewArray(1, logic)
	defer devIn.Free()

	// FFT output on GPU
	devOut := gpu.NewArray(1, gpu.FFTOutputSize(logic))
	defer devOut.Free()
	fullFFTPlan := gpu.NewDefaultFFT(logic, logic)
	defer fullFFTPlan.Free()

	// Maximum of all elements gives idea of scale.
	max := maxAbs(kernel.List)

	// FFT all components
	for k := 0; k < 9; k++ {
		i, j := IdxToIJ(k) // fills diagonal first, then upper, then lower

		// ignore off-diagonals of vector (would go out of bounds)
		if k > ZZ && matsymm == DIAGONAL {
			Debug("break", TensorIndexStr[k], "(off-diagonal)")
			break
		}

		// elements of diagonal kernel are stored in one column
		if matsymm == DIAGONAL {
			i = 0
		}

		// clear data first
		AssertMsg(plan.fftKern[i][j] == nil, "I'm afraid I can't let you overwrite that")
		AssertMsg(plan.fftMul[i][j] == 0, "Likewise")

		// auto-fill lower triangle if possible
		if k > XY {
			if matsymm == SYMMETRIC {
				plan.fftKern[i][j] = plan.fftKern[j][i]
				plan.fftMul[i][j] = plan.fftMul[j][i]
				continue
			}
			if matsymm == ANTISYMMETRIC {
				plan.fftKern[i][j] = plan.fftKern[j][i]
				plan.fftMul[i][j] = -plan.fftMul[j][i]
				continue
			}
		}

		// ignore zeros
		if k < kernel.NComp() && IsZero(kernel.Comp[k], max) {
			Debug("kernel", TensorIndexStr[k], " == 0")
			plan.fftKern[i][j] = gpu.NilArray(1, []int{plan.fftKernSize[X], plan.fftKernSize[Y], plan.fftKernSize[Z]})
			continue
		}

		// calculate FFT of kernel elementx
		Debug("use", TensorIndexStr[k])
		devIn.CopyFromHost(kernel.Component(k))
		fullFFTPlan.Forward(devIn, devOut)
		hostOut := devOut.LocalCopy()

		// extract real part of the kernel from the first quadrant (other parts are redundunt due to the symmetry properties)
		hostFFTKern := extract(hostOut)
		rescale(hostFFTKern, 1/float64(gpu.FFTNormLogic(logic)))
		plan.fftKern[i][j] = gpu.NewArray(1, hostFFTKern.Size3D)
		plan.fftKern[i][j].CopyFromHost(hostFFTKern)
		plan.fftMul[i][j] = scaling
	}

}
예제 #6
0
func (p *ArrayPool) Get(nComp int, size []int) *gpu.Array {
	// TODO: actual recycling
	return gpu.NewArray(nComp, size)
}