func NewFFTUpdater(qin, qout *Quant) *FFTUpdater { u := new(FFTUpdater) u.in = qin u.out = qout meshSize := engine.GridSize() u.win = gpu.NewArray(1, meshSize) u.win.CopyFromHost(genWindow(meshSize)) u.q = gpu.NewArray(qin.NComp(), meshSize) u.norm = 1.0 / float64(gpu.FFTNormLogic(meshSize)) u.plan = gpu.NewDefaultFFT(meshSize, meshSize) engine.Depends(qout.Name(), qin.Name()) return u }
//// Loads a sub-kernel at position pos in the 3x3 global kernel matrix. //// The symmetry and real/imaginary/complex properties are taken into account to reduce storage. func (plan *MaxwellPlan) LoadKernel(kernel *host.Array, matsymm int, realness int) { // for i := range kernel.Array { // Debug("kernel", TensorIndexStr[i], ":", kernel.Array[i], "\n\n\n") // } //Assert(kernel.NComp() == 9) // full tensor if kernel.NComp() > 3 { testedsymm := MatrixSymmetry(kernel) Debug("matsymm", testedsymm) // TODO: re-enable!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! //Assert(matsymm == testedsymm) } Assert(matsymm == SYMMETRIC || matsymm == ANTISYMMETRIC || matsymm == NOSYMMETRY || matsymm == DIAGONAL) //if FFT'd kernel is pure real or imag, //store only relevant part and multiply by scaling later scaling := [3]complex128{complex(1, 0), complex(0, 1), complex(0, 0)}[realness] Debug("scaling=", scaling) // FFT input on GPU logic := plan.logicSize[:] devIn := gpu.NewArray(1, logic) defer devIn.Free() // FFT output on GPU devOut := gpu.NewArray(1, gpu.FFTOutputSize(logic)) defer devOut.Free() fullFFTPlan := gpu.NewDefaultFFT(logic, logic) defer fullFFTPlan.Free() // Maximum of all elements gives idea of scale. max := maxAbs(kernel.List) // FFT all components for k := 0; k < 9; k++ { i, j := IdxToIJ(k) // fills diagonal first, then upper, then lower // ignore off-diagonals of vector (would go out of bounds) if k > ZZ && matsymm == DIAGONAL { Debug("break", TensorIndexStr[k], "(off-diagonal)") break } // elements of diagonal kernel are stored in one column if matsymm == DIAGONAL { i = 0 } // clear data first AssertMsg(plan.fftKern[i][j] == nil, "I'm afraid I can't let you overwrite that") AssertMsg(plan.fftMul[i][j] == 0, "Likewise") // auto-fill lower triangle if possible if k > XY { if matsymm == SYMMETRIC { plan.fftKern[i][j] = plan.fftKern[j][i] plan.fftMul[i][j] = plan.fftMul[j][i] continue } if matsymm == ANTISYMMETRIC { plan.fftKern[i][j] = plan.fftKern[j][i] plan.fftMul[i][j] = -plan.fftMul[j][i] continue } } // ignore zeros if k < kernel.NComp() && IsZero(kernel.Comp[k], max) { Debug("kernel", TensorIndexStr[k], " == 0") plan.fftKern[i][j] = gpu.NilArray(1, []int{plan.fftKernSize[X], plan.fftKernSize[Y], plan.fftKernSize[Z]}) continue } // calculate FFT of kernel elementx Debug("use", TensorIndexStr[k]) devIn.CopyFromHost(kernel.Component(k)) fullFFTPlan.Forward(devIn, devOut) hostOut := devOut.LocalCopy() // extract real part of the kernel from the first quadrant (other parts are redundunt due to the symmetry properties) hostFFTKern := extract(hostOut) rescale(hostFFTKern, 1/float64(gpu.FFTNormLogic(logic))) plan.fftKern[i][j] = gpu.NewArray(1, hostFFTKern.Size3D) plan.fftKern[i][j].CopyFromHost(hostFFTKern) plan.fftMul[i][j] = scaling } }