// Detects matrix symmetry. // returns NOSYMMETRY, SYMMETRIC, ANTISYMMETRIC func MatrixSymmetry(matrix *host.Array) int { AssertMsg(matrix.NComp() == 9, "MatrixSymmetry NComp") symm := true asymm := true max := 1e-100 for i := 0; i < 3; i++ { for j := 0; j < 3; j++ { scount := 0 acount := 0 total := 0 idx1 := FullTensorIdx[i][j] idx2 := FullTensorIdx[j][i] comp1 := matrix.Comp[idx1] comp2 := matrix.Comp[idx2] for x := range comp1 { if math.Abs(float64(comp1[x])) > max { max = math.Abs(float64(comp1[x])) } total++ if comp1[x] == comp2[x] { scount++ } if comp1[x] != comp2[x] { //Debug(comp1[x], "!=", comp2[x]) symm = false //if !asymm { //break //} } if comp1[x] == -comp2[x] { acount++ } if comp1[x] != -comp2[x] { //Debug(comp1[x] ,"!= -", comp2[x]) asymm = false //if !symm { //break //} } } Debug("max", max) Debug(i, j, "symm", scount, "asymm", acount, "(of", total, ")") } } if symm { return SYMMETRIC // also covers all zeros } if asymm { return ANTISYMMETRIC } return NOSYMMETRY }
// Convert mumax's internal ZYX convention to userspace XYZ. func convertXYZ(arr *host.Array) *host.Array { s := arr.Size3D n := arr.NComp() a := arr.Array transp := host.NewArray(n, []int{s[Z], s[Y], s[X]}) t := transp.Array for c := 0; c < n; c++ { for i := 0; i < s[X]; i++ { for j := 0; j < s[Y]; j++ { for k := 0; k < s[Z]; k++ { t[(n-1)-c][k][j][i] = a[c][i][j][k] } } } } runtime.GC() // a LOT of garbage has been made return transp }
// Returns a new host array of size size2, re-sized from the input array by nearest-neighbor interpolation. func Resample(in *host.Array, size2 []int) *host.Array { Assert(len(size2) == 3) out := host.NewArray(in.NComp(), size2) out_a := out.Array in_a := in.Array size1 := in.Size3D for c := range out_a { for i := range out_a[c] { i1 := (i * size1[X]) / size2[X] for j := range out_a[0][i] { j1 := (j * size1[Y]) / size2[Y] for k := range out_a[0][i][j] { k1 := (k * size1[Z]) / size2[Z] out_a[c][i][j][k] = in_a[c][i1][j1][k1] } } } } return out }
// Extract real or imaginary parts, copy them from src to dst. // In the meanwhile, check if the other parts are nearly zero // and scale the kernel to compensate for unnormalized FFTs. // real_imag = 0: real parts // real_imag = 1: imag parts func extract(src *host.Array) *host.Array { sx := src.Size3D[X]/2 + 1 // antisymmetric sy := src.Size3D[Y]/2 + 1 // antisymmetric sz := src.Size3D[Z] / 2 // only real parts should be stored, the value of the imaginary part should stay below the zero threshould dst := host.NewArray(src.NComp(), []int{sx, sy, sz}) dstArray := dst.Array srcArray := src.Array // Normally, the FFT'ed kernel is purely real because of symmetry, // so we only store the real parts... maxImg := float64(0.) maxReal := float64(0.) for c := range dstArray { for k := range dstArray[c] { for j := range dstArray[c][k] { for i := range dstArray[c][k][j] { dstArray[c][k][j][i] = srcArray[c][k][j][2*i] if Abs32(srcArray[c][k][j][2*i+1]) > maxImg { maxImg = Abs32(srcArray[c][k][j][2*i+1]) } if Abs32(srcArray[c][k][j][2*i+0]) > maxReal { maxReal = Abs32(srcArray[c][k][j][2*i+0]) } } } } } // ...however, we check that the imaginary parts are nearly zero, // just to be sure we did not make a mistake during kernel creation. Debug("FFT Kernel max real part", 0, ":", maxReal) Debug("FFT Kernel max imag part", 1, ":", maxImg) Debug("FFT Kernel max imag/real part=", maxImg/maxReal) if maxImg/maxReal > 1e-12 { // TODO: is this reasonable? panic(BugF("FFT Kernel max bad/good part=", maxImg/maxReal)) } return dst }
//// Loads a sub-kernel at position pos in the 3x3 global kernel matrix. //// The symmetry and real/imaginary/complex properties are taken into account to reduce storage. func (plan *MaxwellPlan) LoadKernel(kernel *host.Array, matsymm int, realness int) { // for i := range kernel.Array { // Debug("kernel", TensorIndexStr[i], ":", kernel.Array[i], "\n\n\n") // } //Assert(kernel.NComp() == 9) // full tensor if kernel.NComp() > 3 { testedsymm := MatrixSymmetry(kernel) Debug("matsymm", testedsymm) // TODO: re-enable!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! //Assert(matsymm == testedsymm) } Assert(matsymm == SYMMETRIC || matsymm == ANTISYMMETRIC || matsymm == NOSYMMETRY || matsymm == DIAGONAL) //if FFT'd kernel is pure real or imag, //store only relevant part and multiply by scaling later scaling := [3]complex128{complex(1, 0), complex(0, 1), complex(0, 0)}[realness] Debug("scaling=", scaling) // FFT input on GPU logic := plan.logicSize[:] devIn := gpu.NewArray(1, logic) defer devIn.Free() // FFT output on GPU devOut := gpu.NewArray(1, gpu.FFTOutputSize(logic)) defer devOut.Free() fullFFTPlan := gpu.NewDefaultFFT(logic, logic) defer fullFFTPlan.Free() // Maximum of all elements gives idea of scale. max := maxAbs(kernel.List) // FFT all components for k := 0; k < 9; k++ { i, j := IdxToIJ(k) // fills diagonal first, then upper, then lower // ignore off-diagonals of vector (would go out of bounds) if k > ZZ && matsymm == DIAGONAL { Debug("break", TensorIndexStr[k], "(off-diagonal)") break } // elements of diagonal kernel are stored in one column if matsymm == DIAGONAL { i = 0 } // clear data first AssertMsg(plan.fftKern[i][j] == nil, "I'm afraid I can't let you overwrite that") AssertMsg(plan.fftMul[i][j] == 0, "Likewise") // auto-fill lower triangle if possible if k > XY { if matsymm == SYMMETRIC { plan.fftKern[i][j] = plan.fftKern[j][i] plan.fftMul[i][j] = plan.fftMul[j][i] continue } if matsymm == ANTISYMMETRIC { plan.fftKern[i][j] = plan.fftKern[j][i] plan.fftMul[i][j] = -plan.fftMul[j][i] continue } } // ignore zeros if k < kernel.NComp() && IsZero(kernel.Comp[k], max) { Debug("kernel", TensorIndexStr[k], " == 0") plan.fftKern[i][j] = gpu.NilArray(1, []int{plan.fftKernSize[X], plan.fftKernSize[Y], plan.fftKernSize[Z]}) continue } // calculate FFT of kernel elementx Debug("use", TensorIndexStr[k]) devIn.CopyFromHost(kernel.Component(k)) fullFFTPlan.Forward(devIn, devOut) hostOut := devOut.LocalCopy() // extract real part of the kernel from the first quadrant (other parts are redundunt due to the symmetry properties) hostFFTKern := extract(hostOut) rescale(hostFFTKern, 1/float64(gpu.FFTNormLogic(logic))) plan.fftKern[i][j] = gpu.NewArray(1, hostFFTKern.Size3D) plan.fftKern[i][j].CopyFromHost(hostFFTKern) plan.fftMul[i][j] = scaling } }