// Extract real parts, copy them from src to dst. // In the meanwhile, check if imaginary parts are nearly zero // and scale the kernel to compensate for unnormalized FFTs. func scaleRealParts(dst, src *data.Slice, scale float32) { util.Argument(2*dst.Len() == src.Len()) util.Argument(dst.NComp() == 1 && src.NComp() == 1) srcList := src.HostCopy().Host()[0] dstList := dst.Host()[0] // Normally, the FFT'ed kernel is purely real because of symmetry, // so we only store the real parts... maximg := float32(0.) maxreal := float32(0.) for i := 0; i < src.Len()/2; i++ { dstList[i] = srcList[2*i] * scale if fabs(srcList[2*i+0]) > maxreal { maxreal = fabs(srcList[2*i+0]) } if fabs(srcList[2*i+1]) > maximg { maximg = fabs(srcList[2*i+1]) } } // ...however, we check that the imaginary parts are nearly zero, // just to be sure we did not make a mistake during kernel creation. if maximg/maxreal > FFT_IMAG_TOLERANCE { log.Fatalf("Too large FFT kernel imaginary/real part: %v", maximg/maxreal) } }
func writeOvf2Binary4(out io.Writer, array *data.Slice) { data := array.Tensors() gridsize := array.Mesh().Size() var bytes []byte // OOMMF requires this number to be first to check the format var controlnumber float32 = OMF_CONTROL_NUMBER // Conversion form float32 [4]byte in big-endian // encoding/binary is too slow // Inlined for performance, terabytes of data will pass here... bytes = (*[4]byte)(unsafe.Pointer(&controlnumber))[:] out.Write(bytes) // Here we loop over X,Y,Z, not Z,Y,X, because // internal in C-order == external in Fortran-order ncomp := array.NComp() for i := 0; i < gridsize[X]; i++ { for j := 0; j < gridsize[Y]; j++ { for k := 0; k < gridsize[Z]; k++ { for c := 0; c < ncomp; c++ { bytes = (*[4]byte)(unsafe.Pointer(&data[swapIndex(c, ncomp)][i][j][k]))[:] out.Write(bytes) } } } } }
func dumpGnuplot(out io.Writer, f *data.Slice) (err error) { buf := bufio.NewWriter(out) defer buf.Flush() data := f.Tensors() gridsize := f.Mesh().Size() cellsize := f.Mesh().CellSize() // If no cell size is set, use generic cell index. if cellsize == [3]float64{0, 0, 0} { cellsize = [3]float64{1, 1, 1} } ncomp := f.NComp() // Here we loop over X,Y,Z, not Z,Y,X, because // internal in C-order == external in Fortran-order for i := 0; i < gridsize[0]; i++ { x := float64(i) * cellsize[0] for j := 0; j < gridsize[1]; j++ { y := float64(j) * cellsize[1] for k := 0; k < gridsize[2]; k++ { z := float64(k) * cellsize[2] _, err = fmt.Fprint(buf, z, " ", y, " ", x, "\t") for c := 0; c < ncomp; c++ { _, err = fmt.Fprint(buf, data[swapIndex(c, ncomp)][i][j][k], " ") // converts to user space. } _, err = fmt.Fprint(buf, "\n") } _, err = fmt.Fprint(buf, "\n") } } return }
// Memset sets the Slice's components to the specified values. func Memset(s *data.Slice, val ...float32) { util.Argument(len(val) == s.NComp()) str := stream() for c, v := range val { cu.MemsetD32Async(cu.DevicePtr(s.DevPtr(c)), math.Float32bits(v), int64(s.Len()), str) } syncAndRecycle(str) }
func kernMulRSymm2Dx(fftMx, K00 *data.Slice, N1, N2 int, str cu.Stream) { util.Argument(K00.Len() == (N1/2+1)*N2) util.Argument(fftMx.NComp() == 1 && K00.NComp() == 1) cfg := make2DConf(N1, N2) k_kernmulRSymm2Dx_async(fftMx.DevPtr(0), K00.DevPtr(0), N1, N2, cfg, str) }
// Does not yet use Y mirror symmetry!! // Even though it is implemented partially in kernel func kernMulRSymm3D(fftM [3]*data.Slice, K00, K11, K22, K12, K02, K01 *data.Slice, N0, N1, N2 int, str cu.Stream) { util.Argument(K00.Len() == N0*(N1)*N2) // no symmetry yet util.Argument(fftM[0].NComp() == 1 && K00.NComp() == 1) cfg := make2DConf(N1, N2) k_kernmulRSymm3D_async(fftM[0].DevPtr(0), fftM[1].DevPtr(0), fftM[2].DevPtr(0), K00.DevPtr(0), K11.DevPtr(0), K22.DevPtr(0), K12.DevPtr(0), K02.DevPtr(0), K01.DevPtr(0), N0, N1, N2, cfg, str) }
func kernMulRSymm2Dyz(fftMy, fftMz, K11, K22, K12 *data.Slice, N1, N2 int, str cu.Stream) { util.Argument(K11.Len() == (N1/2+1)*N2) util.Argument(fftMy.NComp() == 1 && K11.NComp() == 1) cfg := make2DConf(N1, N2) k_kernmulRSymm2Dyz_async(fftMy.DevPtr(0), fftMz.DevPtr(0), K11.DevPtr(0), K22.DevPtr(0), K12.DevPtr(0), N1, N2, cfg, str) }
// Copies src into dst, which is larger or smaller. // The remainder of dst is not filled with zeros. func copyPad(dst, src *data.Slice, dstsize, srcsize [3]int, str cu.Stream) { util.Argument(dst.NComp() == 1 && src.NComp() == 1) util.Assert(dst.Len() == prod(dstsize)) util.Assert(src.Len() == prod(srcsize)) N0 := iMin(dstsize[1], srcsize[1]) N1 := iMin(dstsize[2], srcsize[2]) cfg := make2DConf(N0, N1) k_copypad_async(dst.DevPtr(0), dstsize[0], dstsize[1], dstsize[2], src.DevPtr(0), srcsize[0], srcsize[1], srcsize[2], cfg, str) }
// Adds a constant to each element of the slice. // dst[comp][index] += cnst[comp] func AddConst(dst *data.Slice, cnst ...float32) { util.Argument(len(cnst) == dst.NComp()) N := dst.Len() cfg := make1DConf(N) str := stream() for c := 0; c < dst.NComp(); c++ { if cnst[c] != 0 { k_madd2_async(dst.DevPtr(c), dst.DevPtr(c), 1, nil, cnst[c], N, cfg, str) } } syncAndRecycle(str) }
// Execute the FFT plan, asynchronous. // src and dst are 3D arrays stored 1D arrays. func (p *fft3DR2CPlan) ExecAsync(src, dst *data.Slice) { util.Argument(src.NComp() == 1 && dst.NComp() == 1) oksrclen := p.InputLen() if src.Len() != oksrclen { log.Panicf("fft size mismatch: expecting src len %v, got %v", oksrclen, src.Len()) } okdstlen := p.OutputLen() if dst.Len() != okdstlen { log.Panicf("fft size mismatch: expecting dst len %v, got %v", okdstlen, dst.Len()) } p.handle.ExecR2C(cu.DevicePtr(src.DevPtr(0)), cu.DevicePtr(dst.DevPtr(0))) }
func preprocess(f *data.Slice) { if *flag_normalize { normalize(f, 1) } if *flag_normpeak { normpeak(f) } if *flag_comp != -1 { *f = *f.Comp(swapIndex(*flag_comp, f.NComp())) } if *flag_resize != "" { resize(f, *flag_resize) } //if *flag_scale != 1{ // rescale(f, *flag_scale) //} }
// Writes data in OMF Text format func writeOmfText(out io.Writer, tens *data.Slice) (err error) { data := tens.Tensors() gridsize := tens.Mesh().Size() // Here we loop over X,Y,Z, not Z,Y,X, because // internal in C-order == external in Fortran-order for i := 0; i < gridsize[X]; i++ { for j := 0; j < gridsize[Y]; j++ { for k := 0; k < gridsize[Z]; k++ { for c := 0; c < tens.NComp(); c++ { _, err = fmt.Fprint(out, data[swapIndex(c, tens.NComp())][i][j][k], " ") // converts to user space. } _, err = fmt.Fprint(out, "\n") } } } return }
// multiply-add: dst[i] = src1[i] * factor1 + src2[i] * factor2 + src3 * factor3 func Madd3(dst, src1, src2, src3 *data.Slice, factor1, factor2, factor3 float32) { N := dst.Len() nComp := dst.NComp() util.Assert(src1.Len() == N && src2.Len() == N && src3.Len() == N) util.Assert(src1.NComp() == nComp && src2.NComp() == nComp && src3.NComp() == nComp) cfg := make1DConf(N) str := stream() for c := 0; c < nComp; c++ { k_madd3_async(dst.DevPtr(c), src1.DevPtr(c), factor1, src2.DevPtr(c), factor2, src3.DevPtr(c), factor3, N, cfg, str) } syncAndRecycle(str) }
func Image(f *data.Slice, fmin, fmax string) *image.NRGBA { dim := f.NComp() switch dim { default: log.Fatalf("unsupported number of components: %v", dim) case 3: return drawVectors(f.Vectors()) case 1: min, max := extrema(f.Host()[0]) if fmin != "auto" { m, err := strconv.ParseFloat(fmin, 32) util.FatalErr(err) min = float32(m) } if fmax != "auto" { m, err := strconv.ParseFloat(fmax, 32) util.FatalErr(err) max = float32(m) } return drawFloats(f.Scalars(), min, max) } panic("unreachable") }
// Copies src into dst, which is larger or smaller, and multiplies by vol*Bsat. // The remainder of dst is not filled with zeros. func copyPadMul(dst, src *data.Slice, dstsize, srcsize [3]int, vol *data.Slice, Bsat float64, str cu.Stream) { util.Argument(dst.NComp() == 1) util.Argument(src.NComp() == 1) util.Argument(vol.NComp() == 1) util.Assert(dst.Len() == prod(dstsize) && src.Len() == prod(srcsize)) util.Assert(vol.Mesh().Size() == srcsize) N0 := iMin(dstsize[1], srcsize[1]) N1 := iMin(dstsize[2], srcsize[2]) cfg := make2DConf(N0, N1) k_copypadmul_async(dst.DevPtr(0), dstsize[0], dstsize[1], dstsize[2], src.DevPtr(0), srcsize[0], srcsize[1], srcsize[2], vol.DevPtr(0), float32(Bsat), cfg, str) }
func writeVTKCellData(out io.Writer, q *data.Slice, dataformat string) (err error) { N := q.NComp() data := q.Tensors() switch N { case 1: fmt.Fprintf(out, "\t\t\t<PointData Scalars=\"%s\">\n", q.Tag()) fmt.Fprintf(out, "\t\t\t\t<DataArray type=\"Float32\" Name=\"%s\" NumberOfComponents=\"%d\" format=\"%s\">\n\t\t\t\t\t", q.Tag(), N, dataformat) case 3: fmt.Fprintf(out, "\t\t\t<PointData Vectors=\"%s\">\n", q.Tag()) fmt.Fprintf(out, "\t\t\t\t<DataArray type=\"Float32\" Name=\"%s\" NumberOfComponents=\"%d\" format=\"%s\">\n\t\t\t\t\t", q.Tag(), N, dataformat) case 6, 9: fmt.Fprintf(out, "\t\t\t<PointData Tensors=\"%s\">\n", q.Tag()) fmt.Fprintf(out, "\t\t\t\t<DataArray type=\"Float32\" Name=\"%s\" NumberOfComponents=\"%d\" format=\"%s\">\n\t\t\t\t\t", q.Tag(), 9, dataformat) // must be 9! default: log.Fatalf("vtk: cannot handle %v components", N) } gridsize := q.Mesh().Size() switch dataformat { case "ascii": for i := 0; i < gridsize[X]; i++ { for j := 0; j < gridsize[Y]; j++ { for k := 0; k < gridsize[Z]; k++ { // if symmetric tensor manage it appart to write the full 9 components if N == 6 { fmt.Fprint(out, data[swapIndex(0, 9)][i][j][k], " ") fmt.Fprint(out, data[swapIndex(1, 9)][i][j][k], " ") fmt.Fprint(out, data[swapIndex(2, 9)][i][j][k], " ") fmt.Fprint(out, data[swapIndex(1, 9)][i][j][k], " ") fmt.Fprint(out, data[swapIndex(3, 9)][i][j][k], " ") fmt.Fprint(out, data[swapIndex(4, 9)][i][j][k], " ") fmt.Fprint(out, data[swapIndex(2, 9)][i][j][k], " ") fmt.Fprint(out, data[swapIndex(4, 9)][i][j][k], " ") fmt.Fprint(out, data[swapIndex(5, 9)][i][j][k], " ") } else { for c := 0; c < N; c++ { fmt.Fprint(out, data[swapIndex(c, N)][i][j][k], " ") } } } } } case "binary": // Inlined for performance, terabytes of data will pass here... buffer := new(bytes.Buffer) for i := 0; i < gridsize[X]; i++ { for j := 0; j < gridsize[Y]; j++ { for k := 0; k < gridsize[Z]; k++ { // if symmetric tensor manage it appart to write the full 9 components if N == 6 { binary.Write(buffer, binary.LittleEndian, data[swapIndex(0, 9)][i][j][k]) binary.Write(buffer, binary.LittleEndian, data[swapIndex(1, 9)][i][j][k]) binary.Write(buffer, binary.LittleEndian, data[swapIndex(2, 9)][i][j][k]) binary.Write(buffer, binary.LittleEndian, data[swapIndex(1, 9)][i][j][k]) binary.Write(buffer, binary.LittleEndian, data[swapIndex(3, 9)][i][j][k]) binary.Write(buffer, binary.LittleEndian, data[swapIndex(4, 9)][i][j][k]) binary.Write(buffer, binary.LittleEndian, data[swapIndex(2, 9)][i][j][k]) binary.Write(buffer, binary.LittleEndian, data[swapIndex(4, 9)][i][j][k]) binary.Write(buffer, binary.LittleEndian, data[swapIndex(5, 9)][i][j][k]) } else { for c := 0; c < N; c++ { binary.Write(buffer, binary.LittleEndian, data[swapIndex(c, N)][i][j][k]) } } } } } b64len := uint32(len(buffer.Bytes())) bufLen := new(bytes.Buffer) binary.Write(bufLen, binary.LittleEndian, b64len) base64out := base64.NewEncoder(base64.StdEncoding, out) base64out.Write(bufLen.Bytes()) base64out.Write(buffer.Bytes()) base64out.Close() default: panic(fmt.Errorf("vtk: illegal data format " + dataformat + ". Options are: ascii, binary")) } fmt.Fprintln(out, "\n\t\t\t\t</DataArray>") fmt.Fprintln(out, "\t\t\t</PointData>") return }
func writeOvf2Header(out io.Writer, q *data.Slice, time, tstep float64) { gridsize := q.Mesh().Size() cellsize := q.Mesh().CellSize() fmt.Fprintln(out, "# OOMMF OVF 2.0") fmt.Fprintln(out, "#") hdr(out, "Segment count", "1") fmt.Fprintln(out, "#") hdr(out, "Begin", "Segment") hdr(out, "Begin", "Header") fmt.Fprintln(out, "#") hdr(out, "Title", q.Tag()) // TODO hdr(out, "meshtype", "rectangular") hdr(out, "meshunit", "m") hdr(out, "xmin", 0) hdr(out, "ymin", 0) hdr(out, "zmin", 0) hdr(out, "xmax", cellsize[Z]*float64(gridsize[Z])) hdr(out, "ymax", cellsize[Y]*float64(gridsize[Y])) hdr(out, "zmax", cellsize[X]*float64(gridsize[X])) name := q.Tag() var labels []interface{} if q.NComp() == 1 { labels = []interface{}{name} } else { for i := 0; i < q.NComp(); i++ { labels = append(labels, name+"_"+string('x'+i)) } } hdr(out, "valuedim", q.NComp()) hdr(out, "valuelabels", labels...) // TODO unit := q.Unit() if unit == "" { unit = "1" } if q.NComp() == 1 { hdr(out, "valueunits", unit) } else { hdr(out, "valueunits", unit, unit, unit) } // We don't really have stages fmt.Fprintln(out, "# Desc: Stage simulation time: ", tstep, " s") fmt.Fprintln(out, "# Desc: Total simulation time: ", time, " s") hdr(out, "xbase", cellsize[Z]/2) hdr(out, "ybase", cellsize[Y]/2) hdr(out, "zbase", cellsize[X]/2) hdr(out, "xnodes", gridsize[Z]) hdr(out, "ynodes", gridsize[Y]) hdr(out, "znodes", gridsize[X]) hdr(out, "xstepsize", cellsize[Z]) hdr(out, "ystepsize", cellsize[Y]) hdr(out, "zstepsize", cellsize[X]) fmt.Fprintln(out, "#") hdr(out, "End", "Header") fmt.Fprintln(out, "#") }
// Set all elements of all components to zero. func Zero(s *data.Slice) { Memset(s, make([]float32, s.NComp())...) }
// Maximum of absolute values of all elements. func MaxAbs(in *data.Slice) float32 { util.Argument(in.NComp() == 1) out := reduceBuf(0) k_reducemaxabs(in.DevPtr(0), out, 0, in.Len(), reducecfg) return copyback(out) }
// Returns a copy of in, allocated on GPU. func GPUCopy(in *data.Slice) *data.Slice { s := NewSlice(in.NComp(), in.Mesh()) data.Copy(s, in) return s }