// Assumes that f is no smaller than x. func copyRealTo(f *rimg64.Image, x *fftw.Array2) { for u := 0; u < f.Width; u++ { for v := 0; v < f.Height; v++ { f.Set(u, v, real(x.At(u, v))) } } }
// CorrBankStrideFFT computes the strided correlation of // an image with a bank of filters. // h_p[u, v] = (f corr g_p)[stride*u, stride*v] func CorrBankStrideFFT(f *rimg64.Image, g *Bank, stride int) (*rimg64.Multi, error) { out := ValidSizeStride(f.Size(), g.Size(), stride) if out.X <= 0 || out.Y <= 0 { return nil, nil } // Compute strided convolution as the sum over // a stride x stride grid of small convolutions. grid := image.Pt(stride, stride) // But do not divide into a larger grid than the size of the filter. // If the filter is smaller than the stride, // then some pixels in the image will not affect the output. grid.X = min(grid.X, g.Width) grid.Y = min(grid.Y, g.Height) // Determine the size of the sub-sampled filter. gsub := image.Pt(ceilDiv(g.Width, grid.X), ceilDiv(g.Height, grid.Y)) // The sub-sampled size of the image should be such that // the output size is attained. fsub := image.Pt(out.X+gsub.X-1, out.Y+gsub.Y-1) // Determine optimal size for FFT. work, _ := FFT2Size(fsub) // Cache FFT of image for convolving with multiple filters. // Re-use plan for multiple convolutions too. fhat := fftw.NewArray2(work.X, work.Y) ffwd := fftw.NewPlan2(fhat, fhat, fftw.Forward, fftw.Estimate) defer ffwd.Destroy() // FFT for current filter. ghat := fftw.NewArray2(work.X, work.Y) gfwd := fftw.NewPlan2(ghat, ghat, fftw.Forward, fftw.Estimate) defer gfwd.Destroy() // Allocate one array per output channel. hhat := make([]*fftw.Array2, len(g.Filters)) for k := range hhat { hhat[k] = fftw.NewArray2(work.X, work.Y) } // Normalization factor. alpha := complex(1/float64(work.X*work.Y), 0) // Add the convolutions over channels and strides. for i := 0; i < grid.X; i++ { for j := 0; j < grid.Y; j++ { // Take transform of downsampled image given offset (i, j). copyStrideTo(fhat, f, stride, image.Pt(i, j)) ffwd.Execute() // Take transform of each downsampled channel given offset (i, j). for q := range hhat { copyStrideTo(ghat, g.Filters[q], stride, image.Pt(i, j)) gfwd.Execute() addMul(hhat[q], ghat, fhat) } } } // Take the inverse transform of each channel. h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) for q := range hhat { scale(alpha, hhat[q]) fftw.IFFT2To(hhat[q], hhat[q]) copyRealToChannel(h, q, hhat[q]) } return h, nil }
// CorrBankFFT computes the correlation of an image with a bank of filters. // h_p[u, v] = (f corr g_p)[u, v] func CorrBankFFT(f *rimg64.Image, g *Bank) (*rimg64.Multi, error) { out := ValidSize(f.Size(), g.Size()) if out.X <= 0 || out.Y <= 0 { return nil, nil } // Determine optimal size for FFT. work, _ := FFT2Size(f.Size()) // Re-use FFT of image. fhat := fftw.NewArray2(work.X, work.Y) copyImageTo(fhat, f) fftw.FFT2To(fhat, fhat) // Transform of each filter. curr := fftw.NewArray2(work.X, work.Y) fwd := fftw.NewPlan2(curr, curr, fftw.Forward, fftw.Estimate) defer fwd.Destroy() bwd := fftw.NewPlan2(curr, curr, fftw.Backward, fftw.Estimate) defer bwd.Destroy() h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) alpha := complex(1/float64(work.X*work.Y), 0) // For each output channel. for p, gp := range g.Filters { // Take FFT. copyImageTo(curr, gp) fwd.Execute() // h_p[x] = (G_p corr F)[x] // H_p[x] = conj(G_p[x]) F[x] scaleMul(curr, alpha, curr, fhat) bwd.Execute() copyRealToChannel(h, p, curr) } return h, nil }
// Flip mirrors an image in x and y. func Flip(f *rimg64.Image) *rimg64.Image { g := rimg64.New(f.Width, f.Height) for i := 0; i < f.Width; i++ { for j := 0; j < f.Height; j++ { g.Set(f.Width-1-i, f.Height-1-j, f.At(i, j)) } } return g }
func square(f *rimg64.Image) *rimg64.Image { g := rimg64.New(f.Width, f.Height) for i := 0; i < f.Width; i++ { for j := 0; j < f.Height; j++ { g.Set(i, j, sqr(f.At(i, j))) } } return g }
// CorrBankBLAS computes the correlation of an image with a bank of filters. // h_p[u, v] = (f corr g_p)[u, v] func CorrBankBLAS(f *rimg64.Image, g *Bank) (*rimg64.Multi, error) { out := ValidSize(f.Size(), g.Size()) if out.X <= 0 || out.Y <= 0 { return nil, nil } // Express as dense matrix multiplication. // h_p[u, v] = (f corr g_q)[u, v] // Y(h) = A(f) X(g) // If the number of output channels is k, then // A is (M-m+1)(N-n+1) x mn and // X is mn x k, so that // Y is (M-m+1)(N-n+1) x k. h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) m, n, k := g.Width, g.Height, len(g.Filters) a := blas.NewMat(out.X*out.Y, m*n) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { var s int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { a.Set(r, s, f.At(i+u, j+v)) s++ } } r++ } } } x := blas.NewMat(m*n, k) { var r int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for p := 0; p < h.Channels; p++ { x.Set(r, p, g.Filters[p].At(i, j)) } r++ } } } y := blas.MatMul(1, a, x) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { for p := 0; p < h.Channels; p++ { h.Set(u, v, p, y.At(r, p)) } r++ } } } return h, nil }
// CorrBankStrideBLAS computes the strided correlation of // an image with a bank of filters. // h_p[u, v] = (f corr g_p)[stride*u, stride*v] func CorrBankStrideBLAS(f *rimg64.Image, g *Bank, stride int) (*rimg64.Multi, error) { out := ValidSizeStride(f.Size(), g.Size(), stride) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) // Size of filters. m, n := g.Width, g.Height // Express as dense matrix multiplication. // h_p[u, v] = sum_q (f_q corr g_pq)[u, v] // h = A(f) X(g) // where A is whk by mnk // with w = ceil[(M-m+1)/stride], // h = ceil[(N-n+1)/stride]. a := blas.NewMat(h.Width*h.Height, m*n) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { var s int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { a.Set(r, s, f.At(stride*u+i, stride*v+j)) s++ } } r++ } } } x := blas.NewMat(m*n, h.Channels) { var r int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for p := 0; p < h.Channels; p++ { x.Set(r, p, g.Filters[p].At(i, j)) } r++ } } } y := blas.MatMul(1, a, x) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { for p := 0; p < h.Channels; p++ { h.Set(u, v, p, y.At(r, p)) } r++ } } } return h, nil }
// Decimate takes every r-th sample starting at (0, 0). func Decimate(f *rimg64.Image, r int) *rimg64.Image { out := ceilDivPt(f.Size(), r) g := rimg64.New(out.X, out.Y) for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { g.Set(i, j, f.At(r*i, r*j)) } } return g }
func copyImageTo(x *fftw.Array2, f *rimg64.Image) { w, h := x.Dims() for u := 0; u < w; u++ { for v := 0; v < h; v++ { if u < f.Width && v < f.Height { x.Set(u, v, complex(f.At(u, v), 0)) } else { x.Set(u, v, 0) } } } }
func invNorm(f *rimg64.Image) float64 { var norm float64 for i := 0; i < f.Width; i++ { for j := 0; j < f.Height; j++ { norm += sqr(f.At(i, j)) } } norm = math.Sqrt(norm) // This will never be negative. if norm == 0 { return 0 } return 1 / norm }
// CorrStrideFFT computes the strided correlation of an image with a filter. // h[u, v] = (f corr g)[stride*u, stride*v] func CorrStrideFFT(f, g *rimg64.Image, stride int) (*rimg64.Image, error) { out := ValidSizeStride(f.Size(), g.Size(), stride) if out.X <= 0 || out.Y <= 0 { return nil, nil } // Compute strided convolution as the sum over // a stride x stride grid of small convolutions. grid := image.Pt(stride, stride) // But do not divide into a larger grid than the size of the filter. // If the filter is smaller than the stride, // then some pixels in the image will not affect the output. grid.X = min(grid.X, g.Width) grid.Y = min(grid.Y, g.Height) // Determine the size of the sub-sampled filter. gsub := image.Pt(ceilDiv(g.Width, grid.X), ceilDiv(g.Height, grid.Y)) // The sub-sampled size of the image should be such that // the output size is attained. fsub := image.Pt(out.X+gsub.X-1, out.Y+gsub.Y-1) // Determine optimal size for FFT. work, _ := FFT2Size(fsub) // Cache FFT of each channel of image for convolving with multiple filters. // Re-use plan for multiple convolutions too. fhat := fftw.NewArray2(work.X, work.Y) ffwd := fftw.NewPlan2(fhat, fhat, fftw.Forward, fftw.Estimate) defer ffwd.Destroy() // FFT for current filter. curr := fftw.NewArray2(work.X, work.Y) gfwd := fftw.NewPlan2(curr, curr, fftw.Forward, fftw.Estimate) defer gfwd.Destroy() // Normalization factor. alpha := complex(1/float64(work.X*work.Y), 0) // Add the convolutions over strides. hhat := fftw.NewArray2(work.X, work.Y) for i := 0; i < grid.X; i++ { for j := 0; j < grid.Y; j++ { // Copy each downsampled channel and take its transform. copyStrideTo(fhat, f, stride, image.Pt(i, j)) ffwd.Execute() copyStrideTo(curr, g, stride, image.Pt(i, j)) gfwd.Execute() addMul(hhat, curr, fhat) } } // Take the inverse transform. h := rimg64.New(out.X, out.Y) scale(alpha, hhat) fftw.IFFT2To(hhat, hhat) copyRealTo(h, hhat) return h, nil }
// dst[i, j] = src[i*stride + offset.X, j*stride + offset.Y], // or zero if this is outside the boundary. func copyStrideTo(dst *fftw.Array2, src *rimg64.Image, stride int, offset image.Point) { m, n := dst.Dims() bnds := image.Rect(0, 0, src.Width, src.Height) for i := 0; i < m; i++ { for j := 0; j < n; j++ { p := image.Pt(i, j).Mul(stride).Add(offset) var val complex128 if p.In(bnds) { val = complex(src.At(p.X, p.Y), 0) } dst.Set(i, j, val) } } }
// Tests whether (u, v) is a local maximum. // Pixels at the edge can be maxima. func notLocalMax(r *rimg64.Image, u, v int) bool { uv := r.At(u, v) if u > 0 && r.At(u-1, v) > uv { return true } if u < r.Width-1 && r.At(u+1, v) > uv { return true } if v > 0 && r.At(u, v-1) > uv { return true } if v < r.Height-1 && r.At(u, v+1) > uv { return true } return false }
// CorrStrideBLAS computes the strided correlation of an image with a filter. // h[u, v] = (f corr g)[stride*u, stride*v] func CorrStrideBLAS(f, g *rimg64.Image, stride int) (*rimg64.Image, error) { out := ValidSizeStride(f.Size(), g.Size(), stride) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.New(out.X, out.Y) // Size of filters. m, n := g.Width, g.Height // Express as dense matrix multiplication. // h[u, v] = (f corr g)[stride*u, stride*v] // y(h) = A(f) x(g) // where A is wh by mn // with w = ceil[(M-m+1)/stride], // h = ceil[(N-n+1)/stride]. a := blas.NewMat(h.Width*h.Height, m*n) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { var s int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { a.Set(r, s, f.At(stride*u+i, stride*v+j)) s++ } } r++ } } } x := blas.NewMat(m*n, 1) { var r int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { x.Set(r, 0, g.At(i, j)) r++ } } } y := blas.MatMul(1, a, x) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { h.Set(u, v, y.At(r, 0)) r++ } } } return h, nil }
// CorrBankStrideNaive computes the strided correlation of // an image with a bank of filters. // h_p[u, v] = (f corr g_p)[stride*u, stride*v] func CorrBankStrideNaive(f *rimg64.Image, g *Bank, stride int) (*rimg64.Multi, error) { out := ValidSizeStride(f.Size(), g.Size(), stride) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { for p := 0; p < h.Channels; p++ { var sum float64 for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { sum += f.At(stride*u+i, stride*v+j) * g.Filters[p].At(i, j) } } h.Set(u, v, p, sum) } } } return h, nil }
// CorrBankNaive computes the correlation of an image with a bank of filters. // h_p[u, v] = (f corr g_p)[u, v] func CorrBankNaive(f *rimg64.Image, g *Bank) (*rimg64.Multi, error) { out := ValidSize(f.Size(), g.Size()) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { for p := 0; p < h.Channels; p++ { var total float64 for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { total += f.At(i+u, j+v) * g.Filters[p].At(i, j) } } h.Set(u, v, p, total) } } } return h, nil }
// CorrStrideNaive computes the strided correlation of an image with a filter. // h[u, v] = (f corr g)[stride*u, stride*v] func CorrStrideNaive(f, g *rimg64.Image, stride int) (*rimg64.Image, error) { out := ValidSizeStride(f.Size(), g.Size(), stride) h := rimg64.New(out.X, out.Y) for i := 0; i < h.Width; i++ { for j := 0; j < h.Height; j++ { var total float64 for u := 0; u < g.Width; u++ { for v := 0; v < g.Height; v++ { p := image.Pt(i, j).Mul(stride).Add(image.Pt(u, v)) total += f.At(p.X, p.Y) * g.At(u, v) } } h.Set(i, j, total) } } return h, nil }
// CorrNaive computes the correlation of an image with a filter. // h[u, v] = (f corr g)[u, v] func CorrNaive(f, g *rimg64.Image) (*rimg64.Image, error) { out := ValidSize(f.Size(), g.Size()) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.New(out.X, out.Y) for i := 0; i < out.X; i++ { for j := 0; j < out.Y; j++ { var total float64 for u := 0; u < g.Width; u++ { for v := 0; v < g.Height; v++ { total += f.At(i+u, j+v) * g.At(u, v) } } h.Set(i, j, total) } } return h, nil }
// CorrAuto computes the correlation of an image with a filter. // h[u, v] = (f corr g)[u, v] // Automatically selects between naive and Fourier-domain convolution. func CorrAuto(f, g *rimg64.Image) (*rimg64.Image, error) { // Size of output. size := ValidSize(f.Size(), g.Size()) // Return empty image if that's the result. if size.Eq(image.ZP) { return nil, nil } // Need to compute one inner product per output element. naiveMuls := size.X * size.Y * g.Width * g.Height // Optimal FFT size and number of multiplications. _, fftMuls := FFT2Size(f.Size()) // Need to perform two forward and one inverse transform. fftMuls *= 3 // Switch implementation based on image size. if fftMuls < naiveMuls { return CorrFFT(f, g) } return CorrNaive(f, g) }
// CorrFFT computes the correlation of an image with a filter. // h[u, v] = (f corr g)[u, v] func CorrFFT(f, g *rimg64.Image) (*rimg64.Image, error) { out := ValidSize(f.Size(), g.Size()) if out.X <= 0 || out.Y <= 0 { return nil, nil } // Determine optimal size for FFT. work, _ := FFT2Size(f.Size()) fhat := fftw.NewArray2(work.X, work.Y) ghat := fftw.NewArray2(work.X, work.Y) // Take forward transforms. copyImageTo(fhat, f) fftw.FFT2To(fhat, fhat) copyImageTo(ghat, g) fftw.FFT2To(ghat, ghat) // Scale such that convolution theorem holds. n := float64(work.X * work.Y) scaleMul(fhat, complex(1/n, 0), ghat, fhat) // Take inverse transform. h := rimg64.New(out.X, out.Y) fftw.IFFT2To(fhat, fhat) copyRealTo(h, fhat) return h, nil }
func adjSum(f *rimg64.Image, x1, y1, x2, y2 int) float64 { return f.At(x1, y1) + f.At(x1, y2) + f.At(x2, y1) + f.At(x2, y2) }
// Avoids f.Set(x, y, f.Get(x, y, ...)). func addTo(f *rimg64.Image, x, y int, v float64) { f.Set(x, y, f.At(x, y)+v) }
func errIfNotEqImage(f, g *rimg64.Image, eps float64) error { if !f.Size().Eq(g.Size()) { return fmt.Errorf("different size: %v, %v", f.Size(), g.Size()) } for i := 0; i < f.Width; i++ { for j := 0; j < f.Height; j++ { a, b := f.At(i, j), g.At(i, j) if math.Abs(a-b) > eps*math.Max(math.Abs(a), math.Abs(b)) { return fmt.Errorf("different at x %d, y %d: %g, %g", i, j, a, b) } } } return nil }