// CorrBankStrideFFT computes the strided correlation of // an image with a bank of filters. // h_p[u, v] = (f corr g_p)[stride*u, stride*v] func CorrBankStrideFFT(f *rimg64.Image, g *Bank, stride int) (*rimg64.Multi, error) { out := ValidSizeStride(f.Size(), g.Size(), stride) if out.X <= 0 || out.Y <= 0 { return nil, nil } // Compute strided convolution as the sum over // a stride x stride grid of small convolutions. grid := image.Pt(stride, stride) // But do not divide into a larger grid than the size of the filter. // If the filter is smaller than the stride, // then some pixels in the image will not affect the output. grid.X = min(grid.X, g.Width) grid.Y = min(grid.Y, g.Height) // Determine the size of the sub-sampled filter. gsub := image.Pt(ceilDiv(g.Width, grid.X), ceilDiv(g.Height, grid.Y)) // The sub-sampled size of the image should be such that // the output size is attained. fsub := image.Pt(out.X+gsub.X-1, out.Y+gsub.Y-1) // Determine optimal size for FFT. work, _ := FFT2Size(fsub) // Cache FFT of image for convolving with multiple filters. // Re-use plan for multiple convolutions too. fhat := fftw.NewArray2(work.X, work.Y) ffwd := fftw.NewPlan2(fhat, fhat, fftw.Forward, fftw.Estimate) defer ffwd.Destroy() // FFT for current filter. ghat := fftw.NewArray2(work.X, work.Y) gfwd := fftw.NewPlan2(ghat, ghat, fftw.Forward, fftw.Estimate) defer gfwd.Destroy() // Allocate one array per output channel. hhat := make([]*fftw.Array2, len(g.Filters)) for k := range hhat { hhat[k] = fftw.NewArray2(work.X, work.Y) } // Normalization factor. alpha := complex(1/float64(work.X*work.Y), 0) // Add the convolutions over channels and strides. for i := 0; i < grid.X; i++ { for j := 0; j < grid.Y; j++ { // Take transform of downsampled image given offset (i, j). copyStrideTo(fhat, f, stride, image.Pt(i, j)) ffwd.Execute() // Take transform of each downsampled channel given offset (i, j). for q := range hhat { copyStrideTo(ghat, g.Filters[q], stride, image.Pt(i, j)) gfwd.Execute() addMul(hhat[q], ghat, fhat) } } } // Take the inverse transform of each channel. h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) for q := range hhat { scale(alpha, hhat[q]) fftw.IFFT2To(hhat[q], hhat[q]) copyRealToChannel(h, q, hhat[q]) } return h, nil }
// CorrBankFFT computes the correlation of an image with a bank of filters. // h_p[u, v] = (f corr g_p)[u, v] func CorrBankFFT(f *rimg64.Image, g *Bank) (*rimg64.Multi, error) { out := ValidSize(f.Size(), g.Size()) if out.X <= 0 || out.Y <= 0 { return nil, nil } // Determine optimal size for FFT. work, _ := FFT2Size(f.Size()) // Re-use FFT of image. fhat := fftw.NewArray2(work.X, work.Y) copyImageTo(fhat, f) fftw.FFT2To(fhat, fhat) // Transform of each filter. curr := fftw.NewArray2(work.X, work.Y) fwd := fftw.NewPlan2(curr, curr, fftw.Forward, fftw.Estimate) defer fwd.Destroy() bwd := fftw.NewPlan2(curr, curr, fftw.Backward, fftw.Estimate) defer bwd.Destroy() h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) alpha := complex(1/float64(work.X*work.Y), 0) // For each output channel. for p, gp := range g.Filters { // Take FFT. copyImageTo(curr, gp) fwd.Execute() // h_p[x] = (G_p corr F)[x] // H_p[x] = conj(G_p[x]) F[x] scaleMul(curr, alpha, curr, fhat) bwd.Execute() copyRealToChannel(h, p, curr) } return h, nil }
// CorrBankBLAS computes the correlation of an image with a bank of filters. // h_p[u, v] = (f corr g_p)[u, v] func CorrBankBLAS(f *rimg64.Image, g *Bank) (*rimg64.Multi, error) { out := ValidSize(f.Size(), g.Size()) if out.X <= 0 || out.Y <= 0 { return nil, nil } // Express as dense matrix multiplication. // h_p[u, v] = (f corr g_q)[u, v] // Y(h) = A(f) X(g) // If the number of output channels is k, then // A is (M-m+1)(N-n+1) x mn and // X is mn x k, so that // Y is (M-m+1)(N-n+1) x k. h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) m, n, k := g.Width, g.Height, len(g.Filters) a := blas.NewMat(out.X*out.Y, m*n) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { var s int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { a.Set(r, s, f.At(i+u, j+v)) s++ } } r++ } } } x := blas.NewMat(m*n, k) { var r int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for p := 0; p < h.Channels; p++ { x.Set(r, p, g.Filters[p].At(i, j)) } r++ } } } y := blas.MatMul(1, a, x) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { for p := 0; p < h.Channels; p++ { h.Set(u, v, p, y.At(r, p)) } r++ } } } return h, nil }
// CorrBankStrideBLAS computes the strided correlation of // an image with a bank of filters. // h_p[u, v] = (f corr g_p)[stride*u, stride*v] func CorrBankStrideBLAS(f *rimg64.Image, g *Bank, stride int) (*rimg64.Multi, error) { out := ValidSizeStride(f.Size(), g.Size(), stride) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) // Size of filters. m, n := g.Width, g.Height // Express as dense matrix multiplication. // h_p[u, v] = sum_q (f_q corr g_pq)[u, v] // h = A(f) X(g) // where A is whk by mnk // with w = ceil[(M-m+1)/stride], // h = ceil[(N-n+1)/stride]. a := blas.NewMat(h.Width*h.Height, m*n) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { var s int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { a.Set(r, s, f.At(stride*u+i, stride*v+j)) s++ } } r++ } } } x := blas.NewMat(m*n, h.Channels) { var r int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for p := 0; p < h.Channels; p++ { x.Set(r, p, g.Filters[p].At(i, j)) } r++ } } } y := blas.MatMul(1, a, x) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { for p := 0; p < h.Channels; p++ { h.Set(u, v, p, y.At(r, p)) } r++ } } } return h, nil }
// Decimate takes every r-th sample starting at (0, 0). func Decimate(f *rimg64.Image, r int) *rimg64.Image { out := ceilDivPt(f.Size(), r) g := rimg64.New(out.X, out.Y) for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { g.Set(i, j, f.At(r*i, r*j)) } } return g }
// CorrStrideBLAS computes the strided correlation of an image with a filter. // h[u, v] = (f corr g)[stride*u, stride*v] func CorrStrideBLAS(f, g *rimg64.Image, stride int) (*rimg64.Image, error) { out := ValidSizeStride(f.Size(), g.Size(), stride) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.New(out.X, out.Y) // Size of filters. m, n := g.Width, g.Height // Express as dense matrix multiplication. // h[u, v] = (f corr g)[stride*u, stride*v] // y(h) = A(f) x(g) // where A is wh by mn // with w = ceil[(M-m+1)/stride], // h = ceil[(N-n+1)/stride]. a := blas.NewMat(h.Width*h.Height, m*n) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { var s int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { a.Set(r, s, f.At(stride*u+i, stride*v+j)) s++ } } r++ } } } x := blas.NewMat(m*n, 1) { var r int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { x.Set(r, 0, g.At(i, j)) r++ } } } y := blas.MatMul(1, a, x) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { h.Set(u, v, y.At(r, 0)) r++ } } } return h, nil }
// CorrStrideFFT computes the strided correlation of an image with a filter. // h[u, v] = (f corr g)[stride*u, stride*v] func CorrStrideFFT(f, g *rimg64.Image, stride int) (*rimg64.Image, error) { out := ValidSizeStride(f.Size(), g.Size(), stride) if out.X <= 0 || out.Y <= 0 { return nil, nil } // Compute strided convolution as the sum over // a stride x stride grid of small convolutions. grid := image.Pt(stride, stride) // But do not divide into a larger grid than the size of the filter. // If the filter is smaller than the stride, // then some pixels in the image will not affect the output. grid.X = min(grid.X, g.Width) grid.Y = min(grid.Y, g.Height) // Determine the size of the sub-sampled filter. gsub := image.Pt(ceilDiv(g.Width, grid.X), ceilDiv(g.Height, grid.Y)) // The sub-sampled size of the image should be such that // the output size is attained. fsub := image.Pt(out.X+gsub.X-1, out.Y+gsub.Y-1) // Determine optimal size for FFT. work, _ := FFT2Size(fsub) // Cache FFT of each channel of image for convolving with multiple filters. // Re-use plan for multiple convolutions too. fhat := fftw.NewArray2(work.X, work.Y) ffwd := fftw.NewPlan2(fhat, fhat, fftw.Forward, fftw.Estimate) defer ffwd.Destroy() // FFT for current filter. curr := fftw.NewArray2(work.X, work.Y) gfwd := fftw.NewPlan2(curr, curr, fftw.Forward, fftw.Estimate) defer gfwd.Destroy() // Normalization factor. alpha := complex(1/float64(work.X*work.Y), 0) // Add the convolutions over strides. hhat := fftw.NewArray2(work.X, work.Y) for i := 0; i < grid.X; i++ { for j := 0; j < grid.Y; j++ { // Copy each downsampled channel and take its transform. copyStrideTo(fhat, f, stride, image.Pt(i, j)) ffwd.Execute() copyStrideTo(curr, g, stride, image.Pt(i, j)) gfwd.Execute() addMul(hhat, curr, fhat) } } // Take the inverse transform. h := rimg64.New(out.X, out.Y) scale(alpha, hhat) fftw.IFFT2To(hhat, hhat) copyRealTo(h, hhat) return h, nil }
// CorrStrideNaive computes the strided correlation of an image with a filter. // h[u, v] = (f corr g)[stride*u, stride*v] func CorrStrideNaive(f, g *rimg64.Image, stride int) (*rimg64.Image, error) { out := ValidSizeStride(f.Size(), g.Size(), stride) h := rimg64.New(out.X, out.Y) for i := 0; i < h.Width; i++ { for j := 0; j < h.Height; j++ { var total float64 for u := 0; u < g.Width; u++ { for v := 0; v < g.Height; v++ { p := image.Pt(i, j).Mul(stride).Add(image.Pt(u, v)) total += f.At(p.X, p.Y) * g.At(u, v) } } h.Set(i, j, total) } } return h, nil }
// CorrNaive computes the correlation of an image with a filter. // h[u, v] = (f corr g)[u, v] func CorrNaive(f, g *rimg64.Image) (*rimg64.Image, error) { out := ValidSize(f.Size(), g.Size()) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.New(out.X, out.Y) for i := 0; i < out.X; i++ { for j := 0; j < out.Y; j++ { var total float64 for u := 0; u < g.Width; u++ { for v := 0; v < g.Height; v++ { total += f.At(i+u, j+v) * g.At(u, v) } } h.Set(i, j, total) } } return h, nil }
// CorrBankNaive computes the correlation of an image with a bank of filters. // h_p[u, v] = (f corr g_p)[u, v] func CorrBankNaive(f *rimg64.Image, g *Bank) (*rimg64.Multi, error) { out := ValidSize(f.Size(), g.Size()) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { for p := 0; p < h.Channels; p++ { var total float64 for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { total += f.At(i+u, j+v) * g.Filters[p].At(i, j) } } h.Set(u, v, p, total) } } } return h, nil }
// CorrBankStrideNaive computes the strided correlation of // an image with a bank of filters. // h_p[u, v] = (f corr g_p)[stride*u, stride*v] func CorrBankStrideNaive(f *rimg64.Image, g *Bank, stride int) (*rimg64.Multi, error) { out := ValidSizeStride(f.Size(), g.Size(), stride) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { for p := 0; p < h.Channels; p++ { var sum float64 for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { sum += f.At(stride*u+i, stride*v+j) * g.Filters[p].At(i, j) } } h.Set(u, v, p, sum) } } } return h, nil }
func errIfNotEqImage(f, g *rimg64.Image, eps float64) error { if !f.Size().Eq(g.Size()) { return fmt.Errorf("different size: %v, %v", f.Size(), g.Size()) } for i := 0; i < f.Width; i++ { for j := 0; j < f.Height; j++ { a, b := f.At(i, j), g.At(i, j) if math.Abs(a-b) > eps*math.Max(math.Abs(a), math.Abs(b)) { return fmt.Errorf("different at x %d, y %d: %g, %g", i, j, a, b) } } } return nil }
// CorrAuto computes the correlation of an image with a filter. // h[u, v] = (f corr g)[u, v] // Automatically selects between naive and Fourier-domain convolution. func CorrAuto(f, g *rimg64.Image) (*rimg64.Image, error) { // Size of output. size := ValidSize(f.Size(), g.Size()) // Return empty image if that's the result. if size.Eq(image.ZP) { return nil, nil } // Need to compute one inner product per output element. naiveMuls := size.X * size.Y * g.Width * g.Height // Optimal FFT size and number of multiplications. _, fftMuls := FFT2Size(f.Size()) // Need to perform two forward and one inverse transform. fftMuls *= 3 // Switch implementation based on image size. if fftMuls < naiveMuls { return CorrFFT(f, g) } return CorrNaive(f, g) }
// CorrFFT computes the correlation of an image with a filter. // h[u, v] = (f corr g)[u, v] func CorrFFT(f, g *rimg64.Image) (*rimg64.Image, error) { out := ValidSize(f.Size(), g.Size()) if out.X <= 0 || out.Y <= 0 { return nil, nil } // Determine optimal size for FFT. work, _ := FFT2Size(f.Size()) fhat := fftw.NewArray2(work.X, work.Y) ghat := fftw.NewArray2(work.X, work.Y) // Take forward transforms. copyImageTo(fhat, f) fftw.FFT2To(fhat, fhat) copyImageTo(ghat, g) fftw.FFT2To(ghat, ghat) // Scale such that convolution theorem holds. n := float64(work.X * work.Y) scaleMul(fhat, complex(1/n, 0), ghat, fhat) // Take inverse transform. h := rimg64.New(out.X, out.Y) fftw.IFFT2To(fhat, fhat) copyRealTo(h, fhat) return h, nil }