// EvalFunc evaluates a function on every window in an image. // If the input image is M x N and the window size is m x n, // then the output is (M-m+1) x (N-n+1). // If the window size is larger than the image size in either dimension, // a nil image is returned with no error. func EvalFunc(im *rimg64.Multi, size image.Point, f ScoreFunc) (*rimg64.Image, error) { if im.Width < size.X || im.Height < size.Y { return nil, nil } r := rimg64.New(im.Width-size.X+1, im.Height-size.Y+1) x := rimg64.NewMulti(size.X, size.Y, im.Channels) for i := 0; i < r.Width; i++ { for j := 0; j < r.Height; j++ { // Copy window into x. for u := 0; u < size.X; u++ { for v := 0; v < size.Y; v++ { for p := 0; p < im.Channels; p++ { x.Set(u, v, p, im.At(i+u, j+v, p)) } } } y, err := f(x) if err != nil { return nil, err } r.Set(i, j, y) } } return r, nil }
// CorrMultiBankFFT computes the correlation of // a multi-channel image with a multi-channel filter. // h[u, v] = sum_p (f_p corr g_p)[u, v] func CorrMultiFFT(f, g *rimg64.Multi) (*rimg64.Image, error) { if err := errIfChannelsNotEq(f, g); err != nil { panic(err) } out := ValidSize(f.Size(), g.Size()) if out.Eq(image.ZP) { return nil, nil } work, _ := FFT2Size(f.Size()) fhat := fftw.NewArray2(work.X, work.Y) ghat := fftw.NewArray2(work.X, work.Y) ffwd := fftw.NewPlan2(fhat, fhat, fftw.Forward, fftw.Estimate) defer ffwd.Destroy() gfwd := fftw.NewPlan2(ghat, ghat, fftw.Forward, fftw.Estimate) defer gfwd.Destroy() hhat := fftw.NewArray2(work.X, work.Y) for p := 0; p < f.Channels; p++ { // Take transform of each channel. copyChannelTo(fhat, f, p) ffwd.Execute() copyChannelTo(ghat, g, p) gfwd.Execute() addMul(hhat, ghat, fhat) } n := float64(work.X * work.Y) scale(complex(1/n, 0), hhat) fftw.IFFT2To(hhat, hhat) h := rimg64.New(out.X, out.Y) copyRealTo(h, hhat) return h, nil }
func randImage(width, height int) *rimg64.Image { f := rimg64.New(width, height) for i := 0; i < width; i++ { for j := 0; j < height; j++ { f.Set(i, j, rand.NormFloat64()) } } return f }
// Flip mirrors an image in x and y. func Flip(f *rimg64.Image) *rimg64.Image { g := rimg64.New(f.Width, f.Height) for i := 0; i < f.Width; i++ { for j := 0; j < f.Height; j++ { g.Set(f.Width-1-i, f.Height-1-j, f.At(i, j)) } } return g }
func square(f *rimg64.Image) *rimg64.Image { g := rimg64.New(f.Width, f.Height) for i := 0; i < f.Width; i++ { for j := 0; j < f.Height; j++ { g.Set(i, j, sqr(f.At(i, j))) } } return g }
// CorrMultiStrideBLAS computes the strided correlation of // a multi-channel image with a multi-channel filter. // h[u, v] = sum_q (f_q corr g_q)[stride*u, stride*v] func CorrMultiStrideBLAS(f, g *rimg64.Multi, stride int) (*rimg64.Image, error) { out := ValidSizeStride(f.Size(), g.Size(), stride) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.New(out.X, out.Y) // Size of filters. m, n, k := g.Width, g.Height, g.Channels // Express as dense matrix multiplication. // h[u, v] = sum_q (f_q corr g_q)[stride*u, stride*v] // y(h) = A(f) x(g) // where A is wh by mnk // with w = ceil[(M-m+1)/stride], // h = ceil[(N-n+1)/stride]. a := blas.NewMat(h.Width*h.Height, m*n*k) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { var s int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for q := 0; q < g.Channels; q++ { a.Set(r, s, f.At(stride*u+i, stride*v+j, q)) s++ } } } r++ } } } x := blas.NewMat(m*n*k, 1) { var r int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for q := 0; q < g.Channels; q++ { x.Set(r, 0, g.At(i, j, q)) r++ } } } } y := blas.MatMul(1, a, x) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { h.Set(u, v, y.At(r, 0)) r++ } } } return h, nil }
// Decimate takes every r-th sample starting at (0, 0). func Decimate(f *rimg64.Image, r int) *rimg64.Image { out := ceilDivPt(f.Size(), r) g := rimg64.New(out.X, out.Y) for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { g.Set(i, j, f.At(r*i, r*j)) } } return g }
// CorrMultiStrideFFT computes the correlation of // a multi-channel image with a multi-channel filter. // h[u, v] = sum_q (f_q corr g_q)[u, v] func CorrMultiStrideFFT(f, g *rimg64.Multi, stride int) (*rimg64.Image, error) { if err := errIfChannelsNotEq(f, g); err != nil { panic(err) } out := ValidSizeStride(f.Size(), g.Size(), stride) if out.X <= 0 || out.Y <= 0 { return nil, nil } // Compute strided convolution as the sum over // a stride x stride grid of small convolutions. grid := image.Pt(stride, stride) // But do not divide into a larger grid than the size of the filter. // If the filter is smaller than the stride, // then some pixels in the image will not affect the output. grid.X = min(grid.X, g.Width) grid.Y = min(grid.Y, g.Height) // Determine the size of the sub-sampled filter. gsub := image.Pt(ceilDiv(g.Width, grid.X), ceilDiv(g.Height, grid.Y)) // The sub-sampled size of the image should be such that // the output size is attained. fsub := image.Pt(out.X+gsub.X-1, out.Y+gsub.Y-1) // Determine optimal size for FFT. work, _ := FFT2Size(fsub) // Cache FFT of each channel of image for convolving with multiple filters. // Re-use plan for multiple convolutions too. fhat := fftw.NewArray2(work.X, work.Y) ffwd := fftw.NewPlan2(fhat, fhat, fftw.Forward, fftw.Estimate) defer ffwd.Destroy() ghat := fftw.NewArray2(work.X, work.Y) gfwd := fftw.NewPlan2(ghat, ghat, fftw.Forward, fftw.Estimate) defer gfwd.Destroy() // Normalization factor. alpha := complex(1/float64(work.X*work.Y), 0) // Add the convolutions over channels and strides. hhat := fftw.NewArray2(work.X, work.Y) for k := 0; k < f.Channels; k++ { for i := 0; i < grid.X; i++ { for j := 0; j < grid.Y; j++ { // Copy each downsampled channel and take its transform. copyChannelStrideTo(fhat, f, k, stride, image.Pt(i, j)) ffwd.Execute() copyChannelStrideTo(ghat, g, k, stride, image.Pt(i, j)) gfwd.Execute() addMul(hhat, ghat, fhat) } } } // Take the inverse transform. h := rimg64.New(out.X, out.Y) scale(alpha, hhat) fftw.IFFT2To(hhat, hhat) copyRealTo(h, hhat) return h, nil }
// Takes the sum over channels. func squareMulti(f *rimg64.Multi) *rimg64.Image { g := rimg64.New(f.Width, f.Height) for i := 0; i < f.Width; i++ { for j := 0; j < f.Height; j++ { for k := 0; k < f.Channels; k++ { g.Set(i, j, g.At(i, j)+sqr(f.At(i, j, k))) } } } return g }
// CorrBLAS computes the correlation of an image with a filter. // h[u, v] = (f corr g)[u, v] func CorrBLAS(f, g *rimg64.Image) (*rimg64.Image, error) { out := ValidSize(f.Size(), g.Size()) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.New(out.X, out.Y) // Size of filters. m, n := g.Width, g.Height // Express as dense matrix multiplication. // h[u, v] = (f corr g)[u, v] // y(h) = A(f) x(g) // where A is (M-m+1)(N-n+1) by mn. a := blas.NewMat(h.Width*h.Height, m*n) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { var s int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { a.Set(r, s, f.At(u+i, v+j)) s++ } } r++ } } } x := blas.NewMat(m*n, 1) { var r int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { x.Set(r, 0, g.At(i, j)) r++ } } } y := blas.MatMul(1, a, x) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { h.Set(u, v, y.At(r, 0)) r++ } } } return h, nil }
// CorrStrideNaive computes the strided correlation of an image with a filter. // h[u, v] = (f corr g)[stride*u, stride*v] func CorrStrideNaive(f, g *rimg64.Image, stride int) (*rimg64.Image, error) { out := ValidSizeStride(f.Size(), g.Size(), stride) h := rimg64.New(out.X, out.Y) for i := 0; i < h.Width; i++ { for j := 0; j < h.Height; j++ { var total float64 for u := 0; u < g.Width; u++ { for v := 0; v < g.Height; v++ { p := image.Pt(i, j).Mul(stride).Add(image.Pt(u, v)) total += f.At(p.X, p.Y) * g.At(u, v) } } h.Set(i, j, total) } } return h, nil }
// CorrNaive computes the correlation of an image with a filter. // h[u, v] = (f corr g)[u, v] func CorrNaive(f, g *rimg64.Image) (*rimg64.Image, error) { out := ValidSize(f.Size(), g.Size()) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.New(out.X, out.Y) for i := 0; i < out.X; i++ { for j := 0; j < out.Y; j++ { var total float64 for u := 0; u < g.Width; u++ { for v := 0; v < g.Height; v++ { total += f.At(i+u, j+v) * g.At(u, v) } } h.Set(i, j, total) } } return h, nil }
// CorrMultiStrideNaive computes the correlation of // a multi-channel image with a multi-channel filter. // h[u, v] = sum_q (f_q corr g_q)[u, v] func CorrMultiStrideNaive(f, g *rimg64.Multi, stride int) (*rimg64.Image, error) { if err := errIfChannelsNotEq(f, g); err != nil { panic(err) } out := ValidSizeStride(f.Size(), g.Size(), stride) h := rimg64.New(out.X, out.Y) for i := 0; i < h.Width; i++ { for j := 0; j < h.Height; j++ { var total float64 for u := 0; u < g.Width; u++ { for v := 0; v < g.Height; v++ { p := image.Pt(i, j).Mul(stride).Add(image.Pt(u, v)) for k := 0; k < f.Channels; k++ { total += f.At(p.X, p.Y, k) * g.At(u, v, k) } } } h.Set(i, j, total) } } return h, nil }
// CorrFFT computes the correlation of an image with a filter. // h[u, v] = (f corr g)[u, v] func CorrFFT(f, g *rimg64.Image) (*rimg64.Image, error) { out := ValidSize(f.Size(), g.Size()) if out.X <= 0 || out.Y <= 0 { return nil, nil } // Determine optimal size for FFT. work, _ := FFT2Size(f.Size()) fhat := fftw.NewArray2(work.X, work.Y) ghat := fftw.NewArray2(work.X, work.Y) // Take forward transforms. copyImageTo(fhat, f) fftw.FFT2To(fhat, fhat) copyImageTo(ghat, g) fftw.FFT2To(ghat, ghat) // Scale such that convolution theorem holds. n := float64(work.X * work.Y) scaleMul(fhat, complex(1/n, 0), ghat, fhat) // Take inverse transform. h := rimg64.New(out.X, out.Y) fftw.IFFT2To(fhat, fhat) copyRealTo(h, fhat) return h, nil }
// Explicitly forms vectors and computes normalized dot product. func cosCorrMultiNaive(f, g *rimg64.Multi) *rimg64.Image { h := rimg64.New(f.Width-g.Width+1, f.Height-g.Height+1) n := g.Width * g.Height * g.Channels a := make([]float64, n) b := make([]float64, n) for i := 0; i < h.Width; i++ { for j := 0; j < h.Height; j++ { a = a[:0] b = b[:0] for u := 0; u < g.Width; u++ { for v := 0; v < g.Height; v++ { for p := 0; p < g.Channels; p++ { a = append(a, f.At(i+u, j+v, p)) b = append(b, g.At(u, v, p)) } } } floats.Scale(1/floats.Norm(a, 2), a) floats.Scale(1/floats.Norm(b, 2), b) h.Set(i, j, floats.Dot(a, b)) } } return h }
// CorrMultiNaive computes the correlation of // a multi-channel image with a multi-channel filter. // h[u, v] = sum_p (f_p corr g_p)[u, v] func CorrMultiNaive(f, g *rimg64.Multi) (*rimg64.Image, error) { if err := errIfChannelsNotEq(f, g); err != nil { panic(err) } out := ValidSize(f.Size(), g.Size()) if out.Eq(image.ZP) { return nil, nil } h := rimg64.New(out.X, out.Y) for i := 0; i < out.X; i++ { for j := 0; j < out.Y; j++ { var total float64 for u := 0; u < g.Width; u++ { for v := 0; v < g.Height; v++ { for p := 0; p < f.Channels; p++ { total += f.At(i+u, j+v, p) * g.At(u, v, p) } } } h.Set(i, j, total) } } return h, nil }
func HOG(f *rimg64.Multi, conf Config) *rimg64.Multi { const eps = 0.0001 // Leave a one-pixel border to compute derivatives. inside := image.Rectangle{image.ZP, f.Size()}.Inset(1) // Leave a half-cell border. half := conf.CellSize / 2 valid := inside.Inset(half) // Number of whole cells inside valid region. cells := valid.Size().Div(conf.CellSize) if cells.X <= 0 || cells.Y <= 0 { return nil } // Remove one cell on all sides for output. out := cells.Sub(image.Pt(2, 2)) // Region to iterate over. size := cells.Mul(conf.CellSize).Add(image.Pt(2*half, 2*half)) vis := image.Rectangle{inside.Min, inside.Min.Add(size)} // Accumulate edges into cell histograms. hist := rimg64.NewMulti(cells.X, cells.Y, 2*conf.Angles) quantizer := makeQuantizer(conf.Angles) for a := vis.Min.X; a < vis.Max.X; a++ { for b := vis.Min.Y; b < vis.Max.Y; b++ { x, y := a-half-vis.Min.X, b-half-vis.Min.Y // Pick channel with strongest gradient. grad, v := maxGrad(f, a, b) v = math.Sqrt(v) // Snap to orientation. q := quantizer.quantize(grad) // Add to 4 histograms around pixel using bilinear interpolation. xp := (float64(x)+0.5)/float64(conf.CellSize) - 0.5 yp := (float64(y)+0.5)/float64(conf.CellSize) - 0.5 // Extract integer and fractional part. ixp, vx0 := modf(xp) iyp, vy0 := modf(yp) // Complement of fraction part. vx1 := 1 - vx0 vy1 := 1 - vy0 if ixp >= 0 && iyp >= 0 { addToMulti(hist, ixp, iyp, q, vx1*vy1*v) } if ixp+1 < cells.X && iyp >= 0 { addToMulti(hist, ixp+1, iyp, q, vx0*vy1*v) } if ixp >= 0 && iyp+1 < cells.Y { addToMulti(hist, ixp, iyp+1, q, vx1*vy0*v) } if ixp+1 < cells.X && iyp+1 < cells.Y { addToMulti(hist, ixp+1, iyp+1, q, vx0*vy0*v) } } } // compute energy in each block by summing over orientations norm := rimg64.New(cells.X, cells.Y) for x := 0; x < cells.X; x++ { for y := 0; y < cells.Y; y++ { for d := 0; d < conf.Angles; d++ { s := hist.At(x, y, d) + hist.At(x, y, d+conf.Angles) addTo(norm, x, y, s*s) } } } feat := rimg64.NewMulti(out.X, out.Y, conf.Channels()) for x := 0; x < out.X; x++ { for y := 0; y < out.Y; y++ { a, b := x+1, y+1 // Normalization factors. var n [4]float64 n[0] = 1 / math.Sqrt(adjSum(norm, a, b, a+1, b+1)+eps) n[1] = 1 / math.Sqrt(adjSum(norm, a, b, a+1, b-1)+eps) n[2] = 1 / math.Sqrt(adjSum(norm, a, b, a-1, b+1)+eps) n[3] = 1 / math.Sqrt(adjSum(norm, a, b, a-1, b-1)+eps) var off int // Contrast-sensitive features. if !conf.NoContrastVar { for d := 0; d < 2*conf.Angles; d++ { h := hist.At(a, b, d) var sum float64 for _, ni := range n { val := h * ni if !conf.NoClip { val = math.Min(val, 0.2) } sum += val } feat.Set(x, y, off+d, sum/2) } off += 2 * conf.Angles } // Contrast-insensitive features. if !conf.NoContrastInvar { for d := 0; d < conf.Angles; d++ { h := hist.At(a, b, d) + hist.At(a, b, conf.Angles+d) var sum float64 for _, ni := range n { val := h * ni if !conf.NoClip { val = math.Min(val, 0.2) } sum += val } feat.Set(x, y, off+d, sum/2) } off += conf.Angles } // Texture features. if !conf.NoTexture { for i, ni := range n { var sum float64 for d := 0; d < 2*conf.Angles; d++ { h := hist.At(a, b, d) val := h * ni if !conf.NoClip { val = math.Min(val, 0.2) } sum += val } feat.Set(x, y, off+i, sum/math.Sqrt(float64(2*conf.Angles))) } off += 4 } } } return feat }