func (phi *MaxPool) Apply(x *rimg64.Multi) (*rimg64.Multi, error) { if phi.Field.X <= 0 || phi.Field.Y <= 0 { err := fmt.Errorf("invalid field size: %v", phi.Field) return nil, err } if phi.Stride <= 0 { err := fmt.Errorf("invalid stride: %d", phi.Stride) return nil, err } size := image.Pt( ceilDiv(x.Width-phi.Field.X+1, phi.Stride), ceilDiv(x.Height-phi.Field.Y+1, phi.Stride), ) y := rimg64.NewMulti(size.X, size.Y, x.Channels) for i := 0; i < y.Width; i++ { for j := 0; j < y.Height; j++ { for k := 0; k < x.Channels; k++ { // Position in original image. p := image.Pt(i, j).Mul(phi.Stride) max := math.Inf(-1) for u := 0; u < phi.Field.X; u++ { for v := 0; v < phi.Field.Y; v++ { q := p.Add(image.Pt(u, v)) max = math.Max(max, x.At(q.X, q.Y, k)) } } y.Set(i, j, k, max) } } } return y, nil }
// Assumes that f is no smaller than x. func copyRealToChannel(f *rimg64.Multi, p int, x *fftw.Array2) { for u := 0; u < f.Width; u++ { for v := 0; v < f.Height; v++ { f.Set(u, v, p, real(x.At(u, v))) } } }
// EvalFunc evaluates a function on every window in an image. // If the input image is M x N and the window size is m x n, // then the output is (M-m+1) x (N-n+1). // If the window size is larger than the image size in either dimension, // a nil image is returned with no error. func EvalFunc(im *rimg64.Multi, size image.Point, f ScoreFunc) (*rimg64.Image, error) { if im.Width < size.X || im.Height < size.Y { return nil, nil } r := rimg64.New(im.Width-size.X+1, im.Height-size.Y+1) x := rimg64.NewMulti(size.X, size.Y, im.Channels) for i := 0; i < r.Width; i++ { for j := 0; j < r.Height; j++ { // Copy window into x. for u := 0; u < size.X; u++ { for v := 0; v < size.Y; v++ { for p := 0; p < im.Channels; p++ { x.Set(u, v, p, im.At(i+u, j+v, p)) } } } y, err := f(x) if err != nil { return nil, err } r.Set(i, j, y) } } return r, nil }
func (phi SumPool) Apply(x *rimg64.Multi) (*rimg64.Multi, error) { if phi.Field.X <= 0 || phi.Field.Y <= 0 { err := fmt.Errorf("invalid field size: %v", phi.Field) return nil, err } if phi.Stride <= 0 { err := fmt.Errorf("invalid stride: %d", phi.Stride) return nil, err } size := image.Pt( ceilDiv(x.Width-phi.Field.X+1, phi.Stride), ceilDiv(x.Height-phi.Field.Y+1, phi.Stride), ) y := rimg64.NewMulti(size.X, size.Y, x.Channels) for i := 0; i < y.Width; i++ { for j := 0; j < y.Height; j++ { for k := 0; k < x.Channels; k++ { // Position in original image. p := image.Pt(i, j).Mul(phi.Stride) var t float64 for u := p.X; u < p.X+phi.Field.X; u++ { for v := p.Y; v < p.Y+phi.Field.Y; v++ { t += x.At(u, v, k) } } y.Set(i, j, k, t) } } } return y, nil }
// CorrMultiBankStrideBLAS computes the strided correlation of // a multi-channel image with a bank of multi-channel filters. // h_p[u, v] = sum_q (f_q corr g_pq)[stride*u, stride*v] func CorrMultiBankStrideBLAS(f *rimg64.Multi, g *MultiBank, stride int) (*rimg64.Multi, error) { out := ValidSizeStride(f.Size(), g.Size(), stride) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) // Size of filters. m, n, k := g.Width, g.Height, g.Channels // Express as dense matrix multiplication. // h_p[u, v] = sum_q (f_q corr g_pq)[u, v] // h = A(f) X(g) // where A is whk by mnk // with w = ceil[(M-m+1)/stride], // h = ceil[(N-n+1)/stride]. a := blas.NewMat(h.Width*h.Height, m*n*k) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { var s int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for q := 0; q < g.Channels; q++ { a.Set(r, s, f.At(stride*u+i, stride*v+j, q)) s++ } } } r++ } } } x := blas.NewMat(m*n*k, h.Channels) { var r int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for q := 0; q < g.Channels; q++ { for p := 0; p < h.Channels; p++ { x.Set(r, p, g.Filters[p].At(i, j, q)) } r++ } } } } y := blas.MatMul(1, a, x) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { for p := 0; p < h.Channels; p++ { h.Set(u, v, p, y.At(r, p)) } r++ } } } return h, nil }
// CorrMultiStrideFFT computes the correlation of // a multi-channel image with a multi-channel filter. // h[u, v] = sum_q (f_q corr g_q)[u, v] func CorrMultiStrideFFT(f, g *rimg64.Multi, stride int) (*rimg64.Image, error) { if err := errIfChannelsNotEq(f, g); err != nil { panic(err) } out := ValidSizeStride(f.Size(), g.Size(), stride) if out.X <= 0 || out.Y <= 0 { return nil, nil } // Compute strided convolution as the sum over // a stride x stride grid of small convolutions. grid := image.Pt(stride, stride) // But do not divide into a larger grid than the size of the filter. // If the filter is smaller than the stride, // then some pixels in the image will not affect the output. grid.X = min(grid.X, g.Width) grid.Y = min(grid.Y, g.Height) // Determine the size of the sub-sampled filter. gsub := image.Pt(ceilDiv(g.Width, grid.X), ceilDiv(g.Height, grid.Y)) // The sub-sampled size of the image should be such that // the output size is attained. fsub := image.Pt(out.X+gsub.X-1, out.Y+gsub.Y-1) // Determine optimal size for FFT. work, _ := FFT2Size(fsub) // Cache FFT of each channel of image for convolving with multiple filters. // Re-use plan for multiple convolutions too. fhat := fftw.NewArray2(work.X, work.Y) ffwd := fftw.NewPlan2(fhat, fhat, fftw.Forward, fftw.Estimate) defer ffwd.Destroy() ghat := fftw.NewArray2(work.X, work.Y) gfwd := fftw.NewPlan2(ghat, ghat, fftw.Forward, fftw.Estimate) defer gfwd.Destroy() // Normalization factor. alpha := complex(1/float64(work.X*work.Y), 0) // Add the convolutions over channels and strides. hhat := fftw.NewArray2(work.X, work.Y) for k := 0; k < f.Channels; k++ { for i := 0; i < grid.X; i++ { for j := 0; j < grid.Y; j++ { // Copy each downsampled channel and take its transform. copyChannelStrideTo(fhat, f, k, stride, image.Pt(i, j)) ffwd.Execute() copyChannelStrideTo(ghat, g, k, stride, image.Pt(i, j)) gfwd.Execute() addMul(hhat, ghat, fhat) } } } // Take the inverse transform. h := rimg64.New(out.X, out.Y) scale(alpha, hhat) fftw.IFFT2To(hhat, hhat) copyRealTo(h, hhat) return h, nil }
// FlipMulti mirrors a multi-channel image in x and y. func FlipMulti(f *rimg64.Multi) *rimg64.Multi { g := rimg64.NewMulti(f.Width, f.Height, f.Channels) for i := 0; i < f.Width; i++ { for j := 0; j < f.Height; j++ { for k := 0; k < f.Channels; k++ { g.Set(f.Width-1-i, f.Height-1-j, k, f.At(i, j, k)) } } } return g }
func (phi *Scale) Apply(x *rimg64.Multi) (*rimg64.Multi, error) { y := rimg64.NewMulti(x.Width, x.Height, x.Channels) for u := 0; u < x.Width; u++ { for v := 0; v < x.Height; v++ { for p := 0; p < x.Channels; p++ { y.Set(u, v, p, float64(*phi)*x.At(u, v, p)) } } } return y, nil }
// Takes the sum over channels. func squareMulti(f *rimg64.Multi) *rimg64.Image { g := rimg64.New(f.Width, f.Height) for i := 0; i < f.Width; i++ { for j := 0; j < f.Height; j++ { for k := 0; k < f.Channels; k++ { g.Set(i, j, g.At(i, j)+sqr(f.At(i, j, k))) } } } return g }
func (phi *SelectChannels) Apply(f *rimg64.Multi) (*rimg64.Multi, error) { g := rimg64.NewMulti(f.Width, f.Height, len(phi.Set)) for u := 0; u < f.Width; u++ { for v := 0; v < f.Height; v++ { for i, p := range phi.Set { g.Set(u, v, i, f.At(u, v, p)) } } } return g, nil }
func (phi *ChannelInterval) Apply(f *rimg64.Multi) (*rimg64.Multi, error) { g := rimg64.NewMulti(f.Width, f.Height, phi.B-phi.A) for u := 0; u < f.Width; u++ { for v := 0; v < f.Height; v++ { for p := phi.A; p < phi.B; p++ { g.Set(u, v, p-phi.A, f.At(u, v, p)) } } } return g, nil }
func dot(x, y *rimg64.Multi) float64 { var d float64 for i := 0; i < x.Width; i++ { for j := 0; j < x.Height; j++ { for k := 0; k < x.Channels; k++ { d += x.At(i, j, k) * y.At(i, j, k) } } } return d }
func (phi *PosPart) Apply(x *rimg64.Multi) (*rimg64.Multi, error) { y := rimg64.NewMulti(x.Width, x.Height, x.Channels) for i := 0; i < x.Width; i++ { for j := 0; j < x.Height; j++ { for k := 0; k < x.Channels; k++ { y.Set(i, j, k, math.Max(0, x.At(i, j, k))) } } } return y, nil }
// DecimateMulti takes every r-th sample starting at (0, 0). func DecimateMulti(f *rimg64.Multi, r int) *rimg64.Multi { out := ceilDivPt(f.Size(), r) g := rimg64.NewMulti(out.X, out.Y, f.Channels) for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for k := 0; k < g.Channels; k++ { g.Set(i, j, k, f.At(r*i, r*j, k)) } } } return g }
// Assumes that f is no smaller than x. // Pads with zeros. func copyChannelTo(x *fftw.Array2, f *rimg64.Multi, p int) { w, h := x.Dims() for u := 0; u < w; u++ { for v := 0; v < h; v++ { if u < f.Width && v < f.Height { x.Set(u, v, complex(f.At(u, v, p), 0)) } else { x.Set(u, v, 0) } } } }
func (phi *IsPos) Apply(x *rimg64.Multi) (*rimg64.Multi, error) { y := rimg64.NewMulti(x.Width, x.Height, x.Channels) for i := 0; i < x.Width; i++ { for j := 0; j < x.Height; j++ { for k := 0; k < x.Channels; k++ { if x.At(i, j, k) > 0 { y.Set(i, j, k, 1) } } } } return y, nil }
func (phi *PosNegPart) Apply(x *rimg64.Multi) (*rimg64.Multi, error) { channels := x.Channels * 2 y := rimg64.NewMulti(x.Width, x.Height, channels) for i := 0; i < x.Width; i++ { for j := 0; j < x.Height; j++ { for k := 0; k < x.Channels; k++ { pos, neg := posNegPart(x.At(i, j, k)) y.Set(i, j, 2*k, pos) y.Set(i, j, 2*k+1, neg) } } } return y, nil }
func (phi *AdjChanNorm) Apply(x *rimg64.Multi) (*rimg64.Multi, error) { y := rimg64.NewMulti(x.Width, x.Height, x.Channels) r := (phi.Num - 1) / 2 for i := 0; i < x.Width; i++ { for j := 0; j < x.Height; j++ { k := 0 // Range over which to compute sum. a, b := k-r, k+r+1 // Take sum excluding leading element. var t float64 for p := 0; p < min(b, x.Channels); p++ { t += sqr(x.At(i, j, p)) } for ; k < x.Channels; k++ { a, b = k-r, k+r+1 // Set element. norm := math.Pow(phi.K+phi.Alpha*t, phi.Beta) y.Set(i, j, k, x.At(i, j, k)/norm) // Subtract trailing element. if a >= 0 { t -= sqr(x.At(i, j, a)) } // Add leading element. if b < x.Channels { t += sqr(x.At(i, j, b)) } } } } return y, nil }
// dst[i, j] = src[i*stride + offset.X, j*stride + offset.Y], // or zero if this is outside the boundary. func copyChannelStrideTo(dst *fftw.Array2, src *rimg64.Multi, channel, stride int, offset image.Point) { m, n := dst.Dims() bnds := image.Rect(0, 0, src.Width, src.Height) for i := 0; i < m; i++ { for j := 0; j < n; j++ { p := image.Pt(i, j).Mul(stride).Add(offset) var val complex128 if p.In(bnds) { val = complex(src.At(p.X, p.Y, channel), 0) } dst.Set(i, j, val) } } }
func drawCell(feat *rimg64.Multi, i, j int, gc *draw2d.ImageGraphicContext, cell int) { u := (float64(i) + 0.5) * float64(cell) v := (float64(j) + 0.5) * float64(cell) r := float64(cell) / 2 for k := 0; k < Orientations; k++ { x := feat.At(i, j, k) x = math.Max(x, 0) x = math.Min(x, 1) gc.SetStrokeColor(color.Gray{uint8(x*254 + 1)}) theta := (0.5 + float64(k)/float64(Orientations)) * math.Pi drawOrientedLine(gc, u, v, theta, r) } }
func invNormMulti(f *rimg64.Multi) float64 { var norm float64 for i := 0; i < f.Width; i++ { for j := 0; j < f.Height; j++ { for k := 0; k < f.Channels; k++ { norm += sqr(f.At(i, j, k)) } } } norm = math.Sqrt(norm) // This cannot be negative. if norm == 0 { return 0 } return 1 / norm }
func (phi *AddConst) Apply(x *rimg64.Multi) (*rimg64.Multi, error) { if x.Channels != len(*phi) { err := fmt.Errorf("channels: image has %d, filter bank has %d", x.Channels, len(*phi)) return nil, err } y := rimg64.NewMulti(x.Width, x.Height, x.Channels) for u := 0; u < x.Width; u++ { for v := 0; v < x.Height; v++ { for p := 0; p < x.Channels; p++ { y.Set(u, v, p, x.At(u, v, p)+(*phi)[p]) } } } return y, nil }
func formatImageCSV(im *rimg64.Multi) [][]string { var rows [][]string for u := 0; u < im.Width; u++ { for v := 0; v < im.Height; v++ { for w := 0; w < im.Channels; w++ { r := make([]string, 4) r[0] = strconv.FormatInt(int64(u), 10) r[1] = strconv.FormatInt(int64(v), 10) r[2] = strconv.FormatInt(int64(w), 10) r[3] = strconv.FormatFloat(im.At(u, v, w), 'g', -1, 64) rows = append(rows, r) } } } return rows }
// Returns gradient with greatest magnitude across all channels. // 1 <= x <= width-2, 1 <= y <= height-2 func maxGrad(f *rimg64.Multi, x, y int) (point, float64) { var ( grad point max float64 ) for d := 0; d < f.Channels; d++ { p := point{ f.At(x+1, y, d) - f.At(x-1, y, d), f.At(x, y+1, d) - f.At(x, y-1, d), } v := p.X*p.X + p.Y*p.Y if v > max { grad, max = p, v } } return grad, max }
// CorrMultiStrideBLAS computes the strided correlation of // a multi-channel image with a multi-channel filter. // h[u, v] = sum_q (f_q corr g_q)[stride*u, stride*v] func CorrMultiStrideBLAS(f, g *rimg64.Multi, stride int) (*rimg64.Image, error) { out := ValidSizeStride(f.Size(), g.Size(), stride) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.New(out.X, out.Y) // Size of filters. m, n, k := g.Width, g.Height, g.Channels // Express as dense matrix multiplication. // h[u, v] = sum_q (f_q corr g_q)[stride*u, stride*v] // y(h) = A(f) x(g) // where A is wh by mnk // with w = ceil[(M-m+1)/stride], // h = ceil[(N-n+1)/stride]. a := blas.NewMat(h.Width*h.Height, m*n*k) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { var s int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for q := 0; q < g.Channels; q++ { a.Set(r, s, f.At(stride*u+i, stride*v+j, q)) s++ } } } r++ } } } x := blas.NewMat(m*n*k, 1) { var r int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for q := 0; q < g.Channels; q++ { x.Set(r, 0, g.At(i, j, q)) r++ } } } } y := blas.MatMul(1, a, x) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { h.Set(u, v, y.At(r, 0)) r++ } } } return h, nil }
func (phi *ConvEach) Apply(x *rimg64.Multi) (*rimg64.Multi, error) { channels := x.Channels * len(phi.Filters.Filters) field := image.Pt(phi.Filters.Width, phi.Filters.Height) size := slide.ValidSize(x.Size(), field) y := rimg64.NewMulti(size.X, size.Y, channels) var n int for i := 0; i < x.Channels; i++ { // Convolve each channel of the input with the bank. yi, err := slide.CorrBankBLAS(x.Channel(i), phi.Filters) if err != nil { return nil, err } for j := 0; j < yi.Channels; j++ { // Copy the channels into the output. y.SetChannel(n, yi.Channel(j)) n++ } } return y, nil }
// CorrMultiBankFFT computes the correlation of // a multi-channel image with a bank of multi-channel filters. // h_p[u, v] = sum_q (f_q corr g_pq)[u, v] func CorrMultiBankFFT(f *rimg64.Multi, g *MultiBank) (*rimg64.Multi, error) { out := ValidSize(f.Size(), g.Size()) if out.X <= 0 || out.Y <= 0 { return nil, nil } // Determine optimal size for FFT. work, _ := FFT2Size(f.Size()) // Cache FFT of each channel of image. fhat := make([]*fftw.Array2, f.Channels) for i := range fhat { fhat[i] = fftw.NewArray2(work.X, work.Y) copyChannelTo(fhat[i], f, i) fftw.FFT2To(fhat[i], fhat[i]) } curr := fftw.NewArray2(work.X, work.Y) fwd := fftw.NewPlan2(curr, curr, fftw.Forward, fftw.Estimate) defer fwd.Destroy() sum := fftw.NewArray2(work.X, work.Y) bwd := fftw.NewPlan2(sum, sum, fftw.Backward, fftw.Estimate) defer bwd.Destroy() h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) alpha := complex(1/float64(work.X*work.Y), 0) // For each output channel. for p, gp := range g.Filters { zero(sum) // For each input channel. for q := 0; q < f.Channels; q++ { // Take FFT of this input channel. copyChannelTo(curr, gp, q) fwd.Execute() // h_p[x] = (G_qp corr F_p)[x] // H_p[x] = conj(G_qp[x]) F_p[x] addScaleMul(sum, alpha, curr, fhat[q]) } bwd.Execute() copyRealToChannel(h, p, sum) } return h, nil }
// Flattens 31 (or 27) channels down to 9 for visualization. func compress(src *rimg64.Multi, weights WeightSet) *rimg64.Multi { dst := rimg64.NewMulti(src.Width, src.Height, 9) for i := 0; i < 27; i++ { for x := 0; x < src.Width; x++ { for y := 0; y < src.Height; y++ { v := src.At(x, y, i) switch weights { default: case Pos: v = math.Max(0, v) case Neg: v = math.Min(0, v) case Abs: v = math.Abs(v) } dst.Set(x, y, i%9, dst.At(x, y, i%9)+v) } } } return dst }
// CorrMultiBankFFT computes the correlation of // a multi-channel image with a multi-channel filter. // h[u, v] = sum_p (f_p corr g_p)[u, v] func CorrMultiFFT(f, g *rimg64.Multi) (*rimg64.Image, error) { if err := errIfChannelsNotEq(f, g); err != nil { panic(err) } out := ValidSize(f.Size(), g.Size()) if out.Eq(image.ZP) { return nil, nil } work, _ := FFT2Size(f.Size()) fhat := fftw.NewArray2(work.X, work.Y) ghat := fftw.NewArray2(work.X, work.Y) ffwd := fftw.NewPlan2(fhat, fhat, fftw.Forward, fftw.Estimate) defer ffwd.Destroy() gfwd := fftw.NewPlan2(ghat, ghat, fftw.Forward, fftw.Estimate) defer gfwd.Destroy() hhat := fftw.NewArray2(work.X, work.Y) for p := 0; p < f.Channels; p++ { // Take transform of each channel. copyChannelTo(fhat, f, p) ffwd.Execute() copyChannelTo(ghat, g, p) gfwd.Execute() addMul(hhat, ghat, fhat) } n := float64(work.X * work.Y) scale(complex(1/n, 0), hhat) fftw.IFFT2To(hhat, hhat) h := rimg64.New(out.X, out.Y) copyRealTo(h, hhat) return h, nil }
// CorrMultiBankStrideNaive computes the strided correlation of // a multi-channel image with a bank of multi-channel filters. // h_p[u, v] = sum_q (f_q corr g_pq)[stride*u, stride*v] func CorrMultiBankStrideNaive(f *rimg64.Multi, g *MultiBank, stride int) (*rimg64.Multi, error) { out := ValidSizeStride(f.Size(), g.Size(), stride) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { for p := 0; p < h.Channels; p++ { for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for q := 0; q < g.Channels; q++ { val := f.At(stride*u+i, stride*v+j, q) * g.Filters[p].At(i, j, q) h.Set(u, v, p, h.At(u, v, p)+val) } } } } } } return h, nil }