func (phi *MaxPool) Apply(x *rimg64.Multi) (*rimg64.Multi, error) { if phi.Field.X <= 0 || phi.Field.Y <= 0 { err := fmt.Errorf("invalid field size: %v", phi.Field) return nil, err } if phi.Stride <= 0 { err := fmt.Errorf("invalid stride: %d", phi.Stride) return nil, err } size := image.Pt( ceilDiv(x.Width-phi.Field.X+1, phi.Stride), ceilDiv(x.Height-phi.Field.Y+1, phi.Stride), ) y := rimg64.NewMulti(size.X, size.Y, x.Channels) for i := 0; i < y.Width; i++ { for j := 0; j < y.Height; j++ { for k := 0; k < x.Channels; k++ { // Position in original image. p := image.Pt(i, j).Mul(phi.Stride) max := math.Inf(-1) for u := 0; u < phi.Field.X; u++ { for v := 0; v < phi.Field.Y; v++ { q := p.Add(image.Pt(u, v)) max = math.Max(max, x.At(q.X, q.Y, k)) } } y.Set(i, j, k, max) } } } return y, nil }
// CorrMultiBankStrideBLAS computes the strided correlation of // a multi-channel image with a bank of multi-channel filters. // h_p[u, v] = sum_q (f_q corr g_pq)[stride*u, stride*v] func CorrMultiBankStrideBLAS(f *rimg64.Multi, g *MultiBank, stride int) (*rimg64.Multi, error) { out := ValidSizeStride(f.Size(), g.Size(), stride) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) // Size of filters. m, n, k := g.Width, g.Height, g.Channels // Express as dense matrix multiplication. // h_p[u, v] = sum_q (f_q corr g_pq)[u, v] // h = A(f) X(g) // where A is whk by mnk // with w = ceil[(M-m+1)/stride], // h = ceil[(N-n+1)/stride]. a := blas.NewMat(h.Width*h.Height, m*n*k) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { var s int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for q := 0; q < g.Channels; q++ { a.Set(r, s, f.At(stride*u+i, stride*v+j, q)) s++ } } } r++ } } } x := blas.NewMat(m*n*k, h.Channels) { var r int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for q := 0; q < g.Channels; q++ { for p := 0; p < h.Channels; p++ { x.Set(r, p, g.Filters[p].At(i, j, q)) } r++ } } } } y := blas.MatMul(1, a, x) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { for p := 0; p < h.Channels; p++ { h.Set(u, v, p, y.At(r, p)) } r++ } } } return h, nil }
// EvalFunc evaluates a function on every window in an image. // If the input image is M x N and the window size is m x n, // then the output is (M-m+1) x (N-n+1). // If the window size is larger than the image size in either dimension, // a nil image is returned with no error. func EvalFunc(im *rimg64.Multi, size image.Point, f ScoreFunc) (*rimg64.Image, error) { if im.Width < size.X || im.Height < size.Y { return nil, nil } r := rimg64.New(im.Width-size.X+1, im.Height-size.Y+1) x := rimg64.NewMulti(size.X, size.Y, im.Channels) for i := 0; i < r.Width; i++ { for j := 0; j < r.Height; j++ { // Copy window into x. for u := 0; u < size.X; u++ { for v := 0; v < size.Y; v++ { for p := 0; p < im.Channels; p++ { x.Set(u, v, p, im.At(i+u, j+v, p)) } } } y, err := f(x) if err != nil { return nil, err } r.Set(i, j, y) } } return r, nil }
func (phi SumPool) Apply(x *rimg64.Multi) (*rimg64.Multi, error) { if phi.Field.X <= 0 || phi.Field.Y <= 0 { err := fmt.Errorf("invalid field size: %v", phi.Field) return nil, err } if phi.Stride <= 0 { err := fmt.Errorf("invalid stride: %d", phi.Stride) return nil, err } size := image.Pt( ceilDiv(x.Width-phi.Field.X+1, phi.Stride), ceilDiv(x.Height-phi.Field.Y+1, phi.Stride), ) y := rimg64.NewMulti(size.X, size.Y, x.Channels) for i := 0; i < y.Width; i++ { for j := 0; j < y.Height; j++ { for k := 0; k < x.Channels; k++ { // Position in original image. p := image.Pt(i, j).Mul(phi.Stride) var t float64 for u := p.X; u < p.X+phi.Field.X; u++ { for v := p.Y; v < p.Y+phi.Field.Y; v++ { t += x.At(u, v, k) } } y.Set(i, j, k, t) } } } return y, nil }
// CorrMultiStrideBLAS computes the strided correlation of // a multi-channel image with a multi-channel filter. // h[u, v] = sum_q (f_q corr g_q)[stride*u, stride*v] func CorrMultiStrideBLAS(f, g *rimg64.Multi, stride int) (*rimg64.Image, error) { out := ValidSizeStride(f.Size(), g.Size(), stride) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.New(out.X, out.Y) // Size of filters. m, n, k := g.Width, g.Height, g.Channels // Express as dense matrix multiplication. // h[u, v] = sum_q (f_q corr g_q)[stride*u, stride*v] // y(h) = A(f) x(g) // where A is wh by mnk // with w = ceil[(M-m+1)/stride], // h = ceil[(N-n+1)/stride]. a := blas.NewMat(h.Width*h.Height, m*n*k) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { var s int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for q := 0; q < g.Channels; q++ { a.Set(r, s, f.At(stride*u+i, stride*v+j, q)) s++ } } } r++ } } } x := blas.NewMat(m*n*k, 1) { var r int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for q := 0; q < g.Channels; q++ { x.Set(r, 0, g.At(i, j, q)) r++ } } } } y := blas.MatMul(1, a, x) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { h.Set(u, v, y.At(r, 0)) r++ } } } return h, nil }
func (phi *Scale) Apply(x *rimg64.Multi) (*rimg64.Multi, error) { y := rimg64.NewMulti(x.Width, x.Height, x.Channels) for u := 0; u < x.Width; u++ { for v := 0; v < x.Height; v++ { for p := 0; p < x.Channels; p++ { y.Set(u, v, p, float64(*phi)*x.At(u, v, p)) } } } return y, nil }
// Takes the sum over channels. func squareMulti(f *rimg64.Multi) *rimg64.Image { g := rimg64.New(f.Width, f.Height) for i := 0; i < f.Width; i++ { for j := 0; j < f.Height; j++ { for k := 0; k < f.Channels; k++ { g.Set(i, j, g.At(i, j)+sqr(f.At(i, j, k))) } } } return g }
func (phi *SelectChannels) Apply(f *rimg64.Multi) (*rimg64.Multi, error) { g := rimg64.NewMulti(f.Width, f.Height, len(phi.Set)) for u := 0; u < f.Width; u++ { for v := 0; v < f.Height; v++ { for i, p := range phi.Set { g.Set(u, v, i, f.At(u, v, p)) } } } return g, nil }
func (phi *ChannelInterval) Apply(f *rimg64.Multi) (*rimg64.Multi, error) { g := rimg64.NewMulti(f.Width, f.Height, phi.B-phi.A) for u := 0; u < f.Width; u++ { for v := 0; v < f.Height; v++ { for p := phi.A; p < phi.B; p++ { g.Set(u, v, p-phi.A, f.At(u, v, p)) } } } return g, nil }
// FlipMulti mirrors a multi-channel image in x and y. func FlipMulti(f *rimg64.Multi) *rimg64.Multi { g := rimg64.NewMulti(f.Width, f.Height, f.Channels) for i := 0; i < f.Width; i++ { for j := 0; j < f.Height; j++ { for k := 0; k < f.Channels; k++ { g.Set(f.Width-1-i, f.Height-1-j, k, f.At(i, j, k)) } } } return g }
func (phi *PosPart) Apply(x *rimg64.Multi) (*rimg64.Multi, error) { y := rimg64.NewMulti(x.Width, x.Height, x.Channels) for i := 0; i < x.Width; i++ { for j := 0; j < x.Height; j++ { for k := 0; k < x.Channels; k++ { y.Set(i, j, k, math.Max(0, x.At(i, j, k))) } } } return y, nil }
func dot(x, y *rimg64.Multi) float64 { var d float64 for i := 0; i < x.Width; i++ { for j := 0; j < x.Height; j++ { for k := 0; k < x.Channels; k++ { d += x.At(i, j, k) * y.At(i, j, k) } } } return d }
// Assumes that f is no smaller than x. // Pads with zeros. func copyChannelTo(x *fftw.Array2, f *rimg64.Multi, p int) { w, h := x.Dims() for u := 0; u < w; u++ { for v := 0; v < h; v++ { if u < f.Width && v < f.Height { x.Set(u, v, complex(f.At(u, v, p), 0)) } else { x.Set(u, v, 0) } } } }
// DecimateMulti takes every r-th sample starting at (0, 0). func DecimateMulti(f *rimg64.Multi, r int) *rimg64.Multi { out := ceilDivPt(f.Size(), r) g := rimg64.NewMulti(out.X, out.Y, f.Channels) for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for k := 0; k < g.Channels; k++ { g.Set(i, j, k, f.At(r*i, r*j, k)) } } } return g }
func (phi *IsPos) Apply(x *rimg64.Multi) (*rimg64.Multi, error) { y := rimg64.NewMulti(x.Width, x.Height, x.Channels) for i := 0; i < x.Width; i++ { for j := 0; j < x.Height; j++ { for k := 0; k < x.Channels; k++ { if x.At(i, j, k) > 0 { y.Set(i, j, k, 1) } } } } return y, nil }
func (phi *PosNegPart) Apply(x *rimg64.Multi) (*rimg64.Multi, error) { channels := x.Channels * 2 y := rimg64.NewMulti(x.Width, x.Height, channels) for i := 0; i < x.Width; i++ { for j := 0; j < x.Height; j++ { for k := 0; k < x.Channels; k++ { pos, neg := posNegPart(x.At(i, j, k)) y.Set(i, j, 2*k, pos) y.Set(i, j, 2*k+1, neg) } } } return y, nil }
// dst[i, j] = src[i*stride + offset.X, j*stride + offset.Y], // or zero if this is outside the boundary. func copyChannelStrideTo(dst *fftw.Array2, src *rimg64.Multi, channel, stride int, offset image.Point) { m, n := dst.Dims() bnds := image.Rect(0, 0, src.Width, src.Height) for i := 0; i < m; i++ { for j := 0; j < n; j++ { p := image.Pt(i, j).Mul(stride).Add(offset) var val complex128 if p.In(bnds) { val = complex(src.At(p.X, p.Y, channel), 0) } dst.Set(i, j, val) } } }
func (phi *AdjChanNorm) Apply(x *rimg64.Multi) (*rimg64.Multi, error) { y := rimg64.NewMulti(x.Width, x.Height, x.Channels) r := (phi.Num - 1) / 2 for i := 0; i < x.Width; i++ { for j := 0; j < x.Height; j++ { k := 0 // Range over which to compute sum. a, b := k-r, k+r+1 // Take sum excluding leading element. var t float64 for p := 0; p < min(b, x.Channels); p++ { t += sqr(x.At(i, j, p)) } for ; k < x.Channels; k++ { a, b = k-r, k+r+1 // Set element. norm := math.Pow(phi.K+phi.Alpha*t, phi.Beta) y.Set(i, j, k, x.At(i, j, k)/norm) // Subtract trailing element. if a >= 0 { t -= sqr(x.At(i, j, a)) } // Add leading element. if b < x.Channels { t += sqr(x.At(i, j, b)) } } } } return y, nil }
func drawCell(feat *rimg64.Multi, i, j int, gc *draw2d.ImageGraphicContext, cell int) { u := (float64(i) + 0.5) * float64(cell) v := (float64(j) + 0.5) * float64(cell) r := float64(cell) / 2 for k := 0; k < Orientations; k++ { x := feat.At(i, j, k) x = math.Max(x, 0) x = math.Min(x, 1) gc.SetStrokeColor(color.Gray{uint8(x*254 + 1)}) theta := (0.5 + float64(k)/float64(Orientations)) * math.Pi drawOrientedLine(gc, u, v, theta, r) } }
func invNormMulti(f *rimg64.Multi) float64 { var norm float64 for i := 0; i < f.Width; i++ { for j := 0; j < f.Height; j++ { for k := 0; k < f.Channels; k++ { norm += sqr(f.At(i, j, k)) } } } norm = math.Sqrt(norm) // This cannot be negative. if norm == 0 { return 0 } return 1 / norm }
func (phi *AddConst) Apply(x *rimg64.Multi) (*rimg64.Multi, error) { if x.Channels != len(*phi) { err := fmt.Errorf("channels: image has %d, filter bank has %d", x.Channels, len(*phi)) return nil, err } y := rimg64.NewMulti(x.Width, x.Height, x.Channels) for u := 0; u < x.Width; u++ { for v := 0; v < x.Height; v++ { for p := 0; p < x.Channels; p++ { y.Set(u, v, p, x.At(u, v, p)+(*phi)[p]) } } } return y, nil }
func formatImageCSV(im *rimg64.Multi) [][]string { var rows [][]string for u := 0; u < im.Width; u++ { for v := 0; v < im.Height; v++ { for w := 0; w < im.Channels; w++ { r := make([]string, 4) r[0] = strconv.FormatInt(int64(u), 10) r[1] = strconv.FormatInt(int64(v), 10) r[2] = strconv.FormatInt(int64(w), 10) r[3] = strconv.FormatFloat(im.At(u, v, w), 'g', -1, 64) rows = append(rows, r) } } } return rows }
// Returns gradient with greatest magnitude across all channels. // 1 <= x <= width-2, 1 <= y <= height-2 func maxGrad(f *rimg64.Multi, x, y int) (point, float64) { var ( grad point max float64 ) for d := 0; d < f.Channels; d++ { p := point{ f.At(x+1, y, d) - f.At(x-1, y, d), f.At(x, y+1, d) - f.At(x, y-1, d), } v := p.X*p.X + p.Y*p.Y if v > max { grad, max = p, v } } return grad, max }
func errIfNotEqMulti(f, g *rimg64.Multi, eps float64) error { if !f.Size().Eq(g.Size()) { return fmt.Errorf("different size: %v, %v", f.Size(), g.Size()) } if f.Channels != g.Channels { return fmt.Errorf("different channels: %d, %d", f.Channels, g.Channels) } for i := 0; i < f.Width; i++ { for j := 0; j < f.Height; j++ { for k := 0; k < f.Channels; k++ { a, b := f.At(i, j, k), g.At(i, j, k) if math.Abs(a-b) > eps*math.Max(math.Abs(a), math.Abs(b)) { return fmt.Errorf("different at x %d, y %d, c %d: %g, %g", i, j, k, a, b) } } } } return nil }
// Flattens 31 (or 27) channels down to 9 for visualization. func compress(src *rimg64.Multi, weights WeightSet) *rimg64.Multi { dst := rimg64.NewMulti(src.Width, src.Height, 9) for i := 0; i < 27; i++ { for x := 0; x < src.Width; x++ { for y := 0; y < src.Height; y++ { v := src.At(x, y, i) switch weights { default: case Pos: v = math.Max(0, v) case Neg: v = math.Min(0, v) case Abs: v = math.Abs(v) } dst.Set(x, y, i%9, dst.At(x, y, i%9)+v) } } } return dst }
func (f *AffineScorer) Score(x *rimg64.Multi) (float64, error) { if f.Op != Cos { panic("cosine unimplemented") } if !x.Size().Eq(f.Tmpl.Size()) { return 0, fmt.Errorf("different size: input %v, template %v", x.Size(), f.Tmpl.Size()) } if x.Channels != f.Tmpl.Channels { return 0, fmt.Errorf("different channels: input %v, template %v", x.Channels, f.Tmpl.Channels) } size := f.Tmpl.Size() var y float64 for i := 0; i < size.X; i++ { for j := 0; j < size.Y; j++ { for k := 0; k < f.Tmpl.Channels; k++ { y += x.At(i, j, k) * f.Tmpl.At(i, j, k) } } } y += f.Bias return y, nil }
// CorrMultiBankStrideNaive computes the strided correlation of // a multi-channel image with a bank of multi-channel filters. // h_p[u, v] = sum_q (f_q corr g_pq)[stride*u, stride*v] func CorrMultiBankStrideNaive(f *rimg64.Multi, g *MultiBank, stride int) (*rimg64.Multi, error) { out := ValidSizeStride(f.Size(), g.Size(), stride) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { for p := 0; p < h.Channels; p++ { for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for q := 0; q < g.Channels; q++ { val := f.At(stride*u+i, stride*v+j, q) * g.Filters[p].At(i, j, q) h.Set(u, v, p, h.At(u, v, p)+val) } } } } } } return h, nil }
// CorrMultiStrideNaive computes the correlation of // a multi-channel image with a multi-channel filter. // h[u, v] = sum_q (f_q corr g_q)[u, v] func CorrMultiStrideNaive(f, g *rimg64.Multi, stride int) (*rimg64.Image, error) { if err := errIfChannelsNotEq(f, g); err != nil { panic(err) } out := ValidSizeStride(f.Size(), g.Size(), stride) h := rimg64.New(out.X, out.Y) for i := 0; i < h.Width; i++ { for j := 0; j < h.Height; j++ { var total float64 for u := 0; u < g.Width; u++ { for v := 0; v < g.Height; v++ { p := image.Pt(i, j).Mul(stride).Add(image.Pt(u, v)) for k := 0; k < f.Channels; k++ { total += f.At(p.X, p.Y, k) * g.At(u, v, k) } } } h.Set(i, j, total) } } return h, nil }
// CorrMultiBankNaive computes the correlation of // a multi-channel image with a bank of multi-channel filters. // h_p[u, v] = sum_q (f_q corr g_pq)[u, v] func CorrMultiBankNaive(f *rimg64.Multi, g *MultiBank) (*rimg64.Multi, error) { out := ValidSize(f.Size(), g.Size()) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { for p := 0; p < h.Channels; p++ { var sum float64 for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for q := 0; q < g.Channels; q++ { sum += f.At(i+u, j+v, q) * g.Filters[p].At(i, j, q) } } } h.Set(u, v, p, sum) } } } return h, nil }
// CorrMultiNaive computes the correlation of // a multi-channel image with a multi-channel filter. // h[u, v] = sum_p (f_p corr g_p)[u, v] func CorrMultiNaive(f, g *rimg64.Multi) (*rimg64.Image, error) { if err := errIfChannelsNotEq(f, g); err != nil { panic(err) } out := ValidSize(f.Size(), g.Size()) if out.Eq(image.ZP) { return nil, nil } h := rimg64.New(out.X, out.Y) for i := 0; i < out.X; i++ { for j := 0; j < out.Y; j++ { var total float64 for u := 0; u < g.Width; u++ { for v := 0; v < g.Height; v++ { for p := 0; p < f.Channels; p++ { total += f.At(i+u, j+v, p) * g.At(u, v, p) } } } h.Set(i, j, total) } } return h, nil }