func (phi *AdjChanNorm) Apply(x *rimg64.Multi) (*rimg64.Multi, error) { y := rimg64.NewMulti(x.Width, x.Height, x.Channels) r := (phi.Num - 1) / 2 for i := 0; i < x.Width; i++ { for j := 0; j < x.Height; j++ { k := 0 // Range over which to compute sum. a, b := k-r, k+r+1 // Take sum excluding leading element. var t float64 for p := 0; p < min(b, x.Channels); p++ { t += sqr(x.At(i, j, p)) } for ; k < x.Channels; k++ { a, b = k-r, k+r+1 // Set element. norm := math.Pow(phi.K+phi.Alpha*t, phi.Beta) y.Set(i, j, k, x.At(i, j, k)/norm) // Subtract trailing element. if a >= 0 { t -= sqr(x.At(i, j, a)) } // Add leading element. if b < x.Channels { t += sqr(x.At(i, j, b)) } } } } return y, nil }
func (phi SumPool) Apply(x *rimg64.Multi) (*rimg64.Multi, error) { if phi.Field.X <= 0 || phi.Field.Y <= 0 { err := fmt.Errorf("invalid field size: %v", phi.Field) return nil, err } if phi.Stride <= 0 { err := fmt.Errorf("invalid stride: %d", phi.Stride) return nil, err } size := image.Pt( ceilDiv(x.Width-phi.Field.X+1, phi.Stride), ceilDiv(x.Height-phi.Field.Y+1, phi.Stride), ) y := rimg64.NewMulti(size.X, size.Y, x.Channels) for i := 0; i < y.Width; i++ { for j := 0; j < y.Height; j++ { for k := 0; k < x.Channels; k++ { // Position in original image. p := image.Pt(i, j).Mul(phi.Stride) var t float64 for u := p.X; u < p.X+phi.Field.X; u++ { for v := p.Y; v < p.Y+phi.Field.Y; v++ { t += x.At(u, v, k) } } y.Set(i, j, k, t) } } } return y, nil }
// CorrBankFFT computes the correlation of an image with a bank of filters. // h_p[u, v] = (f corr g_p)[u, v] func CorrBankFFT(f *rimg64.Image, g *Bank) (*rimg64.Multi, error) { out := ValidSize(f.Size(), g.Size()) if out.X <= 0 || out.Y <= 0 { return nil, nil } // Determine optimal size for FFT. work, _ := FFT2Size(f.Size()) // Re-use FFT of image. fhat := fftw.NewArray2(work.X, work.Y) copyImageTo(fhat, f) fftw.FFT2To(fhat, fhat) // Transform of each filter. curr := fftw.NewArray2(work.X, work.Y) fwd := fftw.NewPlan2(curr, curr, fftw.Forward, fftw.Estimate) defer fwd.Destroy() bwd := fftw.NewPlan2(curr, curr, fftw.Backward, fftw.Estimate) defer bwd.Destroy() h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) alpha := complex(1/float64(work.X*work.Y), 0) // For each output channel. for p, gp := range g.Filters { // Take FFT. copyImageTo(curr, gp) fwd.Execute() // h_p[x] = (G_p corr F)[x] // H_p[x] = conj(G_p[x]) F[x] scaleMul(curr, alpha, curr, fhat) bwd.Execute() copyRealToChannel(h, p, curr) } return h, nil }
// EvalFunc evaluates a function on every window in an image. // If the input image is M x N and the window size is m x n, // then the output is (M-m+1) x (N-n+1). // If the window size is larger than the image size in either dimension, // a nil image is returned with no error. func EvalFunc(im *rimg64.Multi, size image.Point, f ScoreFunc) (*rimg64.Image, error) { if im.Width < size.X || im.Height < size.Y { return nil, nil } r := rimg64.New(im.Width-size.X+1, im.Height-size.Y+1) x := rimg64.NewMulti(size.X, size.Y, im.Channels) for i := 0; i < r.Width; i++ { for j := 0; j < r.Height; j++ { // Copy window into x. for u := 0; u < size.X; u++ { for v := 0; v < size.Y; v++ { for p := 0; p < im.Channels; p++ { x.Set(u, v, p, im.At(i+u, j+v, p)) } } } y, err := f(x) if err != nil { return nil, err } r.Set(i, j, y) } } return r, nil }
// CorrMultiBankStrideBLAS computes the strided correlation of // a multi-channel image with a bank of multi-channel filters. // h_p[u, v] = sum_q (f_q corr g_pq)[stride*u, stride*v] func CorrMultiBankStrideBLAS(f *rimg64.Multi, g *MultiBank, stride int) (*rimg64.Multi, error) { out := ValidSizeStride(f.Size(), g.Size(), stride) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) // Size of filters. m, n, k := g.Width, g.Height, g.Channels // Express as dense matrix multiplication. // h_p[u, v] = sum_q (f_q corr g_pq)[u, v] // h = A(f) X(g) // where A is whk by mnk // with w = ceil[(M-m+1)/stride], // h = ceil[(N-n+1)/stride]. a := blas.NewMat(h.Width*h.Height, m*n*k) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { var s int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for q := 0; q < g.Channels; q++ { a.Set(r, s, f.At(stride*u+i, stride*v+j, q)) s++ } } } r++ } } } x := blas.NewMat(m*n*k, h.Channels) { var r int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for q := 0; q < g.Channels; q++ { for p := 0; p < h.Channels; p++ { x.Set(r, p, g.Filters[p].At(i, j, q)) } r++ } } } } y := blas.MatMul(1, a, x) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { for p := 0; p < h.Channels; p++ { h.Set(u, v, p, y.At(r, p)) } r++ } } } return h, nil }
// CorrBankStrideFFT computes the strided correlation of // an image with a bank of filters. // h_p[u, v] = (f corr g_p)[stride*u, stride*v] func CorrBankStrideFFT(f *rimg64.Image, g *Bank, stride int) (*rimg64.Multi, error) { out := ValidSizeStride(f.Size(), g.Size(), stride) if out.X <= 0 || out.Y <= 0 { return nil, nil } // Compute strided convolution as the sum over // a stride x stride grid of small convolutions. grid := image.Pt(stride, stride) // But do not divide into a larger grid than the size of the filter. // If the filter is smaller than the stride, // then some pixels in the image will not affect the output. grid.X = min(grid.X, g.Width) grid.Y = min(grid.Y, g.Height) // Determine the size of the sub-sampled filter. gsub := image.Pt(ceilDiv(g.Width, grid.X), ceilDiv(g.Height, grid.Y)) // The sub-sampled size of the image should be such that // the output size is attained. fsub := image.Pt(out.X+gsub.X-1, out.Y+gsub.Y-1) // Determine optimal size for FFT. work, _ := FFT2Size(fsub) // Cache FFT of image for convolving with multiple filters. // Re-use plan for multiple convolutions too. fhat := fftw.NewArray2(work.X, work.Y) ffwd := fftw.NewPlan2(fhat, fhat, fftw.Forward, fftw.Estimate) defer ffwd.Destroy() // FFT for current filter. ghat := fftw.NewArray2(work.X, work.Y) gfwd := fftw.NewPlan2(ghat, ghat, fftw.Forward, fftw.Estimate) defer gfwd.Destroy() // Allocate one array per output channel. hhat := make([]*fftw.Array2, len(g.Filters)) for k := range hhat { hhat[k] = fftw.NewArray2(work.X, work.Y) } // Normalization factor. alpha := complex(1/float64(work.X*work.Y), 0) // Add the convolutions over channels and strides. for i := 0; i < grid.X; i++ { for j := 0; j < grid.Y; j++ { // Take transform of downsampled image given offset (i, j). copyStrideTo(fhat, f, stride, image.Pt(i, j)) ffwd.Execute() // Take transform of each downsampled channel given offset (i, j). for q := range hhat { copyStrideTo(ghat, g.Filters[q], stride, image.Pt(i, j)) gfwd.Execute() addMul(hhat[q], ghat, fhat) } } } // Take the inverse transform of each channel. h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) for q := range hhat { scale(alpha, hhat[q]) fftw.IFFT2To(hhat[q], hhat[q]) copyRealToChannel(h, q, hhat[q]) } return h, nil }
func (phi *MaxPool) Apply(x *rimg64.Multi) (*rimg64.Multi, error) { if phi.Field.X <= 0 || phi.Field.Y <= 0 { err := fmt.Errorf("invalid field size: %v", phi.Field) return nil, err } if phi.Stride <= 0 { err := fmt.Errorf("invalid stride: %d", phi.Stride) return nil, err } size := image.Pt( ceilDiv(x.Width-phi.Field.X+1, phi.Stride), ceilDiv(x.Height-phi.Field.Y+1, phi.Stride), ) y := rimg64.NewMulti(size.X, size.Y, x.Channels) for i := 0; i < y.Width; i++ { for j := 0; j < y.Height; j++ { for k := 0; k < x.Channels; k++ { // Position in original image. p := image.Pt(i, j).Mul(phi.Stride) max := math.Inf(-1) for u := 0; u < phi.Field.X; u++ { for v := 0; v < phi.Field.Y; v++ { q := p.Add(image.Pt(u, v)) max = math.Max(max, x.At(q.X, q.Y, k)) } } y.Set(i, j, k, max) } } } return y, nil }
// CorrBankBLAS computes the correlation of an image with a bank of filters. // h_p[u, v] = (f corr g_p)[u, v] func CorrBankBLAS(f *rimg64.Image, g *Bank) (*rimg64.Multi, error) { out := ValidSize(f.Size(), g.Size()) if out.X <= 0 || out.Y <= 0 { return nil, nil } // Express as dense matrix multiplication. // h_p[u, v] = (f corr g_q)[u, v] // Y(h) = A(f) X(g) // If the number of output channels is k, then // A is (M-m+1)(N-n+1) x mn and // X is mn x k, so that // Y is (M-m+1)(N-n+1) x k. h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) m, n, k := g.Width, g.Height, len(g.Filters) a := blas.NewMat(out.X*out.Y, m*n) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { var s int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { a.Set(r, s, f.At(i+u, j+v)) s++ } } r++ } } } x := blas.NewMat(m*n, k) { var r int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for p := 0; p < h.Channels; p++ { x.Set(r, p, g.Filters[p].At(i, j)) } r++ } } } y := blas.MatMul(1, a, x) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { for p := 0; p < h.Channels; p++ { h.Set(u, v, p, y.At(r, p)) } r++ } } } return h, nil }
func (phi *PosPart) Apply(x *rimg64.Multi) (*rimg64.Multi, error) { y := rimg64.NewMulti(x.Width, x.Height, x.Channels) for i := 0; i < x.Width; i++ { for j := 0; j < x.Height; j++ { for k := 0; k < x.Channels; k++ { y.Set(i, j, k, math.Max(0, x.At(i, j, k))) } } } return y, nil }
// FlipMulti mirrors a multi-channel image in x and y. func FlipMulti(f *rimg64.Multi) *rimg64.Multi { g := rimg64.NewMulti(f.Width, f.Height, f.Channels) for i := 0; i < f.Width; i++ { for j := 0; j < f.Height; j++ { for k := 0; k < f.Channels; k++ { g.Set(f.Width-1-i, f.Height-1-j, k, f.At(i, j, k)) } } } return g }
func randMulti(width, height, channels int) *rimg64.Multi { f := rimg64.NewMulti(width, height, channels) for i := 0; i < width; i++ { for j := 0; j < height; j++ { for k := 0; k < channels; k++ { f.Set(i, j, k, rand.NormFloat64()) } } } return f }
func (phi *ChannelInterval) Apply(f *rimg64.Multi) (*rimg64.Multi, error) { g := rimg64.NewMulti(f.Width, f.Height, phi.B-phi.A) for u := 0; u < f.Width; u++ { for v := 0; v < f.Height; v++ { for p := phi.A; p < phi.B; p++ { g.Set(u, v, p-phi.A, f.At(u, v, p)) } } } return g, nil }
func (phi *SelectChannels) Apply(f *rimg64.Multi) (*rimg64.Multi, error) { g := rimg64.NewMulti(f.Width, f.Height, len(phi.Set)) for u := 0; u < f.Width; u++ { for v := 0; v < f.Height; v++ { for i, p := range phi.Set { g.Set(u, v, i, f.At(u, v, p)) } } } return g, nil }
func (phi *Scale) Apply(x *rimg64.Multi) (*rimg64.Multi, error) { y := rimg64.NewMulti(x.Width, x.Height, x.Channels) for u := 0; u < x.Width; u++ { for v := 0; v < x.Height; v++ { for p := 0; p < x.Channels; p++ { y.Set(u, v, p, float64(*phi)*x.At(u, v, p)) } } } return y, nil }
// DecimateMulti takes every r-th sample starting at (0, 0). func DecimateMulti(f *rimg64.Multi, r int) *rimg64.Multi { out := ceilDivPt(f.Size(), r) g := rimg64.NewMulti(out.X, out.Y, f.Channels) for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for k := 0; k < g.Channels; k++ { g.Set(i, j, k, f.At(r*i, r*j, k)) } } } return g }
func (phi *IsPos) Apply(x *rimg64.Multi) (*rimg64.Multi, error) { y := rimg64.NewMulti(x.Width, x.Height, x.Channels) for i := 0; i < x.Width; i++ { for j := 0; j < x.Height; j++ { for k := 0; k < x.Channels; k++ { if x.At(i, j, k) > 0 { y.Set(i, j, k, 1) } } } } return y, nil }
func (phi *PosNegPart) Apply(x *rimg64.Multi) (*rimg64.Multi, error) { channels := x.Channels * 2 y := rimg64.NewMulti(x.Width, x.Height, channels) for i := 0; i < x.Width; i++ { for j := 0; j < x.Height; j++ { for k := 0; k < x.Channels; k++ { pos, neg := posNegPart(x.At(i, j, k)) y.Set(i, j, 2*k, pos) y.Set(i, j, 2*k+1, neg) } } } return y, nil }
func parseImageCSV(rows [][]string) (*rimg64.Multi, error) { // Convert to numbers. var ( u, v, w []int f []float64 ) for _, r := range rows { if len(r) == 0 { continue } if len(r) != 4 { panic("wrong number of elements in row") } ui, err := strconv.ParseInt(r[0], 10, 32) if err != nil { return nil, err } vi, err := strconv.ParseInt(r[1], 10, 32) if err != nil { return nil, err } wi, err := strconv.ParseInt(r[2], 10, 32) if err != nil { return nil, err } fi, err := strconv.ParseFloat(r[3], 64) if err != nil { return nil, err } u = append(u, int(ui)) v = append(v, int(vi)) w = append(w, int(wi)) f = append(f, fi) } // Take max over u, v, w. var width, height, channels int for i := range u { width = max(u[i]+1, width) height = max(v[i]+1, height) channels = max(w[i]+1, channels) } // Set pixels in image. im := rimg64.NewMulti(width, height, channels) for i := range u { im.Set(u[i], v[i], w[i], f[i]) } return im, nil }
func (phi *AddConst) Apply(x *rimg64.Multi) (*rimg64.Multi, error) { if x.Channels != len(*phi) { err := fmt.Errorf("channels: image has %d, filter bank has %d", x.Channels, len(*phi)) return nil, err } y := rimg64.NewMulti(x.Width, x.Height, x.Channels) for u := 0; u < x.Width; u++ { for v := 0; v < x.Height; v++ { for p := 0; p < x.Channels; p++ { y.Set(u, v, p, x.At(u, v, p)+(*phi)[p]) } } } return y, nil }
func (phi *ConvEach) Apply(x *rimg64.Multi) (*rimg64.Multi, error) { channels := x.Channels * len(phi.Filters.Filters) field := image.Pt(phi.Filters.Width, phi.Filters.Height) size := slide.ValidSize(x.Size(), field) y := rimg64.NewMulti(size.X, size.Y, channels) var n int for i := 0; i < x.Channels; i++ { // Convolve each channel of the input with the bank. yi, err := slide.CorrBankBLAS(x.Channel(i), phi.Filters) if err != nil { return nil, err } for j := 0; j < yi.Channels; j++ { // Copy the channels into the output. y.SetChannel(n, yi.Channel(j)) n++ } } return y, nil }
// CorrMultiBankFFT computes the correlation of // a multi-channel image with a bank of multi-channel filters. // h_p[u, v] = sum_q (f_q corr g_pq)[u, v] func CorrMultiBankFFT(f *rimg64.Multi, g *MultiBank) (*rimg64.Multi, error) { out := ValidSize(f.Size(), g.Size()) if out.X <= 0 || out.Y <= 0 { return nil, nil } // Determine optimal size for FFT. work, _ := FFT2Size(f.Size()) // Cache FFT of each channel of image. fhat := make([]*fftw.Array2, f.Channels) for i := range fhat { fhat[i] = fftw.NewArray2(work.X, work.Y) copyChannelTo(fhat[i], f, i) fftw.FFT2To(fhat[i], fhat[i]) } curr := fftw.NewArray2(work.X, work.Y) fwd := fftw.NewPlan2(curr, curr, fftw.Forward, fftw.Estimate) defer fwd.Destroy() sum := fftw.NewArray2(work.X, work.Y) bwd := fftw.NewPlan2(sum, sum, fftw.Backward, fftw.Estimate) defer bwd.Destroy() h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) alpha := complex(1/float64(work.X*work.Y), 0) // For each output channel. for p, gp := range g.Filters { zero(sum) // For each input channel. for q := 0; q < f.Channels; q++ { // Take FFT of this input channel. copyChannelTo(curr, gp, q) fwd.Execute() // h_p[x] = (G_qp corr F_p)[x] // H_p[x] = conj(G_qp[x]) F_p[x] addScaleMul(sum, alpha, curr, fhat[q]) } bwd.Execute() copyRealToChannel(h, p, sum) } return h, nil }
// CorrBankStrideNaive computes the strided correlation of // an image with a bank of filters. // h_p[u, v] = (f corr g_p)[stride*u, stride*v] func CorrBankStrideNaive(f *rimg64.Image, g *Bank, stride int) (*rimg64.Multi, error) { out := ValidSizeStride(f.Size(), g.Size(), stride) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { for p := 0; p < h.Channels; p++ { var sum float64 for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { sum += f.At(stride*u+i, stride*v+j) * g.Filters[p].At(i, j) } } h.Set(u, v, p, sum) } } } return h, nil }
// CorrBankNaive computes the correlation of an image with a bank of filters. // h_p[u, v] = (f corr g_p)[u, v] func CorrBankNaive(f *rimg64.Image, g *Bank) (*rimg64.Multi, error) { out := ValidSize(f.Size(), g.Size()) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { for p := 0; p < h.Channels; p++ { var total float64 for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { total += f.At(i+u, j+v) * g.Filters[p].At(i, j) } } h.Set(u, v, p, total) } } } return h, nil }
// Flattens 31 (or 27) channels down to 9 for visualization. func compress(src *rimg64.Multi, weights WeightSet) *rimg64.Multi { dst := rimg64.NewMulti(src.Width, src.Height, 9) for i := 0; i < 27; i++ { for x := 0; x < src.Width; x++ { for y := 0; y < src.Height; y++ { v := src.At(x, y, i) switch weights { default: case Pos: v = math.Max(0, v) case Neg: v = math.Min(0, v) case Abs: v = math.Abs(v) } dst.Set(x, y, i%9, dst.At(x, y, i%9)+v) } } } return dst }
// CorrMultiBankStrideNaive computes the strided correlation of // a multi-channel image with a bank of multi-channel filters. // h_p[u, v] = sum_q (f_q corr g_pq)[stride*u, stride*v] func CorrMultiBankStrideNaive(f *rimg64.Multi, g *MultiBank, stride int) (*rimg64.Multi, error) { out := ValidSizeStride(f.Size(), g.Size(), stride) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { for p := 0; p < h.Channels; p++ { for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for q := 0; q < g.Channels; q++ { val := f.At(stride*u+i, stride*v+j, q) * g.Filters[p].At(i, j, q) h.Set(u, v, p, h.At(u, v, p)+val) } } } } } } return h, nil }
func fgmr(im *rimg64.Multi, sbin int) *rimg64.Multi { if im.Channels != 3 { panic("Input image must have three channels") } if sbin < 1 { panic("Bin size must be positive") } // Query size of workspace and output. var ( dims = [3]C.int{C.int(im.Height), C.int(im.Width), 3} cells [2]C.int out [3]C.int ) C.size(&dims[0], C.int(sbin), &cells[0], &out[0]) var ( // Allocate output. hog = rimg64.NewMulti(int(out[1]), int(out[0]), int(out[2])) // Allocate workspace. numCells = cells[0] * cells[1] hist = make([]C.double, 18*numCells) norm = make([]C.double, numCells) ) // Compute HOG features. C.compute( &dims[0], (*C.double)(unsafe.Pointer(&im.Elems[0])), (*C.double)(unsafe.Pointer(&hist[0])), (*C.double)(unsafe.Pointer(&norm[0])), C.int(sbin), &cells[0], &out[0], (*C.double)(unsafe.Pointer(&hog.Elems[0]))) return hog }
// CorrMultiBankNaive computes the correlation of // a multi-channel image with a bank of multi-channel filters. // h_p[u, v] = sum_q (f_q corr g_pq)[u, v] func CorrMultiBankNaive(f *rimg64.Multi, g *MultiBank) (*rimg64.Multi, error) { out := ValidSize(f.Size(), g.Size()) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { for p := 0; p < h.Channels; p++ { var sum float64 for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for q := 0; q < g.Channels; q++ { sum += f.At(i+u, j+v, q) * g.Filters[p].At(i, j, q) } } } h.Set(u, v, p, sum) } } } return h, nil }
func TestAdjChanNorm_Apply(t *testing.T) { const eps = 1e-9 const ( k = 2 n = 5 a = 1e-4 b = 0.75 ) phi := &convfeat.AdjChanNorm{K: k, Num: n, Alpha: a, Beta: b} elems := []float64{-1, 2, 3, 2, 1, -1, -3} cases := []struct { In, Out []float64 }{{ In: elems, Out: []float64{ elems[0] / math.Pow(k+a*sumsqr(elems[:3]), b), elems[1] / math.Pow(k+a*sumsqr(elems[:4]), b), elems[2] / math.Pow(k+a*sumsqr(elems[:5]), b), elems[3] / math.Pow(k+a*sumsqr(elems[1:6]), b), elems[4] / math.Pow(k+a*sumsqr(elems[2:]), b), elems[5] / math.Pow(k+a*sumsqr(elems[3:]), b), elems[6] / math.Pow(k+a*sumsqr(elems[4:]), b), }, }, { In: elems[:6], Out: []float64{ elems[0] / math.Pow(k+a*sumsqr(elems[:6][:3]), b), elems[1] / math.Pow(k+a*sumsqr(elems[:6][:4]), b), elems[2] / math.Pow(k+a*sumsqr(elems[:6][:5]), b), elems[3] / math.Pow(k+a*sumsqr(elems[:6][1:]), b), elems[4] / math.Pow(k+a*sumsqr(elems[:6][2:]), b), elems[5] / math.Pow(k+a*sumsqr(elems[:6][3:]), b), }, }, { In: elems[:5], Out: []float64{ elems[0] / math.Pow(k+a*sumsqr(elems[:5][:3]), b), elems[1] / math.Pow(k+a*sumsqr(elems[:5][:4]), b), elems[2] / math.Pow(k+a*sumsqr(elems[:5]), b), elems[3] / math.Pow(k+a*sumsqr(elems[:5][1:]), b), elems[4] / math.Pow(k+a*sumsqr(elems[:5][2:]), b), }, }, { In: elems[:3], Out: []float64{ elems[0] / math.Pow(k+a*sumsqr(elems[:3]), b), elems[1] / math.Pow(k+a*sumsqr(elems[:3]), b), elems[2] / math.Pow(k+a*sumsqr(elems[:3]), b), }, }, { In: elems[:2], Out: []float64{ elems[0] / math.Pow(k+a*sumsqr(elems[:2]), b), elems[1] / math.Pow(k+a*sumsqr(elems[:2]), b), }, }, { In: elems[:1], Out: []float64{ elems[0] / math.Pow(k+a*sumsqr(elems[:1]), b), }, }} for _, test := range cases { f := rimg64.NewMulti(1, 1, len(test.In)) f.SetPixel(0, 0, test.In) y, err := phi.Apply(f) if err != nil { t.Fatal(err) } for i := range test.Out { want, got := test.Out[i], y.At(0, 0, i) if math.Abs(want-got) > eps { t.Errorf("with %d channels: different at %d: want %g, got %g", len(test.In), i, want, got) } } } }
// CorrMultiBankBLAS computes the correlation of // a multi-channel image with a bank of multi-channel filters. // h_p[u, v] = sum_q (f_q corr g_pq)[u, v] func CorrMultiBankBLAS(f *rimg64.Multi, g *MultiBank) (*rimg64.Multi, error) { out := ValidSize(f.Size(), g.Size()) if out.X <= 0 || out.Y <= 0 { return nil, nil } // Express as dense matrix multiplication. // h_p[u, v] = sum_q (f_q corr g_pq)[u, v] // Y(h) = A(f) X(g) // If the number of input and output channels are Q and P, then // A is (M-m+1)(N-n+1) x mnQ and // X is mnQ x P, so that // Y is (M-m+1)(N-n+1) x P. // Note that the time to build the system is therefore // affected more by the number of input channels Q than outputs P. h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) M, N, K := h.Width, h.Height, h.Channels m, n, k := g.Width, g.Height, g.Channels a := blas.NewMat(M*N, m*n*k) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { var s int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for q := 0; q < g.Channels; q++ { a.Set(r, s, f.At(i+u, j+v, q)) s++ } } } r++ } } } x := blas.NewMat(m*n*k, K) { var r int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for q := 0; q < g.Channels; q++ { for p := 0; p < h.Channels; p++ { x.Set(r, p, g.Filters[p].At(i, j, q)) } r++ } } } } y := blas.MatMul(1, a, x) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { for p := 0; p < h.Channels; p++ { h.Set(u, v, p, y.At(r, p)) } r++ } } } return h, nil }
func HOG(f *rimg64.Multi, conf Config) *rimg64.Multi { const eps = 0.0001 // Leave a one-pixel border to compute derivatives. inside := image.Rectangle{image.ZP, f.Size()}.Inset(1) // Leave a half-cell border. half := conf.CellSize / 2 valid := inside.Inset(half) // Number of whole cells inside valid region. cells := valid.Size().Div(conf.CellSize) if cells.X <= 0 || cells.Y <= 0 { return nil } // Remove one cell on all sides for output. out := cells.Sub(image.Pt(2, 2)) // Region to iterate over. size := cells.Mul(conf.CellSize).Add(image.Pt(2*half, 2*half)) vis := image.Rectangle{inside.Min, inside.Min.Add(size)} // Accumulate edges into cell histograms. hist := rimg64.NewMulti(cells.X, cells.Y, 2*conf.Angles) quantizer := makeQuantizer(conf.Angles) for a := vis.Min.X; a < vis.Max.X; a++ { for b := vis.Min.Y; b < vis.Max.Y; b++ { x, y := a-half-vis.Min.X, b-half-vis.Min.Y // Pick channel with strongest gradient. grad, v := maxGrad(f, a, b) v = math.Sqrt(v) // Snap to orientation. q := quantizer.quantize(grad) // Add to 4 histograms around pixel using bilinear interpolation. xp := (float64(x)+0.5)/float64(conf.CellSize) - 0.5 yp := (float64(y)+0.5)/float64(conf.CellSize) - 0.5 // Extract integer and fractional part. ixp, vx0 := modf(xp) iyp, vy0 := modf(yp) // Complement of fraction part. vx1 := 1 - vx0 vy1 := 1 - vy0 if ixp >= 0 && iyp >= 0 { addToMulti(hist, ixp, iyp, q, vx1*vy1*v) } if ixp+1 < cells.X && iyp >= 0 { addToMulti(hist, ixp+1, iyp, q, vx0*vy1*v) } if ixp >= 0 && iyp+1 < cells.Y { addToMulti(hist, ixp, iyp+1, q, vx1*vy0*v) } if ixp+1 < cells.X && iyp+1 < cells.Y { addToMulti(hist, ixp+1, iyp+1, q, vx0*vy0*v) } } } // compute energy in each block by summing over orientations norm := rimg64.New(cells.X, cells.Y) for x := 0; x < cells.X; x++ { for y := 0; y < cells.Y; y++ { for d := 0; d < conf.Angles; d++ { s := hist.At(x, y, d) + hist.At(x, y, d+conf.Angles) addTo(norm, x, y, s*s) } } } feat := rimg64.NewMulti(out.X, out.Y, conf.Channels()) for x := 0; x < out.X; x++ { for y := 0; y < out.Y; y++ { a, b := x+1, y+1 // Normalization factors. var n [4]float64 n[0] = 1 / math.Sqrt(adjSum(norm, a, b, a+1, b+1)+eps) n[1] = 1 / math.Sqrt(adjSum(norm, a, b, a+1, b-1)+eps) n[2] = 1 / math.Sqrt(adjSum(norm, a, b, a-1, b+1)+eps) n[3] = 1 / math.Sqrt(adjSum(norm, a, b, a-1, b-1)+eps) var off int // Contrast-sensitive features. if !conf.NoContrastVar { for d := 0; d < 2*conf.Angles; d++ { h := hist.At(a, b, d) var sum float64 for _, ni := range n { val := h * ni if !conf.NoClip { val = math.Min(val, 0.2) } sum += val } feat.Set(x, y, off+d, sum/2) } off += 2 * conf.Angles } // Contrast-insensitive features. if !conf.NoContrastInvar { for d := 0; d < conf.Angles; d++ { h := hist.At(a, b, d) + hist.At(a, b, conf.Angles+d) var sum float64 for _, ni := range n { val := h * ni if !conf.NoClip { val = math.Min(val, 0.2) } sum += val } feat.Set(x, y, off+d, sum/2) } off += conf.Angles } // Texture features. if !conf.NoTexture { for i, ni := range n { var sum float64 for d := 0; d < 2*conf.Angles; d++ { h := hist.At(a, b, d) val := h * ni if !conf.NoClip { val = math.Min(val, 0.2) } sum += val } feat.Set(x, y, off+i, sum/math.Sqrt(float64(2*conf.Angles))) } off += 4 } } } return feat }