func TestMatMul(t *testing.T) { const ( m = 3 k = 4 n = 5 ) alpha := rand.NormFloat64() a, b := randMat(m, k), randMat(k, n) got := blas.MatMul(alpha, a, b) want := mat.Scale(alpha, mat.Mul(a, b)) checkEqualMat(t, want, got, 1e-9) // Try with non-copying transposes. alpha = rand.NormFloat64() a, b = randMat(k, m).T(), randMat(k, n) got = blas.MatMul(alpha, a, b) want = mat.Scale(alpha, mat.Mul(a, b)) checkEqualMat(t, want, got, 1e-9) alpha = rand.NormFloat64() a, b = randMat(m, k), randMat(n, k).T() got = blas.MatMul(alpha, a, b) want = mat.Scale(alpha, mat.Mul(a, b)) checkEqualMat(t, want, got, 1e-9) alpha = rand.NormFloat64() a, b = randMat(k, m).T(), randMat(n, k).T() got = blas.MatMul(alpha, a, b) want = mat.Scale(alpha, mat.Mul(a, b)) checkEqualMat(t, want, got, 1e-9) }
// CorrMultiBankStrideBLAS computes the strided correlation of // a multi-channel image with a bank of multi-channel filters. // h_p[u, v] = sum_q (f_q corr g_pq)[stride*u, stride*v] func CorrMultiBankStrideBLAS(f *rimg64.Multi, g *MultiBank, stride int) (*rimg64.Multi, error) { out := ValidSizeStride(f.Size(), g.Size(), stride) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) // Size of filters. m, n, k := g.Width, g.Height, g.Channels // Express as dense matrix multiplication. // h_p[u, v] = sum_q (f_q corr g_pq)[u, v] // h = A(f) X(g) // where A is whk by mnk // with w = ceil[(M-m+1)/stride], // h = ceil[(N-n+1)/stride]. a := blas.NewMat(h.Width*h.Height, m*n*k) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { var s int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for q := 0; q < g.Channels; q++ { a.Set(r, s, f.At(stride*u+i, stride*v+j, q)) s++ } } } r++ } } } x := blas.NewMat(m*n*k, h.Channels) { var r int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for q := 0; q < g.Channels; q++ { for p := 0; p < h.Channels; p++ { x.Set(r, p, g.Filters[p].At(i, j, q)) } r++ } } } } y := blas.MatMul(1, a, x) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { for p := 0; p < h.Channels; p++ { h.Set(u, v, p, y.At(r, p)) } r++ } } } return h, nil }
// CorrBankBLAS computes the correlation of an image with a bank of filters. // h_p[u, v] = (f corr g_p)[u, v] func CorrBankBLAS(f *rimg64.Image, g *Bank) (*rimg64.Multi, error) { out := ValidSize(f.Size(), g.Size()) if out.X <= 0 || out.Y <= 0 { return nil, nil } // Express as dense matrix multiplication. // h_p[u, v] = (f corr g_q)[u, v] // Y(h) = A(f) X(g) // If the number of output channels is k, then // A is (M-m+1)(N-n+1) x mn and // X is mn x k, so that // Y is (M-m+1)(N-n+1) x k. h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) m, n, k := g.Width, g.Height, len(g.Filters) a := blas.NewMat(out.X*out.Y, m*n) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { var s int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { a.Set(r, s, f.At(i+u, j+v)) s++ } } r++ } } } x := blas.NewMat(m*n, k) { var r int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for p := 0; p < h.Channels; p++ { x.Set(r, p, g.Filters[p].At(i, j)) } r++ } } } y := blas.MatMul(1, a, x) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { for p := 0; p < h.Channels; p++ { h.Set(u, v, p, y.At(r, p)) } r++ } } } return h, nil }
// CorrMultiStrideBLAS computes the strided correlation of // a multi-channel image with a multi-channel filter. // h[u, v] = sum_q (f_q corr g_q)[stride*u, stride*v] func CorrMultiStrideBLAS(f, g *rimg64.Multi, stride int) (*rimg64.Image, error) { out := ValidSizeStride(f.Size(), g.Size(), stride) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.New(out.X, out.Y) // Size of filters. m, n, k := g.Width, g.Height, g.Channels // Express as dense matrix multiplication. // h[u, v] = sum_q (f_q corr g_q)[stride*u, stride*v] // y(h) = A(f) x(g) // where A is wh by mnk // with w = ceil[(M-m+1)/stride], // h = ceil[(N-n+1)/stride]. a := blas.NewMat(h.Width*h.Height, m*n*k) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { var s int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for q := 0; q < g.Channels; q++ { a.Set(r, s, f.At(stride*u+i, stride*v+j, q)) s++ } } } r++ } } } x := blas.NewMat(m*n*k, 1) { var r int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for q := 0; q < g.Channels; q++ { x.Set(r, 0, g.At(i, j, q)) r++ } } } } y := blas.MatMul(1, a, x) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { h.Set(u, v, y.At(r, 0)) r++ } } } return h, nil }
func benchmarkMatMul(b *testing.B, m, k, n int, naive bool) { x, y := randMat(m, k), randMat(k, n) b.ResetTimer() for i := 0; i < b.N; i++ { if naive { mat.Mul(x, y) } else { blas.MatMul(1, x, y) } } }
// CorrBLAS computes the correlation of an image with a filter. // h[u, v] = (f corr g)[u, v] func CorrBLAS(f, g *rimg64.Image) (*rimg64.Image, error) { out := ValidSize(f.Size(), g.Size()) if out.X <= 0 || out.Y <= 0 { return nil, nil } h := rimg64.New(out.X, out.Y) // Size of filters. m, n := g.Width, g.Height // Express as dense matrix multiplication. // h[u, v] = (f corr g)[u, v] // y(h) = A(f) x(g) // where A is (M-m+1)(N-n+1) by mn. a := blas.NewMat(h.Width*h.Height, m*n) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { var s int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { a.Set(r, s, f.At(u+i, v+j)) s++ } } r++ } } } x := blas.NewMat(m*n, 1) { var r int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { x.Set(r, 0, g.At(i, j)) r++ } } } y := blas.MatMul(1, a, x) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { h.Set(u, v, y.At(r, 0)) r++ } } } return h, nil }
// CorrMultiBankBLAS computes the correlation of // a multi-channel image with a bank of multi-channel filters. // h_p[u, v] = sum_q (f_q corr g_pq)[u, v] func CorrMultiBankBLAS(f *rimg64.Multi, g *MultiBank) (*rimg64.Multi, error) { out := ValidSize(f.Size(), g.Size()) if out.X <= 0 || out.Y <= 0 { return nil, nil } // Express as dense matrix multiplication. // h_p[u, v] = sum_q (f_q corr g_pq)[u, v] // Y(h) = A(f) X(g) // If the number of input and output channels are Q and P, then // A is (M-m+1)(N-n+1) x mnQ and // X is mnQ x P, so that // Y is (M-m+1)(N-n+1) x P. // Note that the time to build the system is therefore // affected more by the number of input channels Q than outputs P. h := rimg64.NewMulti(out.X, out.Y, len(g.Filters)) M, N, K := h.Width, h.Height, h.Channels m, n, k := g.Width, g.Height, g.Channels a := blas.NewMat(M*N, m*n*k) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { var s int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for q := 0; q < g.Channels; q++ { a.Set(r, s, f.At(i+u, j+v, q)) s++ } } } r++ } } } x := blas.NewMat(m*n*k, K) { var r int for i := 0; i < g.Width; i++ { for j := 0; j < g.Height; j++ { for q := 0; q < g.Channels; q++ { for p := 0; p < h.Channels; p++ { x.Set(r, p, g.Filters[p].At(i, j, q)) } r++ } } } } y := blas.MatMul(1, a, x) { var r int for u := 0; u < h.Width; u++ { for v := 0; v < h.Height; v++ { for p := 0; p < h.Channels; p++ { h.Set(u, v, p, y.At(r, p)) } r++ } } } return h, nil }