// TODO: swap N1/N2? func make2DConfSize(N1, N2, BLOCK int) *config { var gr, bl cu.Dim3 bl.X = BLOCK bl.Y = BLOCK bl.Z = 1 NX := divUp(N2, BLOCK) NY := divUp(N1, BLOCK) gr.X = NX gr.Y = NY gr.Z = 1 //N := N1 * N2 //util.Assert(gr.X*gr.Y*gr.Z*bl.X*bl.Y*bl.Z >= N) return &config{gr, bl} }
// Make a 1D kernel launch configuration suited for N threads. func make1DConf(N int) *config { var gr, bl cu.Dim3 bl.X = MaxBlockSize bl.Y = 1 bl.Z = 1 N2 := divUp(N, MaxBlockSize) // N2 blocks left NX := divUp(N2, MaxGridSize) NY := divUp(N2, NX) gr.X = NX gr.Y = NY gr.Z = 1 //util.Assert(gridSize.X*gridSize.Y*gridSize.Z*blockSize.X*blockSize.Y*blockSize.Z >= N) return &config{gr, bl} }