Ejemplo n.º 1
0
// Wrapper for kernmulRSymm2Dyz CUDA kernel, asynchronous.
func k_kernmulRSymm2Dyz_async(fftMy unsafe.Pointer, fftMz unsafe.Pointer, fftKyy unsafe.Pointer, fftKzz unsafe.Pointer, fftKyz unsafe.Pointer, N1 int, N2 int, cfg *config, str cu.Stream) {
	if kernmulRSymm2Dyz_code == 0 {
		kernmulRSymm2Dyz_code = fatbinLoad(kernmulRSymm2Dyz_map, "kernmulRSymm2Dyz")
	}

	var a kernmulRSymm2Dyz_args

	a.arg_fftMy = fftMy
	a.argptr[0] = unsafe.Pointer(&a.arg_fftMy)
	a.arg_fftMz = fftMz
	a.argptr[1] = unsafe.Pointer(&a.arg_fftMz)
	a.arg_fftKyy = fftKyy
	a.argptr[2] = unsafe.Pointer(&a.arg_fftKyy)
	a.arg_fftKzz = fftKzz
	a.argptr[3] = unsafe.Pointer(&a.arg_fftKzz)
	a.arg_fftKyz = fftKyz
	a.argptr[4] = unsafe.Pointer(&a.arg_fftKyz)
	a.arg_N1 = N1
	a.argptr[5] = unsafe.Pointer(&a.arg_N1)
	a.arg_N2 = N2
	a.argptr[6] = unsafe.Pointer(&a.arg_N2)

	args := a.argptr[:]
	cu.LaunchKernel(kernmulRSymm2Dyz_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, str, args)
}
Ejemplo n.º 2
0
// Wrapper for kernmulRSymm3D CUDA kernel, asynchronous.
func k_kernmulRSymm3D_async(fftMx unsafe.Pointer, fftMy unsafe.Pointer, fftMz unsafe.Pointer, fftKxx unsafe.Pointer, fftKyy unsafe.Pointer, fftKzz unsafe.Pointer, fftKyz unsafe.Pointer, fftKxz unsafe.Pointer, fftKxy unsafe.Pointer, N0 int, N1 int, N2 int, cfg *config, str cu.Stream) {
	if kernmulRSymm3D_code == 0 {
		kernmulRSymm3D_code = fatbinLoad(kernmulRSymm3D_map, "kernmulRSymm3D")
	}

	var a kernmulRSymm3D_args

	a.arg_fftMx = fftMx
	a.argptr[0] = unsafe.Pointer(&a.arg_fftMx)
	a.arg_fftMy = fftMy
	a.argptr[1] = unsafe.Pointer(&a.arg_fftMy)
	a.arg_fftMz = fftMz
	a.argptr[2] = unsafe.Pointer(&a.arg_fftMz)
	a.arg_fftKxx = fftKxx
	a.argptr[3] = unsafe.Pointer(&a.arg_fftKxx)
	a.arg_fftKyy = fftKyy
	a.argptr[4] = unsafe.Pointer(&a.arg_fftKyy)
	a.arg_fftKzz = fftKzz
	a.argptr[5] = unsafe.Pointer(&a.arg_fftKzz)
	a.arg_fftKyz = fftKyz
	a.argptr[6] = unsafe.Pointer(&a.arg_fftKyz)
	a.arg_fftKxz = fftKxz
	a.argptr[7] = unsafe.Pointer(&a.arg_fftKxz)
	a.arg_fftKxy = fftKxy
	a.argptr[8] = unsafe.Pointer(&a.arg_fftKxy)
	a.arg_N0 = N0
	a.argptr[9] = unsafe.Pointer(&a.arg_N0)
	a.arg_N1 = N1
	a.argptr[10] = unsafe.Pointer(&a.arg_N1)
	a.arg_N2 = N2
	a.argptr[11] = unsafe.Pointer(&a.arg_N2)

	args := a.argptr[:]
	cu.LaunchKernel(kernmulRSymm3D_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, str, args)
}
Ejemplo n.º 3
0
// Wrapper for adddmi CUDA kernel, asynchronous.
func k_adddmi_async(Hx unsafe.Pointer, Hy unsafe.Pointer, Hz unsafe.Pointer, mx unsafe.Pointer, my unsafe.Pointer, mz unsafe.Pointer, Dx float32, Dy float32, Dz float32, N0 int, N1 int, N2 int, cfg *config, str cu.Stream) {
	if adddmi_code == 0 {
		adddmi_code = fatbinLoad(adddmi_map, "adddmi")
	}

	var a adddmi_args

	a.arg_Hx = Hx
	a.argptr[0] = unsafe.Pointer(&a.arg_Hx)
	a.arg_Hy = Hy
	a.argptr[1] = unsafe.Pointer(&a.arg_Hy)
	a.arg_Hz = Hz
	a.argptr[2] = unsafe.Pointer(&a.arg_Hz)
	a.arg_mx = mx
	a.argptr[3] = unsafe.Pointer(&a.arg_mx)
	a.arg_my = my
	a.argptr[4] = unsafe.Pointer(&a.arg_my)
	a.arg_mz = mz
	a.argptr[5] = unsafe.Pointer(&a.arg_mz)
	a.arg_Dx = Dx
	a.argptr[6] = unsafe.Pointer(&a.arg_Dx)
	a.arg_Dy = Dy
	a.argptr[7] = unsafe.Pointer(&a.arg_Dy)
	a.arg_Dz = Dz
	a.argptr[8] = unsafe.Pointer(&a.arg_Dz)
	a.arg_N0 = N0
	a.argptr[9] = unsafe.Pointer(&a.arg_N0)
	a.arg_N1 = N1
	a.argptr[10] = unsafe.Pointer(&a.arg_N1)
	a.arg_N2 = N2
	a.argptr[11] = unsafe.Pointer(&a.arg_N2)

	args := a.argptr[:]
	cu.LaunchKernel(adddmi_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, str, args)
}
Ejemplo n.º 4
0
// Wrapper for copypadmul CUDA kernel, asynchronous.
func k_copypadmul_async(dst unsafe.Pointer, D0 int, D1 int, D2 int, src unsafe.Pointer, S0 int, S1 int, S2 int, volmask unsafe.Pointer, Bsat float32, cfg *config, str cu.Stream) {
	if copypadmul_code == 0 {
		copypadmul_code = fatbinLoad(copypadmul_map, "copypadmul")
	}

	var a copypadmul_args

	a.arg_dst = dst
	a.argptr[0] = unsafe.Pointer(&a.arg_dst)
	a.arg_D0 = D0
	a.argptr[1] = unsafe.Pointer(&a.arg_D0)
	a.arg_D1 = D1
	a.argptr[2] = unsafe.Pointer(&a.arg_D1)
	a.arg_D2 = D2
	a.argptr[3] = unsafe.Pointer(&a.arg_D2)
	a.arg_src = src
	a.argptr[4] = unsafe.Pointer(&a.arg_src)
	a.arg_S0 = S0
	a.argptr[5] = unsafe.Pointer(&a.arg_S0)
	a.arg_S1 = S1
	a.argptr[6] = unsafe.Pointer(&a.arg_S1)
	a.arg_S2 = S2
	a.argptr[7] = unsafe.Pointer(&a.arg_S2)
	a.arg_volmask = volmask
	a.argptr[8] = unsafe.Pointer(&a.arg_volmask)
	a.arg_Bsat = Bsat
	a.argptr[9] = unsafe.Pointer(&a.arg_Bsat)

	args := a.argptr[:]
	cu.LaunchKernel(copypadmul_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, str, args)
}
Ejemplo n.º 5
0
// Wrapper for reducemaxvecdiff2 CUDA kernel, asynchronous.
func k_reducemaxvecdiff2_async(x1 unsafe.Pointer, y1 unsafe.Pointer, z1 unsafe.Pointer, x2 unsafe.Pointer, y2 unsafe.Pointer, z2 unsafe.Pointer, dst unsafe.Pointer, initVal float32, n int, cfg *config, str cu.Stream) {
	if reducemaxvecdiff2_code == 0 {
		reducemaxvecdiff2_code = fatbinLoad(reducemaxvecdiff2_map, "reducemaxvecdiff2")
	}

	var a reducemaxvecdiff2_args

	a.arg_x1 = x1
	a.argptr[0] = unsafe.Pointer(&a.arg_x1)
	a.arg_y1 = y1
	a.argptr[1] = unsafe.Pointer(&a.arg_y1)
	a.arg_z1 = z1
	a.argptr[2] = unsafe.Pointer(&a.arg_z1)
	a.arg_x2 = x2
	a.argptr[3] = unsafe.Pointer(&a.arg_x2)
	a.arg_y2 = y2
	a.argptr[4] = unsafe.Pointer(&a.arg_y2)
	a.arg_z2 = z2
	a.argptr[5] = unsafe.Pointer(&a.arg_z2)
	a.arg_dst = dst
	a.argptr[6] = unsafe.Pointer(&a.arg_dst)
	a.arg_initVal = initVal
	a.argptr[7] = unsafe.Pointer(&a.arg_initVal)
	a.arg_n = n
	a.argptr[8] = unsafe.Pointer(&a.arg_n)

	args := a.argptr[:]
	cu.LaunchKernel(reducemaxvecdiff2_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, str, args)
}
Ejemplo n.º 6
0
// Wrapper for dampingtorque CUDA kernel, asynchronous.
func k_dampingtorque_async(tx unsafe.Pointer, ty unsafe.Pointer, tz unsafe.Pointer, mx unsafe.Pointer, my unsafe.Pointer, mz unsafe.Pointer, hx unsafe.Pointer, hy unsafe.Pointer, hz unsafe.Pointer, N int, cfg *config, str cu.Stream) {
	if dampingtorque_code == 0 {
		dampingtorque_code = fatbinLoad(dampingtorque_map, "dampingtorque")
	}

	var a dampingtorque_args

	a.arg_tx = tx
	a.argptr[0] = unsafe.Pointer(&a.arg_tx)
	a.arg_ty = ty
	a.argptr[1] = unsafe.Pointer(&a.arg_ty)
	a.arg_tz = tz
	a.argptr[2] = unsafe.Pointer(&a.arg_tz)
	a.arg_mx = mx
	a.argptr[3] = unsafe.Pointer(&a.arg_mx)
	a.arg_my = my
	a.argptr[4] = unsafe.Pointer(&a.arg_my)
	a.arg_mz = mz
	a.argptr[5] = unsafe.Pointer(&a.arg_mz)
	a.arg_hx = hx
	a.argptr[6] = unsafe.Pointer(&a.arg_hx)
	a.arg_hy = hy
	a.argptr[7] = unsafe.Pointer(&a.arg_hy)
	a.arg_hz = hz
	a.argptr[8] = unsafe.Pointer(&a.arg_hz)
	a.arg_N = N
	a.argptr[9] = unsafe.Pointer(&a.arg_N)

	args := a.argptr[:]
	cu.LaunchKernel(dampingtorque_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, str, args)
}
Ejemplo n.º 7
0
// Wrapper for addexchange1comp CUDA kernel, asynchronous.
func k_addexchange1comp_async(Beff unsafe.Pointer, m unsafe.Pointer, wx float32, wy float32, wz float32, N0 int, N1 int, N2 int, cfg *config, str cu.Stream) {
	if addexchange1comp_code == 0 {
		addexchange1comp_code = fatbinLoad(addexchange1comp_map, "addexchange1comp")
	}

	var a addexchange1comp_args

	a.arg_Beff = Beff
	a.argptr[0] = unsafe.Pointer(&a.arg_Beff)
	a.arg_m = m
	a.argptr[1] = unsafe.Pointer(&a.arg_m)
	a.arg_wx = wx
	a.argptr[2] = unsafe.Pointer(&a.arg_wx)
	a.arg_wy = wy
	a.argptr[3] = unsafe.Pointer(&a.arg_wy)
	a.arg_wz = wz
	a.argptr[4] = unsafe.Pointer(&a.arg_wz)
	a.arg_N0 = N0
	a.argptr[5] = unsafe.Pointer(&a.arg_N0)
	a.arg_N1 = N1
	a.argptr[6] = unsafe.Pointer(&a.arg_N1)
	a.arg_N2 = N2
	a.argptr[7] = unsafe.Pointer(&a.arg_N2)

	args := a.argptr[:]
	cu.LaunchKernel(addexchange1comp_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, str, args)
}
Ejemplo n.º 8
0
// Wrapper for madd3 CUDA kernel, asynchronous.
func k_madd3_async(dst unsafe.Pointer, src1 unsafe.Pointer, fac1 float32, src2 unsafe.Pointer, fac2 float32, src3 unsafe.Pointer, fac3 float32, N int, cfg *config, str cu.Stream) {
	if madd3_code == 0 {
		madd3_code = fatbinLoad(madd3_map, "madd3")
	}

	var a madd3_args

	a.arg_dst = dst
	a.argptr[0] = unsafe.Pointer(&a.arg_dst)
	a.arg_src1 = src1
	a.argptr[1] = unsafe.Pointer(&a.arg_src1)
	a.arg_fac1 = fac1
	a.argptr[2] = unsafe.Pointer(&a.arg_fac1)
	a.arg_src2 = src2
	a.argptr[3] = unsafe.Pointer(&a.arg_src2)
	a.arg_fac2 = fac2
	a.argptr[4] = unsafe.Pointer(&a.arg_fac2)
	a.arg_src3 = src3
	a.argptr[5] = unsafe.Pointer(&a.arg_src3)
	a.arg_fac3 = fac3
	a.argptr[6] = unsafe.Pointer(&a.arg_fac3)
	a.arg_N = N
	a.argptr[7] = unsafe.Pointer(&a.arg_N)

	args := a.argptr[:]
	cu.LaunchKernel(madd3_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, str, args)
}
Ejemplo n.º 9
0
// Wrapper for adduniaxialanisotropy CUDA kernel, asynchronous.
func k_adduniaxialanisotropy_async(Bx unsafe.Pointer, By unsafe.Pointer, Bz unsafe.Pointer, mx unsafe.Pointer, my unsafe.Pointer, mz unsafe.Pointer, Ux float32, Uy float32, Uz float32, N int, cfg *config, str cu.Stream) {
	if adduniaxialanisotropy_code == 0 {
		adduniaxialanisotropy_code = fatbinLoad(adduniaxialanisotropy_map, "adduniaxialanisotropy")
	}

	var a adduniaxialanisotropy_args

	a.arg_Bx = Bx
	a.argptr[0] = unsafe.Pointer(&a.arg_Bx)
	a.arg_By = By
	a.argptr[1] = unsafe.Pointer(&a.arg_By)
	a.arg_Bz = Bz
	a.argptr[2] = unsafe.Pointer(&a.arg_Bz)
	a.arg_mx = mx
	a.argptr[3] = unsafe.Pointer(&a.arg_mx)
	a.arg_my = my
	a.argptr[4] = unsafe.Pointer(&a.arg_my)
	a.arg_mz = mz
	a.argptr[5] = unsafe.Pointer(&a.arg_mz)
	a.arg_Ux = Ux
	a.argptr[6] = unsafe.Pointer(&a.arg_Ux)
	a.arg_Uy = Uy
	a.argptr[7] = unsafe.Pointer(&a.arg_Uy)
	a.arg_Uz = Uz
	a.argptr[8] = unsafe.Pointer(&a.arg_Uz)
	a.arg_N = N
	a.argptr[9] = unsafe.Pointer(&a.arg_N)

	args := a.argptr[:]
	cu.LaunchKernel(adduniaxialanisotropy_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, str, args)
}
Ejemplo n.º 10
0
// Wrapper for addzhanglitorque CUDA kernel, asynchronous.
func k_addzhanglitorque_async(tx unsafe.Pointer, ty unsafe.Pointer, tz unsafe.Pointer, mx unsafe.Pointer, my unsafe.Pointer, mz unsafe.Pointer, ux float32, uy float32, uz float32, jmapx unsafe.Pointer, jmapy unsafe.Pointer, jmapz unsafe.Pointer, alpha float32, xi float32, N0 int, N1 int, N2 int, cfg *config, str cu.Stream) {
	if addzhanglitorque_code == 0 {
		addzhanglitorque_code = fatbinLoad(addzhanglitorque_map, "addzhanglitorque")
	}

	var a addzhanglitorque_args

	a.arg_tx = tx
	a.argptr[0] = unsafe.Pointer(&a.arg_tx)
	a.arg_ty = ty
	a.argptr[1] = unsafe.Pointer(&a.arg_ty)
	a.arg_tz = tz
	a.argptr[2] = unsafe.Pointer(&a.arg_tz)
	a.arg_mx = mx
	a.argptr[3] = unsafe.Pointer(&a.arg_mx)
	a.arg_my = my
	a.argptr[4] = unsafe.Pointer(&a.arg_my)
	a.arg_mz = mz
	a.argptr[5] = unsafe.Pointer(&a.arg_mz)
	a.arg_ux = ux
	a.argptr[6] = unsafe.Pointer(&a.arg_ux)
	a.arg_uy = uy
	a.argptr[7] = unsafe.Pointer(&a.arg_uy)
	a.arg_uz = uz
	a.argptr[8] = unsafe.Pointer(&a.arg_uz)
	a.arg_jmapx = jmapx
	a.argptr[9] = unsafe.Pointer(&a.arg_jmapx)
	a.arg_jmapy = jmapy
	a.argptr[10] = unsafe.Pointer(&a.arg_jmapy)
	a.arg_jmapz = jmapz
	a.argptr[11] = unsafe.Pointer(&a.arg_jmapz)
	a.arg_alpha = alpha
	a.argptr[12] = unsafe.Pointer(&a.arg_alpha)
	a.arg_xi = xi
	a.argptr[13] = unsafe.Pointer(&a.arg_xi)
	a.arg_N0 = N0
	a.argptr[14] = unsafe.Pointer(&a.arg_N0)
	a.arg_N1 = N1
	a.argptr[15] = unsafe.Pointer(&a.arg_N1)
	a.arg_N2 = N2
	a.argptr[16] = unsafe.Pointer(&a.arg_N2)

	args := a.argptr[:]
	cu.LaunchKernel(addzhanglitorque_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, str, args)
}
Ejemplo n.º 11
0
// Wrapper for kernmulRSymm2Dx CUDA kernel, asynchronous.
func k_kernmulRSymm2Dx_async(fftMx unsafe.Pointer, fftKxx unsafe.Pointer, N1 int, N2 int, cfg *config, str cu.Stream) {
	if kernmulRSymm2Dx_code == 0 {
		kernmulRSymm2Dx_code = fatbinLoad(kernmulRSymm2Dx_map, "kernmulRSymm2Dx")
	}

	var a kernmulRSymm2Dx_args

	a.arg_fftMx = fftMx
	a.argptr[0] = unsafe.Pointer(&a.arg_fftMx)
	a.arg_fftKxx = fftKxx
	a.argptr[1] = unsafe.Pointer(&a.arg_fftKxx)
	a.arg_N1 = N1
	a.argptr[2] = unsafe.Pointer(&a.arg_N1)
	a.arg_N2 = N2
	a.argptr[3] = unsafe.Pointer(&a.arg_N2)

	args := a.argptr[:]
	cu.LaunchKernel(kernmulRSymm2Dx_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, str, args)
}
Ejemplo n.º 12
0
// Wrapper for normalize CUDA kernel, asynchronous.
func k_normalize_async(vx unsafe.Pointer, vy unsafe.Pointer, vz unsafe.Pointer, N int, cfg *config, str cu.Stream) {
	if normalize_code == 0 {
		normalize_code = fatbinLoad(normalize_map, "normalize")
	}

	var a normalize_args

	a.arg_vx = vx
	a.argptr[0] = unsafe.Pointer(&a.arg_vx)
	a.arg_vy = vy
	a.argptr[1] = unsafe.Pointer(&a.arg_vy)
	a.arg_vz = vz
	a.argptr[2] = unsafe.Pointer(&a.arg_vz)
	a.arg_N = N
	a.argptr[3] = unsafe.Pointer(&a.arg_N)

	args := a.argptr[:]
	cu.LaunchKernel(normalize_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, str, args)
}
Ejemplo n.º 13
0
// Wrapper for reducesum CUDA kernel, asynchronous.
func k_reducesum_async(src unsafe.Pointer, dst unsafe.Pointer, initVal float32, n int, cfg *config, str cu.Stream) {
	if reducesum_code == 0 {
		reducesum_code = fatbinLoad(reducesum_map, "reducesum")
	}

	var a reducesum_args

	a.arg_src = src
	a.argptr[0] = unsafe.Pointer(&a.arg_src)
	a.arg_dst = dst
	a.argptr[1] = unsafe.Pointer(&a.arg_dst)
	a.arg_initVal = initVal
	a.argptr[2] = unsafe.Pointer(&a.arg_initVal)
	a.arg_n = n
	a.argptr[3] = unsafe.Pointer(&a.arg_n)

	args := a.argptr[:]
	cu.LaunchKernel(reducesum_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, str, args)
}
Ejemplo n.º 14
0
// Wrapper for reducedot CUDA kernel, asynchronous.
func k_reducedot_async(x1 unsafe.Pointer, x2 unsafe.Pointer, dst unsafe.Pointer, initVal float32, n int, cfg *config, str cu.Stream) {
	if reducedot_code == 0 {
		reducedot_code = fatbinLoad(reducedot_map, "reducedot")
	}

	var a reducedot_args

	a.arg_x1 = x1
	a.argptr[0] = unsafe.Pointer(&a.arg_x1)
	a.arg_x2 = x2
	a.argptr[1] = unsafe.Pointer(&a.arg_x2)
	a.arg_dst = dst
	a.argptr[2] = unsafe.Pointer(&a.arg_dst)
	a.arg_initVal = initVal
	a.argptr[3] = unsafe.Pointer(&a.arg_initVal)
	a.arg_n = n
	a.argptr[4] = unsafe.Pointer(&a.arg_n)

	args := a.argptr[:]
	cu.LaunchKernel(reducedot_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, str, args)
}
Ejemplo n.º 15
0
// Wrapper for reducemaxvecnorm2 CUDA kernel, asynchronous.
func k_reducemaxvecnorm2_async(x unsafe.Pointer, y unsafe.Pointer, z unsafe.Pointer, dst unsafe.Pointer, initVal float32, n int, cfg *config, str cu.Stream) {
	if reducemaxvecnorm2_code == 0 {
		reducemaxvecnorm2_code = fatbinLoad(reducemaxvecnorm2_map, "reducemaxvecnorm2")
	}

	var a reducemaxvecnorm2_args

	a.arg_x = x
	a.argptr[0] = unsafe.Pointer(&a.arg_x)
	a.arg_y = y
	a.argptr[1] = unsafe.Pointer(&a.arg_y)
	a.arg_z = z
	a.argptr[2] = unsafe.Pointer(&a.arg_z)
	a.arg_dst = dst
	a.argptr[3] = unsafe.Pointer(&a.arg_dst)
	a.arg_initVal = initVal
	a.argptr[4] = unsafe.Pointer(&a.arg_initVal)
	a.arg_n = n
	a.argptr[5] = unsafe.Pointer(&a.arg_n)

	args := a.argptr[:]
	cu.LaunchKernel(reducemaxvecnorm2_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, str, args)
}
Ejemplo n.º 16
0
// Wrapper for kernmulC CUDA kernel, asynchronous.
func k_kernmulC_async(Mx unsafe.Pointer, My unsafe.Pointer, Mz unsafe.Pointer, Kxx unsafe.Pointer, Kyy unsafe.Pointer, Kzz unsafe.Pointer, Kyz unsafe.Pointer, Kxz unsafe.Pointer, Kxy unsafe.Pointer, Kzy unsafe.Pointer, Kzx unsafe.Pointer, Kyx unsafe.Pointer, N int, cfg *config, str cu.Stream) {
	if kernmulC_code == 0 {
		kernmulC_code = fatbinLoad(kernmulC_map, "kernmulC")
	}

	var a kernmulC_args

	a.arg_Mx = Mx
	a.argptr[0] = unsafe.Pointer(&a.arg_Mx)
	a.arg_My = My
	a.argptr[1] = unsafe.Pointer(&a.arg_My)
	a.arg_Mz = Mz
	a.argptr[2] = unsafe.Pointer(&a.arg_Mz)
	a.arg_Kxx = Kxx
	a.argptr[3] = unsafe.Pointer(&a.arg_Kxx)
	a.arg_Kyy = Kyy
	a.argptr[4] = unsafe.Pointer(&a.arg_Kyy)
	a.arg_Kzz = Kzz
	a.argptr[5] = unsafe.Pointer(&a.arg_Kzz)
	a.arg_Kyz = Kyz
	a.argptr[6] = unsafe.Pointer(&a.arg_Kyz)
	a.arg_Kxz = Kxz
	a.argptr[7] = unsafe.Pointer(&a.arg_Kxz)
	a.arg_Kxy = Kxy
	a.argptr[8] = unsafe.Pointer(&a.arg_Kxy)
	a.arg_Kzy = Kzy
	a.argptr[9] = unsafe.Pointer(&a.arg_Kzy)
	a.arg_Kzx = Kzx
	a.argptr[10] = unsafe.Pointer(&a.arg_Kzx)
	a.arg_Kyx = Kyx
	a.argptr[11] = unsafe.Pointer(&a.arg_Kyx)
	a.arg_N = N
	a.argptr[12] = unsafe.Pointer(&a.arg_N)

	args := a.argptr[:]
	cu.LaunchKernel(kernmulC_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, str, args)
}