Ejemplo n.º 1
0
func (this *buffer) EnqueueRead(queue CommandQueue,
	blocking_read cl.CL_bool,
	offset cl.CL_size_t,
	cb cl.CL_size_t,
	ptr unsafe.Pointer,
	event_wait_list []Event) (Event, error) {
	var errCode cl.CL_int
	var event_id cl.CL_event

	numEvents := cl.CL_uint(len(event_wait_list))
	var events []cl.CL_event
	if numEvents > 0 {
		events = make([]cl.CL_event, numEvents)
		for i := cl.CL_uint(0); i < numEvents; i++ {
			events[i] = event_wait_list[i].GetID()
		}
	}

	if errCode = cl.CLEnqueueReadBuffer(queue.GetID(),
		this.memory_id,
		blocking_read,
		offset,
		cb,
		ptr,
		numEvents,
		events,
		&event_id); errCode != cl.CL_SUCCESS {
		return nil, fmt.Errorf("EnqueueRead failure with errcode_ret %d: %s", errCode, cl.ERROR_CODES_STRINGS[-errCode])
	} else {
		return &event{event_id}, nil
	}
}
Ejemplo n.º 2
0
Archivo: pipe.go Proyecto: xfong/gocl
func main() {
	// Use this to check the output of each API call
	var status cl.CL_int

	//-----------------------------------------------------
	// STEP 1: Discover and initialize the platforms
	//-----------------------------------------------------
	var numPlatforms cl.CL_uint
	var platforms []cl.CL_platform_id

	// Use clGetPlatformIDs() to retrieve the number of
	// platforms
	status = cl.CLGetPlatformIDs(0, nil, &numPlatforms)

	// Allocate enough space for each platform
	platforms = make([]cl.CL_platform_id, numPlatforms)

	// Fill in platforms with clGetPlatformIDs()
	status = cl.CLGetPlatformIDs(numPlatforms, platforms, nil)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetPlatformIDs")

	//-----------------------------------------------------
	// STEP 2: Discover and initialize the GPU devices
	//-----------------------------------------------------
	var numDevices cl.CL_uint
	var devices []cl.CL_device_id

	// Use clGetDeviceIDs() to retrieve the number of
	// devices present
	status = cl.CLGetDeviceIDs(platforms[0],
		cl.CL_DEVICE_TYPE_GPU,
		0,
		nil,
		&numDevices)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetDeviceIDs")

	// Allocate enough space for each device
	devices = make([]cl.CL_device_id, numDevices)

	// Fill in devices with clGetDeviceIDs()
	status = cl.CLGetDeviceIDs(platforms[0],
		cl.CL_DEVICE_TYPE_GPU,
		numDevices,
		devices,
		nil)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetDeviceIDs")

	//-----------------------------------------------------
	// STEP 3: Create a context
	//-----------------------------------------------------
	var context cl.CL_context

	// Create a context using clCreateContext() and
	// associate it with the devices
	context = cl.CLCreateContext(nil,
		numDevices,
		devices,
		nil,
		nil,
		&status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateContext")
	defer cl.CLReleaseContext(context)

	//-----------------------------------------------------
	// STEP 4: Create a command queue
	//-----------------------------------------------------
	var commandQueue [MAX_COMMAND_QUEUE]cl.CL_command_queue

	// Create a command queue using clCreateCommandQueueWithProperties(),
	// and associate it with the device you want to execute
	for i := 0; i < MAX_COMMAND_QUEUE; i++ {
		commandQueue[i] = cl.CLCreateCommandQueueWithProperties(context,
			devices[0],
			nil,
			&status)
		utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateCommandQueueWithProperties")
		defer cl.CLReleaseCommandQueue(commandQueue[i])
	}

	//-----------------------------------------------------
	// STEP 5: Create device buffers
	//-----------------------------------------------------
	producerGroupSize := cl.CL_size_t(PRODUCER_GROUP_SIZE)
	producerGlobalSize := cl.CL_size_t(PRODUCER_GLOBAL_SIZE)

	consumerGroupSize := cl.CL_size_t(CONSUMER_GROUP_SIZE)
	consumerGlobalSize := cl.CL_size_t(CONSUMER_GLOBAL_SIZE)

	var samplePipePkt [2]cl.CL_float
	szPipe := cl.CL_uint(PIPE_SIZE)
	szPipePkt := cl.CL_uint(unsafe.Sizeof(samplePipePkt))
	if szPipe%PRNG_CHANNELS != 0 {
		szPipe = (szPipe/PRNG_CHANNELS)*PRNG_CHANNELS + PRNG_CHANNELS
	}
	consumerGlobalSize = cl.CL_size_t(szPipe)
	pipePktPerThread := cl.CL_int(szPipe) / PRNG_CHANNELS
	seed := cl.CL_int(SEED)
	rngType := cl.CL_int(RV_GAUSSIAN)
	var histMin cl.CL_float
	var histMax cl.CL_float
	if rngType == cl.CL_int(RV_UNIFORM) {
		histMin = 0.0
		histMax = 1.0
	} else {
		histMin = -10.0
		histMax = 10.0
	}

	localDevHist := make([]cl.CL_int, MAX_HIST_BINS)
	cpuHist := make([]cl.CL_int, MAX_HIST_BINS)

	//Create and initialize memory objects
	rngPipe := cl.CLCreatePipe(context,
		cl.CL_MEM_READ_WRITE,
		szPipePkt,
		szPipe,
		nil,
		&status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clCreatePipe")

	devHist := cl.CLCreateBuffer(context,
		cl.CL_MEM_READ_WRITE|cl.CL_MEM_COPY_HOST_PTR,
		MAX_HIST_BINS*cl.CL_size_t(unsafe.Sizeof(localDevHist[0])),
		unsafe.Pointer(&localDevHist[0]),
		&status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clCreateBuffer")

	//-----------------------------------------------------
	// STEP 6: Create and compile the program
	//-----------------------------------------------------
	programSource, programeSize := utils.Load_programsource("pipe.cl")

	// Create a program using clCreateProgramWithSource()
	program := cl.CLCreateProgramWithSource(context,
		1,
		programSource[:],
		programeSize[:],
		&status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateProgramWithSource")
	defer cl.CLReleaseProgram(program)

	// Build (compile) the program for the devices with
	// clBuildProgram()
	options := "-cl-std=CL2.0"
	status = cl.CLBuildProgram(program,
		numDevices,
		devices,
		[]byte(options),
		nil,
		nil)
	if status != cl.CL_SUCCESS {
		var program_log interface{}
		var log_size cl.CL_size_t

		/* Find size of log and print to std output */
		cl.CLGetProgramBuildInfo(program, devices[0], cl.CL_PROGRAM_BUILD_LOG,
			0, nil, &log_size)
		cl.CLGetProgramBuildInfo(program, devices[0], cl.CL_PROGRAM_BUILD_LOG,
			log_size, &program_log, nil)
		fmt.Printf("%s\n", program_log)
		return
	}
	//utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLBuildProgram")

	//-----------------------------------------------------
	// STEP 7: Create the kernel
	//-----------------------------------------------------
	// Use clCreateKernel() to create a kernel
	produceKernel := cl.CLCreateKernel(program, []byte("pipe_producer"), &status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateKernel")
	defer cl.CLReleaseKernel(produceKernel)

	consumeKernel := cl.CLCreateKernel(program, []byte("pipe_consumer"), &status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateKernel")
	defer cl.CLReleaseKernel(consumeKernel)

	//-----------------------------------------------------
	// STEP 8: Set the kernel arguments
	//-----------------------------------------------------
	// Associate the input and output buffers with the
	// kernel
	// using clSetKernelArg()
	// Set appropriate arguments to the kernel
	status = cl.CLSetKernelArg(produceKernel,
		0,
		cl.CL_size_t(unsafe.Sizeof(rngPipe)),
		unsafe.Pointer(&rngPipe))

	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArg(rngPipe)")

	status = cl.CLSetKernelArg(produceKernel,
		1,
		cl.CL_size_t(unsafe.Sizeof(pipePktPerThread)),
		unsafe.Pointer(&pipePktPerThread))
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArg(pipePktPerThread)")

	status = cl.CLSetKernelArg(produceKernel,
		2,
		cl.CL_size_t(unsafe.Sizeof(seed)),
		unsafe.Pointer(&seed))
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArg(seed)")

	status = cl.CLSetKernelArg(produceKernel,
		3,
		cl.CL_size_t(unsafe.Sizeof(rngType)),
		unsafe.Pointer(&rngType))
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArg(rngType)")

	//-----------------------------------------------------
	// STEP 9: Configure the work-item structure
	//-----------------------------------------------------
	// Define an index space (global work size) of work
	// items for
	// execution. A workgroup size (local work size) is not
	// required,
	// but can be used.
	// Enqueue both the kernels.
	var globalThreads = []cl.CL_size_t{producerGlobalSize}
	var localThreads = []cl.CL_size_t{producerGroupSize}

	//-----------------------------------------------------
	// STEP 10: Enqueue the kernel for execution
	//-----------------------------------------------------
	// Execute the kernel by using
	// clEnqueueNDRangeKernel().
	// 'globalWorkSize' is the 1D dimension of the
	// work-items
	var produceEvt [1]cl.CL_event
	status = cl.CLEnqueueNDRangeKernel(commandQueue[0],
		produceKernel,
		1,
		nil,
		globalThreads,
		localThreads,
		0,
		nil,
		&produceEvt[0])
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clEnqueueNDRangeKernel")

	/*
	   launch consumer kernel only after producer has finished.
	   This is done to avoid concurrent kernels execution as the
	   memory consistency of pipe is guaranteed only across
	   synchronization points.
	*/
	status = cl.CLWaitForEvents(1, produceEvt[:])
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clWaitForEvents(produceEvt)")

	//-----------------------------------------------------
	// STEP 8: Set the kernel arguments
	//-----------------------------------------------------
	// Associate the input and output buffers with the
	// kernel
	// using clSetKernelArg()
	// Set appropriate arguments to the kernel
	status = cl.CLSetKernelArg(consumeKernel,
		0,
		cl.CL_size_t(unsafe.Sizeof(rngPipe)),
		unsafe.Pointer(&rngPipe))
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArg(rngPipe)")

	status = cl.CLSetKernelArg(consumeKernel,
		1,
		cl.CL_size_t(unsafe.Sizeof(devHist)),
		unsafe.Pointer(&devHist))
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArg(devHist)")

	status = cl.CLSetKernelArg(consumeKernel,
		2,
		cl.CL_size_t(unsafe.Sizeof(histMin)),
		unsafe.Pointer(&histMin))
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArg(histMin)")

	status = cl.CLSetKernelArg(consumeKernel,
		3,
		cl.CL_size_t(unsafe.Sizeof(histMax)),
		unsafe.Pointer(&histMax))
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArg(histMax)")

	//-----------------------------------------------------
	// STEP 9: Configure the work-item structure
	//-----------------------------------------------------
	// Define an index space (global work size) of work
	// items for
	// execution. A workgroup size (local work size) is not
	// required,
	// but can be used.
	globalThreads[0] = consumerGlobalSize
	localThreads[0] = consumerGroupSize

	//-----------------------------------------------------
	// STEP 10: Enqueue the kernel for execution
	//-----------------------------------------------------
	// Execute the kernel by using
	// clEnqueueNDRangeKernel().
	// 'globalWorkSize' is the 1D dimension of the
	// work-items
	var consumeEvt [1]cl.CL_event
	status = cl.CLEnqueueNDRangeKernel(
		commandQueue[1],
		consumeKernel,
		1,
		nil,
		globalThreads,
		localThreads,
		0,
		nil,
		&consumeEvt[0])
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clEnqueueNDRangeKernel")

	status = cl.CLFlush(commandQueue[0])
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clFlush(0)")

	status = cl.CLFlush(commandQueue[1])
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clFlush(1)")

	//wait for kernels to finish
	status = cl.CLFinish(commandQueue[0])
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clFinish(0)")

	status = cl.CLFinish(commandQueue[1])
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clFinish(1)")

	//-----------------------------------------------------
	// STEP 11: Read the output buffer back to the host
	//-----------------------------------------------------
	// Use clEnqueueReadBuffer() to read the OpenCL output
	// buffer (bufferC)
	// to the host output array (C)
	//copy the data back to host buffer
	var readEvt cl.CL_event
	status = cl.CLEnqueueReadBuffer(commandQueue[1],
		devHist,
		cl.CL_TRUE,
		0,
		(MAX_HIST_BINS)*cl.CL_size_t(unsafe.Sizeof(localDevHist[0])),
		unsafe.Pointer(&localDevHist[0]),
		0,
		nil,
		&readEvt)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clEnqueueReadBuffer")

	//-----------------------------------------------------
	// STEP 12: Verify the results
	//-----------------------------------------------------
	//Find the tolerance limit
	fTol := (float32)(CONSUMER_GLOBAL_SIZE) * (float32)(COMP_TOL) / (float32)(100.0)
	iTol := (int)(fTol)
	if iTol == 0 {
		iTol = 1
	}

	//CPU side histogram computation
	CPUReference(seed, pipePktPerThread, rngType, cpuHist, histMax, histMin)

	//Compare
	for bin := 0; bin < MAX_HIST_BINS; bin++ {
		diff := int(localDevHist[bin] - cpuHist[bin])

		if diff < 0 {
			diff = -diff
		}
		if diff > iTol {
			println("Failed!")
			return
		}
	}

	println("Passed!")
}
Ejemplo n.º 3
0
func TestMatvec(t *testing.T) {
	/* Host/device data structures */
	var platform [1]cl.CL_platform_id
	var device [1]cl.CL_device_id
	var context cl.CL_context
	var queue cl.CL_command_queue
	var i, err cl.CL_int

	/* Program/kernel data structures */
	var program cl.CL_program
	var program_buffer [1][]byte
	var program_log interface{}
	var program_size [1]cl.CL_size_t
	var log_size cl.CL_size_t
	var kernel cl.CL_kernel

	/* Data and buffers */
	var mat [16]float32
	var vec, result [4]float32
	var correct = [4]float32{0.0, 0.0, 0.0, 0.0}
	var mat_buff, vec_buff, res_buff cl.CL_mem

	/* Initialize data to be processed by the kernel */
	for i = 0; i < 16; i++ {
		mat[i] = float32(i) * 2.0
	}

	for i = 0; i < 4; i++ {
		vec[i] = float32(i) * 3.0
		correct[0] += mat[i] * vec[i]
		correct[1] += mat[i+4] * vec[i]
		correct[2] += mat[i+8] * vec[i]
		correct[3] += mat[i+12] * vec[i]
	}

	/* Identify a platform */
	err = cl.CLGetPlatformIDs(1, platform[:], nil)
	if err < 0 {
		t.Errorf("Couldn't find any platforms")
	}

	/* Access a device */
	err = cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_GPU, 1, device[:], nil)
	if err < 0 {
		err = cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_CPU, 1, device[:], nil)
		if err < 0 {
			t.Errorf("Couldn't find any devices")
		}
	}

	/* Create the context */
	context = cl.CLCreateContext(nil, 1, device[:], nil, nil, &err)
	if err < 0 {
		t.Errorf("Couldn't create a context")
	}

	/* Read program file and place content into buffer */
	program_handle, err1 := os.Open("matvec.cl")
	if err1 != nil {
		t.Errorf("Couldn't find the program file")
	}
	defer program_handle.Close()

	fi, err2 := program_handle.Stat()
	if err2 != nil {
		t.Errorf("Couldn't find the program stat")
	}
	program_size[0] = cl.CL_size_t(fi.Size())
	program_buffer[0] = make([]byte, program_size[0])
	read_size, err3 := program_handle.Read(program_buffer[0])
	if err3 != nil || cl.CL_size_t(read_size) != program_size[0] {
		t.Errorf("read file error or file size wrong")
	}

	/* Create a program containing all program content */
	program = cl.CLCreateProgramWithSource(context, 1,
		program_buffer[:], program_size[:], &err)
	if err < 0 {
		t.Errorf("Couldn't create the program")
	}

	/* Build program */
	err = cl.CLBuildProgram(program, 1, device[:], nil, nil, nil)
	if err < 0 {
		/* Find size of log and print to std output */
		cl.CLGetProgramBuildInfo(program, device[0], cl.CL_PROGRAM_BUILD_LOG,
			0, nil, &log_size)
		//program_log = (char*) malloc(log_size+1);
		//program_log[log_size] = '\0';
		cl.CLGetProgramBuildInfo(program, device[0], cl.CL_PROGRAM_BUILD_LOG,
			log_size, &program_log, nil)
		t.Errorf("%s\n", program_log)
		//free(program_log);
	}

	/* Create kernel for the mat_vec_mult function */
	kernel = cl.CLCreateKernel(program, []byte("matvec_mult"), &err)
	if err < 0 {
		t.Errorf("Couldn't create the kernel")
		return
	}

	/* Create CL buffers to hold input and output data */
	mat_buff = cl.CLCreateBuffer(context, cl.CL_MEM_READ_ONLY|
		cl.CL_MEM_COPY_HOST_PTR, cl.CL_size_t(unsafe.Sizeof(mat)), unsafe.Pointer(&mat[0]), &err)
	if err < 0 {
		t.Errorf("Couldn't create a buffer object")
		return
	}
	vec_buff = cl.CLCreateBuffer(context, cl.CL_MEM_READ_ONLY|
		cl.CL_MEM_COPY_HOST_PTR, cl.CL_size_t(unsafe.Sizeof(vec)), unsafe.Pointer(&vec[0]), nil)
	res_buff = cl.CLCreateBuffer(context, cl.CL_MEM_WRITE_ONLY,
		cl.CL_size_t(unsafe.Sizeof(result)), nil, nil)

	/* Create kernel arguments from the CL buffers */
	err = cl.CLSetKernelArg(kernel, 0, cl.CL_size_t(unsafe.Sizeof(mat_buff)), unsafe.Pointer(&mat_buff))
	if err < 0 {
		t.Errorf("Couldn't set the kernel argument")
		return
	}
	cl.CLSetKernelArg(kernel, 1, cl.CL_size_t(unsafe.Sizeof(vec_buff)), unsafe.Pointer(&vec_buff))
	cl.CLSetKernelArg(kernel, 2, cl.CL_size_t(unsafe.Sizeof(res_buff)), unsafe.Pointer(&res_buff))

	/* Create a CL command queue for the device*/
	queue = cl.CLCreateCommandQueue(context, device[0], 0, &err)
	if err < 0 {
		t.Errorf("Couldn't create the command queue, errcode=%d\n", err)
		return
	}

	/* Enqueue the command queue to the device */
	var work_units_per_kernel = [1]cl.CL_size_t{4} /* 4 work-units per kernel */
	err = cl.CLEnqueueNDRangeKernel(queue, kernel, 1, nil, work_units_per_kernel[:],
		nil, 0, nil, nil)
	if err < 0 {
		t.Errorf("Couldn't enqueue the kernel execution command, errcode=%d\n", err)
		return
	}

	/* Read the result */
	err = cl.CLEnqueueReadBuffer(queue, res_buff, cl.CL_TRUE, 0, cl.CL_size_t(unsafe.Sizeof(result)),
		unsafe.Pointer(&result[0]), 0, nil, nil)
	if err < 0 {
		t.Errorf("Couldn't enqueue the read buffer command")
		return
	}

	/* Test the result */
	if (result[0] == correct[0]) && (result[1] == correct[1]) &&
		(result[2] == correct[2]) && (result[3] == correct[3]) {
		t.Logf("Matrix-vector multiplication successful.")
	} else {
		t.Errorf("Matrix-vector multiplication unsuccessful.")
	}

	/* Deallocate resources */
	cl.CLReleaseMemObject(mat_buff)
	cl.CLReleaseMemObject(vec_buff)
	cl.CLReleaseMemObject(res_buff)
	cl.CLReleaseKernel(kernel)
	cl.CLReleaseCommandQueue(queue)
	cl.CLReleaseProgram(program)
	cl.CLReleaseContext(context)
}
Ejemplo n.º 4
0
func main() {

	/* OpenCL data structures */
	var device []cl.CL_device_id
	var context cl.CL_context
	var queue cl.CL_command_queue
	var program *cl.CL_program
	var kernel cl.CL_kernel
	var err cl.CL_int

	/* Data and buffers */
	var select1 [4]float32
	var select2 [2]cl.CL_uchar
	var select1_buffer, select2_buffer cl.CL_mem

	/* Create a context */
	device = utils.Create_device()
	context = cl.CLCreateContext(nil, 1, device[:], nil, nil, &err)
	if err < 0 {
		println("Couldn't create a context")
		return
	}

	/* Create a kernel */
	program = utils.Build_program(context, device[:], PROGRAM_FILE, nil)
	kernel = cl.CLCreateKernel(*program, KERNEL_FUNC, &err)
	if err < 0 {
		println("Couldn't create a kernel")
		return
	}

	/* Create a write-only buffer to hold the output data */
	select1_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_WRITE_ONLY,
		cl.CL_size_t(unsafe.Sizeof(select1)), nil, &err)
	if err < 0 {
		println("Couldn't create a buffer")
		return
	}
	select2_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_WRITE_ONLY,
		cl.CL_size_t(unsafe.Sizeof(select2)), nil, &err)

	/* Create kernel argument */
	err = cl.CLSetKernelArg(kernel, 0, cl.CL_size_t(unsafe.Sizeof(select1_buffer)), unsafe.Pointer(&select1_buffer))
	if err < 0 {
		println("Couldn't set a kernel argument")
		return
	}
	cl.CLSetKernelArg(kernel, 1, cl.CL_size_t(unsafe.Sizeof(select2_buffer)), unsafe.Pointer(&select2_buffer))

	/* Create a command queue */
	queue = cl.CLCreateCommandQueue(context, device[0], 0, &err)
	if err < 0 {
		println("Couldn't create a command queue")
		return
	}

	/* Enqueue kernel */
	err = cl.CLEnqueueTask(queue, kernel, 0, nil, nil)
	if err < 0 {
		println("Couldn't enqueue the kernel")
		return
	}

	/* Read and print the result */
	err = cl.CLEnqueueReadBuffer(queue, select1_buffer, cl.CL_TRUE, 0,
		cl.CL_size_t(unsafe.Sizeof(select1)), unsafe.Pointer(&select1), 0, nil, nil)
	if err < 0 {
		println("Couldn't read the buffer")
		return
	}
	cl.CLEnqueueReadBuffer(queue, select2_buffer, cl.CL_TRUE, 0,
		cl.CL_size_t(unsafe.Sizeof(select2)), unsafe.Pointer(&select2), 0, nil, nil)

	fmt.Printf("select: ")
	for i := 0; i < 3; i++ {
		fmt.Printf("%.2f, ", select1[i])
	}
	fmt.Printf("%.2f\n", select1[3])

	fmt.Printf("bitselect: %X, %X\n", select2[0], select2[1])

	/* Deallocate resources */
	cl.CLReleaseMemObject(select1_buffer)
	cl.CLReleaseMemObject(select2_buffer)
	cl.CLReleaseKernel(kernel)
	cl.CLReleaseCommandQueue(queue)
	cl.CLReleaseProgram(*program)
	cl.CLReleaseContext(context)
}
Ejemplo n.º 5
0
func main() {
	// This code executes on the OpenCL host

	// Host data
	var size cl.CL_int
	var A []cl.CL_int //input array
	var B []cl.CL_int //input array
	var C []cl.CL_int //output array

	// Elements in each array
	const elements = cl.CL_size_t(2048)

	// Compute the size of the data
	datasize := cl.CL_size_t(unsafe.Sizeof(size)) * elements

	// Allocate space for input/output data
	A = make([]cl.CL_int, datasize)
	B = make([]cl.CL_int, datasize)
	C = make([]cl.CL_int, datasize)
	// Initialize the input data
	for i := cl.CL_int(0); i < cl.CL_int(elements); i++ {
		A[i] = i
		B[i] = i
	}

	// Use this to check the output of each API call
	var status cl.CL_int

	//-----------------------------------------------------
	// STEP 1: Discover and initialize the platforms
	//-----------------------------------------------------

	var numPlatforms cl.CL_uint
	var platforms []cl.CL_platform_id

	// Use clGetPlatformIDs() to retrieve the number of
	// platforms
	status = cl.CLGetPlatformIDs(0, nil, &numPlatforms)

	// Allocate enough space for each platform
	platforms = make([]cl.CL_platform_id, numPlatforms)

	// Fill in platforms with clGetPlatformIDs()
	status = cl.CLGetPlatformIDs(numPlatforms, platforms, nil)
	if status != cl.CL_SUCCESS {
		println("CLGetPlatformIDs status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 2: Discover and initialize the devices
	//-----------------------------------------------------

	var numDevices cl.CL_uint
	var devices []cl.CL_device_id

	// Use clGetDeviceIDs() to retrieve the number of
	// devices present
	status = cl.CLGetDeviceIDs(platforms[0],
		cl.CL_DEVICE_TYPE_ALL,
		0,
		nil,
		&numDevices)
	if status != cl.CL_SUCCESS {
		println("CLGetDeviceIDs status!=cl.CL_SUCCESS")
		return
	}

	// Allocate enough space for each device
	devices = make([]cl.CL_device_id, numDevices)

	// Fill in devices with clGetDeviceIDs()
	status = cl.CLGetDeviceIDs(platforms[0],
		cl.CL_DEVICE_TYPE_ALL,
		numDevices,
		devices,
		nil)
	if status != cl.CL_SUCCESS {
		println("CLGetDeviceIDs status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 3: Create a context
	//-----------------------------------------------------

	var context cl.CL_context

	// Create a context using clCreateContext() and
	// associate it with the devices
	context = cl.CLCreateContext(nil,
		numDevices,
		devices,
		nil,
		nil,
		&status)
	if status != cl.CL_SUCCESS {
		println("CLCreateContext status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 4: Create a command queue
	//-----------------------------------------------------

	var cmdQueue cl.CL_command_queue

	// Create a command queue using clCreateCommandQueue(),
	// and associate it with the device you want to execute
	// on
	cmdQueue = cl.CLCreateCommandQueue(context,
		devices[0],
		0,
		&status)
	if status != cl.CL_SUCCESS {
		println("CLCreateCommandQueue status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 5: Create device buffers
	//-----------------------------------------------------

	var bufferA cl.CL_mem // Input array on the device
	var bufferB cl.CL_mem // Input array on the device
	var bufferC cl.CL_mem // Output array on the device

	// Use clCreateBuffer() to create a buffer object (d_A)
	// that will contain the data from the host array A
	bufferA = cl.CLCreateBuffer(context,
		cl.CL_MEM_READ_ONLY,
		datasize,
		nil,
		&status)
	if status != cl.CL_SUCCESS {
		println("CLCreateBuffer status!=cl.CL_SUCCESS")
		return
	}
	// Use clCreateBuffer() to create a buffer object (d_B)
	// that will contain the data from the host array B
	bufferB = cl.CLCreateBuffer(context,
		cl.CL_MEM_READ_ONLY,
		datasize,
		nil,
		&status)
	if status != cl.CL_SUCCESS {
		println("CLCreateBuffer status!=cl.CL_SUCCESS")
		return
	}
	// Use clCreateBuffer() to create a buffer object (d_C)
	// with enough space to hold the output data
	bufferC = cl.CLCreateBuffer(context,
		cl.CL_MEM_WRITE_ONLY,
		datasize,
		nil,
		&status)
	if status != cl.CL_SUCCESS {
		println("CLCreateBuffer status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 6: Write host data to device buffers
	//-----------------------------------------------------

	// Use clEnqueueWriteBuffer() to write input array A to
	// the device buffer bufferA
	status = cl.CLEnqueueWriteBuffer(cmdQueue,
		bufferA,
		cl.CL_FALSE,
		0,
		datasize,
		unsafe.Pointer(&A[0]),
		0,
		nil,
		nil)
	if status != cl.CL_SUCCESS {
		println("CLEnqueueWriteBuffer status!=cl.CL_SUCCESS")
		return
	}
	// Use clEnqueueWriteBuffer() to write input array B to
	// the device buffer bufferB
	status = cl.CLEnqueueWriteBuffer(cmdQueue,
		bufferB,
		cl.CL_FALSE,
		0,
		datasize,
		unsafe.Pointer(&B[0]),
		0,
		nil,
		nil)
	if status != cl.CL_SUCCESS {
		println("CLEnqueueWriteBuffer status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 7: Create and compile the program
	//-----------------------------------------------------
	programSource, programeSize := utils.Load_programsource("chapter2.cl")

	// Create a program using clCreateProgramWithSource()
	program := cl.CLCreateProgramWithSource(context,
		1,
		programSource[:],
		programeSize[:],
		&status)
	if status != cl.CL_SUCCESS {
		println("CLCreateProgramWithSource status!=cl.CL_SUCCESS")
		return
	}
	// Build (compile) the program for the devices with
	// clBuildProgram()
	status = cl.CLBuildProgram(program,
		numDevices,
		devices,
		nil,
		nil,
		nil)
	if status != cl.CL_SUCCESS {
		println("CLBuildProgram status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 8: Create the kernel
	//-----------------------------------------------------

	var kernel cl.CL_kernel

	// Use clCreateKernel() to create a kernel from the
	// vector addition function (named "vecadd")
	kernel = cl.CLCreateKernel(program, []byte("vecadd"), &status)
	if status != cl.CL_SUCCESS {
		println("CLCreateKernel status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 9: Set the kernel arguments
	//-----------------------------------------------------

	// Associate the input and output buffers with the
	// kernel
	// using clSetKernelArg()
	status = cl.CLSetKernelArg(kernel,
		0,
		cl.CL_size_t(unsafe.Sizeof(bufferA)),
		unsafe.Pointer(&bufferA))
	status |= cl.CLSetKernelArg(kernel,
		1,
		cl.CL_size_t(unsafe.Sizeof(bufferB)),
		unsafe.Pointer(&bufferB))
	status |= cl.CLSetKernelArg(kernel,
		2,
		cl.CL_size_t(unsafe.Sizeof(bufferC)),
		unsafe.Pointer(&bufferC))
	if status != cl.CL_SUCCESS {
		println("CLSetKernelArg status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 10: Configure the work-item structure
	//-----------------------------------------------------

	// Define an index space (global work size) of work
	// items for
	// execution. A workgroup size (local work size) is not
	// required,
	// but can be used.
	var globalWorkSize [1]cl.CL_size_t
	// There are 'elements' work-items
	globalWorkSize[0] = elements

	//-----------------------------------------------------
	// STEP 11: Enqueue the kernel for execution
	//-----------------------------------------------------

	// Execute the kernel by using
	// clEnqueueNDRangeKernel().
	// 'globalWorkSize' is the 1D dimension of the
	// work-items
	status = cl.CLEnqueueNDRangeKernel(cmdQueue,
		kernel,
		1,
		nil,
		globalWorkSize[:],
		nil,
		0,
		nil,
		nil)
	if status != cl.CL_SUCCESS {
		println("CLEnqueueNDRangeKernel status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 12: Read the output buffer back to the host
	//-----------------------------------------------------

	// Use clEnqueueReadBuffer() to read the OpenCL output
	// buffer (bufferC)
	// to the host output array (C)
	cl.CLEnqueueReadBuffer(cmdQueue,
		bufferC,
		cl.CL_TRUE,
		0,
		datasize,
		unsafe.Pointer(&C[0]),
		0,
		nil,
		nil)
	if status != cl.CL_SUCCESS {
		println("CLEnqueueReadBuffer status!=cl.CL_SUCCESS")
		return
	}
	// Verify the output
	result := true
	for i := cl.CL_int(0); i < cl.CL_int(elements); i++ {
		if C[i] != i+i {
			result = false
			break
		}
	}
	if result {
		println("Output is correct\n")
	} else {
		println("Output is incorrect\n")
	}

	//-----------------------------------------------------
	// STEP 13: Release OpenCL resources
	//-----------------------------------------------------

	// Free OpenCL resources
	cl.CLReleaseKernel(kernel)
	cl.CLReleaseProgram(program)
	cl.CLReleaseCommandQueue(cmdQueue)
	cl.CLReleaseMemObject(bufferA)
	cl.CLReleaseMemObject(bufferB)
	cl.CLReleaseMemObject(bufferC)
	cl.CLReleaseContext(context)
}
Ejemplo n.º 6
0
func main() {

	/* Host/device data structures */
	var device []cl.CL_device_id
	var context cl.CL_context
	var queue cl.CL_command_queue
	var program *cl.CL_program
	var kernel cl.CL_kernel
	var err cl.CL_int

	/* Data and buffers */
	var test [16]byte
	var test_buffer cl.CL_mem

	/* Create a context */
	device = utils.Create_device()
	context = cl.CLCreateContext(nil, 1, device[:], nil, nil, &err)
	if err < 0 {
		println("Couldn't create a context")
		return
	}

	/* Build the program and create a kernel */
	program = utils.Build_program(context, device[:], PROGRAM_FILE, nil)
	kernel = cl.CLCreateKernel(*program, KERNEL_FUNC, &err)
	if err < 0 {
		println("Couldn't create a kernel")
		return
	}

	/* Create a write-only buffer to hold the output data */
	test_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_WRITE_ONLY,
		cl.CL_size_t(unsafe.Sizeof(test)), nil, &err)
	if err < 0 {
		println("Couldn't create a buffer")
		return
	}

	/* Create kernel argument */
	err = cl.CLSetKernelArg(kernel, 0, cl.CL_size_t(unsafe.Sizeof(test_buffer)), unsafe.Pointer(&test_buffer))
	if err < 0 {
		println("Couldn't set a kernel argument")
		return
	}

	/* Create a command queue */
	queue = cl.CLCreateCommandQueue(context, device[0], 0, &err)
	if err < 0 {
		println("Couldn't create a command queue")
		return
	}

	/* Enqueue kernel */
	err = cl.CLEnqueueTask(queue, kernel, 0, nil, nil)
	if err < 0 {
		println("Couldn't enqueue the kernel")
		return
	}

	/* Read and print the result */
	err = cl.CLEnqueueReadBuffer(queue, test_buffer, cl.CL_TRUE, 0,
		cl.CL_size_t(unsafe.Sizeof(test)), unsafe.Pointer(&test), 0, nil, nil)
	if err < 0 {
		println("Couldn't read the buffer")
		return
	}
	for i := 0; i < 15; i++ {
		fmt.Printf("0x%X, ", test[i])
	}
	fmt.Printf("0x%X\n", test[15])

	/* Deallocate resources */
	cl.CLReleaseMemObject(test_buffer)
	cl.CLReleaseKernel(kernel)
	cl.CLReleaseCommandQueue(queue)
	cl.CLReleaseProgram(*program)
	cl.CLReleaseContext(context)
}
Ejemplo n.º 7
0
func main() {

	/* OpenCL data structures */
	var device []cl.CL_device_id
	var context cl.CL_context
	var queue cl.CL_command_queue
	var program *cl.CL_program
	var kernel cl.CL_kernel
	var err cl.CL_int

	/* Data and buffers */
	dim := cl.CL_uint(2)
	var global_offset = [2]cl.CL_size_t{3, 5}
	var global_size = [2]cl.CL_size_t{6, 4}
	var local_size = [2]cl.CL_size_t{3, 2}
	var test [24]float32
	var test_buffer cl.CL_mem

	/* Create a device and context */
	device = utils.Create_device()
	context = cl.CLCreateContext(nil, 1, device[:], nil, nil, &err)
	if err < 0 {
		println("Couldn't create a context")
		return
	}

	/* Build the program and create a kernel */
	program = utils.Build_program(context, device[:], PROGRAM_FILE, nil)
	kernel = cl.CLCreateKernel(*program, KERNEL_FUNC, &err)
	if err < 0 {
		println("Couldn't create a kernel")
		return
	}

	/* Create a write-only buffer to hold the output data */
	test_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_WRITE_ONLY,
		cl.CL_size_t(unsafe.Sizeof(test)), nil, &err)
	if err < 0 {
		println("Couldn't create a buffer")
		return
	}

	/* Create kernel argument */
	err = cl.CLSetKernelArg(kernel, 0, cl.CL_size_t(unsafe.Sizeof(test_buffer)), unsafe.Pointer(&test_buffer))
	if err < 0 {
		println("Couldn't set a kernel argument")
		return
	}

	/* Create a command queue */
	queue = cl.CLCreateCommandQueue(context, device[0], 0, &err)
	if err < 0 {
		println("Couldn't create a command queue")
		return
	}

	/* Enqueue kernel */
	err = cl.CLEnqueueNDRangeKernel(queue, kernel, dim, global_offset[:],
		global_size[:], local_size[:], 0, nil, nil)
	if err < 0 {
		println("Couldn't enqueue the kernel")
		return
	}

	/* Read and print the result */
	err = cl.CLEnqueueReadBuffer(queue, test_buffer, cl.CL_TRUE, 0,
		cl.CL_size_t(unsafe.Sizeof(test)), unsafe.Pointer(&test), 0, nil, nil)
	if err < 0 {
		println("Couldn't read the buffer")
		return
	}

	for i := 0; i < 24; i += 6 {
		fmt.Printf("%.2f     %.2f     %.2f     %.2f     %.2f     %.2f\n",
			test[i], test[i+1], test[i+2], test[i+3], test[i+4], test[i+5])
	}

	/* Deallocate resources */
	cl.CLReleaseMemObject(test_buffer)
	cl.CLReleaseKernel(kernel)
	cl.CLReleaseCommandQueue(queue)
	cl.CLReleaseProgram(*program)
	cl.CLReleaseContext(context)
}
Ejemplo n.º 8
0
func main() {

	/* OpenCL data structures */
	var device []cl.CL_device_id
	var context cl.CL_context
	var queue cl.CL_command_queue
	var program *cl.CL_program
	var kernel cl.CL_kernel
	var err cl.CL_int

	/* Data and events */
	var data []float32
	var data_buffer cl.CL_mem
	var user_event, kernel_event, read_event [1]cl.CL_event

	/* Initialize data */
	data = make([]float32, 4)
	for i := 0; i < 4; i++ {
		data[i] = float32(i) * 1.0
	}

	/* Create a device and context */
	device = utils.Create_device()
	context = cl.CLCreateContext(nil, 1, device[:], nil, nil, &err)
	if err < 0 {
		println("Couldn't create a context")
		return
	}

	/* Build the program and create a kernel */
	program = utils.Build_program(context, device[:], PROGRAM_FILE, nil)
	kernel = cl.CLCreateKernel(*program, KERNEL_FUNC, &err)
	if err < 0 {
		println("Couldn't create a kernel")
		return
	}

	/* Create a buffer to hold data */
	data_buffer = cl.CLCreateBuffer(context,
		cl.CL_MEM_READ_WRITE|cl.CL_MEM_COPY_HOST_PTR,
		cl.CL_size_t(unsafe.Sizeof(data[0]))*4, unsafe.Pointer(&data[0]), &err)
	if err < 0 {
		println("Couldn't create a buffer")
		return
	}

	/* Create kernel argument */
	err = cl.CLSetKernelArg(kernel, 0, cl.CL_size_t(unsafe.Sizeof(data_buffer)), unsafe.Pointer(&data_buffer))
	if err < 0 {
		println("Couldn't set a kernel argument")
		return
	}

	/* Create a command queue */
	queue = cl.CLCreateCommandQueue(context, device[0],
		cl.CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err)
	if err < 0 {
		println("Couldn't create a command queue")
		return
	}

	/* Configure events */
	user_event[0] = cl.CLCreateUserEvent(context, &err)
	if err < 0 {
		println("Couldn't enqueue the kernel")
		return
	}

	/* Enqueue kernel */
	err = cl.CLEnqueueTask(queue, kernel, 1, user_event[:], &kernel_event[0])
	if err < 0 {
		println("Couldn't enqueue the kernel")
		return
	}

	/* Read the buffer */
	err = cl.CLEnqueueReadBuffer(queue, data_buffer, cl.CL_FALSE, 0,
		cl.CL_size_t(unsafe.Sizeof(data[0]))*4, unsafe.Pointer(&data[0]), 1, kernel_event[:], &read_event[0])
	if err < 0 {
		println("Couldn't read the buffer")
		return
	}

	/* Set callback for event */
	err = cl.CLSetEventCallback(read_event[0], cl.CL_COMPLETE,
		read_complete, unsafe.Pointer(&data))
	if err < 0 {
		println("Couldn't set callback for event")
		return
	}

	/* Sleep for a second to demonstrate the that commands haven't
	   started executing. Then prompt user */
	time.Sleep(1)
	fmt.Printf("Old data: %4.2f, %4.2f, %4.2f, %4.2f\n",
		data[0], data[1], data[2], data[3])
	fmt.Printf("Press ENTER to continue.\n")
	//getchar();
	reader := bufio.NewReader(os.Stdin)
	reader.ReadString('\n')

	/* Set user event to success */
	cl.CLSetUserEventStatus(user_event[0], cl.CL_SUCCESS)

	/* Deallocate resources */
	cl.CLReleaseEvent(read_event[0])
	cl.CLReleaseEvent(kernel_event[0])
	cl.CLReleaseEvent(user_event[0])
	cl.CLReleaseMemObject(data_buffer)
	cl.CLReleaseKernel(kernel)
	cl.CLReleaseCommandQueue(queue)
	cl.CLReleaseProgram(*program)
	cl.CLReleaseContext(context)
}
Ejemplo n.º 9
0
Archivo: atomic.go Proyecto: xfong/gocl
func main() {

	/* OpenCL data structures */
	var device []cl.CL_device_id
	var context cl.CL_context
	var queue cl.CL_command_queue
	var program *cl.CL_program
	var kernel cl.CL_kernel
	var err cl.CL_int

	var offset, global_size, local_size [1]cl.CL_size_t

	/* Data and events */
	var data [2]cl.CL_int
	var data_buffer cl.CL_mem

	/* Create a device and context */
	device = utils.Create_device()
	context = cl.CLCreateContext(nil, 1, device[:], nil, nil, &err)
	if err < 0 {
		println("Couldn't create a context")
		return
	}

	/* Build the program and create a kernel */
	program = utils.Build_program(context, device[:], PROGRAM_FILE, nil)
	kernel = cl.CLCreateKernel(*program, KERNEL_FUNC, &err)
	if err < 0 {
		println("Couldn't create a kernel")
		return
	}

	/* Create a buffer to hold data */
	data_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_WRITE_ONLY,
		cl.CL_size_t(unsafe.Sizeof(data[0]))*2, nil, &err)
	if err < 0 {
		println("Couldn't create a buffer")
		return
	}

	/* Create kernel argument */
	err = cl.CLSetKernelArg(kernel, 0, cl.CL_size_t(unsafe.Sizeof(data_buffer)), unsafe.Pointer(&data_buffer))
	if err < 0 {
		println("Couldn't set a kernel argument")
		return
	}

	/* Create a command queue */
	queue = cl.CLCreateCommandQueue(context, device[0], 0, &err)
	if err < 0 {
		println("Couldn't create a command queue")
		return
	}

	/* Enqueue kernel */
	offset[0] = 0
	global_size[0] = 8
	local_size[0] = 4
	err = cl.CLEnqueueNDRangeKernel(queue, kernel, 1, offset[:], global_size[:], local_size[:], 0, nil, nil)
	if err < 0 {
		println("Couldn't enqueue the kernel")
		return
	}

	/* Read the buffer */
	err = cl.CLEnqueueReadBuffer(queue, data_buffer, cl.CL_TRUE, 0,
		cl.CL_size_t(unsafe.Sizeof(data[0]))*2, unsafe.Pointer(&data[0]), 0, nil, nil)
	if err < 0 {
		println("Couldn't read the buffer")
		return
	}

	fmt.Printf("Increment: %d\n", data[0])
	fmt.Printf("Atomic increment: %d\n", data[1])

	/* Deallocate resources */
	cl.CLReleaseMemObject(data_buffer)
	cl.CLReleaseKernel(kernel)
	cl.CLReleaseCommandQueue(queue)
	cl.CLReleaseProgram(*program)
	cl.CLReleaseContext(context)
}
Ejemplo n.º 10
0
func main() {

	/* OpenCL data structures */
	var device []cl.CL_device_id
	var context cl.CL_context
	var queue cl.CL_command_queue
	var program *cl.CL_program
	var kernel cl.CL_kernel
	var err cl.CL_int

	/* Data and buffers */
	var a float32 = 6.0
	var b float32 = 2.0
	var result float32
	var a_buffer, b_buffer, output_buffer cl.CL_mem

	/* Extension data */
	var sizeofuint cl.CL_uint
	var addr_data interface{}
	var ext_data interface{}
	fp64_ext := "cl_khr_fp64"
	var ext_size cl.CL_size_t
	var options []byte

	/* Create a device and context */
	device = utils.Create_device()
	context = cl.CLCreateContext(nil, 1, device[:], nil, nil, &err)
	if err < 0 {
		println("Couldn't create a context")
		return
	}

	/* Obtain the device data */
	if cl.CLGetDeviceInfo(device[0], cl.CL_DEVICE_ADDRESS_BITS,
		cl.CL_size_t(unsafe.Sizeof(sizeofuint)), &addr_data, nil) < 0 {
		println("Couldn't read extension data")
		return
	}
	fmt.Printf("Address width: %v\n", addr_data.(cl.CL_uint))

	/* Define "FP_64" option if doubles are supported */
	cl.CLGetDeviceInfo(device[0], cl.CL_DEVICE_EXTENSIONS,
		0, nil, &ext_size)
	// ext_data = (char*)malloc(ext_size + 1);
	// ext_data[ext_size] = '\0';
	cl.CLGetDeviceInfo(device[0], cl.CL_DEVICE_EXTENSIONS,
		ext_size, &ext_data, nil)
	if strings.Contains(ext_data.(string), fp64_ext) {
		fmt.Printf("The %s extension is supported.\n", fp64_ext)
		options = []byte("-DFP_64 ")
	} else {
		fmt.Printf("The %s extension is not supported. %s\n", fp64_ext, ext_data.(string))
	}

	/* Build the program and create the kernel */
	program = utils.Build_program(context, device[:], PROGRAM_FILE, options)
	kernel = cl.CLCreateKernel(*program, KERNEL_FUNC, &err)
	if err < 0 {
		println("Couldn't create a kernel")
		return
	}

	/* Create CL buffers to hold input and output data */
	a_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_READ_ONLY|
		cl.CL_MEM_COPY_HOST_PTR, cl.CL_size_t(unsafe.Sizeof(a)), unsafe.Pointer(&a), &err)
	if err < 0 {
		println("Couldn't create a memory object")
		return
	}

	b_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_READ_ONLY|
		cl.CL_MEM_COPY_HOST_PTR, cl.CL_size_t(unsafe.Sizeof(b)), unsafe.Pointer(&b), nil)
	output_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_WRITE_ONLY,
		cl.CL_size_t(unsafe.Sizeof(b)), nil, nil)

	/* Create kernel arguments */
	err = cl.CLSetKernelArg(kernel, 0, cl.CL_size_t(unsafe.Sizeof(a_buffer)), unsafe.Pointer(&a_buffer))
	if err < 0 {
		println("Couldn't set a kernel argument")
		return
	}
	cl.CLSetKernelArg(kernel, 1, cl.CL_size_t(unsafe.Sizeof(b_buffer)), unsafe.Pointer(&b_buffer))
	cl.CLSetKernelArg(kernel, 2, cl.CL_size_t(unsafe.Sizeof(output_buffer)), unsafe.Pointer(&output_buffer))

	/* Create a command queue */
	queue = cl.CLCreateCommandQueue(context, device[0], 0, &err)
	if err < 0 {
		println("Couldn't create a command queue")
		return
	}

	/* Enqueue kernel */
	err = cl.CLEnqueueTask(queue, kernel, 0, nil, nil)
	if err < 0 {
		println("Couldn't enqueue the kernel")
		return
	}

	/* Read and print the result */
	err = cl.CLEnqueueReadBuffer(queue, output_buffer, cl.CL_TRUE, 0,
		cl.CL_size_t(unsafe.Sizeof(result)), unsafe.Pointer(&result), 0, nil, nil)
	if err < 0 {
		println("Couldn't read the output buffer")
		return
	}
	fmt.Printf("The kernel result is %f\n", result)

	/* Deallocate resources */
	cl.CLReleaseMemObject(a_buffer)
	cl.CLReleaseMemObject(b_buffer)
	cl.CLReleaseMemObject(output_buffer)
	cl.CLReleaseKernel(kernel)
	cl.CLReleaseCommandQueue(queue)
	cl.CLReleaseProgram(*program)
	cl.CLReleaseContext(context)
}
Ejemplo n.º 11
0
func main() {

	/* OpenCL data structures */
	var device []cl.CL_device_id
	var context cl.CL_context
	var queue cl.CL_command_queue
	var program *cl.CL_program
	var kernel cl.CL_kernel
	var err cl.CL_int

	/* Data and events */
	var num_vectors cl.CL_int
	var data [NUM_BYTES]byte
	var data_buffer cl.CL_mem
	var prof_event cl.CL_event
	var total_time cl.CL_ulong
	var time_start, time_end interface{}
	var mapped_memory unsafe.Pointer

	/* Create a device and context */
	device = utils.Create_device()
	context = cl.CLCreateContext(nil, 1, device[:], nil, nil, &err)
	if err < 0 {
		println("Couldn't create a context")
		return
	}

	/* Build the program and create a kernel */
	program = utils.Build_program(context, device[:], PROGRAM_FILE, nil)
	kernel = cl.CLCreateKernel(*program, KERNEL_FUNC, &err)
	if err < 0 {
		println("Couldn't create a kernel")
		return
	}

	/* Create a buffer to hold data */
	data_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_WRITE_ONLY,
		cl.CL_size_t(unsafe.Sizeof(data[0]))*NUM_BYTES, nil, &err)
	if err < 0 {
		println("Couldn't create a buffer")
		return
	}

	/* Create kernel argument */
	err = cl.CLSetKernelArg(kernel, 0, cl.CL_size_t(unsafe.Sizeof(data_buffer)), unsafe.Pointer(&data_buffer))
	if err < 0 {
		println("Couldn't set a kernel argument")
		return
	}

	/* Tell kernel number of char16 vectors */
	num_vectors = NUM_BYTES / 16
	cl.CLSetKernelArg(kernel, 1, cl.CL_size_t(unsafe.Sizeof(num_vectors)), unsafe.Pointer(&num_vectors))

	/* Create a command queue */
	queue = cl.CLCreateCommandQueue(context, device[0],
		cl.CL_QUEUE_PROFILING_ENABLE, &err)
	if err < 0 {
		println("Couldn't create a command queue")
		return
	}

	total_time = 0.0
	for i := 0; i < NUM_ITERATIONS; i++ {

		/* Enqueue kernel */
		err = cl.CLEnqueueTask(queue, kernel, 0, nil, nil)
		if err < 0 {
			println("Couldn't enqueue the kernel")
			return
		}

		if PROFILE_READ == 1 {
			/* Read the buffer */
			err = cl.CLEnqueueReadBuffer(queue, data_buffer, cl.CL_TRUE, 0,
				cl.CL_size_t(unsafe.Sizeof(data[0]))*NUM_BYTES, unsafe.Pointer(&data[0]), 0, nil, &prof_event)
			if err < 0 {
				println("Couldn't read the buffer")
				return
			}
		} else {
			/* Create memory map */
			mapped_memory = cl.CLEnqueueMapBuffer(queue, data_buffer, cl.CL_TRUE,
				cl.CL_MAP_READ, 0, cl.CL_size_t(unsafe.Sizeof(data[0]))*NUM_BYTES, 0, nil, &prof_event, &err)
			if err < 0 {
				println("Couldn't map the buffer to host memory")
				return
			}
		}

		/* Get profiling information */
		cl.CLGetEventProfilingInfo(prof_event, cl.CL_PROFILING_COMMAND_START,
			cl.CL_size_t(unsafe.Sizeof(total_time)), &time_start, nil)
		cl.CLGetEventProfilingInfo(prof_event, cl.CL_PROFILING_COMMAND_END,
			cl.CL_size_t(unsafe.Sizeof(total_time)), &time_end, nil)
		total_time += time_end.(cl.CL_ulong) - time_start.(cl.CL_ulong)

		if PROFILE_READ == 0 {
			/* Unmap the buffer */
			err = cl.CLEnqueueUnmapMemObject(queue, data_buffer, mapped_memory,
				0, nil, nil)
			if err < 0 {
				println("Couldn't unmap the buffer")
				return
			}
		}
	}

	if PROFILE_READ == 1 {
		fmt.Printf("Average read time: %v\n", total_time/NUM_ITERATIONS)
	} else {
		fmt.Printf("Average map time: %v\n", total_time/NUM_ITERATIONS)
	}

	/* Deallocate resources */
	cl.CLReleaseEvent(prof_event)
	cl.CLReleaseMemObject(data_buffer)
	cl.CLReleaseKernel(kernel)
	cl.CLReleaseCommandQueue(queue)
	cl.CLReleaseProgram(*program)
	cl.CLReleaseContext(context)
}
Ejemplo n.º 12
0
func main() {

	/* OpenCL data structures */
	var device []cl.CL_device_id
	var context cl.CL_context
	var queue cl.CL_command_queue
	var program *cl.CL_program
	var kernel cl.CL_kernel
	var err cl.CL_int

	/* Data and buffers */
	var r_coords = [4]float32{2, 1, 3, 4}
	var angles = [4]float32{3 * M_PI / 8, 3 * M_PI / 4, 4 * M_PI / 3, 11 * M_PI / 6}
	var x_coords, y_coords [4]float32
	var r_coords_buffer, angles_buffer,
		x_coords_buffer, y_coords_buffer cl.CL_mem

	/* Create a device and context */
	device = utils.Create_device()
	context = cl.CLCreateContext(nil, 1, device[:], nil, nil, &err)
	if err < 0 {
		println("Couldn't create a context")
		return
	}

	/* Create a kernel */
	program = utils.Build_program(context, device[:], PROGRAM_FILE, nil)
	kernel = cl.CLCreateKernel(*program, KERNEL_FUNC, &err)
	if err < 0 {
		println("Couldn't create a kernel")
		return
	}

	/* Create a write-only buffer to hold the output data */
	r_coords_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_READ_ONLY|cl.CL_MEM_COPY_HOST_PTR,
		cl.CL_size_t(unsafe.Sizeof(r_coords)), unsafe.Pointer(&r_coords[0]), &err)
	if err < 0 {
		println("Couldn't create a buffer")
		return
	}
	angles_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_READ_ONLY|cl.CL_MEM_COPY_HOST_PTR,
		cl.CL_size_t(unsafe.Sizeof(angles)), unsafe.Pointer(&angles[0]), &err)
	x_coords_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_READ_WRITE,
		cl.CL_size_t(unsafe.Sizeof(x_coords)), nil, &err)
	y_coords_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_READ_WRITE,
		cl.CL_size_t(unsafe.Sizeof(y_coords)), nil, &err)

	/* Create kernel argument */
	err = cl.CLSetKernelArg(kernel, 0, cl.CL_size_t(unsafe.Sizeof(r_coords_buffer)), unsafe.Pointer(&r_coords_buffer))
	if err < 0 {
		println("Couldn't set a kernel argument")
		return
	}
	cl.CLSetKernelArg(kernel, 1, cl.CL_size_t(unsafe.Sizeof(angles_buffer)), unsafe.Pointer(&angles_buffer))
	cl.CLSetKernelArg(kernel, 2, cl.CL_size_t(unsafe.Sizeof(x_coords_buffer)), unsafe.Pointer(&x_coords_buffer))
	cl.CLSetKernelArg(kernel, 3, cl.CL_size_t(unsafe.Sizeof(y_coords_buffer)), unsafe.Pointer(&y_coords_buffer))

	/* Create a command queue */
	queue = cl.CLCreateCommandQueue(context, device[0], 0, &err)
	if err < 0 {
		println("Couldn't create a command queue")
		return
	}

	/* Enqueue kernel */
	err = cl.CLEnqueueTask(queue, kernel, 0, nil, nil)
	if err < 0 {
		println("Couldn't enqueue the kernel")
		return
	}

	/* Read and print the result */
	err = cl.CLEnqueueReadBuffer(queue, x_coords_buffer, cl.CL_TRUE, 0,
		cl.CL_size_t(unsafe.Sizeof(x_coords)), unsafe.Pointer(&x_coords), 0, nil, nil)
	if err < 0 {
		println("Couldn't read the buffer")
		return
	}
	cl.CLEnqueueReadBuffer(queue, y_coords_buffer, cl.CL_TRUE, 0,
		cl.CL_size_t(unsafe.Sizeof(y_coords)), unsafe.Pointer(&y_coords), 0, nil, nil)

	/* Display the results */
	for i := 0; i < 4; i++ {
		fmt.Printf("(%6.3f, %6.3f)\n", x_coords[i], y_coords[i])
	}

	/* Deallocate resources */
	cl.CLReleaseMemObject(r_coords_buffer)
	cl.CLReleaseMemObject(angles_buffer)
	cl.CLReleaseMemObject(x_coords_buffer)
	cl.CLReleaseMemObject(y_coords_buffer)
	cl.CLReleaseKernel(kernel)
	cl.CLReleaseCommandQueue(queue)
	cl.CLReleaseProgram(*program)
	cl.CLReleaseContext(context)
}