Beispiel #1
0
func (this *kernel) GetWorkGroupInfo(device Device,
	param_name cl.CL_kernel_work_group_info) (interface{}, error) {
	/* param data */
	var param_value interface{}
	var param_size cl.CL_size_t
	var errCode cl.CL_int

	/* Find size of param data */
	if errCode = cl.CLGetKernelWorkGroupInfo(this.kernel_id, device.GetID(), param_name, 0, nil, &param_size); errCode != cl.CL_SUCCESS {
		return nil, fmt.Errorf("GetWorkGroupInfo failure with errcode_ret %d: %s", errCode, cl.ERROR_CODES_STRINGS[-errCode])
	}

	/* Access param data */
	if errCode = cl.CLGetKernelWorkGroupInfo(this.kernel_id, device.GetID(), param_name, param_size, &param_value, nil); errCode != cl.CL_SUCCESS {
		return nil, fmt.Errorf("GetWorkGroupInfo failure with errcode_ret %d: %s", errCode, cl.ERROR_CODES_STRINGS[-errCode])
	}

	return param_value, nil
}
Beispiel #2
0
Datei: bst.go Projekt: xfong/gocl
func main() {
	// Use this to check the output of each API call
	var status cl.CL_int

	//-----------------------------------------------------
	// STEP 1: Discover and initialize the platforms
	//-----------------------------------------------------
	var numPlatforms cl.CL_uint
	var platforms []cl.CL_platform_id

	// Use clGetPlatformIDs() to retrieve the number of
	// platforms
	status = cl.CLGetPlatformIDs(0, nil, &numPlatforms)

	// Allocate enough space for each platform
	platforms = make([]cl.CL_platform_id, numPlatforms)

	// Fill in platforms with clGetPlatformIDs()
	status = cl.CLGetPlatformIDs(numPlatforms, platforms, nil)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetPlatformIDs")

	//-----------------------------------------------------
	// STEP 2: Discover and initialize the GPU devices
	//-----------------------------------------------------
	var numDevices cl.CL_uint
	var devices []cl.CL_device_id

	// Use clGetDeviceIDs() to retrieve the number of
	// devices present
	status = cl.CLGetDeviceIDs(platforms[0],
		cl.CL_DEVICE_TYPE_GPU,
		0,
		nil,
		&numDevices)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetDeviceIDs")

	// Allocate enough space for each device
	devices = make([]cl.CL_device_id, numDevices)

	// Fill in devices with clGetDeviceIDs()
	status = cl.CLGetDeviceIDs(platforms[0],
		cl.CL_DEVICE_TYPE_GPU,
		numDevices,
		devices,
		nil)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetDeviceIDs")

	//-----------------------------------------------------
	// STEP 3: Create a context
	//-----------------------------------------------------
	var context cl.CL_context

	// Create a context using clCreateContext() and
	// associate it with the devices
	context = cl.CLCreateContext(nil,
		numDevices,
		devices,
		nil,
		nil,
		&status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateContext")
	defer cl.CLReleaseContext(context)

	//-----------------------------------------------------
	// STEP 4: Create a command queue
	//-----------------------------------------------------
	var cmdQueue cl.CL_command_queue

	// Create a command queue using clCreateCommandQueueWithProperties(),
	// and associate it with the device you want to execute
	cmdQueue = cl.CLCreateCommandQueueWithProperties(context,
		devices[0],
		nil,
		&status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateCommandQueueWithProperties")
	defer cl.CLReleaseCommandQueue(cmdQueue)

	//-----------------------------------------------------
	// STEP 5: Create device buffers
	//-----------------------------------------------------
	// initialize any device/SVM memory here.

	/* svm buffer for binary tree */
	svmTreeBuf := cl.CLSVMAlloc(context,
		cl.CL_MEM_READ_WRITE,
		cl.CL_size_t(NUMBER_OF_NODES*unsafe.Sizeof(sampleNode)),
		0)
	if nil == svmTreeBuf {
		println("clSVMAlloc(svmTreeBuf) failed.")
		return
	}
	defer cl.CLSVMFree(context, svmTreeBuf)

	/* svm buffer for search keys */
	svmSearchBuf := cl.CLSVMAlloc(context,
		cl.CL_MEM_READ_WRITE,
		cl.CL_size_t(NUMBER_OF_SEARCH_KEY*unsafe.Sizeof(sampleKey)),
		0)
	if nil == svmSearchBuf {
		println("clSVMAlloc(svmSearchBuf) failed.")
		return
	}
	defer cl.CLSVMFree(context, svmSearchBuf)

	//create the binary tree and set the root
	/* root node of the binary tree */
	svmRoot := cpuCreateBinaryTree(cmdQueue, svmTreeBuf)

	//initialize search keys
	cpuInitSearchKeys(cmdQueue, svmSearchBuf)

	/* if voice is not deliberately muzzled, shout parameters */
	fmt.Printf("-------------------------------------------------------------------------\n")
	fmt.Printf("Searching %d keys in a BST having %d Nodes...\n", NUMBER_OF_SEARCH_KEY, NUMBER_OF_NODES)
	fmt.Printf("-------------------------------------------------------------------------\n")

	//-----------------------------------------------------
	// STEP 6: Create and compile the program
	//-----------------------------------------------------
	programSource, programeSize := utils.Load_programsource("bst.cl")

	// Create a program using clCreateProgramWithSource()
	program := cl.CLCreateProgramWithSource(context,
		1,
		programSource[:],
		programeSize[:],
		&status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateProgramWithSource")
	defer cl.CLReleaseProgram(program)

	// Build (compile) the program for the devices with
	// clBuildProgram()
	options := "-cl-std=CL2.0"
	status = cl.CLBuildProgram(program,
		numDevices,
		devices,
		[]byte(options),
		nil,
		nil)
	if status != cl.CL_SUCCESS {
		var program_log interface{}
		var log_size cl.CL_size_t

		/* Find size of log and print to std output */
		cl.CLGetProgramBuildInfo(program, devices[0], cl.CL_PROGRAM_BUILD_LOG,
			0, nil, &log_size)
		cl.CLGetProgramBuildInfo(program, devices[0], cl.CL_PROGRAM_BUILD_LOG,
			log_size, &program_log, nil)
		fmt.Printf("%s\n", program_log)
		return
	}
	//utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLBuildProgram")

	//-----------------------------------------------------
	// STEP 7: Create the kernel
	//-----------------------------------------------------
	var kernel cl.CL_kernel

	// Use clCreateKernel() to create a kernel
	kernel = cl.CLCreateKernel(program, []byte("bst_kernel"), &status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateKernel")
	defer cl.CLReleaseKernel(kernel)

	//-----------------------------------------------------
	// STEP 8: Set the kernel arguments
	//-----------------------------------------------------
	// Associate the input and output buffers with the
	// kernel
	// using clSetKernelArg()
	// Set appropriate arguments to the kernel
	status = cl.CLSetKernelArgSVMPointer(kernel,
		0,
		svmTreeBuf)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArgSVMPointer(svmTreeBuf)")

	status = cl.CLSetKernelArgSVMPointer(kernel,
		1,
		svmSearchBuf)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArgSVMPointer(svmSearchBuf)")

	//-----------------------------------------------------
	// STEP 9: Configure the work-item structure
	//-----------------------------------------------------
	// Define an index space (global work size) of work
	// items for
	// execution. A workgroup size (local work size) is not
	// required,
	// but can be used.
	var localWorkSize [1]cl.CL_size_t
	var kernelWorkGroupSize interface{}
	status = cl.CLGetKernelWorkGroupInfo(kernel,
		devices[0],
		cl.CL_KERNEL_WORK_GROUP_SIZE,
		cl.CL_size_t(unsafe.Sizeof(localWorkSize[0])),
		&kernelWorkGroupSize,
		nil)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetKernelWorkGroupInfo")
	localWorkSize[0] = kernelWorkGroupSize.(cl.CL_size_t)

	var globalWorkSize [1]cl.CL_size_t
	globalWorkSize[0] = NUMBER_OF_SEARCH_KEY

	//-----------------------------------------------------
	// STEP 10: Enqueue the kernel for execution
	//-----------------------------------------------------
	// Execute the kernel by using
	// clEnqueueNDRangeKernel().
	// 'globalWorkSize' is the 1D dimension of the
	// work-items
	status = cl.CLEnqueueNDRangeKernel(cmdQueue,
		kernel,
		1,
		nil,
		globalWorkSize[:],
		localWorkSize[:],
		0,
		nil,
		nil)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLEnqueueNDRangeKernel")

	status = cl.CLFlush(cmdQueue)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clFlush")

	status = cl.CLFinish(cmdQueue)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clFinish")

	//-----------------------------------------------------
	// STEP 11: Read the output buffer back to the host
	//-----------------------------------------------------
	// Use clEnqueueReadBuffer() to read the OpenCL output
	// buffer (bufferC)
	// to the host output array (C)
	//copy the data back to host buffer

	//this demo doesn't need clEnqueueReadBuffer due to SVM

	//-----------------------------------------------------
	// STEP 12: Verify the results
	//-----------------------------------------------------
	// reference implementation
	svmBinaryTreeCPUReference(cmdQueue,
		svmRoot,
		svmTreeBuf,
		svmSearchBuf)

	// compare the results and see if they match
	pass := svmCompareResults(cmdQueue, svmSearchBuf)
	if pass {
		println("Passed!")
	} else {
		println("Failed!")
	}
}