Beispiel #1
0
Datei: bst.go Projekt: xfong/gocl
func svmCompareResults(commandQueue cl.CL_command_queue,
	svmSearchBuf unsafe.Pointer) bool {
	var compare_status bool
	var status cl.CL_int

	status = cl.CLEnqueueSVMMap(commandQueue,
		cl.CL_TRUE, //blocking call
		cl.CL_MAP_WRITE_INVALIDATE_REGION,
		svmSearchBuf,
		cl.CL_size_t(NUMBER_OF_SEARCH_KEY*unsafe.Sizeof(sampleKey)),
		0,
		nil,
		nil)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clEnqueueSVMMap(svmSearchBuf)")

	compare_status = true
	for i := 0; i < NUMBER_OF_SEARCH_KEY; i++ {
		currKey := (*searchKey)(unsafe.Pointer(uintptr(svmSearchBuf) + uintptr(i)*unsafe.Sizeof(sampleKey)))

		/* compare OCL and native nodes */
		if currKey.oclNode != currKey.nativeNode {
			compare_status = false
			break
		}
	}

	status = cl.CLEnqueueSVMUnmap(commandQueue,
		svmSearchBuf,
		0,
		nil,
		nil)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clEnqueueSVMUnmap(svmSearchBuf)")

	return compare_status
}
Beispiel #2
0
Datei: bst.go Projekt: xfong/gocl
/**
 * cpuCreateBinaryTree()
 * creates a tree from the data in "svmTreeBuf". If this is NULL returns NULL
 * else returns root of the tree.
 **/
func cpuCreateBinaryTree(commandQueue cl.CL_command_queue,
	svmTreeBuf unsafe.Pointer) *node {
	var root *node
	var status cl.CL_int

	// reserve svm space for CPU update
	status = cl.CLEnqueueSVMMap(commandQueue,
		cl.CL_TRUE, //blocking call
		cl.CL_MAP_WRITE_INVALIDATE_REGION,
		svmTreeBuf,
		cl.CL_size_t(NUMBER_OF_NODES*unsafe.Sizeof(sampleNode)),
		0,
		nil,
		nil)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clEnqueueSVMMap(svmTreeBuf)")

	//init node and make bt
	root = cpuMakeBinaryTree(svmTreeBuf)

	status = cl.CLEnqueueSVMUnmap(commandQueue,
		svmTreeBuf,
		0,
		nil,
		nil)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clEnqueueSVMUnmap(svmTreeBuf)")

	return root
}
Beispiel #3
0
Datei: bst.go Projekt: xfong/gocl
func cpuInitSearchKeys(commandQueue cl.CL_command_queue,
	svmSearchBuf unsafe.Pointer) {
	var nextData *searchKey
	var status cl.CL_int

	status = cl.CLEnqueueSVMMap(commandQueue,
		cl.CL_TRUE, //blocking call
		cl.CL_MAP_WRITE_INVALIDATE_REGION,
		svmSearchBuf,
		cl.CL_size_t(NUMBER_OF_SEARCH_KEY*unsafe.Sizeof(sampleKey)),
		0,
		nil,
		nil)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clEnqueueSVMMap(svmSearchBuf)")

	r := rand.New(rand.NewSource(999))

	// initialize nodes
	for i := 0; i < NUMBER_OF_SEARCH_KEY; i++ {
		nextData = (*searchKey)(unsafe.Pointer(uintptr(svmSearchBuf) + uintptr(i)*unsafe.Sizeof(sampleKey)))
		// allocate a random value to node
		nextData.key = cl.CL_int(r.Int())
		// all pointers are null
		nextData.oclNode = nil
		nextData.nativeNode = nil
	}

	status = cl.CLEnqueueSVMUnmap(commandQueue,
		svmSearchBuf,
		0,
		nil,
		nil)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clEnqueueSVMUnmap(svmSearchBuf)")
}
Beispiel #4
0
Datei: bst.go Projekt: xfong/gocl
func svmBinaryTreeCPUReference(commandQueue cl.CL_command_queue,
	svmRoot *node,
	svmTreeBuf unsafe.Pointer,
	svmSearchBuf unsafe.Pointer) {
	var status cl.CL_int

	/* reserve svm buffers for cpu usage */
	status = cl.CLEnqueueSVMMap(commandQueue,
		cl.CL_TRUE, //blocking call
		cl.CL_MAP_READ,
		svmTreeBuf,
		cl.CL_size_t(NUMBER_OF_NODES*unsafe.Sizeof(sampleNode)),
		0,
		nil,
		nil)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clEnqueueSVMMap(svmTreeBuf)")

	status = cl.CLEnqueueSVMMap(commandQueue,
		cl.CL_TRUE, //blocking call
		cl.CL_MAP_WRITE,
		svmSearchBuf,
		cl.CL_size_t(NUMBER_OF_SEARCH_KEY*unsafe.Sizeof(sampleKey)),
		0,
		nil,
		nil)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clEnqueueSVMMap(svmSearchBuf)")

	for i := 0; i < NUMBER_OF_SEARCH_KEY; i++ {
		/* search tree */
		searchNode := svmRoot
		currKey := (*searchKey)(unsafe.Pointer(uintptr(svmSearchBuf) + uintptr(i)*unsafe.Sizeof(sampleKey)))

		for nil != searchNode {
			if currKey.key == searchNode.value {
				/* rejoice on finding key */
				currKey.nativeNode = searchNode
				searchNode = nil
			} else if currKey.key < searchNode.value {
				/* move left */
				searchNode = searchNode.left
			} else {
				/* move right */
				searchNode = searchNode.right
			}
		}
	}

	status = cl.CLEnqueueSVMUnmap(commandQueue,
		svmSearchBuf,
		0,
		nil,
		nil)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clEnqueueSVMUnmap(svmSearchBuf)")

	status = cl.CLEnqueueSVMUnmap(commandQueue,
		svmTreeBuf,
		0,
		nil,
		nil)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clEnqueueSVMUnmap(svmTreeBuf)")
}
Beispiel #5
0
func svmbasic(size cl.CL_size_t,
	context cl.CL_context,
	queue cl.CL_command_queue,
	kernel cl.CL_kernel) {
	// Prepare input data as follows.
	// Build two arrays:
	//     - an array that consists of the Element structures
	//       (refer to svmbasic.h for the structure definition)
	//     - an array that consists of the float values
	//
	// Each structure of the first array has the following pointers:
	//     - 'internal', which points to a 'value' field of another entry
	//       of the same array.
	//     - 'external', which points to a float value from the the
	//       second array.
	//
	// Pointers are set randomly. The structures do not reflect any real usage
	// scenario, but are illustrative for a simple device-side traversal.
	//
	//        Array of Element                        Array of floats
	//           structures
	//
	//    ||====================||
	//    ||    .............   ||                   ||============||
	//    ||    .............   ||<-----+            || .......... ||
	//    ||====================||      |            ||    float   ||
	//    ||   float* internal--||------+            ||    float   ||
	//    ||   float* external--||------------------>||    float   ||
	//    ||   float value <----||------+            || .......... ||
	//    ||====================||      |            || .......... ||
	//    ||    .............   ||      |            ||    float   ||
	//    ||    .............   ||      |            ||    float   ||
	//    ||====================||      |            ||    float   ||
	//    ||====================||      |            ||    float   ||
	//    ||   float* internal--||------+            ||    float   ||
	//    ||   float* external--||------------------>||    float   ||
	//    ||   float value      ||                   ||    float   ||
	//    ||====================||                   ||    float   ||
	//    ||    .............   ||                   || .......... ||
	//    ||    .............   ||                   ||============||
	//    ||====================||
	//
	// The two arrays are created independently and are used to illustrate
	// two new OpenCL 2.0 API functions:
	//    - the array of Element structures is passed to the kernel as a
	//      kernel argument with the clSetKernelArgSVMPointer function
	//    - the array of floats is used by the kernel indirectly, and this
	//      dependency should be also specified with the clSetKernelExecInfo
	//      function prior to the kernel execution

	var err cl.CL_int

	// To enable host & device code to share pointer to the same address space
	// the arrays should be allocated as SVM memory. Use the clSVMAlloc function
	// to allocate SVM memory.
	//
	// Optionally, this function allows specifying alignment in bytes as its
	// last argument. As this basic example doesn't require any _special_ alignment,
	// the following code illustrates requesting default alignment via passing
	// zero value.

	inputElements := cl.CLSVMAlloc(context, // the context where this memory is supposed to be used
		cl.CL_MEM_READ_ONLY,
		size*cl.CL_size_t(unsafe.Sizeof(sampleElement)), // amount of memory to allocate (in bytes)
		0) // alignment in bytes (0 means default)
	if nil == inputElements {
		println("Cannot allocate SVM memory with clSVMAlloc: it returns null pointer. You might be out of memory.")
		return
	}
	defer cl.CLSVMFree(context, inputElements)

	inputFloats := cl.CLSVMAlloc(context, // the context where this memory is supposed to be used
		cl.CL_MEM_READ_ONLY,
		size*cl.CL_size_t(unsafe.Sizeof(sampleFloat)), // amount of memory to allocate (in bytes)
		0) // alignment in bytes (0 means default)
	if nil == inputFloats {
		println("Cannot allocate SVM memory with clSVMAlloc: it returns null pointer. You might be out of memory.")
		return
	}
	defer cl.CLSVMFree(context, inputFloats)

	// The OpenCL kernel uses the aforementioned input arrays to compute
	// values for the output array.

	output := cl.CLSVMAlloc(context, // the context where this memory is supposed to be used
		cl.CL_MEM_WRITE_ONLY,
		size*cl.CL_size_t(unsafe.Sizeof(sampleFloat)), // amount of memory to allocate (in bytes)
		0) // alignment in bytes (0 means default)
	defer cl.CLSVMFree(context, output)

	if nil == output {
		println("Cannot allocate SVM memory with clSVMAlloc: it returns null pointer. You might be out of memory.")
		return
	}

	// In the coarse-grained buffer SVM model, only one OpenCL device (or
	// host) can have ownership for writing to the buffer. Specifically, host
	// explicitly requests the ownership by mapping/unmapping the SVM buffer.
	//
	// So to fill the input SVM buffers on the host, you need to map them to have
	// access from the host program.
	//
	// The following two map calls are required in case of coarse-grained SVM only.

	err = cl.CLEnqueueSVMMap(queue,
		cl.CL_TRUE, // blocking map
		cl.CL_MAP_WRITE,
		inputElements,
		size*cl.CL_size_t(unsafe.Sizeof(sampleElement)),
		0,
		nil,
		nil)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLEnqueueSVMMap")

	err = cl.CLEnqueueSVMMap(queue,
		cl.CL_TRUE, // blocking map
		cl.CL_MAP_WRITE,
		inputFloats,
		size*cl.CL_size_t(unsafe.Sizeof(sampleFloat)),
		0,
		nil,
		nil)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLEnqueueSVMMap")

	// Populate data-structures with initial data.
	r := rand.New(rand.NewSource(99))

	for i := cl.CL_size_t(0); i < size; i++ {
		inputElement := (*Element)(unsafe.Pointer(uintptr(inputElements) + uintptr(i)*unsafe.Sizeof(sampleElement)))
		inputFloat := (*cl.CL_float)(unsafe.Pointer(uintptr(inputFloats) + uintptr(i)*unsafe.Sizeof(sampleFloat)))
		randElement := (*Element)(unsafe.Pointer(uintptr(inputElements) + uintptr(r.Intn(int(size)))*unsafe.Sizeof(sampleElement)))
		randFloat := (*cl.CL_float)(unsafe.Pointer(uintptr(inputFloats) + uintptr(r.Intn(int(size)))*unsafe.Sizeof(sampleFloat)))

		inputElement.internal = &(randElement.value)
		inputElement.external = randFloat
		inputElement.value = cl.CL_float(i)
		*inputFloat = cl.CL_float(i + size)
	}

	// The following two unmap calls are required in case of coarse-grained SVM only

	err = cl.CLEnqueueSVMUnmap(queue,
		inputElements,
		0,
		nil,
		nil)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLEnqueueSVMUnmap")

	err = cl.CLEnqueueSVMUnmap(queue,
		inputFloats,
		0,
		nil,
		nil)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLEnqueueSVMUnmap")

	// Pass arguments to the kernel.
	// According to the OpenCL 2.0 specification, you need to use a special
	// function to pass a pointer from SVM memory to kernel.

	err = cl.CLSetKernelArgSVMPointer(kernel, 0, inputElements)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLSetKernelArgSVMPointer")

	err = cl.CLSetKernelArgSVMPointer(kernel, 1, output)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLSetKernelArgSVMPointer")

	// For buffer based SVM (both coarse- and fine-grain) if one SVM buffer
	// points to memory allocated in another SVM buffer, such allocations
	// should be passed to the kernel via clSetKernelExecInfo.

	err = cl.CLSetKernelExecInfo(kernel,
		cl.CL_KERNEL_EXEC_INFO_SVM_PTRS,
		cl.CL_size_t(unsafe.Sizeof(inputFloats)),
		unsafe.Pointer(&inputFloats))
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLSetKernelExecInfo")

	// Run the kernel.
	println("Running kernel...")

	var globalWorkSize [1]cl.CL_size_t
	globalWorkSize[0] = size

	err = cl.CLEnqueueNDRangeKernel(queue,
		kernel,
		1,
		nil,
		globalWorkSize[:],
		nil,
		0,
		nil,
		nil)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLEnqueueNDRangeKernel")

	// Map the output SVM buffer to read the results.
	// Mapping is required for coarse-grained SVM only.

	err = cl.CLEnqueueSVMMap(queue,
		cl.CL_TRUE, // blocking map
		cl.CL_MAP_READ,
		output,
		size*cl.CL_size_t(unsafe.Sizeof(sampleFloat)),
		0,
		nil,
		nil)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLEnqueueSVMMap")

	println(" DONE.")

	// Validate output state for correctness.

	println("Checking correctness of the output buffer...")
	for i := cl.CL_size_t(0); i < size; i++ {
		inputElement := (*Element)(unsafe.Pointer(uintptr(inputElements) + uintptr(i)*unsafe.Sizeof(sampleElement)))
		outputFloat := (*cl.CL_float)(unsafe.Pointer(uintptr(output) + uintptr(i)*unsafe.Sizeof(sampleFloat)))
		expectedValue := *(inputElement.internal) + *(inputElement.external)
		if *outputFloat != expectedValue {
			println(" FAILED.")
			fmt.Printf("Mismatch at position %d, read %f, expected %f\n", i, *outputFloat, expectedValue)
			return
		}
	}
	println(" PASSED.")

	err = cl.CLEnqueueSVMUnmap(queue,
		output,
		0,
		nil,
		nil)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLEnqueueSVMUnmap")

	err = cl.CLFinish(queue)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLFinish")
}