Beispiel #1
0
func svmbasic(size cl.CL_size_t,
	context cl.CL_context,
	queue cl.CL_command_queue,
	kernel cl.CL_kernel) {
	// Prepare input data as follows.
	// Build two arrays:
	//     - an array that consists of the Element structures
	//       (refer to svmbasic.h for the structure definition)
	//     - an array that consists of the float values
	//
	// Each structure of the first array has the following pointers:
	//     - 'internal', which points to a 'value' field of another entry
	//       of the same array.
	//     - 'external', which points to a float value from the the
	//       second array.
	//
	// Pointers are set randomly. The structures do not reflect any real usage
	// scenario, but are illustrative for a simple device-side traversal.
	//
	//        Array of Element                        Array of floats
	//           structures
	//
	//    ||====================||
	//    ||    .............   ||                   ||============||
	//    ||    .............   ||<-----+            || .......... ||
	//    ||====================||      |            ||    float   ||
	//    ||   float* internal--||------+            ||    float   ||
	//    ||   float* external--||------------------>||    float   ||
	//    ||   float value <----||------+            || .......... ||
	//    ||====================||      |            || .......... ||
	//    ||    .............   ||      |            ||    float   ||
	//    ||    .............   ||      |            ||    float   ||
	//    ||====================||      |            ||    float   ||
	//    ||====================||      |            ||    float   ||
	//    ||   float* internal--||------+            ||    float   ||
	//    ||   float* external--||------------------>||    float   ||
	//    ||   float value      ||                   ||    float   ||
	//    ||====================||                   ||    float   ||
	//    ||    .............   ||                   || .......... ||
	//    ||    .............   ||                   ||============||
	//    ||====================||
	//
	// The two arrays are created independently and are used to illustrate
	// two new OpenCL 2.0 API functions:
	//    - the array of Element structures is passed to the kernel as a
	//      kernel argument with the clSetKernelArgSVMPointer function
	//    - the array of floats is used by the kernel indirectly, and this
	//      dependency should be also specified with the clSetKernelExecInfo
	//      function prior to the kernel execution

	var err cl.CL_int

	// To enable host & device code to share pointer to the same address space
	// the arrays should be allocated as SVM memory. Use the clSVMAlloc function
	// to allocate SVM memory.
	//
	// Optionally, this function allows specifying alignment in bytes as its
	// last argument. As this basic example doesn't require any _special_ alignment,
	// the following code illustrates requesting default alignment via passing
	// zero value.

	inputElements := cl.CLSVMAlloc(context, // the context where this memory is supposed to be used
		cl.CL_MEM_READ_ONLY|cl.CL_MEM_SVM_FINE_GRAIN_BUFFER,
		size*cl.CL_size_t(unsafe.Sizeof(sampleElement)), // amount of memory to allocate (in bytes)
		0) // alignment in bytes (0 means default)
	if nil == inputElements {
		println("Cannot allocate SVM memory with clSVMAlloc: it returns null pointer. You might be out of memory.")
		return
	}
	defer cl.CLSVMFree(context, inputElements)

	inputFloats := cl.CLSVMAlloc(context, // the context where this memory is supposed to be used
		cl.CL_MEM_READ_ONLY|cl.CL_MEM_SVM_FINE_GRAIN_BUFFER,
		size*cl.CL_size_t(unsafe.Sizeof(sampleFloat)), // amount of memory to allocate (in bytes)
		0) // alignment in bytes (0 means default)
	if nil == inputFloats {
		println("Cannot allocate SVM memory with clSVMAlloc: it returns null pointer. You might be out of memory.")
		return
	}
	defer cl.CLSVMFree(context, inputFloats)

	// The OpenCL kernel uses the aforementioned input arrays to compute
	// values for the output array.

	output := cl.CLSVMAlloc(context, // the context where this memory is supposed to be used
		cl.CL_MEM_WRITE_ONLY|cl.CL_MEM_SVM_FINE_GRAIN_BUFFER,
		size*cl.CL_size_t(unsafe.Sizeof(sampleFloat)), // amount of memory to allocate (in bytes)
		0) // alignment in bytes (0 means default)
	defer cl.CLSVMFree(context, output)

	if nil == output {
		println("Cannot allocate SVM memory with clSVMAlloc: it returns null pointer. You might be out of memory.")
		return
	}

	// Note: in the coarse-grained SVM, mapping of inputElement and inputFloats is
	// needed to do the following initialization. While here, in the fine-grained SVM,
	// it is not necessary.

	// Populate data-structures with initial data.
	r := rand.New(rand.NewSource(99))

	for i := cl.CL_size_t(0); i < size; i++ {
		inputElement := (*Element)(unsafe.Pointer(uintptr(inputElements) + uintptr(i)*unsafe.Sizeof(sampleElement)))
		inputFloat := (*cl.CL_float)(unsafe.Pointer(uintptr(inputFloats) + uintptr(i)*unsafe.Sizeof(sampleFloat)))
		randElement := (*Element)(unsafe.Pointer(uintptr(inputElements) + uintptr(r.Intn(int(size)))*unsafe.Sizeof(sampleElement)))
		randFloat := (*cl.CL_float)(unsafe.Pointer(uintptr(inputFloats) + uintptr(r.Intn(int(size)))*unsafe.Sizeof(sampleFloat)))

		inputElement.internal = &(randElement.value)
		inputElement.external = randFloat
		inputElement.value = cl.CL_float(i)
		*inputFloat = cl.CL_float(i + size)
	}

	// Note: in the coarse-grained SVM, unmapping of inputElement and inputFloats is
	// needed before scheduling the kernel for execution. While here, in the fine-grained SVM,
	// it is not necessary.

	// Pass arguments to the kernel.
	// According to the OpenCL 2.0 specification, you need to use a special
	// function to pass a pointer from SVM memory to kernel.

	err = cl.CLSetKernelArgSVMPointer(kernel, 0, inputElements)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLSetKernelArgSVMPointer")

	err = cl.CLSetKernelArgSVMPointer(kernel, 1, output)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLSetKernelArgSVMPointer")

	// For buffer based SVM (both coarse- and fine-grain) if one SVM buffer
	// points to memory allocated in another SVM buffer, such allocations
	// should be passed to the kernel via clSetKernelExecInfo.

	err = cl.CLSetKernelExecInfo(kernel,
		cl.CL_KERNEL_EXEC_INFO_SVM_PTRS,
		cl.CL_size_t(unsafe.Sizeof(inputFloats)),
		unsafe.Pointer(&inputFloats))
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLSetKernelExecInfo")

	// Run the kernel.
	println("Running kernel...")

	var globalWorkSize [1]cl.CL_size_t
	globalWorkSize[0] = size

	err = cl.CLEnqueueNDRangeKernel(queue,
		kernel,
		1,
		nil,
		globalWorkSize[:],
		nil,
		0,
		nil,
		nil)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLEnqueueNDRangeKernel")

	// Note: In the fine-grained SVM, after enqueuing the kernel above, the host application is
	// not blocked from accessing SVM allocations that were passed to the kernel. The host
	// can access the same regions of SVM memory as does the kernel if the kernel and the host
	// read/modify different bytes. If one side (host or device) needs to modify the same bytes
	// that are simultaniously read/modified by another side, atomics operations are usually
	// required to maintain sufficient memory consistency. This sample doesn't use this possibility
	// and the host just waits in clFinish below until the kernel is finished.
	err = cl.CLFinish(queue)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLFinish")

	println(" DONE.")

	// Validate output state for correctness.
	// Compare: in the coarse-grained SVM case you need to map the output.
	// Here it is not needed.

	println("Checking correctness of the output buffer...")
	for i := cl.CL_size_t(0); i < size; i++ {
		inputElement := (*Element)(unsafe.Pointer(uintptr(inputElements) + uintptr(i)*unsafe.Sizeof(sampleElement)))
		outputFloat := (*cl.CL_float)(unsafe.Pointer(uintptr(output) + uintptr(i)*unsafe.Sizeof(sampleFloat)))
		expectedValue := *(inputElement.internal) + *(inputElement.external)
		if *outputFloat != expectedValue {
			println(" FAILED.")
			fmt.Printf("Mismatch at position %d, read %f, expected %f\n", i, *outputFloat, expectedValue)
			return
		}
	}
	println(" PASSED.")
}
Beispiel #2
0
func svmbasic(size cl.CL_size_t,
	context cl.CL_context,
	queue cl.CL_command_queue,
	kernel cl.CL_kernel) {
	// Prepare input data as follows.
	// Build two arrays:
	//     - an array that consists of the Element structures
	//       (refer to svmbasic.h for the structure definition)
	//     - an array that consists of the float values
	//
	// Each structure of the first array has the following pointers:
	//     - 'internal', which points to a 'value' field of another entry
	//       of the same array.
	//     - 'external', which points to a float value from the the
	//       second array.
	//
	// Pointers are set randomly. The structures do not reflect any real usage
	// scenario, but are illustrative for a simple device-side traversal.
	//
	//        Array of Element                        Array of floats
	//           structures
	//
	//    ||====================||
	//    ||    .............   ||                   ||============||
	//    ||    .............   ||<-----+            || .......... ||
	//    ||====================||      |            ||    float   ||
	//    ||   float* internal--||------+            ||    float   ||
	//    ||   float* external--||------------------>||    float   ||
	//    ||   float value <----||------+            || .......... ||
	//    ||====================||      |            || .......... ||
	//    ||    .............   ||      |            ||    float   ||
	//    ||    .............   ||      |            ||    float   ||
	//    ||====================||      |            ||    float   ||
	//    ||====================||      |            ||    float   ||
	//    ||   float* internal--||------+            ||    float   ||
	//    ||   float* external--||------------------>||    float   ||
	//    ||   float value      ||                   ||    float   ||
	//    ||====================||                   ||    float   ||
	//    ||    .............   ||                   || .......... ||
	//    ||    .............   ||                   ||============||
	//    ||====================||
	//
	// The two arrays are created independently and are used to illustrate
	// two new OpenCL 2.0 API functions:
	//    - the array of Element structures is passed to the kernel as a
	//      kernel argument with the clSetKernelArgSVMPointer function
	//    - the array of floats is used by the kernel indirectly, and this
	//      dependency should be also specified with the clSetKernelExecInfo
	//      function prior to the kernel execution

	var err cl.CL_int

	// To enable host & device code to share pointer to the same address space
	// the arrays should be allocated as SVM memory. Use the clSVMAlloc function
	// to allocate SVM memory.
	//
	// Optionally, this function allows specifying alignment in bytes as its
	// last argument. As this basic example doesn't require any _special_ alignment,
	// the following code illustrates requesting default alignment via passing
	// zero value.

	inputElements := cl.CLSVMAlloc(context, // the context where this memory is supposed to be used
		cl.CL_MEM_READ_ONLY,
		size*cl.CL_size_t(unsafe.Sizeof(sampleElement)), // amount of memory to allocate (in bytes)
		0) // alignment in bytes (0 means default)
	if nil == inputElements {
		println("Cannot allocate SVM memory with clSVMAlloc: it returns null pointer. You might be out of memory.")
		return
	}
	defer cl.CLSVMFree(context, inputElements)

	inputFloats := cl.CLSVMAlloc(context, // the context where this memory is supposed to be used
		cl.CL_MEM_READ_ONLY,
		size*cl.CL_size_t(unsafe.Sizeof(sampleFloat)), // amount of memory to allocate (in bytes)
		0) // alignment in bytes (0 means default)
	if nil == inputFloats {
		println("Cannot allocate SVM memory with clSVMAlloc: it returns null pointer. You might be out of memory.")
		return
	}
	defer cl.CLSVMFree(context, inputFloats)

	// The OpenCL kernel uses the aforementioned input arrays to compute
	// values for the output array.

	output := cl.CLSVMAlloc(context, // the context where this memory is supposed to be used
		cl.CL_MEM_WRITE_ONLY,
		size*cl.CL_size_t(unsafe.Sizeof(sampleFloat)), // amount of memory to allocate (in bytes)
		0) // alignment in bytes (0 means default)
	defer cl.CLSVMFree(context, output)

	if nil == output {
		println("Cannot allocate SVM memory with clSVMAlloc: it returns null pointer. You might be out of memory.")
		return
	}

	// In the coarse-grained buffer SVM model, only one OpenCL device (or
	// host) can have ownership for writing to the buffer. Specifically, host
	// explicitly requests the ownership by mapping/unmapping the SVM buffer.
	//
	// So to fill the input SVM buffers on the host, you need to map them to have
	// access from the host program.
	//
	// The following two map calls are required in case of coarse-grained SVM only.

	err = cl.CLEnqueueSVMMap(queue,
		cl.CL_TRUE, // blocking map
		cl.CL_MAP_WRITE,
		inputElements,
		size*cl.CL_size_t(unsafe.Sizeof(sampleElement)),
		0,
		nil,
		nil)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLEnqueueSVMMap")

	err = cl.CLEnqueueSVMMap(queue,
		cl.CL_TRUE, // blocking map
		cl.CL_MAP_WRITE,
		inputFloats,
		size*cl.CL_size_t(unsafe.Sizeof(sampleFloat)),
		0,
		nil,
		nil)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLEnqueueSVMMap")

	// Populate data-structures with initial data.
	r := rand.New(rand.NewSource(99))

	for i := cl.CL_size_t(0); i < size; i++ {
		inputElement := (*Element)(unsafe.Pointer(uintptr(inputElements) + uintptr(i)*unsafe.Sizeof(sampleElement)))
		inputFloat := (*cl.CL_float)(unsafe.Pointer(uintptr(inputFloats) + uintptr(i)*unsafe.Sizeof(sampleFloat)))
		randElement := (*Element)(unsafe.Pointer(uintptr(inputElements) + uintptr(r.Intn(int(size)))*unsafe.Sizeof(sampleElement)))
		randFloat := (*cl.CL_float)(unsafe.Pointer(uintptr(inputFloats) + uintptr(r.Intn(int(size)))*unsafe.Sizeof(sampleFloat)))

		inputElement.internal = &(randElement.value)
		inputElement.external = randFloat
		inputElement.value = cl.CL_float(i)
		*inputFloat = cl.CL_float(i + size)
	}

	// The following two unmap calls are required in case of coarse-grained SVM only

	err = cl.CLEnqueueSVMUnmap(queue,
		inputElements,
		0,
		nil,
		nil)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLEnqueueSVMUnmap")

	err = cl.CLEnqueueSVMUnmap(queue,
		inputFloats,
		0,
		nil,
		nil)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLEnqueueSVMUnmap")

	// Pass arguments to the kernel.
	// According to the OpenCL 2.0 specification, you need to use a special
	// function to pass a pointer from SVM memory to kernel.

	err = cl.CLSetKernelArgSVMPointer(kernel, 0, inputElements)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLSetKernelArgSVMPointer")

	err = cl.CLSetKernelArgSVMPointer(kernel, 1, output)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLSetKernelArgSVMPointer")

	// For buffer based SVM (both coarse- and fine-grain) if one SVM buffer
	// points to memory allocated in another SVM buffer, such allocations
	// should be passed to the kernel via clSetKernelExecInfo.

	err = cl.CLSetKernelExecInfo(kernel,
		cl.CL_KERNEL_EXEC_INFO_SVM_PTRS,
		cl.CL_size_t(unsafe.Sizeof(inputFloats)),
		unsafe.Pointer(&inputFloats))
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLSetKernelExecInfo")

	// Run the kernel.
	println("Running kernel...")

	var globalWorkSize [1]cl.CL_size_t
	globalWorkSize[0] = size

	err = cl.CLEnqueueNDRangeKernel(queue,
		kernel,
		1,
		nil,
		globalWorkSize[:],
		nil,
		0,
		nil,
		nil)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLEnqueueNDRangeKernel")

	// Map the output SVM buffer to read the results.
	// Mapping is required for coarse-grained SVM only.

	err = cl.CLEnqueueSVMMap(queue,
		cl.CL_TRUE, // blocking map
		cl.CL_MAP_READ,
		output,
		size*cl.CL_size_t(unsafe.Sizeof(sampleFloat)),
		0,
		nil,
		nil)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLEnqueueSVMMap")

	println(" DONE.")

	// Validate output state for correctness.

	println("Checking correctness of the output buffer...")
	for i := cl.CL_size_t(0); i < size; i++ {
		inputElement := (*Element)(unsafe.Pointer(uintptr(inputElements) + uintptr(i)*unsafe.Sizeof(sampleElement)))
		outputFloat := (*cl.CL_float)(unsafe.Pointer(uintptr(output) + uintptr(i)*unsafe.Sizeof(sampleFloat)))
		expectedValue := *(inputElement.internal) + *(inputElement.external)
		if *outputFloat != expectedValue {
			println(" FAILED.")
			fmt.Printf("Mismatch at position %d, read %f, expected %f\n", i, *outputFloat, expectedValue)
			return
		}
	}
	println(" PASSED.")

	err = cl.CLEnqueueSVMUnmap(queue,
		output,
		0,
		nil,
		nil)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLEnqueueSVMUnmap")

	err = cl.CLFinish(queue)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLFinish")
}