예제 #1
0
파일: context_test.go 프로젝트: xfong/gocl
func TestContext(t *testing.T) {
	/* Host/device data structures */
	var platform [1]cl.CL_platform_id
	var device [1]cl.CL_device_id
	var context cl.CL_context
	var err cl.CL_int

	var paramValueSize cl.CL_size_t
	var ref_count interface{}
	user_data := []byte("Hello, I am callback")

	/* Access the first installed platform */
	err = cl.CLGetPlatformIDs(1, platform[:], nil)
	if err != cl.CL_SUCCESS {
		t.Errorf("Couldn't find any platforms")
	}

	/* Access the first available device */
	err = cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_GPU, 1, device[:], nil)
	if err == cl.CL_DEVICE_NOT_FOUND {
		err = cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_CPU, 1, device[:], nil)
	}
	if err != cl.CL_SUCCESS {
		t.Errorf("Couldn't find any devices")
	}

	/* Create the context */
	context = cl.CLCreateContext(nil, 1, device[:], my_contex_notify, unsafe.Pointer(&user_data), &err)
	if err != cl.CL_SUCCESS {
		t.Errorf("Couldn't create a context")
	}

	/* Determine the reference count */
	err = cl.CLGetContextInfo(context, cl.CL_CONTEXT_REFERENCE_COUNT,
		0, nil, &paramValueSize)

	if err != cl.CL_SUCCESS {
		t.Errorf("Failed to find context %s.\n", "CL_CONTEXT_REFERENCE_COUNT")
	}

	err = cl.CLGetContextInfo(context, cl.CL_CONTEXT_REFERENCE_COUNT,
		paramValueSize, &ref_count, nil)
	if err != cl.CL_SUCCESS {
		t.Errorf("Couldn't read the reference count.")
	}
	t.Logf("Initial reference count: %d\n", ref_count.(cl.CL_uint))

	/* Update and display the reference count */
	cl.CLRetainContext(context)
	cl.CLGetContextInfo(context, cl.CL_CONTEXT_REFERENCE_COUNT,
		paramValueSize, &ref_count, nil)
	t.Logf("Reference count: %d\n", ref_count.(cl.CL_uint))

	cl.CLReleaseContext(context)
	cl.CLGetContextInfo(context, cl.CL_CONTEXT_REFERENCE_COUNT,
		paramValueSize, &ref_count, nil)
	t.Logf("Reference count: %d\n", ref_count.(cl.CL_uint))

	cl.CLReleaseContext(context)
}
예제 #2
0
파일: utils.go 프로젝트: xfong/gocl
/* Find a GPU or CPU associated with the first available platform */
func Create_device() []cl.CL_device_id {

	var platform [1]cl.CL_platform_id
	var dev [1]cl.CL_device_id
	var err cl.CL_int

	/* Identify a platform */
	err = cl.CLGetPlatformIDs(1, platform[:], nil)
	if err < 0 {
		println("Couldn't identify a platform")
		return nil
	}

	/* Access a device */
	err = cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_GPU, 1, dev[:], nil)
	if err == cl.CL_DEVICE_NOT_FOUND {
		err = cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_CPU, 1, dev[:], nil)
	}
	if err < 0 {
		println("Couldn't access any devices")
		return nil
	}

	return dev[:]
}
예제 #3
0
파일: float_config.go 프로젝트: xfong/gocl
func main() {

	/* Host/device data structures */
	var platform [1]cl.CL_platform_id
	var device [1]cl.CL_device_id
	var flag interface{} //cl.CL_device_fp_config;
	var err cl.CL_int

	/* Identify a platform */
	err = cl.CLGetPlatformIDs(1, platform[:], nil)
	if err < 0 {
		println("Couldn't identify a platform")
		return
	}

	/* Access a device */
	err = cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_GPU, 1, device[:], nil)
	if err == cl.CL_DEVICE_NOT_FOUND {
		err = cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_CPU, 1, device[:], nil)
	}
	if err < 0 {
		println("Couldn't access any devices")
		return
	}

	/* Check float-processing features */
	err = cl.CLGetDeviceInfo(device[0], cl.CL_DEVICE_SINGLE_FP_CONFIG,
		cl.CL_size_t(unsafe.Sizeof(flag)), &flag, nil)
	if err < 0 {
		println("Couldn't read floating-point properties")
		return
	}
	fmt.Printf("Float Processing Features:\n")
	if (flag.(cl.CL_device_fp_config) & cl.CL_FP_INF_NAN) > 0 {
		fmt.Printf("INF and NaN values supported.\n")
	}
	if (flag.(cl.CL_device_fp_config) & cl.CL_FP_DENORM) > 0 {
		fmt.Printf("Denormalized numbers supported.\n")
	}
	if (flag.(cl.CL_device_fp_config) & cl.CL_FP_ROUND_TO_NEAREST) > 0 {
		fmt.Printf("Round To Nearest Even mode supported.\n")
	}
	if (flag.(cl.CL_device_fp_config) & cl.CL_FP_ROUND_TO_INF) > 0 {
		fmt.Printf("Round To Infinity mode supported.\n")
	}
	if (flag.(cl.CL_device_fp_config) & cl.CL_FP_ROUND_TO_ZERO) > 0 {
		fmt.Printf("Round To Zero mode supported.\n")
	}
	if (flag.(cl.CL_device_fp_config) & cl.CL_FP_FMA) > 0 {
		fmt.Printf("Floating-point multiply-and-add operation supported.\n")
	}
	if (flag.(cl.CL_device_fp_config) & cl.CL_FP_SOFT_FLOAT) > 0 {
		fmt.Printf("Basic floating-point processing performed in software.\n")
	}
}
예제 #4
0
파일: platform.go 프로젝트: xfong/gocl
func (this *platform) GetDevices(deviceType cl.CL_device_type) ([]Device, error) {
	var devices []Device
	var deviceIds []cl.CL_device_id
	var numDevices cl.CL_uint
	var errCode cl.CL_int

	/* Determine number of connected devices */
	if errCode = cl.CLGetDeviceIDs(this.platform_id, deviceType, 0, nil, &numDevices); errCode != cl.CL_SUCCESS {
		return nil, fmt.Errorf("GetDevices failure with errcode_ret %d: %s", errCode, cl.ERROR_CODES_STRINGS[-errCode])
	}

	/* Access connected devices */
	deviceIds = make([]cl.CL_device_id, numDevices)
	if errCode = cl.CLGetDeviceIDs(this.platform_id, deviceType, numDevices, deviceIds, nil); errCode != cl.CL_SUCCESS {
		return nil, fmt.Errorf("GetDevices failure with errcode_ret %d: %s", errCode, cl.ERROR_CODES_STRINGS[-errCode])
	}

	devices = make([]Device, numDevices)
	for i := cl.CL_uint(0); i < numDevices; i++ {
		devices[i] = &device{deviceIds[i]}
	}

	return devices, nil
}
예제 #5
0
파일: queue_test.go 프로젝트: xfong/gocl
func TestQueue(t *testing.T) {

	/* Host/device data structures */
	var platform [1]cl.CL_platform_id
	var device [1]cl.CL_device_id
	var context cl.CL_context
	var queue cl.CL_command_queue
	var err cl.CL_int

	/* Program/kernel data structures */
	var program cl.CL_program
	var program_buffer [1][]byte
	var program_log interface{}
	var program_size [1]cl.CL_size_t
	var log_size cl.CL_size_t
	var kernel cl.CL_kernel

	/* Access the first installed platform */
	err = cl.CLGetPlatformIDs(1, platform[:], nil)
	if err < 0 {
		t.Errorf("Couldn't find any platforms")
	}

	/* Access the first GPU/CPU */
	err = cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_GPU, 1, device[:], nil)
	if err == cl.CL_DEVICE_NOT_FOUND {
		err = cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_CPU, 1, device[:], nil)
	}
	if err < 0 {
		t.Errorf("Couldn't find any devices")
	}

	/* Create a context */
	context = cl.CLCreateContext(nil, 1, device[:], nil, nil, &err)
	if err < 0 {
		t.Errorf("Couldn't create a context")
	}

	/* Read each program file and place content into buffer array */
	program_handle, err1 := os.Open("blank.cl")
	if err1 != nil {
		t.Errorf("Couldn't find the program file")
	}
	defer program_handle.Close()

	fi, err2 := program_handle.Stat()
	if err2 != nil {
		t.Errorf("Couldn't find the program stat")
	}
	program_size[0] = cl.CL_size_t(fi.Size())
	program_buffer[0] = make([]byte, program_size[0])
	read_size, err3 := program_handle.Read(program_buffer[0])
	if err3 != nil || cl.CL_size_t(read_size) != program_size[0] {
		t.Errorf("read file error or file size wrong")
	}

	/* Create program from file */
	program = cl.CLCreateProgramWithSource(context, 1,
		program_buffer[:], program_size[:], &err)
	if err < 0 {
		t.Errorf("Couldn't create the program")
	}

	/* Build program */
	err = cl.CLBuildProgram(program, 1, device[:], nil, nil, nil)
	if err < 0 {
		/* Find size of log and print to std output */
		cl.CLGetProgramBuildInfo(program, device[0], cl.CL_PROGRAM_BUILD_LOG,
			0, nil, &log_size)
		//program_log = (char*) malloc(log_size+1);
		//program_log[log_size] = '\0';
		cl.CLGetProgramBuildInfo(program, device[0], cl.CL_PROGRAM_BUILD_LOG,
			log_size, &program_log, nil)
		t.Errorf("%s\n", program_log)
		//free(program_log);
	}

	/* Create the kernel */
	kernel = cl.CLCreateKernel(program, []byte("blank"), &err)
	if err < 0 {
		t.Errorf("Couldn't create the kernel")
	}

	/* Create the command queue */
	queue = cl.CLCreateCommandQueue(context, device[0], 0, &err)
	if err < 0 {
		t.Errorf("Couldn't create the command queue")
	}

	/* Enqueue the kernel execution command */
	err = cl.CLEnqueueTask(queue, kernel, 0, nil, nil)
	if err < 0 {
		t.Errorf("Couldn't enqueue the kernel execution command")
	} else {
		t.Logf("Successfully queued kernel.\n")
	}

	/* Deallocate resources */
	cl.CLReleaseCommandQueue(queue)
	cl.CLReleaseKernel(kernel)
	cl.CLReleaseProgram(program)
	cl.CLReleaseContext(context)
}
예제 #6
0
파일: pipe.go 프로젝트: xfong/gocl
func main() {
	// Use this to check the output of each API call
	var status cl.CL_int

	//-----------------------------------------------------
	// STEP 1: Discover and initialize the platforms
	//-----------------------------------------------------
	var numPlatforms cl.CL_uint
	var platforms []cl.CL_platform_id

	// Use clGetPlatformIDs() to retrieve the number of
	// platforms
	status = cl.CLGetPlatformIDs(0, nil, &numPlatforms)

	// Allocate enough space for each platform
	platforms = make([]cl.CL_platform_id, numPlatforms)

	// Fill in platforms with clGetPlatformIDs()
	status = cl.CLGetPlatformIDs(numPlatforms, platforms, nil)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetPlatformIDs")

	//-----------------------------------------------------
	// STEP 2: Discover and initialize the GPU devices
	//-----------------------------------------------------
	var numDevices cl.CL_uint
	var devices []cl.CL_device_id

	// Use clGetDeviceIDs() to retrieve the number of
	// devices present
	status = cl.CLGetDeviceIDs(platforms[0],
		cl.CL_DEVICE_TYPE_GPU,
		0,
		nil,
		&numDevices)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetDeviceIDs")

	// Allocate enough space for each device
	devices = make([]cl.CL_device_id, numDevices)

	// Fill in devices with clGetDeviceIDs()
	status = cl.CLGetDeviceIDs(platforms[0],
		cl.CL_DEVICE_TYPE_GPU,
		numDevices,
		devices,
		nil)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetDeviceIDs")

	//-----------------------------------------------------
	// STEP 3: Create a context
	//-----------------------------------------------------
	var context cl.CL_context

	// Create a context using clCreateContext() and
	// associate it with the devices
	context = cl.CLCreateContext(nil,
		numDevices,
		devices,
		nil,
		nil,
		&status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateContext")
	defer cl.CLReleaseContext(context)

	//-----------------------------------------------------
	// STEP 4: Create a command queue
	//-----------------------------------------------------
	var commandQueue [MAX_COMMAND_QUEUE]cl.CL_command_queue

	// Create a command queue using clCreateCommandQueueWithProperties(),
	// and associate it with the device you want to execute
	for i := 0; i < MAX_COMMAND_QUEUE; i++ {
		commandQueue[i] = cl.CLCreateCommandQueueWithProperties(context,
			devices[0],
			nil,
			&status)
		utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateCommandQueueWithProperties")
		defer cl.CLReleaseCommandQueue(commandQueue[i])
	}

	//-----------------------------------------------------
	// STEP 5: Create device buffers
	//-----------------------------------------------------
	producerGroupSize := cl.CL_size_t(PRODUCER_GROUP_SIZE)
	producerGlobalSize := cl.CL_size_t(PRODUCER_GLOBAL_SIZE)

	consumerGroupSize := cl.CL_size_t(CONSUMER_GROUP_SIZE)
	consumerGlobalSize := cl.CL_size_t(CONSUMER_GLOBAL_SIZE)

	var samplePipePkt [2]cl.CL_float
	szPipe := cl.CL_uint(PIPE_SIZE)
	szPipePkt := cl.CL_uint(unsafe.Sizeof(samplePipePkt))
	if szPipe%PRNG_CHANNELS != 0 {
		szPipe = (szPipe/PRNG_CHANNELS)*PRNG_CHANNELS + PRNG_CHANNELS
	}
	consumerGlobalSize = cl.CL_size_t(szPipe)
	pipePktPerThread := cl.CL_int(szPipe) / PRNG_CHANNELS
	seed := cl.CL_int(SEED)
	rngType := cl.CL_int(RV_GAUSSIAN)
	var histMin cl.CL_float
	var histMax cl.CL_float
	if rngType == cl.CL_int(RV_UNIFORM) {
		histMin = 0.0
		histMax = 1.0
	} else {
		histMin = -10.0
		histMax = 10.0
	}

	localDevHist := make([]cl.CL_int, MAX_HIST_BINS)
	cpuHist := make([]cl.CL_int, MAX_HIST_BINS)

	//Create and initialize memory objects
	rngPipe := cl.CLCreatePipe(context,
		cl.CL_MEM_READ_WRITE,
		szPipePkt,
		szPipe,
		nil,
		&status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clCreatePipe")

	devHist := cl.CLCreateBuffer(context,
		cl.CL_MEM_READ_WRITE|cl.CL_MEM_COPY_HOST_PTR,
		MAX_HIST_BINS*cl.CL_size_t(unsafe.Sizeof(localDevHist[0])),
		unsafe.Pointer(&localDevHist[0]),
		&status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clCreateBuffer")

	//-----------------------------------------------------
	// STEP 6: Create and compile the program
	//-----------------------------------------------------
	programSource, programeSize := utils.Load_programsource("pipe.cl")

	// Create a program using clCreateProgramWithSource()
	program := cl.CLCreateProgramWithSource(context,
		1,
		programSource[:],
		programeSize[:],
		&status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateProgramWithSource")
	defer cl.CLReleaseProgram(program)

	// Build (compile) the program for the devices with
	// clBuildProgram()
	options := "-cl-std=CL2.0"
	status = cl.CLBuildProgram(program,
		numDevices,
		devices,
		[]byte(options),
		nil,
		nil)
	if status != cl.CL_SUCCESS {
		var program_log interface{}
		var log_size cl.CL_size_t

		/* Find size of log and print to std output */
		cl.CLGetProgramBuildInfo(program, devices[0], cl.CL_PROGRAM_BUILD_LOG,
			0, nil, &log_size)
		cl.CLGetProgramBuildInfo(program, devices[0], cl.CL_PROGRAM_BUILD_LOG,
			log_size, &program_log, nil)
		fmt.Printf("%s\n", program_log)
		return
	}
	//utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLBuildProgram")

	//-----------------------------------------------------
	// STEP 7: Create the kernel
	//-----------------------------------------------------
	// Use clCreateKernel() to create a kernel
	produceKernel := cl.CLCreateKernel(program, []byte("pipe_producer"), &status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateKernel")
	defer cl.CLReleaseKernel(produceKernel)

	consumeKernel := cl.CLCreateKernel(program, []byte("pipe_consumer"), &status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateKernel")
	defer cl.CLReleaseKernel(consumeKernel)

	//-----------------------------------------------------
	// STEP 8: Set the kernel arguments
	//-----------------------------------------------------
	// Associate the input and output buffers with the
	// kernel
	// using clSetKernelArg()
	// Set appropriate arguments to the kernel
	status = cl.CLSetKernelArg(produceKernel,
		0,
		cl.CL_size_t(unsafe.Sizeof(rngPipe)),
		unsafe.Pointer(&rngPipe))

	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArg(rngPipe)")

	status = cl.CLSetKernelArg(produceKernel,
		1,
		cl.CL_size_t(unsafe.Sizeof(pipePktPerThread)),
		unsafe.Pointer(&pipePktPerThread))
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArg(pipePktPerThread)")

	status = cl.CLSetKernelArg(produceKernel,
		2,
		cl.CL_size_t(unsafe.Sizeof(seed)),
		unsafe.Pointer(&seed))
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArg(seed)")

	status = cl.CLSetKernelArg(produceKernel,
		3,
		cl.CL_size_t(unsafe.Sizeof(rngType)),
		unsafe.Pointer(&rngType))
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArg(rngType)")

	//-----------------------------------------------------
	// STEP 9: Configure the work-item structure
	//-----------------------------------------------------
	// Define an index space (global work size) of work
	// items for
	// execution. A workgroup size (local work size) is not
	// required,
	// but can be used.
	// Enqueue both the kernels.
	var globalThreads = []cl.CL_size_t{producerGlobalSize}
	var localThreads = []cl.CL_size_t{producerGroupSize}

	//-----------------------------------------------------
	// STEP 10: Enqueue the kernel for execution
	//-----------------------------------------------------
	// Execute the kernel by using
	// clEnqueueNDRangeKernel().
	// 'globalWorkSize' is the 1D dimension of the
	// work-items
	var produceEvt [1]cl.CL_event
	status = cl.CLEnqueueNDRangeKernel(commandQueue[0],
		produceKernel,
		1,
		nil,
		globalThreads,
		localThreads,
		0,
		nil,
		&produceEvt[0])
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clEnqueueNDRangeKernel")

	/*
	   launch consumer kernel only after producer has finished.
	   This is done to avoid concurrent kernels execution as the
	   memory consistency of pipe is guaranteed only across
	   synchronization points.
	*/
	status = cl.CLWaitForEvents(1, produceEvt[:])
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clWaitForEvents(produceEvt)")

	//-----------------------------------------------------
	// STEP 8: Set the kernel arguments
	//-----------------------------------------------------
	// Associate the input and output buffers with the
	// kernel
	// using clSetKernelArg()
	// Set appropriate arguments to the kernel
	status = cl.CLSetKernelArg(consumeKernel,
		0,
		cl.CL_size_t(unsafe.Sizeof(rngPipe)),
		unsafe.Pointer(&rngPipe))
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArg(rngPipe)")

	status = cl.CLSetKernelArg(consumeKernel,
		1,
		cl.CL_size_t(unsafe.Sizeof(devHist)),
		unsafe.Pointer(&devHist))
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArg(devHist)")

	status = cl.CLSetKernelArg(consumeKernel,
		2,
		cl.CL_size_t(unsafe.Sizeof(histMin)),
		unsafe.Pointer(&histMin))
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArg(histMin)")

	status = cl.CLSetKernelArg(consumeKernel,
		3,
		cl.CL_size_t(unsafe.Sizeof(histMax)),
		unsafe.Pointer(&histMax))
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArg(histMax)")

	//-----------------------------------------------------
	// STEP 9: Configure the work-item structure
	//-----------------------------------------------------
	// Define an index space (global work size) of work
	// items for
	// execution. A workgroup size (local work size) is not
	// required,
	// but can be used.
	globalThreads[0] = consumerGlobalSize
	localThreads[0] = consumerGroupSize

	//-----------------------------------------------------
	// STEP 10: Enqueue the kernel for execution
	//-----------------------------------------------------
	// Execute the kernel by using
	// clEnqueueNDRangeKernel().
	// 'globalWorkSize' is the 1D dimension of the
	// work-items
	var consumeEvt [1]cl.CL_event
	status = cl.CLEnqueueNDRangeKernel(
		commandQueue[1],
		consumeKernel,
		1,
		nil,
		globalThreads,
		localThreads,
		0,
		nil,
		&consumeEvt[0])
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clEnqueueNDRangeKernel")

	status = cl.CLFlush(commandQueue[0])
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clFlush(0)")

	status = cl.CLFlush(commandQueue[1])
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clFlush(1)")

	//wait for kernels to finish
	status = cl.CLFinish(commandQueue[0])
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clFinish(0)")

	status = cl.CLFinish(commandQueue[1])
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clFinish(1)")

	//-----------------------------------------------------
	// STEP 11: Read the output buffer back to the host
	//-----------------------------------------------------
	// Use clEnqueueReadBuffer() to read the OpenCL output
	// buffer (bufferC)
	// to the host output array (C)
	//copy the data back to host buffer
	var readEvt cl.CL_event
	status = cl.CLEnqueueReadBuffer(commandQueue[1],
		devHist,
		cl.CL_TRUE,
		0,
		(MAX_HIST_BINS)*cl.CL_size_t(unsafe.Sizeof(localDevHist[0])),
		unsafe.Pointer(&localDevHist[0]),
		0,
		nil,
		&readEvt)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clEnqueueReadBuffer")

	//-----------------------------------------------------
	// STEP 12: Verify the results
	//-----------------------------------------------------
	//Find the tolerance limit
	fTol := (float32)(CONSUMER_GLOBAL_SIZE) * (float32)(COMP_TOL) / (float32)(100.0)
	iTol := (int)(fTol)
	if iTol == 0 {
		iTol = 1
	}

	//CPU side histogram computation
	CPUReference(seed, pipePktPerThread, rngType, cpuHist, histMax, histMin)

	//Compare
	for bin := 0; bin < MAX_HIST_BINS; bin++ {
		diff := int(localDevHist[bin] - cpuHist[bin])

		if diff < 0 {
			diff = -diff
		}
		if diff > iTol {
			println("Failed!")
			return
		}
	}

	println("Passed!")
}
예제 #7
0
파일: bst.go 프로젝트: xfong/gocl
func main() {
	// Use this to check the output of each API call
	var status cl.CL_int

	//-----------------------------------------------------
	// STEP 1: Discover and initialize the platforms
	//-----------------------------------------------------
	var numPlatforms cl.CL_uint
	var platforms []cl.CL_platform_id

	// Use clGetPlatformIDs() to retrieve the number of
	// platforms
	status = cl.CLGetPlatformIDs(0, nil, &numPlatforms)

	// Allocate enough space for each platform
	platforms = make([]cl.CL_platform_id, numPlatforms)

	// Fill in platforms with clGetPlatformIDs()
	status = cl.CLGetPlatformIDs(numPlatforms, platforms, nil)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetPlatformIDs")

	//-----------------------------------------------------
	// STEP 2: Discover and initialize the GPU devices
	//-----------------------------------------------------
	var numDevices cl.CL_uint
	var devices []cl.CL_device_id

	// Use clGetDeviceIDs() to retrieve the number of
	// devices present
	status = cl.CLGetDeviceIDs(platforms[0],
		cl.CL_DEVICE_TYPE_GPU,
		0,
		nil,
		&numDevices)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetDeviceIDs")

	// Allocate enough space for each device
	devices = make([]cl.CL_device_id, numDevices)

	// Fill in devices with clGetDeviceIDs()
	status = cl.CLGetDeviceIDs(platforms[0],
		cl.CL_DEVICE_TYPE_GPU,
		numDevices,
		devices,
		nil)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetDeviceIDs")

	//-----------------------------------------------------
	// STEP 3: Create a context
	//-----------------------------------------------------
	var context cl.CL_context

	// Create a context using clCreateContext() and
	// associate it with the devices
	context = cl.CLCreateContext(nil,
		numDevices,
		devices,
		nil,
		nil,
		&status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateContext")
	defer cl.CLReleaseContext(context)

	//-----------------------------------------------------
	// STEP 4: Create a command queue
	//-----------------------------------------------------
	var cmdQueue cl.CL_command_queue

	// Create a command queue using clCreateCommandQueueWithProperties(),
	// and associate it with the device you want to execute
	cmdQueue = cl.CLCreateCommandQueueWithProperties(context,
		devices[0],
		nil,
		&status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateCommandQueueWithProperties")
	defer cl.CLReleaseCommandQueue(cmdQueue)

	//-----------------------------------------------------
	// STEP 5: Create device buffers
	//-----------------------------------------------------
	// initialize any device/SVM memory here.

	/* svm buffer for binary tree */
	svmTreeBuf := cl.CLSVMAlloc(context,
		cl.CL_MEM_READ_WRITE,
		cl.CL_size_t(NUMBER_OF_NODES*unsafe.Sizeof(sampleNode)),
		0)
	if nil == svmTreeBuf {
		println("clSVMAlloc(svmTreeBuf) failed.")
		return
	}
	defer cl.CLSVMFree(context, svmTreeBuf)

	/* svm buffer for search keys */
	svmSearchBuf := cl.CLSVMAlloc(context,
		cl.CL_MEM_READ_WRITE,
		cl.CL_size_t(NUMBER_OF_SEARCH_KEY*unsafe.Sizeof(sampleKey)),
		0)
	if nil == svmSearchBuf {
		println("clSVMAlloc(svmSearchBuf) failed.")
		return
	}
	defer cl.CLSVMFree(context, svmSearchBuf)

	//create the binary tree and set the root
	/* root node of the binary tree */
	svmRoot := cpuCreateBinaryTree(cmdQueue, svmTreeBuf)

	//initialize search keys
	cpuInitSearchKeys(cmdQueue, svmSearchBuf)

	/* if voice is not deliberately muzzled, shout parameters */
	fmt.Printf("-------------------------------------------------------------------------\n")
	fmt.Printf("Searching %d keys in a BST having %d Nodes...\n", NUMBER_OF_SEARCH_KEY, NUMBER_OF_NODES)
	fmt.Printf("-------------------------------------------------------------------------\n")

	//-----------------------------------------------------
	// STEP 6: Create and compile the program
	//-----------------------------------------------------
	programSource, programeSize := utils.Load_programsource("bst.cl")

	// Create a program using clCreateProgramWithSource()
	program := cl.CLCreateProgramWithSource(context,
		1,
		programSource[:],
		programeSize[:],
		&status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateProgramWithSource")
	defer cl.CLReleaseProgram(program)

	// Build (compile) the program for the devices with
	// clBuildProgram()
	options := "-cl-std=CL2.0"
	status = cl.CLBuildProgram(program,
		numDevices,
		devices,
		[]byte(options),
		nil,
		nil)
	if status != cl.CL_SUCCESS {
		var program_log interface{}
		var log_size cl.CL_size_t

		/* Find size of log and print to std output */
		cl.CLGetProgramBuildInfo(program, devices[0], cl.CL_PROGRAM_BUILD_LOG,
			0, nil, &log_size)
		cl.CLGetProgramBuildInfo(program, devices[0], cl.CL_PROGRAM_BUILD_LOG,
			log_size, &program_log, nil)
		fmt.Printf("%s\n", program_log)
		return
	}
	//utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLBuildProgram")

	//-----------------------------------------------------
	// STEP 7: Create the kernel
	//-----------------------------------------------------
	var kernel cl.CL_kernel

	// Use clCreateKernel() to create a kernel
	kernel = cl.CLCreateKernel(program, []byte("bst_kernel"), &status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateKernel")
	defer cl.CLReleaseKernel(kernel)

	//-----------------------------------------------------
	// STEP 8: Set the kernel arguments
	//-----------------------------------------------------
	// Associate the input and output buffers with the
	// kernel
	// using clSetKernelArg()
	// Set appropriate arguments to the kernel
	status = cl.CLSetKernelArgSVMPointer(kernel,
		0,
		svmTreeBuf)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArgSVMPointer(svmTreeBuf)")

	status = cl.CLSetKernelArgSVMPointer(kernel,
		1,
		svmSearchBuf)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArgSVMPointer(svmSearchBuf)")

	//-----------------------------------------------------
	// STEP 9: Configure the work-item structure
	//-----------------------------------------------------
	// Define an index space (global work size) of work
	// items for
	// execution. A workgroup size (local work size) is not
	// required,
	// but can be used.
	var localWorkSize [1]cl.CL_size_t
	var kernelWorkGroupSize interface{}
	status = cl.CLGetKernelWorkGroupInfo(kernel,
		devices[0],
		cl.CL_KERNEL_WORK_GROUP_SIZE,
		cl.CL_size_t(unsafe.Sizeof(localWorkSize[0])),
		&kernelWorkGroupSize,
		nil)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetKernelWorkGroupInfo")
	localWorkSize[0] = kernelWorkGroupSize.(cl.CL_size_t)

	var globalWorkSize [1]cl.CL_size_t
	globalWorkSize[0] = NUMBER_OF_SEARCH_KEY

	//-----------------------------------------------------
	// STEP 10: Enqueue the kernel for execution
	//-----------------------------------------------------
	// Execute the kernel by using
	// clEnqueueNDRangeKernel().
	// 'globalWorkSize' is the 1D dimension of the
	// work-items
	status = cl.CLEnqueueNDRangeKernel(cmdQueue,
		kernel,
		1,
		nil,
		globalWorkSize[:],
		localWorkSize[:],
		0,
		nil,
		nil)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLEnqueueNDRangeKernel")

	status = cl.CLFlush(cmdQueue)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clFlush")

	status = cl.CLFinish(cmdQueue)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clFinish")

	//-----------------------------------------------------
	// STEP 11: Read the output buffer back to the host
	//-----------------------------------------------------
	// Use clEnqueueReadBuffer() to read the OpenCL output
	// buffer (bufferC)
	// to the host output array (C)
	//copy the data back to host buffer

	//this demo doesn't need clEnqueueReadBuffer due to SVM

	//-----------------------------------------------------
	// STEP 12: Verify the results
	//-----------------------------------------------------
	// reference implementation
	svmBinaryTreeCPUReference(cmdQueue,
		svmRoot,
		svmTreeBuf,
		svmSearchBuf)

	// compare the results and see if they match
	pass := svmCompareResults(cmdQueue, svmSearchBuf)
	if pass {
		println("Passed!")
	} else {
		println("Failed!")
	}
}
예제 #8
0
파일: svmfg.go 프로젝트: xfong/gocl
func main() {
	// Use this to check the output of each API call
	var status cl.CL_int

	//-----------------------------------------------------
	// STEP 1: Discover and initialize the platforms
	//-----------------------------------------------------
	var numPlatforms cl.CL_uint

	// Use clGetPlatformIDs() to retrieve the number of
	// platforms
	status = cl.CLGetPlatformIDs(0, nil, &numPlatforms)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetPlatformIDs")

	// Allocate enough space for each platform
	platforms := make([]cl.CL_platform_id, numPlatforms)

	// Fill in platforms with clGetPlatformIDs()
	status = cl.CLGetPlatformIDs(numPlatforms, platforms, nil)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetPlatformIDs")

	//-----------------------------------------------------
	// STEP 2: Discover and initialize the GPU devices
	//-----------------------------------------------------
	var numDevices cl.CL_uint

	// Use clGetDeviceIDs() to retrieve the number of
	// devices present
	status = cl.CLGetDeviceIDs(platforms[0],
		cl.CL_DEVICE_TYPE_GPU,
		0,
		nil,
		&numDevices)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetDeviceIDs")

	// Allocate enough space for each device
	devices := make([]cl.CL_device_id, numDevices)

	// Fill in devices with clGetDeviceIDs()
	status = cl.CLGetDeviceIDs(platforms[0],
		cl.CL_DEVICE_TYPE_GPU,
		numDevices,
		devices,
		nil)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetDeviceIDs")

	var caps cl.CL_device_svm_capabilities
	var caps_value interface{}

	status = cl.CLGetDeviceInfo(
		devices[0],
		cl.CL_DEVICE_SVM_CAPABILITIES,
		cl.CL_size_t(unsafe.Sizeof(caps)),
		&caps_value,
		nil)
	caps = caps_value.(cl.CL_device_svm_capabilities)

	// Coarse-grained buffer SVM should be available on any OpenCL 2.0 device.
	// So it is either not an OpenCL 2.0 device or it must support coarse-grained buffer SVM:
	if !(status == cl.CL_SUCCESS && (caps&cl.CL_DEVICE_SVM_FINE_GRAIN_BUFFER) != 0) {
		fmt.Printf("Cannot detect fine-grained buffer SVM capabilities on the device. The device seemingly doesn't support fine-grained buffer SVM. caps=%x\n", caps)
		println("")
		return
	}

	//-----------------------------------------------------
	// STEP 3: Create a context
	//-----------------------------------------------------
	// Create a context using clCreateContext() and
	// associate it with the devices
	context := cl.CLCreateContext(nil,
		numDevices,
		devices,
		nil,
		nil,
		&status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateContext")
	defer cl.CLReleaseContext(context)

	//-----------------------------------------------------
	// STEP 4: Create a command queue
	//-----------------------------------------------------
	// Create a command queue using clCreateCommandQueueWithProperties(),
	// and associate it with the device you want to execute
	queue := cl.CLCreateCommandQueueWithProperties(context,
		devices[0],
		nil,
		&status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateCommandQueueWithProperties")
	defer cl.CLReleaseCommandQueue(queue)

	//-----------------------------------------------------
	// STEP 5: Create and compile the program
	//-----------------------------------------------------
	programSource, programeSize := utils.Load_programsource("svmfg.cl")

	// Create a program using clCreateProgramWithSource()
	program := cl.CLCreateProgramWithSource(context,
		1,
		programSource[:],
		programeSize[:],
		&status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateProgramWithSource")
	defer cl.CLReleaseProgram(program)

	// Build (compile) the program for the devices with
	// clBuildProgram()
	options := "-cl-std=CL2.0"

	status = cl.CLBuildProgram(program,
		numDevices,
		devices,
		[]byte(options),
		nil,
		nil)
	if status != cl.CL_SUCCESS {
		var log interface{}
		var log_size cl.CL_size_t
		/* Find size of log and print to std output */
		cl.CLGetProgramBuildInfo(program, devices[0], cl.CL_PROGRAM_BUILD_LOG, 0, nil, &log_size)
		cl.CLGetProgramBuildInfo(program, devices[0], cl.CL_PROGRAM_BUILD_LOG, log_size, &log, nil)
		fmt.Printf("%s\n", log)
		return
	}
	//utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLBuildProgram")

	//-----------------------------------------------------
	// STEP 7: Create the kernel
	//-----------------------------------------------------
	// Use clCreateKernel() to create a kernel
	kernel := cl.CLCreateKernel(program,
		[]byte("svmbasic"),
		&status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateKernel")
	defer cl.CLReleaseKernel(kernel)

	// Then call the main sample routine - resource allocations, OpenCL kernel
	// execution, and so on.
	svmbasic(1024*1024, context, queue, kernel)

	// All resource deallocations happen in defer.
}
예제 #9
0
파일: matvec_test.go 프로젝트: xfong/gocl
func TestMatvec(t *testing.T) {
	/* Host/device data structures */
	var platform [1]cl.CL_platform_id
	var device [1]cl.CL_device_id
	var context cl.CL_context
	var queue cl.CL_command_queue
	var i, err cl.CL_int

	/* Program/kernel data structures */
	var program cl.CL_program
	var program_buffer [1][]byte
	var program_log interface{}
	var program_size [1]cl.CL_size_t
	var log_size cl.CL_size_t
	var kernel cl.CL_kernel

	/* Data and buffers */
	var mat [16]float32
	var vec, result [4]float32
	var correct = [4]float32{0.0, 0.0, 0.0, 0.0}
	var mat_buff, vec_buff, res_buff cl.CL_mem

	/* Initialize data to be processed by the kernel */
	for i = 0; i < 16; i++ {
		mat[i] = float32(i) * 2.0
	}

	for i = 0; i < 4; i++ {
		vec[i] = float32(i) * 3.0
		correct[0] += mat[i] * vec[i]
		correct[1] += mat[i+4] * vec[i]
		correct[2] += mat[i+8] * vec[i]
		correct[3] += mat[i+12] * vec[i]
	}

	/* Identify a platform */
	err = cl.CLGetPlatformIDs(1, platform[:], nil)
	if err < 0 {
		t.Errorf("Couldn't find any platforms")
	}

	/* Access a device */
	err = cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_GPU, 1, device[:], nil)
	if err < 0 {
		err = cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_CPU, 1, device[:], nil)
		if err < 0 {
			t.Errorf("Couldn't find any devices")
		}
	}

	/* Create the context */
	context = cl.CLCreateContext(nil, 1, device[:], nil, nil, &err)
	if err < 0 {
		t.Errorf("Couldn't create a context")
	}

	/* Read program file and place content into buffer */
	program_handle, err1 := os.Open("matvec.cl")
	if err1 != nil {
		t.Errorf("Couldn't find the program file")
	}
	defer program_handle.Close()

	fi, err2 := program_handle.Stat()
	if err2 != nil {
		t.Errorf("Couldn't find the program stat")
	}
	program_size[0] = cl.CL_size_t(fi.Size())
	program_buffer[0] = make([]byte, program_size[0])
	read_size, err3 := program_handle.Read(program_buffer[0])
	if err3 != nil || cl.CL_size_t(read_size) != program_size[0] {
		t.Errorf("read file error or file size wrong")
	}

	/* Create a program containing all program content */
	program = cl.CLCreateProgramWithSource(context, 1,
		program_buffer[:], program_size[:], &err)
	if err < 0 {
		t.Errorf("Couldn't create the program")
	}

	/* Build program */
	err = cl.CLBuildProgram(program, 1, device[:], nil, nil, nil)
	if err < 0 {
		/* Find size of log and print to std output */
		cl.CLGetProgramBuildInfo(program, device[0], cl.CL_PROGRAM_BUILD_LOG,
			0, nil, &log_size)
		//program_log = (char*) malloc(log_size+1);
		//program_log[log_size] = '\0';
		cl.CLGetProgramBuildInfo(program, device[0], cl.CL_PROGRAM_BUILD_LOG,
			log_size, &program_log, nil)
		t.Errorf("%s\n", program_log)
		//free(program_log);
	}

	/* Create kernel for the mat_vec_mult function */
	kernel = cl.CLCreateKernel(program, []byte("matvec_mult"), &err)
	if err < 0 {
		t.Errorf("Couldn't create the kernel")
		return
	}

	/* Create CL buffers to hold input and output data */
	mat_buff = cl.CLCreateBuffer(context, cl.CL_MEM_READ_ONLY|
		cl.CL_MEM_COPY_HOST_PTR, cl.CL_size_t(unsafe.Sizeof(mat)), unsafe.Pointer(&mat[0]), &err)
	if err < 0 {
		t.Errorf("Couldn't create a buffer object")
		return
	}
	vec_buff = cl.CLCreateBuffer(context, cl.CL_MEM_READ_ONLY|
		cl.CL_MEM_COPY_HOST_PTR, cl.CL_size_t(unsafe.Sizeof(vec)), unsafe.Pointer(&vec[0]), nil)
	res_buff = cl.CLCreateBuffer(context, cl.CL_MEM_WRITE_ONLY,
		cl.CL_size_t(unsafe.Sizeof(result)), nil, nil)

	/* Create kernel arguments from the CL buffers */
	err = cl.CLSetKernelArg(kernel, 0, cl.CL_size_t(unsafe.Sizeof(mat_buff)), unsafe.Pointer(&mat_buff))
	if err < 0 {
		t.Errorf("Couldn't set the kernel argument")
		return
	}
	cl.CLSetKernelArg(kernel, 1, cl.CL_size_t(unsafe.Sizeof(vec_buff)), unsafe.Pointer(&vec_buff))
	cl.CLSetKernelArg(kernel, 2, cl.CL_size_t(unsafe.Sizeof(res_buff)), unsafe.Pointer(&res_buff))

	/* Create a CL command queue for the device*/
	queue = cl.CLCreateCommandQueue(context, device[0], 0, &err)
	if err < 0 {
		t.Errorf("Couldn't create the command queue, errcode=%d\n", err)
		return
	}

	/* Enqueue the command queue to the device */
	var work_units_per_kernel = [1]cl.CL_size_t{4} /* 4 work-units per kernel */
	err = cl.CLEnqueueNDRangeKernel(queue, kernel, 1, nil, work_units_per_kernel[:],
		nil, 0, nil, nil)
	if err < 0 {
		t.Errorf("Couldn't enqueue the kernel execution command, errcode=%d\n", err)
		return
	}

	/* Read the result */
	err = cl.CLEnqueueReadBuffer(queue, res_buff, cl.CL_TRUE, 0, cl.CL_size_t(unsafe.Sizeof(result)),
		unsafe.Pointer(&result[0]), 0, nil, nil)
	if err < 0 {
		t.Errorf("Couldn't enqueue the read buffer command")
		return
	}

	/* Test the result */
	if (result[0] == correct[0]) && (result[1] == correct[1]) &&
		(result[2] == correct[2]) && (result[3] == correct[3]) {
		t.Logf("Matrix-vector multiplication successful.")
	} else {
		t.Errorf("Matrix-vector multiplication unsuccessful.")
	}

	/* Deallocate resources */
	cl.CLReleaseMemObject(mat_buff)
	cl.CLReleaseMemObject(vec_buff)
	cl.CLReleaseMemObject(res_buff)
	cl.CLReleaseKernel(kernel)
	cl.CLReleaseCommandQueue(queue)
	cl.CLReleaseProgram(program)
	cl.CLReleaseContext(context)
}
예제 #10
0
파일: goclinfo.go 프로젝트: xfong/gocl
func main() {
	var errNum cl.CL_int
	var numPlatforms cl.CL_uint
	var platformIds []cl.CL_platform_id
	//var context cl.CL_context

	// First, query the total number of platforms
	errNum = cl.CLGetPlatformIDs(0, nil, &numPlatforms)
	if errNum != cl.CL_SUCCESS || numPlatforms <= 0 {
		println("Failed to find any OpenCL platform.")
		return
	}

	// Next, allocate memory for the installed plaforms, and qeury
	// to get the list.
	platformIds = make([]cl.CL_platform_id, numPlatforms)

	// First, query the total number of platforms
	errNum = cl.CLGetPlatformIDs(numPlatforms, platformIds, nil)
	if errNum != cl.CL_SUCCESS {
		println("Failed to find any OpenCL platforms.")
		return
	}

	fmt.Printf("Number of platforms: \t%d\n", numPlatforms)

	// Iterate through the list of platforms displaying associated information
	for i := cl.CL_uint(0); i < numPlatforms; i++ {
		// First we display information associated with the platform
		DisplayPlatformInfo(
			platformIds[i],
			cl.CL_PLATFORM_PROFILE,
			"CL_PLATFORM_PROFILE")
		DisplayPlatformInfo(
			platformIds[i],
			cl.CL_PLATFORM_VERSION,
			"CL_PLATFORM_VERSION")
		DisplayPlatformInfo(
			platformIds[i],
			cl.CL_PLATFORM_VENDOR,
			"CL_PLATFORM_VENDOR")
		DisplayPlatformInfo(
			platformIds[i],
			cl.CL_PLATFORM_EXTENSIONS,
			"CL_PLATFORM_EXTENSIONS")

		// Now query the set of devices associated with the platform
		var numDevices cl.CL_uint
		errNum = cl.CLGetDeviceIDs(platformIds[i],
			cl.CL_DEVICE_TYPE_ALL,
			0,
			nil,
			&numDevices)
		if errNum != cl.CL_SUCCESS {
			println("Failed to find OpenCL devices.")
			return
		}

		devices := make([]cl.CL_device_id, numDevices)
		errNum = cl.CLGetDeviceIDs(platformIds[i],
			cl.CL_DEVICE_TYPE_ALL,
			numDevices,
			devices,
			nil)
		if errNum != cl.CL_SUCCESS {
			println("Failed to find OpenCL devices.")
			return
		}

		fmt.Printf("\n\tNumber of devices: \t%d\n", numDevices)

		// Iterate through each device, displaying associated information
		for j := cl.CL_uint(0); j < numDevices; j++ {
			DisplayDeviceInfo(devices[j],
				cl.CL_DEVICE_TYPE,
				"CL_DEVICE_TYPE")

			DisplayDeviceInfo(devices[j],
				cl.CL_DEVICE_NAME,
				"CL_DEVICE_NAME")

			DisplayDeviceInfo(devices[j],
				cl.CL_DEVICE_VENDOR,
				"CL_DEVICE_VENDOR")

			//DisplayDeviceInfo(devices[j],
			//	cl.CL_DRIVER_VERSION,
			//	"CL_DRIVER_VERSION")

			DisplayDeviceInfo(devices[j],
				cl.CL_DEVICE_PROFILE,
				"CL_DEVICE_PROFILE")

			fmt.Printf("\n")
		}
	}
}
예제 #11
0
파일: convolution.go 프로젝트: xfong/gocl
func main() {
	var i, j cl.CL_size_t
	// Rows and columns in the input image
	inputFile := "test.png"
	outputFile := "output.png"
	refFile := "ref.png"

	// Homegrown function to read a BMP from file
	inputpixels, imageWidth, imageHeight, err1 := utils.Read_image_data(inputFile)
	if err1 != nil {
		log.Fatal(err1)
		return
	} else {
		fmt.Printf("width=%d, height=%d (%d)\n", imageWidth, imageHeight, inputpixels[0])
	}

	// Output image on the host
	outputpixels := make([]uint16, imageHeight*imageWidth)
	inputImage := make([]float32, imageHeight*imageWidth)
	outputImage := make([]float32, imageHeight*imageWidth)
	refImage := make([]float32, imageHeight*imageWidth)

	for i = 0; i < imageHeight*imageWidth; i++ {
		inputImage[i] = float32(inputpixels[i])
	}

	// 45 degree motion blur
	var filter = [49]float32{0, 0, 0, 0, 0, 0, 0,
		0, 0, 0, 0, 0, 0, 0,
		0, 0, -1, 0, 1, 0, 0,
		0, 0, -2, 0, 2, 0, 0,
		0, 0, -1, 0, 1, 0, 0,
		0, 0, 0, 0, 0, 0, 0,
		0, 0, 0, 0, 0, 0, 0}

	// The convolution filter is 7x7
	filterWidth := cl.CL_size_t(7)
	filterSize := cl.CL_size_t(filterWidth * filterWidth) // Assume a square kernel

	// Set up the OpenCL environment
	var status cl.CL_int

	// Discovery platform
	var platform [1]cl.CL_platform_id
	status = cl.CLGetPlatformIDs(1, platform[:], nil)
	chk(status, "clGetPlatformIDs")

	// Discover device
	var device [1]cl.CL_device_id
	cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_ALL, 1, device[:], nil)
	chk(status, "clGetDeviceIDs")

	// Create context
	//var props =[3]cl.CL_context_properties{cl.CL_CONTEXT_PLATFORM,
	//    (cl.CL_context_properties)(unsafe.Pointer(&platform[0])), 0};

	var context cl.CL_context
	context = cl.CLCreateContext(nil, 1, device[:], nil, nil, &status)
	chk(status, "clCreateContext")

	// Create command queue
	var queue cl.CL_command_queue
	queue = cl.CLCreateCommandQueue(context, device[0], 0, &status)
	chk(status, "clCreateCommandQueue")

	// The image format describes how the data will be stored in memory
	var format cl.CL_image_format
	format.Image_channel_order = cl.CL_R         // single channel
	format.Image_channel_data_type = cl.CL_FLOAT // float data type

	var desc cl.CL_image_desc
	desc.Image_type = cl.CL_MEM_OBJECT_IMAGE2D
	desc.Image_width = imageWidth
	desc.Image_height = imageHeight
	desc.Image_depth = 0
	desc.Image_array_size = 0
	desc.Image_row_pitch = 0
	desc.Image_slice_pitch = 0
	desc.Num_mip_levels = 0
	desc.Num_samples = 0
	desc.Buffer = cl.CL_mem{}

	// Create space for the source image on the device
	d_inputImage := cl.CLCreateImage(context, cl.CL_MEM_READ_ONLY, &format, &desc,
		nil, &status)
	chk(status, "clCreateImage")

	// Create space for the output image on the device
	d_outputImage := cl.CLCreateImage(context, cl.CL_MEM_WRITE_ONLY, &format, &desc,
		nil, &status)
	chk(status, "clCreateImage")

	// Create space for the 7x7 filter on the device
	d_filter := cl.CLCreateBuffer(context, 0, filterSize*cl.CL_size_t(unsafe.Sizeof(filter[0])),
		nil, &status)
	chk(status, "clCreateBuffer")

	// Copy the source image to the device
	var origin = [3]cl.CL_size_t{0, 0, 0}                                                // Offset within the image to copy from
	var region = [3]cl.CL_size_t{cl.CL_size_t(imageWidth), cl.CL_size_t(imageHeight), 1} // Elements to per dimension
	status = cl.CLEnqueueWriteImage(queue, d_inputImage, cl.CL_FALSE, origin, region,
		0, 0, unsafe.Pointer(&inputImage[0]), 0, nil, nil)
	chk(status, "clEnqueueWriteImage")

	// Copy the 7x7 filter to the device
	status = cl.CLEnqueueWriteBuffer(queue, d_filter, cl.CL_FALSE, 0,
		filterSize*cl.CL_size_t(unsafe.Sizeof(filter[0])), unsafe.Pointer(&filter[0]), 0, nil, nil)
	chk(status, "clEnqueueWriteBuffer")

	// Create the image sampler
	sampler := cl.CLCreateSampler(context, cl.CL_FALSE,
		cl.CL_ADDRESS_CLAMP_TO_EDGE, cl.CL_FILTER_NEAREST, &status)
	chk(status, "clCreateSampler")

	// Create a program object with source and build it
	program := utils.Build_program(context, device[:], "convolution.cl", nil)
	kernel := cl.CLCreateKernel(*program, []byte("convolution"), &status)
	chk(status, "clCreateKernel")

	// Set the kernel arguments
	var w, h, f cl.CL_int
	w = cl.CL_int(imageWidth)
	h = cl.CL_int(imageHeight)
	f = cl.CL_int(filterWidth)
	status = cl.CLSetKernelArg(kernel, 0, cl.CL_size_t(unsafe.Sizeof(d_inputImage)), unsafe.Pointer(&d_inputImage))
	status |= cl.CLSetKernelArg(kernel, 1, cl.CL_size_t(unsafe.Sizeof(d_outputImage)), unsafe.Pointer(&d_outputImage))
	status |= cl.CLSetKernelArg(kernel, 2, cl.CL_size_t(unsafe.Sizeof(h)), unsafe.Pointer(&h))
	status |= cl.CLSetKernelArg(kernel, 3, cl.CL_size_t(unsafe.Sizeof(w)), unsafe.Pointer(&w))
	status |= cl.CLSetKernelArg(kernel, 4, cl.CL_size_t(unsafe.Sizeof(d_filter)), unsafe.Pointer(&d_filter))
	status |= cl.CLSetKernelArg(kernel, 5, cl.CL_size_t(unsafe.Sizeof(f)), unsafe.Pointer(&f))
	status |= cl.CLSetKernelArg(kernel, 6, cl.CL_size_t(unsafe.Sizeof(sampler)), unsafe.Pointer(&sampler))
	chk(status, "clSetKernelArg")

	// Set the work item dimensions
	var globalSize = [2]cl.CL_size_t{imageWidth, imageHeight}
	status = cl.CLEnqueueNDRangeKernel(queue, kernel, 2, nil, globalSize[:], nil, 0,
		nil, nil)
	chk(status, "clEnqueueNDRange")

	// Read the image back to the host
	status = cl.CLEnqueueReadImage(queue, d_outputImage, cl.CL_TRUE, origin,
		region, 0, 0, unsafe.Pointer(&outputImage[0]), 0, nil, nil)
	chk(status, "clEnqueueReadImage")

	// Write the output image to file
	for i = 0; i < imageHeight*imageWidth; i++ {
		outputpixels[i] = uint16(outputImage[i])
	}
	utils.Write_image_data(outputFile, outputpixels, imageWidth, imageHeight)

	// Compute the reference image
	for i = 0; i < imageHeight; i++ {
		for j = 0; j < imageWidth; j++ {
			refImage[i*imageWidth+j] = 0
		}
	}

	// Iterate over the rows of the source image
	halfFilterWidth := filterWidth / 2
	var sum float32
	for i = 0; i < imageHeight; i++ {
		// Iterate over the columns of the source image
		for j = 0; j < imageWidth; j++ {
			sum = 0 // Reset sum for new source pixel
			// Apply the filter to the neighborhood
			for k := -halfFilterWidth; k <= halfFilterWidth; k++ {
				for l := -halfFilterWidth; l <= halfFilterWidth; l++ {
					if i+k >= 0 && i+k < imageHeight &&
						j+l >= 0 && j+l < imageWidth {
						sum += inputImage[(i+k)*imageWidth+j+l] *
							filter[(k+halfFilterWidth)*filterWidth+
								l+halfFilterWidth]
					} else {
						i_k := i + k
						j_l := j + l
						if i+k < 0 {
							i_k = 0
						} else if i+k >= imageHeight {
							i_k = imageHeight - 1
						}
						if j+l < 0 {
							j_l = 0
						} else if j+l >= imageWidth {
							j_l = imageWidth - 1
						}
						sum += inputImage[(i_k)*imageWidth+j_l] *
							filter[(k+halfFilterWidth)*filterWidth+
								l+halfFilterWidth]
					}
				}
			}
			refImage[i*imageWidth+j] = sum
		}
	}
	// Write the ref image to file
	for i = 0; i < imageHeight*imageWidth; i++ {
		outputpixels[i] = uint16(refImage[i])
	}
	utils.Write_image_data(refFile, outputpixels, imageWidth, imageHeight)

	failed := 0
	for i = 0; i < imageHeight; i++ {
		for j = 0; j < imageWidth; j++ {
			if math.Abs(float64(outputImage[i*imageWidth+j]-refImage[i*imageWidth+j])) > 0.01 {
				//fmt.Printf("Results are INCORRECT\n");
				//fmt.Printf("Pixel mismatch at <%d,%d> (%f vs. %f) %f\n", i, j,
				//   outputImage[i*imageWidth+j], refImage[i*imageWidth+j], inputImage[i*imageWidth+j]);
				failed++
			}
		}
	}
	fmt.Printf("Mismatch Pixel number/Total pixel number = %d/%d\n", failed, imageWidth*imageHeight)

	// Free OpenCL resources
	cl.CLReleaseKernel(kernel)
	cl.CLReleaseProgram(*program)
	cl.CLReleaseCommandQueue(queue)
	cl.CLReleaseMemObject(d_inputImage)
	cl.CLReleaseMemObject(d_outputImage)
	cl.CLReleaseMemObject(d_filter)
	cl.CLReleaseSampler(sampler)
	cl.CLReleaseContext(context)
}
예제 #12
0
파일: chapter2.go 프로젝트: xfong/gocl
func main() {
	// This code executes on the OpenCL host

	// Host data
	var size cl.CL_int
	var A []cl.CL_int //input array
	var B []cl.CL_int //input array
	var C []cl.CL_int //output array

	// Elements in each array
	const elements = cl.CL_size_t(2048)

	// Compute the size of the data
	datasize := cl.CL_size_t(unsafe.Sizeof(size)) * elements

	// Allocate space for input/output data
	A = make([]cl.CL_int, datasize)
	B = make([]cl.CL_int, datasize)
	C = make([]cl.CL_int, datasize)
	// Initialize the input data
	for i := cl.CL_int(0); i < cl.CL_int(elements); i++ {
		A[i] = i
		B[i] = i
	}

	// Use this to check the output of each API call
	var status cl.CL_int

	//-----------------------------------------------------
	// STEP 1: Discover and initialize the platforms
	//-----------------------------------------------------

	var numPlatforms cl.CL_uint
	var platforms []cl.CL_platform_id

	// Use clGetPlatformIDs() to retrieve the number of
	// platforms
	status = cl.CLGetPlatformIDs(0, nil, &numPlatforms)

	// Allocate enough space for each platform
	platforms = make([]cl.CL_platform_id, numPlatforms)

	// Fill in platforms with clGetPlatformIDs()
	status = cl.CLGetPlatformIDs(numPlatforms, platforms, nil)
	if status != cl.CL_SUCCESS {
		println("CLGetPlatformIDs status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 2: Discover and initialize the devices
	//-----------------------------------------------------

	var numDevices cl.CL_uint
	var devices []cl.CL_device_id

	// Use clGetDeviceIDs() to retrieve the number of
	// devices present
	status = cl.CLGetDeviceIDs(platforms[0],
		cl.CL_DEVICE_TYPE_ALL,
		0,
		nil,
		&numDevices)
	if status != cl.CL_SUCCESS {
		println("CLGetDeviceIDs status!=cl.CL_SUCCESS")
		return
	}

	// Allocate enough space for each device
	devices = make([]cl.CL_device_id, numDevices)

	// Fill in devices with clGetDeviceIDs()
	status = cl.CLGetDeviceIDs(platforms[0],
		cl.CL_DEVICE_TYPE_ALL,
		numDevices,
		devices,
		nil)
	if status != cl.CL_SUCCESS {
		println("CLGetDeviceIDs status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 3: Create a context
	//-----------------------------------------------------

	var context cl.CL_context

	// Create a context using clCreateContext() and
	// associate it with the devices
	context = cl.CLCreateContext(nil,
		numDevices,
		devices,
		nil,
		nil,
		&status)
	if status != cl.CL_SUCCESS {
		println("CLCreateContext status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 4: Create a command queue
	//-----------------------------------------------------

	var cmdQueue cl.CL_command_queue

	// Create a command queue using clCreateCommandQueue(),
	// and associate it with the device you want to execute
	// on
	cmdQueue = cl.CLCreateCommandQueue(context,
		devices[0],
		0,
		&status)
	if status != cl.CL_SUCCESS {
		println("CLCreateCommandQueue status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 5: Create device buffers
	//-----------------------------------------------------

	var bufferA cl.CL_mem // Input array on the device
	var bufferB cl.CL_mem // Input array on the device
	var bufferC cl.CL_mem // Output array on the device

	// Use clCreateBuffer() to create a buffer object (d_A)
	// that will contain the data from the host array A
	bufferA = cl.CLCreateBuffer(context,
		cl.CL_MEM_READ_ONLY,
		datasize,
		nil,
		&status)
	if status != cl.CL_SUCCESS {
		println("CLCreateBuffer status!=cl.CL_SUCCESS")
		return
	}
	// Use clCreateBuffer() to create a buffer object (d_B)
	// that will contain the data from the host array B
	bufferB = cl.CLCreateBuffer(context,
		cl.CL_MEM_READ_ONLY,
		datasize,
		nil,
		&status)
	if status != cl.CL_SUCCESS {
		println("CLCreateBuffer status!=cl.CL_SUCCESS")
		return
	}
	// Use clCreateBuffer() to create a buffer object (d_C)
	// with enough space to hold the output data
	bufferC = cl.CLCreateBuffer(context,
		cl.CL_MEM_WRITE_ONLY,
		datasize,
		nil,
		&status)
	if status != cl.CL_SUCCESS {
		println("CLCreateBuffer status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 6: Write host data to device buffers
	//-----------------------------------------------------

	// Use clEnqueueWriteBuffer() to write input array A to
	// the device buffer bufferA
	status = cl.CLEnqueueWriteBuffer(cmdQueue,
		bufferA,
		cl.CL_FALSE,
		0,
		datasize,
		unsafe.Pointer(&A[0]),
		0,
		nil,
		nil)
	if status != cl.CL_SUCCESS {
		println("CLEnqueueWriteBuffer status!=cl.CL_SUCCESS")
		return
	}
	// Use clEnqueueWriteBuffer() to write input array B to
	// the device buffer bufferB
	status = cl.CLEnqueueWriteBuffer(cmdQueue,
		bufferB,
		cl.CL_FALSE,
		0,
		datasize,
		unsafe.Pointer(&B[0]),
		0,
		nil,
		nil)
	if status != cl.CL_SUCCESS {
		println("CLEnqueueWriteBuffer status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 7: Create and compile the program
	//-----------------------------------------------------
	programSource, programeSize := utils.Load_programsource("chapter2.cl")

	// Create a program using clCreateProgramWithSource()
	program := cl.CLCreateProgramWithSource(context,
		1,
		programSource[:],
		programeSize[:],
		&status)
	if status != cl.CL_SUCCESS {
		println("CLCreateProgramWithSource status!=cl.CL_SUCCESS")
		return
	}
	// Build (compile) the program for the devices with
	// clBuildProgram()
	status = cl.CLBuildProgram(program,
		numDevices,
		devices,
		nil,
		nil,
		nil)
	if status != cl.CL_SUCCESS {
		println("CLBuildProgram status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 8: Create the kernel
	//-----------------------------------------------------

	var kernel cl.CL_kernel

	// Use clCreateKernel() to create a kernel from the
	// vector addition function (named "vecadd")
	kernel = cl.CLCreateKernel(program, []byte("vecadd"), &status)
	if status != cl.CL_SUCCESS {
		println("CLCreateKernel status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 9: Set the kernel arguments
	//-----------------------------------------------------

	// Associate the input and output buffers with the
	// kernel
	// using clSetKernelArg()
	status = cl.CLSetKernelArg(kernel,
		0,
		cl.CL_size_t(unsafe.Sizeof(bufferA)),
		unsafe.Pointer(&bufferA))
	status |= cl.CLSetKernelArg(kernel,
		1,
		cl.CL_size_t(unsafe.Sizeof(bufferB)),
		unsafe.Pointer(&bufferB))
	status |= cl.CLSetKernelArg(kernel,
		2,
		cl.CL_size_t(unsafe.Sizeof(bufferC)),
		unsafe.Pointer(&bufferC))
	if status != cl.CL_SUCCESS {
		println("CLSetKernelArg status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 10: Configure the work-item structure
	//-----------------------------------------------------

	// Define an index space (global work size) of work
	// items for
	// execution. A workgroup size (local work size) is not
	// required,
	// but can be used.
	var globalWorkSize [1]cl.CL_size_t
	// There are 'elements' work-items
	globalWorkSize[0] = elements

	//-----------------------------------------------------
	// STEP 11: Enqueue the kernel for execution
	//-----------------------------------------------------

	// Execute the kernel by using
	// clEnqueueNDRangeKernel().
	// 'globalWorkSize' is the 1D dimension of the
	// work-items
	status = cl.CLEnqueueNDRangeKernel(cmdQueue,
		kernel,
		1,
		nil,
		globalWorkSize[:],
		nil,
		0,
		nil,
		nil)
	if status != cl.CL_SUCCESS {
		println("CLEnqueueNDRangeKernel status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 12: Read the output buffer back to the host
	//-----------------------------------------------------

	// Use clEnqueueReadBuffer() to read the OpenCL output
	// buffer (bufferC)
	// to the host output array (C)
	cl.CLEnqueueReadBuffer(cmdQueue,
		bufferC,
		cl.CL_TRUE,
		0,
		datasize,
		unsafe.Pointer(&C[0]),
		0,
		nil,
		nil)
	if status != cl.CL_SUCCESS {
		println("CLEnqueueReadBuffer status!=cl.CL_SUCCESS")
		return
	}
	// Verify the output
	result := true
	for i := cl.CL_int(0); i < cl.CL_int(elements); i++ {
		if C[i] != i+i {
			result = false
			break
		}
	}
	if result {
		println("Output is correct\n")
	} else {
		println("Output is incorrect\n")
	}

	//-----------------------------------------------------
	// STEP 13: Release OpenCL resources
	//-----------------------------------------------------

	// Free OpenCL resources
	cl.CLReleaseKernel(kernel)
	cl.CLReleaseProgram(program)
	cl.CLReleaseCommandQueue(cmdQueue)
	cl.CLReleaseMemObject(bufferA)
	cl.CLReleaseMemObject(bufferB)
	cl.CLReleaseMemObject(bufferC)
	cl.CLReleaseContext(context)
}
예제 #13
0
파일: program_test.go 프로젝트: xfong/gocl
func TestProgram(t *testing.T) {

	/* Host/device data structures */
	var platform [1]cl.CL_platform_id
	var device [1]cl.CL_device_id
	var context cl.CL_context
	var i, err cl.CL_int

	/* Program data structures */
	var program cl.CL_program
	var program_buffer [NUM_FILES][]byte
	var program_log interface{}
	var file_name = []string{"bad.cl", "good.cl"}
	options := "-cl-finite-math-only -cl-no-signed-zeros"
	var program_size [NUM_FILES]cl.CL_size_t
	var log_size cl.CL_size_t

	/* Access the first installed platform */
	err = cl.CLGetPlatformIDs(1, platform[:], nil)
	if err < 0 {
		t.Errorf("Couldn't find any platforms")
	}

	/* Access the first GPU/CPU */
	err = cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_GPU, 1, device[:], nil)
	if err == cl.CL_DEVICE_NOT_FOUND {
		err = cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_CPU, 1, device[:], nil)
	}
	if err < 0 {
		t.Errorf("Couldn't find any devices")
	}

	/* Create a context */
	context = cl.CLCreateContext(nil, 1, device[:], nil, nil, &err)
	if err < 0 {
		t.Errorf("Couldn't create a context")
	}

	/* Read each program file and place content into buffer array */
	for i = 0; i < NUM_FILES; i++ {
		program_handle, err := os.Open(file_name[i])
		if err != nil {
			t.Errorf("Couldn't find the program file")
		}
		defer program_handle.Close()

		fi, err2 := program_handle.Stat()
		if err2 != nil {
			t.Errorf("Couldn't find the program stat")
		}
		program_size[i] = cl.CL_size_t(fi.Size())
		program_buffer[i] = make([]byte, program_size[i])
		read_size, err3 := program_handle.Read(program_buffer[i])
		if err3 != nil || cl.CL_size_t(read_size) != program_size[i] {
			t.Errorf("read file error or file size wrong")
		}
	}

	/* Create a program containing all program content */
	program = cl.CLCreateProgramWithSource(context, NUM_FILES,
		program_buffer[:], program_size[:], &err)
	if err < 0 {
		t.Errorf("Couldn't create the program")
	}

	/* Build program */
	err = cl.CLBuildProgram(program, 1, device[:], []byte(options), nil, nil)
	if err < 0 {
		/* Find size of log and print to std output */
		cl.CLGetProgramBuildInfo(program, device[0], cl.CL_PROGRAM_BUILD_LOG,
			0, nil, &log_size)
		//program_log = (char*) malloc(log_size+1);
		//program_log[log_size] = '\0';
		cl.CLGetProgramBuildInfo(program, device[0], cl.CL_PROGRAM_BUILD_LOG,
			log_size, &program_log, nil)
		t.Errorf("%s\n", program_log)
		//free(program_log);
	}

	/* Deallocate resources */
	//for(i=0; i<NUM_FILES; i++) {
	//   free(program_buffer[i]);
	//}
	cl.CLReleaseProgram(program)
	cl.CLReleaseContext(context)
}
예제 #14
0
파일: device_ext.go 프로젝트: xfong/gocl
func main() {

	/* Host/device data structures */
	var platform [1]cl.CL_platform_id
	var devices []cl.CL_device_id
	var num_devices cl.CL_uint
	var i, err cl.CL_int

	/* Extension data */
	var paramValueSize cl.CL_size_t
	var name_data interface{}
	var ext_data interface{}
	var addr_data interface{}

	/* Identify a platform */
	err = cl.CLGetPlatformIDs(1, platform[:], nil)
	if err != cl.CL_SUCCESS {
		println("Couldn't find any platforms")
		return
	}

	/* Determine number of connected devices */
	err = cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_ALL, 0, nil, &num_devices)
	if err != cl.CL_SUCCESS {
		println("Couldn't find any devices")
		return
	}

	/* Access connected devices */
	devices = make([]cl.CL_device_id, num_devices)

	err = cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_ALL,
		num_devices, devices, nil)
	if err != cl.CL_SUCCESS {
		println("Couldn't get any devices.")
		return
	}

	/* Obtain data for each connected device */
	for i = 0; i < cl.CL_int(num_devices); i++ {

		err = cl.CLGetDeviceInfo(devices[i],
			cl.CL_DEVICE_NAME,
			0,
			nil,
			&paramValueSize)

		if err != cl.CL_SUCCESS {
			fmt.Printf("Failed to find OpenCL device info %s.\n", "NAME")
			return
		}

		err = cl.CLGetDeviceInfo(devices[i],
			cl.CL_DEVICE_NAME,
			paramValueSize,
			&name_data,
			nil)
		if err != cl.CL_SUCCESS {
			fmt.Printf("Failed to find OpenCL device info %s.\n", "NAME")
			return
		}

		err = cl.CLGetDeviceInfo(devices[i],
			cl.CL_DEVICE_ADDRESS_BITS,
			0,
			nil,
			&paramValueSize)

		if err != cl.CL_SUCCESS {
			fmt.Printf("Failed to find OpenCL device info %s.\n", "NAME")
			return
		}

		err = cl.CLGetDeviceInfo(devices[i],
			cl.CL_DEVICE_ADDRESS_BITS,
			paramValueSize,
			&addr_data,
			nil)
		if err != cl.CL_SUCCESS {
			fmt.Printf("Failed to find OpenCL device info %s.\n", "NAME")
			return
		}

		err = cl.CLGetDeviceInfo(devices[i],
			cl.CL_DEVICE_EXTENSIONS,
			0,
			nil,
			&paramValueSize)

		if err != cl.CL_SUCCESS {
			fmt.Printf("Failed to find OpenCL device info %s.\n", "NAME")
			return
		}

		err = cl.CLGetDeviceInfo(devices[i],
			cl.CL_DEVICE_EXTENSIONS,
			paramValueSize,
			&ext_data,
			nil)
		if err != cl.CL_SUCCESS {
			fmt.Printf("Failed to find OpenCL device info %s.\n", "NAME")
			return
		}

		fmt.Printf("NAME: %s\nADDRESS_WIDTH: %d\nEXTENSIONS: %s\n\n",
			name_data.(string), addr_data.(cl.CL_uint), ext_data.(string))
	}
}
예제 #15
0
파일: kernel_test.go 프로젝트: xfong/gocl
func TestKernel(t *testing.T) {

	/* Host/device data structures */
	var platform [1]cl.CL_platform_id
	var device [1]cl.CL_device_id
	var context cl.CL_context
	var err cl.CL_int

	/* Program data structures */
	var program cl.CL_program
	var program_buffer [1][]byte
	var program_log interface{}
	var program_size [1]cl.CL_size_t
	var log_size cl.CL_size_t
	var kernels []cl.CL_kernel
	var found bool
	var i, num_kernels cl.CL_uint

	/* Access the first installed platform */
	err = cl.CLGetPlatformIDs(1, platform[:], nil)
	if err < 0 {
		t.Errorf("Couldn't find any platforms")
	}

	/* Access the first GPU/CPU */
	err = cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_GPU, 1, device[:], nil)
	if err == cl.CL_DEVICE_NOT_FOUND {
		err = cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_CPU, 1, device[:], nil)
	}
	if err < 0 {
		t.Errorf("Couldn't find any devices")
	}

	/* Create a context */
	context = cl.CLCreateContext(nil, 1, device[:], nil, nil, &err)
	if err < 0 {
		t.Errorf("Couldn't create a context")
	}

	/* Read each program file and place content into buffer array */
	program_handle, err1 := os.Open("test.cl")
	if err1 != nil {
		t.Errorf("Couldn't find the program file")
	}
	defer program_handle.Close()

	fi, err2 := program_handle.Stat()
	if err2 != nil {
		t.Errorf("Couldn't find the program stat")
	}
	program_size[0] = cl.CL_size_t(fi.Size())
	program_buffer[0] = make([]byte, program_size[0])
	read_size, err3 := program_handle.Read(program_buffer[0])
	if err3 != nil || cl.CL_size_t(read_size) != program_size[0] {
		t.Errorf("read file error or file size wrong")
	}

	/* Create a program containing all program content */
	program = cl.CLCreateProgramWithSource(context, 1,
		program_buffer[:], program_size[:], &err)
	if err < 0 {
		t.Errorf("Couldn't create the program")
	}

	/* Build program */
	err = cl.CLBuildProgram(program, 1, device[:], nil, nil, nil)
	if err < 0 {
		/* Find size of log and print to std output */
		cl.CLGetProgramBuildInfo(program, device[0], cl.CL_PROGRAM_BUILD_LOG,
			0, nil, &log_size)
		//program_log = (char*) malloc(log_size+1);
		//program_log[log_size] = '\0';
		cl.CLGetProgramBuildInfo(program, device[0], cl.CL_PROGRAM_BUILD_LOG,
			log_size, &program_log, nil)
		t.Errorf("%s\n", program_log)
		//free(program_log);
	}

	/* Find out how many kernels are in the source file */
	err = cl.CLCreateKernelsInProgram(program, 0, nil, &num_kernels)
	if err < 0 {
		t.Errorf("Couldn't find any kernels")
	} else {
		t.Logf("num_kernels = %d\n", num_kernels)
	}

	/* Create a kernel for each function */
	kernels = make([]cl.CL_kernel, num_kernels)
	err = cl.CLCreateKernelsInProgram(program, num_kernels, kernels, nil)
	if err < 0 {
		t.Errorf("Couldn't create kernels")
	}

	/* Search for the named kernel */
	for i = 0; i < num_kernels; i++ {
		var kernel_name_size cl.CL_size_t
		var kernel_name interface{}

		err = cl.CLGetKernelInfo(kernels[i], cl.CL_KERNEL_FUNCTION_NAME,
			0, nil, &kernel_name_size)
		if err < 0 {
			t.Errorf("Couldn't get kernel size of name, errcode=%d\n", err)
		}
		err = cl.CLGetKernelInfo(kernels[i], cl.CL_KERNEL_FUNCTION_NAME,
			kernel_name_size, &kernel_name, nil)
		if err < 0 {
			t.Errorf("Couldn't get kernel info of name, errcode=%d\n", err)
		}
		if kernel_name.(string) == "mult" {
			found = true
			t.Logf("Found mult kernel at index %d.\n", i)
			break
		}
	}
	if !found {
		t.Errorf("Not found mult kernel\n")
	}

	for i = 0; i < num_kernels; i++ {
		cl.CLReleaseKernel(kernels[i])
	}

	cl.CLReleaseProgram(program)
	cl.CLReleaseContext(context)
}