Ejemplo n.º 1
0
func main() {

	/* Host/device data structures */
	var platform [1]cl.CL_platform_id
	var device [1]cl.CL_device_id
	var flag interface{} //cl.CL_device_fp_config;
	var err cl.CL_int

	/* Identify a platform */
	err = cl.CLGetPlatformIDs(1, platform[:], nil)
	if err < 0 {
		println("Couldn't identify a platform")
		return
	}

	/* Access a device */
	err = cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_GPU, 1, device[:], nil)
	if err == cl.CL_DEVICE_NOT_FOUND {
		err = cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_CPU, 1, device[:], nil)
	}
	if err < 0 {
		println("Couldn't access any devices")
		return
	}

	/* Check float-processing features */
	err = cl.CLGetDeviceInfo(device[0], cl.CL_DEVICE_SINGLE_FP_CONFIG,
		cl.CL_size_t(unsafe.Sizeof(flag)), &flag, nil)
	if err < 0 {
		println("Couldn't read floating-point properties")
		return
	}
	fmt.Printf("Float Processing Features:\n")
	if (flag.(cl.CL_device_fp_config) & cl.CL_FP_INF_NAN) > 0 {
		fmt.Printf("INF and NaN values supported.\n")
	}
	if (flag.(cl.CL_device_fp_config) & cl.CL_FP_DENORM) > 0 {
		fmt.Printf("Denormalized numbers supported.\n")
	}
	if (flag.(cl.CL_device_fp_config) & cl.CL_FP_ROUND_TO_NEAREST) > 0 {
		fmt.Printf("Round To Nearest Even mode supported.\n")
	}
	if (flag.(cl.CL_device_fp_config) & cl.CL_FP_ROUND_TO_INF) > 0 {
		fmt.Printf("Round To Infinity mode supported.\n")
	}
	if (flag.(cl.CL_device_fp_config) & cl.CL_FP_ROUND_TO_ZERO) > 0 {
		fmt.Printf("Round To Zero mode supported.\n")
	}
	if (flag.(cl.CL_device_fp_config) & cl.CL_FP_FMA) > 0 {
		fmt.Printf("Floating-point multiply-and-add operation supported.\n")
	}
	if (flag.(cl.CL_device_fp_config) & cl.CL_FP_SOFT_FLOAT) > 0 {
		fmt.Printf("Basic floating-point processing performed in software.\n")
	}
}
Ejemplo n.º 2
0
Archivo: svmfg.go Proyecto: xfong/gocl
func main() {
	// Use this to check the output of each API call
	var status cl.CL_int

	//-----------------------------------------------------
	// STEP 1: Discover and initialize the platforms
	//-----------------------------------------------------
	var numPlatforms cl.CL_uint

	// Use clGetPlatformIDs() to retrieve the number of
	// platforms
	status = cl.CLGetPlatformIDs(0, nil, &numPlatforms)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetPlatformIDs")

	// Allocate enough space for each platform
	platforms := make([]cl.CL_platform_id, numPlatforms)

	// Fill in platforms with clGetPlatformIDs()
	status = cl.CLGetPlatformIDs(numPlatforms, platforms, nil)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetPlatformIDs")

	//-----------------------------------------------------
	// STEP 2: Discover and initialize the GPU devices
	//-----------------------------------------------------
	var numDevices cl.CL_uint

	// Use clGetDeviceIDs() to retrieve the number of
	// devices present
	status = cl.CLGetDeviceIDs(platforms[0],
		cl.CL_DEVICE_TYPE_GPU,
		0,
		nil,
		&numDevices)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetDeviceIDs")

	// Allocate enough space for each device
	devices := make([]cl.CL_device_id, numDevices)

	// Fill in devices with clGetDeviceIDs()
	status = cl.CLGetDeviceIDs(platforms[0],
		cl.CL_DEVICE_TYPE_GPU,
		numDevices,
		devices,
		nil)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetDeviceIDs")

	var caps cl.CL_device_svm_capabilities
	var caps_value interface{}

	status = cl.CLGetDeviceInfo(
		devices[0],
		cl.CL_DEVICE_SVM_CAPABILITIES,
		cl.CL_size_t(unsafe.Sizeof(caps)),
		&caps_value,
		nil)
	caps = caps_value.(cl.CL_device_svm_capabilities)

	// Coarse-grained buffer SVM should be available on any OpenCL 2.0 device.
	// So it is either not an OpenCL 2.0 device or it must support coarse-grained buffer SVM:
	if !(status == cl.CL_SUCCESS && (caps&cl.CL_DEVICE_SVM_FINE_GRAIN_BUFFER) != 0) {
		fmt.Printf("Cannot detect fine-grained buffer SVM capabilities on the device. The device seemingly doesn't support fine-grained buffer SVM. caps=%x\n", caps)
		println("")
		return
	}

	//-----------------------------------------------------
	// STEP 3: Create a context
	//-----------------------------------------------------
	// Create a context using clCreateContext() and
	// associate it with the devices
	context := cl.CLCreateContext(nil,
		numDevices,
		devices,
		nil,
		nil,
		&status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateContext")
	defer cl.CLReleaseContext(context)

	//-----------------------------------------------------
	// STEP 4: Create a command queue
	//-----------------------------------------------------
	// Create a command queue using clCreateCommandQueueWithProperties(),
	// and associate it with the device you want to execute
	queue := cl.CLCreateCommandQueueWithProperties(context,
		devices[0],
		nil,
		&status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateCommandQueueWithProperties")
	defer cl.CLReleaseCommandQueue(queue)

	//-----------------------------------------------------
	// STEP 5: Create and compile the program
	//-----------------------------------------------------
	programSource, programeSize := utils.Load_programsource("svmfg.cl")

	// Create a program using clCreateProgramWithSource()
	program := cl.CLCreateProgramWithSource(context,
		1,
		programSource[:],
		programeSize[:],
		&status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateProgramWithSource")
	defer cl.CLReleaseProgram(program)

	// Build (compile) the program for the devices with
	// clBuildProgram()
	options := "-cl-std=CL2.0"

	status = cl.CLBuildProgram(program,
		numDevices,
		devices,
		[]byte(options),
		nil,
		nil)
	if status != cl.CL_SUCCESS {
		var log interface{}
		var log_size cl.CL_size_t
		/* Find size of log and print to std output */
		cl.CLGetProgramBuildInfo(program, devices[0], cl.CL_PROGRAM_BUILD_LOG, 0, nil, &log_size)
		cl.CLGetProgramBuildInfo(program, devices[0], cl.CL_PROGRAM_BUILD_LOG, log_size, &log, nil)
		fmt.Printf("%s\n", log)
		return
	}
	//utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLBuildProgram")

	//-----------------------------------------------------
	// STEP 7: Create the kernel
	//-----------------------------------------------------
	// Use clCreateKernel() to create a kernel
	kernel := cl.CLCreateKernel(program,
		[]byte("svmbasic"),
		&status)
	utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateKernel")
	defer cl.CLReleaseKernel(kernel)

	// Then call the main sample routine - resource allocations, OpenCL kernel
	// execution, and so on.
	svmbasic(1024*1024, context, queue, kernel)

	// All resource deallocations happen in defer.
}
Ejemplo n.º 3
0
func main() {

	/* Host/device data structures */
	var platform [1]cl.CL_platform_id
	var devices []cl.CL_device_id
	var num_devices cl.CL_uint
	var i, err cl.CL_int

	/* Extension data */
	var paramValueSize cl.CL_size_t
	var name_data interface{}
	var ext_data interface{}
	var addr_data interface{}

	/* Identify a platform */
	err = cl.CLGetPlatformIDs(1, platform[:], nil)
	if err != cl.CL_SUCCESS {
		println("Couldn't find any platforms")
		return
	}

	/* Determine number of connected devices */
	err = cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_ALL, 0, nil, &num_devices)
	if err != cl.CL_SUCCESS {
		println("Couldn't find any devices")
		return
	}

	/* Access connected devices */
	devices = make([]cl.CL_device_id, num_devices)

	err = cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_ALL,
		num_devices, devices, nil)
	if err != cl.CL_SUCCESS {
		println("Couldn't get any devices.")
		return
	}

	/* Obtain data for each connected device */
	for i = 0; i < cl.CL_int(num_devices); i++ {

		err = cl.CLGetDeviceInfo(devices[i],
			cl.CL_DEVICE_NAME,
			0,
			nil,
			&paramValueSize)

		if err != cl.CL_SUCCESS {
			fmt.Printf("Failed to find OpenCL device info %s.\n", "NAME")
			return
		}

		err = cl.CLGetDeviceInfo(devices[i],
			cl.CL_DEVICE_NAME,
			paramValueSize,
			&name_data,
			nil)
		if err != cl.CL_SUCCESS {
			fmt.Printf("Failed to find OpenCL device info %s.\n", "NAME")
			return
		}

		err = cl.CLGetDeviceInfo(devices[i],
			cl.CL_DEVICE_ADDRESS_BITS,
			0,
			nil,
			&paramValueSize)

		if err != cl.CL_SUCCESS {
			fmt.Printf("Failed to find OpenCL device info %s.\n", "NAME")
			return
		}

		err = cl.CLGetDeviceInfo(devices[i],
			cl.CL_DEVICE_ADDRESS_BITS,
			paramValueSize,
			&addr_data,
			nil)
		if err != cl.CL_SUCCESS {
			fmt.Printf("Failed to find OpenCL device info %s.\n", "NAME")
			return
		}

		err = cl.CLGetDeviceInfo(devices[i],
			cl.CL_DEVICE_EXTENSIONS,
			0,
			nil,
			&paramValueSize)

		if err != cl.CL_SUCCESS {
			fmt.Printf("Failed to find OpenCL device info %s.\n", "NAME")
			return
		}

		err = cl.CLGetDeviceInfo(devices[i],
			cl.CL_DEVICE_EXTENSIONS,
			paramValueSize,
			&ext_data,
			nil)
		if err != cl.CL_SUCCESS {
			fmt.Printf("Failed to find OpenCL device info %s.\n", "NAME")
			return
		}

		fmt.Printf("NAME: %s\nADDRESS_WIDTH: %d\nEXTENSIONS: %s\n\n",
			name_data.(string), addr_data.(cl.CL_uint), ext_data.(string))
	}
}
Ejemplo n.º 4
0
func main() {

	/* OpenCL data structures */
	var device []cl.CL_device_id
	var context cl.CL_context
	var queue cl.CL_command_queue
	var program *cl.CL_program
	var kernel cl.CL_kernel
	var err cl.CL_int

	/* Data and buffers */
	var a float32 = 6.0
	var b float32 = 2.0
	var result float32
	var a_buffer, b_buffer, output_buffer cl.CL_mem

	/* Extension data */
	var sizeofuint cl.CL_uint
	var addr_data interface{}
	var ext_data interface{}
	fp64_ext := "cl_khr_fp64"
	var ext_size cl.CL_size_t
	var options []byte

	/* Create a device and context */
	device = utils.Create_device()
	context = cl.CLCreateContext(nil, 1, device[:], nil, nil, &err)
	if err < 0 {
		println("Couldn't create a context")
		return
	}

	/* Obtain the device data */
	if cl.CLGetDeviceInfo(device[0], cl.CL_DEVICE_ADDRESS_BITS,
		cl.CL_size_t(unsafe.Sizeof(sizeofuint)), &addr_data, nil) < 0 {
		println("Couldn't read extension data")
		return
	}
	fmt.Printf("Address width: %v\n", addr_data.(cl.CL_uint))

	/* Define "FP_64" option if doubles are supported */
	cl.CLGetDeviceInfo(device[0], cl.CL_DEVICE_EXTENSIONS,
		0, nil, &ext_size)
	// ext_data = (char*)malloc(ext_size + 1);
	// ext_data[ext_size] = '\0';
	cl.CLGetDeviceInfo(device[0], cl.CL_DEVICE_EXTENSIONS,
		ext_size, &ext_data, nil)
	if strings.Contains(ext_data.(string), fp64_ext) {
		fmt.Printf("The %s extension is supported.\n", fp64_ext)
		options = []byte("-DFP_64 ")
	} else {
		fmt.Printf("The %s extension is not supported. %s\n", fp64_ext, ext_data.(string))
	}

	/* Build the program and create the kernel */
	program = utils.Build_program(context, device[:], PROGRAM_FILE, options)
	kernel = cl.CLCreateKernel(*program, KERNEL_FUNC, &err)
	if err < 0 {
		println("Couldn't create a kernel")
		return
	}

	/* Create CL buffers to hold input and output data */
	a_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_READ_ONLY|
		cl.CL_MEM_COPY_HOST_PTR, cl.CL_size_t(unsafe.Sizeof(a)), unsafe.Pointer(&a), &err)
	if err < 0 {
		println("Couldn't create a memory object")
		return
	}

	b_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_READ_ONLY|
		cl.CL_MEM_COPY_HOST_PTR, cl.CL_size_t(unsafe.Sizeof(b)), unsafe.Pointer(&b), nil)
	output_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_WRITE_ONLY,
		cl.CL_size_t(unsafe.Sizeof(b)), nil, nil)

	/* Create kernel arguments */
	err = cl.CLSetKernelArg(kernel, 0, cl.CL_size_t(unsafe.Sizeof(a_buffer)), unsafe.Pointer(&a_buffer))
	if err < 0 {
		println("Couldn't set a kernel argument")
		return
	}
	cl.CLSetKernelArg(kernel, 1, cl.CL_size_t(unsafe.Sizeof(b_buffer)), unsafe.Pointer(&b_buffer))
	cl.CLSetKernelArg(kernel, 2, cl.CL_size_t(unsafe.Sizeof(output_buffer)), unsafe.Pointer(&output_buffer))

	/* Create a command queue */
	queue = cl.CLCreateCommandQueue(context, device[0], 0, &err)
	if err < 0 {
		println("Couldn't create a command queue")
		return
	}

	/* Enqueue kernel */
	err = cl.CLEnqueueTask(queue, kernel, 0, nil, nil)
	if err < 0 {
		println("Couldn't enqueue the kernel")
		return
	}

	/* Read and print the result */
	err = cl.CLEnqueueReadBuffer(queue, output_buffer, cl.CL_TRUE, 0,
		cl.CL_size_t(unsafe.Sizeof(result)), unsafe.Pointer(&result), 0, nil, nil)
	if err < 0 {
		println("Couldn't read the output buffer")
		return
	}
	fmt.Printf("The kernel result is %f\n", result)

	/* Deallocate resources */
	cl.CLReleaseMemObject(a_buffer)
	cl.CLReleaseMemObject(b_buffer)
	cl.CLReleaseMemObject(output_buffer)
	cl.CLReleaseKernel(kernel)
	cl.CLReleaseCommandQueue(queue)
	cl.CLReleaseProgram(*program)
	cl.CLReleaseContext(context)
}
Ejemplo n.º 5
0
func DisplayDeviceInfo(id cl.CL_device_id,
	name cl.CL_device_info,
	str string) {

	var errNum cl.CL_int
	var paramValueSize cl.CL_size_t

	errNum = cl.CLGetDeviceInfo(id,
		name,
		0,
		nil,
		&paramValueSize)

	if errNum != cl.CL_SUCCESS {
		fmt.Printf("Failed to find OpenCL device info %s.\n", str)
		return
	}

	var info interface{}
	errNum = cl.CLGetDeviceInfo(id,
		name,
		paramValueSize,
		&info,
		nil)
	if errNum != cl.CL_SUCCESS {
		fmt.Printf("Failed to find OpenCL device info %s.\n", str)
		return
	}

	// Handle a few special cases

	switch name {

	case cl.CL_DEVICE_TYPE:
		var deviceTypeStr string

		appendBitfield(cl.CL_bitfield(info.(cl.CL_device_type)),
			cl.CL_bitfield(cl.CL_DEVICE_TYPE_CPU),
			"CL_DEVICE_TYPE_CPU",
			&deviceTypeStr)

		appendBitfield(cl.CL_bitfield(info.(cl.CL_device_type)),
			cl.CL_bitfield(cl.CL_DEVICE_TYPE_GPU),
			"CL_DEVICE_TYPE_GPU",
			&deviceTypeStr)

		appendBitfield(cl.CL_bitfield(info.(cl.CL_device_type)),
			cl.CL_bitfield(cl.CL_DEVICE_TYPE_ACCELERATOR),
			"CL_DEVICE_TYPE_ACCELERATOR",
			&deviceTypeStr)

		appendBitfield(cl.CL_bitfield(info.(cl.CL_device_type)),
			cl.CL_bitfield(cl.CL_DEVICE_TYPE_DEFAULT),
			"CL_DEVICE_TYPE_DEFAULT",
			&deviceTypeStr)

		info = deviceTypeStr

		/*
		   case CL_DEVICE_SINGLE_FP_CONFIG:
		   {
		   	std::string fpType;

		   	appendBitfield<cl_device_fp_config>(
		   		*(reinterpret_cast<cl_device_fp_config*>(info)),
		   		CL_FP_DENORM,
		   		"CL_FP_DENORM",
		   		fpType);

		   	appendBitfield<cl_device_fp_config>(
		   		*(reinterpret_cast<cl_device_fp_config*>(info)),
		   		CL_FP_INF_NAN,
		   		"CL_FP_INF_NAN",
		   		fpType);

		   	appendBitfield<cl_device_fp_config>(
		   		*(reinterpret_cast<cl_device_fp_config*>(info)),
		   		CL_FP_ROUND_TO_NEAREST,
		   		"CL_FP_ROUND_TO_NEAREST",
		   		fpType);

		   	appendBitfield<cl_device_fp_config>(
		   		*(reinterpret_cast<cl_device_fp_config*>(info)),
		   		CL_FP_ROUND_TO_ZERO,
		   		"CL_FP_ROUND_TO_ZERO",
		   		fpType);

		   	appendBitfield<cl_device_fp_config>(
		   		*(reinterpret_cast<cl_device_fp_config*>(info)),
		   		CL_FP_ROUND_TO_INF,
		   		"CL_FP_ROUND_TO_INF",
		   		fpType);

		   	appendBitfield<cl_device_fp_config>(
		   		*(reinterpret_cast<cl_device_fp_config*>(info)),
		   		CL_FP_FMA,
		   		"CL_FP_FMA",
		   		fpType);

		   #ifdef CL_FP_SOFT_FLOAT
		   	appendBitfield<cl_device_fp_config>(
		   		*(reinterpret_cast<cl_device_fp_config*>(info)),
		   		CL_FP_SOFT_FLOAT,
		   		"CL_FP_SOFT_FLOAT",
		   		fpType);
		   #endif

		   	std::cout << "\t\t" << str << ":\t" << fpType << std::endl;
		   }
		   case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE:
		   {
		   	std::string memType;

		   	appendBitfield<cl_device_mem_cache_type>(
		   		*(reinterpret_cast<cl_device_mem_cache_type*>(info)),
		   		CL_NONE,
		   		"CL_NONE",
		   		memType);
		   	appendBitfield<cl_device_mem_cache_type>(
		   		*(reinterpret_cast<cl_device_mem_cache_type*>(info)),
		   		CL_READ_ONLY_CACHE,
		   		"CL_READ_ONLY_CACHE",
		   		memType);

		   	appendBitfield<cl_device_mem_cache_type>(
		   		*(reinterpret_cast<cl_device_mem_cache_type*>(info)),
		   		CL_READ_WRITE_CACHE,
		   		"CL_READ_WRITE_CACHE",
		   		memType);

		   	std::cout << "\t\t" << str << ":\t" << memType << std::endl;
		   }
		   break;
		   case CL_DEVICE_LOCAL_MEM_TYPE:
		   {
		   	std::string memType;

		   	appendBitfield<cl_device_local_mem_type>(
		   		*(reinterpret_cast<cl_device_local_mem_type*>(info)),
		   		CL_GLOBAL,
		   		"CL_LOCAL",
		   		memType);

		   	appendBitfield<cl_device_local_mem_type>(
		   		*(reinterpret_cast<cl_device_local_mem_type*>(info)),
		   		CL_GLOBAL,
		   		"CL_GLOBAL",
		   		memType);

		   	std::cout << "\t\t" << str << ":\t" << memType << std::endl;
		   }
		   break;
		   case CL_DEVICE_EXECUTION_CAPABILITIES:
		   {
		   	std::string memType;

		   	appendBitfield<cl_device_exec_capabilities>(
		   		*(reinterpret_cast<cl_device_exec_capabilities*>(info)),
		   		CL_EXEC_KERNEL,
		   		"CL_EXEC_KERNEL",
		   		memType);

		   	appendBitfield<cl_device_exec_capabilities>(
		   		*(reinterpret_cast<cl_device_exec_capabilities*>(info)),
		   		CL_EXEC_NATIVE_KERNEL,
		   		"CL_EXEC_NATIVE_KERNEL",
		   		memType);

		   	std::cout << "\t\t" << str << ":\t" << memType << std::endl;
		   }
		   break;
		   case CL_DEVICE_QUEUE_PROPERTIES:
		   {
		   	std::string memType;

		   	appendBitfield<cl_device_exec_capabilities>(
		   		*(reinterpret_cast<cl_device_exec_capabilities*>(info)),
		   		CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
		   		"CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE",
		   		memType);

		   	appendBitfield<cl_device_exec_capabilities>(
		   		*(reinterpret_cast<cl_device_exec_capabilities*>(info)),
		   		CL_QUEUE_PROFILING_ENABLE,
		   		"CL_QUEUE_PROFILING_ENABLE",
		   		memType);

		   	std::cout << "\t\t" << str << ":\t" << memType << std::endl;
		   }
		   break;
		*/
	default:
	}

	fmt.Printf("\t\t%-20s: %v\n", str, info)
}