Ejemplo n.º 1
0
func (this *buffer) EnqueueWrite(queue CommandQueue,
	blocking_write cl.CL_bool,
	offset cl.CL_size_t,
	cb cl.CL_size_t,
	ptr unsafe.Pointer,
	event_wait_list []Event) (Event, error) {
	var errCode cl.CL_int
	var event_id cl.CL_event

	numEvents := cl.CL_uint(len(event_wait_list))

	var events []cl.CL_event
	if numEvents > 0 {
		events = make([]cl.CL_event, numEvents)
		for i := cl.CL_uint(0); i < numEvents; i++ {
			events[i] = event_wait_list[i].GetID()
		}
	}

	if errCode = cl.CLEnqueueWriteBuffer(queue.GetID(),
		this.memory_id,
		blocking_write,
		offset,
		cb,
		ptr,
		numEvents,
		events,
		&event_id); errCode != cl.CL_SUCCESS {
		return nil, fmt.Errorf("EnqueueWrite failure with errcode_ret %d: %s", errCode, cl.ERROR_CODES_STRINGS[-errCode])
	} else {
		return &event{event_id}, nil
	}
}
Ejemplo n.º 2
0
func main() {
	// This code executes on the OpenCL host

	// Host data
	var size cl.CL_int
	var A []cl.CL_int //input array
	var B []cl.CL_int //input array
	var C []cl.CL_int //output array

	// Elements in each array
	const elements = cl.CL_size_t(2048)

	// Compute the size of the data
	datasize := cl.CL_size_t(unsafe.Sizeof(size)) * elements

	// Allocate space for input/output data
	A = make([]cl.CL_int, datasize)
	B = make([]cl.CL_int, datasize)
	C = make([]cl.CL_int, datasize)
	// Initialize the input data
	for i := cl.CL_int(0); i < cl.CL_int(elements); i++ {
		A[i] = i
		B[i] = i
	}

	// Use this to check the output of each API call
	var status cl.CL_int

	//-----------------------------------------------------
	// STEP 1: Discover and initialize the platforms
	//-----------------------------------------------------

	var numPlatforms cl.CL_uint
	var platforms []cl.CL_platform_id

	// Use clGetPlatformIDs() to retrieve the number of
	// platforms
	status = cl.CLGetPlatformIDs(0, nil, &numPlatforms)

	// Allocate enough space for each platform
	platforms = make([]cl.CL_platform_id, numPlatforms)

	// Fill in platforms with clGetPlatformIDs()
	status = cl.CLGetPlatformIDs(numPlatforms, platforms, nil)
	if status != cl.CL_SUCCESS {
		println("CLGetPlatformIDs status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 2: Discover and initialize the devices
	//-----------------------------------------------------

	var numDevices cl.CL_uint
	var devices []cl.CL_device_id

	// Use clGetDeviceIDs() to retrieve the number of
	// devices present
	status = cl.CLGetDeviceIDs(platforms[0],
		cl.CL_DEVICE_TYPE_ALL,
		0,
		nil,
		&numDevices)
	if status != cl.CL_SUCCESS {
		println("CLGetDeviceIDs status!=cl.CL_SUCCESS")
		return
	}

	// Allocate enough space for each device
	devices = make([]cl.CL_device_id, numDevices)

	// Fill in devices with clGetDeviceIDs()
	status = cl.CLGetDeviceIDs(platforms[0],
		cl.CL_DEVICE_TYPE_ALL,
		numDevices,
		devices,
		nil)
	if status != cl.CL_SUCCESS {
		println("CLGetDeviceIDs status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 3: Create a context
	//-----------------------------------------------------

	var context cl.CL_context

	// Create a context using clCreateContext() and
	// associate it with the devices
	context = cl.CLCreateContext(nil,
		numDevices,
		devices,
		nil,
		nil,
		&status)
	if status != cl.CL_SUCCESS {
		println("CLCreateContext status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 4: Create a command queue
	//-----------------------------------------------------

	var cmdQueue cl.CL_command_queue

	// Create a command queue using clCreateCommandQueue(),
	// and associate it with the device you want to execute
	// on
	cmdQueue = cl.CLCreateCommandQueue(context,
		devices[0],
		0,
		&status)
	if status != cl.CL_SUCCESS {
		println("CLCreateCommandQueue status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 5: Create device buffers
	//-----------------------------------------------------

	var bufferA cl.CL_mem // Input array on the device
	var bufferB cl.CL_mem // Input array on the device
	var bufferC cl.CL_mem // Output array on the device

	// Use clCreateBuffer() to create a buffer object (d_A)
	// that will contain the data from the host array A
	bufferA = cl.CLCreateBuffer(context,
		cl.CL_MEM_READ_ONLY,
		datasize,
		nil,
		&status)
	if status != cl.CL_SUCCESS {
		println("CLCreateBuffer status!=cl.CL_SUCCESS")
		return
	}
	// Use clCreateBuffer() to create a buffer object (d_B)
	// that will contain the data from the host array B
	bufferB = cl.CLCreateBuffer(context,
		cl.CL_MEM_READ_ONLY,
		datasize,
		nil,
		&status)
	if status != cl.CL_SUCCESS {
		println("CLCreateBuffer status!=cl.CL_SUCCESS")
		return
	}
	// Use clCreateBuffer() to create a buffer object (d_C)
	// with enough space to hold the output data
	bufferC = cl.CLCreateBuffer(context,
		cl.CL_MEM_WRITE_ONLY,
		datasize,
		nil,
		&status)
	if status != cl.CL_SUCCESS {
		println("CLCreateBuffer status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 6: Write host data to device buffers
	//-----------------------------------------------------

	// Use clEnqueueWriteBuffer() to write input array A to
	// the device buffer bufferA
	status = cl.CLEnqueueWriteBuffer(cmdQueue,
		bufferA,
		cl.CL_FALSE,
		0,
		datasize,
		unsafe.Pointer(&A[0]),
		0,
		nil,
		nil)
	if status != cl.CL_SUCCESS {
		println("CLEnqueueWriteBuffer status!=cl.CL_SUCCESS")
		return
	}
	// Use clEnqueueWriteBuffer() to write input array B to
	// the device buffer bufferB
	status = cl.CLEnqueueWriteBuffer(cmdQueue,
		bufferB,
		cl.CL_FALSE,
		0,
		datasize,
		unsafe.Pointer(&B[0]),
		0,
		nil,
		nil)
	if status != cl.CL_SUCCESS {
		println("CLEnqueueWriteBuffer status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 7: Create and compile the program
	//-----------------------------------------------------
	programSource, programeSize := utils.Load_programsource("chapter2.cl")

	// Create a program using clCreateProgramWithSource()
	program := cl.CLCreateProgramWithSource(context,
		1,
		programSource[:],
		programeSize[:],
		&status)
	if status != cl.CL_SUCCESS {
		println("CLCreateProgramWithSource status!=cl.CL_SUCCESS")
		return
	}
	// Build (compile) the program for the devices with
	// clBuildProgram()
	status = cl.CLBuildProgram(program,
		numDevices,
		devices,
		nil,
		nil,
		nil)
	if status != cl.CL_SUCCESS {
		println("CLBuildProgram status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 8: Create the kernel
	//-----------------------------------------------------

	var kernel cl.CL_kernel

	// Use clCreateKernel() to create a kernel from the
	// vector addition function (named "vecadd")
	kernel = cl.CLCreateKernel(program, []byte("vecadd"), &status)
	if status != cl.CL_SUCCESS {
		println("CLCreateKernel status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 9: Set the kernel arguments
	//-----------------------------------------------------

	// Associate the input and output buffers with the
	// kernel
	// using clSetKernelArg()
	status = cl.CLSetKernelArg(kernel,
		0,
		cl.CL_size_t(unsafe.Sizeof(bufferA)),
		unsafe.Pointer(&bufferA))
	status |= cl.CLSetKernelArg(kernel,
		1,
		cl.CL_size_t(unsafe.Sizeof(bufferB)),
		unsafe.Pointer(&bufferB))
	status |= cl.CLSetKernelArg(kernel,
		2,
		cl.CL_size_t(unsafe.Sizeof(bufferC)),
		unsafe.Pointer(&bufferC))
	if status != cl.CL_SUCCESS {
		println("CLSetKernelArg status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 10: Configure the work-item structure
	//-----------------------------------------------------

	// Define an index space (global work size) of work
	// items for
	// execution. A workgroup size (local work size) is not
	// required,
	// but can be used.
	var globalWorkSize [1]cl.CL_size_t
	// There are 'elements' work-items
	globalWorkSize[0] = elements

	//-----------------------------------------------------
	// STEP 11: Enqueue the kernel for execution
	//-----------------------------------------------------

	// Execute the kernel by using
	// clEnqueueNDRangeKernel().
	// 'globalWorkSize' is the 1D dimension of the
	// work-items
	status = cl.CLEnqueueNDRangeKernel(cmdQueue,
		kernel,
		1,
		nil,
		globalWorkSize[:],
		nil,
		0,
		nil,
		nil)
	if status != cl.CL_SUCCESS {
		println("CLEnqueueNDRangeKernel status!=cl.CL_SUCCESS")
		return
	}
	//-----------------------------------------------------
	// STEP 12: Read the output buffer back to the host
	//-----------------------------------------------------

	// Use clEnqueueReadBuffer() to read the OpenCL output
	// buffer (bufferC)
	// to the host output array (C)
	cl.CLEnqueueReadBuffer(cmdQueue,
		bufferC,
		cl.CL_TRUE,
		0,
		datasize,
		unsafe.Pointer(&C[0]),
		0,
		nil,
		nil)
	if status != cl.CL_SUCCESS {
		println("CLEnqueueReadBuffer status!=cl.CL_SUCCESS")
		return
	}
	// Verify the output
	result := true
	for i := cl.CL_int(0); i < cl.CL_int(elements); i++ {
		if C[i] != i+i {
			result = false
			break
		}
	}
	if result {
		println("Output is correct\n")
	} else {
		println("Output is incorrect\n")
	}

	//-----------------------------------------------------
	// STEP 13: Release OpenCL resources
	//-----------------------------------------------------

	// Free OpenCL resources
	cl.CLReleaseKernel(kernel)
	cl.CLReleaseProgram(program)
	cl.CLReleaseCommandQueue(cmdQueue)
	cl.CLReleaseMemObject(bufferA)
	cl.CLReleaseMemObject(bufferB)
	cl.CLReleaseMemObject(bufferC)
	cl.CLReleaseContext(context)
}
Ejemplo n.º 3
0
func main() {
	var i, j cl.CL_size_t
	// Rows and columns in the input image
	inputFile := "test.png"
	outputFile := "output.png"
	refFile := "ref.png"

	// Homegrown function to read a BMP from file
	inputpixels, imageWidth, imageHeight, err1 := utils.Read_image_data(inputFile)
	if err1 != nil {
		log.Fatal(err1)
		return
	} else {
		fmt.Printf("width=%d, height=%d (%d)\n", imageWidth, imageHeight, inputpixels[0])
	}

	// Output image on the host
	outputpixels := make([]uint16, imageHeight*imageWidth)
	inputImage := make([]float32, imageHeight*imageWidth)
	outputImage := make([]float32, imageHeight*imageWidth)
	refImage := make([]float32, imageHeight*imageWidth)

	for i = 0; i < imageHeight*imageWidth; i++ {
		inputImage[i] = float32(inputpixels[i])
	}

	// 45 degree motion blur
	var filter = [49]float32{0, 0, 0, 0, 0, 0, 0,
		0, 0, 0, 0, 0, 0, 0,
		0, 0, -1, 0, 1, 0, 0,
		0, 0, -2, 0, 2, 0, 0,
		0, 0, -1, 0, 1, 0, 0,
		0, 0, 0, 0, 0, 0, 0,
		0, 0, 0, 0, 0, 0, 0}

	// The convolution filter is 7x7
	filterWidth := cl.CL_size_t(7)
	filterSize := cl.CL_size_t(filterWidth * filterWidth) // Assume a square kernel

	// Set up the OpenCL environment
	var status cl.CL_int

	// Discovery platform
	var platform [1]cl.CL_platform_id
	status = cl.CLGetPlatformIDs(1, platform[:], nil)
	chk(status, "clGetPlatformIDs")

	// Discover device
	var device [1]cl.CL_device_id
	cl.CLGetDeviceIDs(platform[0], cl.CL_DEVICE_TYPE_ALL, 1, device[:], nil)
	chk(status, "clGetDeviceIDs")

	// Create context
	//var props =[3]cl.CL_context_properties{cl.CL_CONTEXT_PLATFORM,
	//    (cl.CL_context_properties)(unsafe.Pointer(&platform[0])), 0};

	var context cl.CL_context
	context = cl.CLCreateContext(nil, 1, device[:], nil, nil, &status)
	chk(status, "clCreateContext")

	// Create command queue
	var queue cl.CL_command_queue
	queue = cl.CLCreateCommandQueue(context, device[0], 0, &status)
	chk(status, "clCreateCommandQueue")

	// The image format describes how the data will be stored in memory
	var format cl.CL_image_format
	format.Image_channel_order = cl.CL_R         // single channel
	format.Image_channel_data_type = cl.CL_FLOAT // float data type

	var desc cl.CL_image_desc
	desc.Image_type = cl.CL_MEM_OBJECT_IMAGE2D
	desc.Image_width = imageWidth
	desc.Image_height = imageHeight
	desc.Image_depth = 0
	desc.Image_array_size = 0
	desc.Image_row_pitch = 0
	desc.Image_slice_pitch = 0
	desc.Num_mip_levels = 0
	desc.Num_samples = 0
	desc.Buffer = cl.CL_mem{}

	// Create space for the source image on the device
	d_inputImage := cl.CLCreateImage(context, cl.CL_MEM_READ_ONLY, &format, &desc,
		nil, &status)
	chk(status, "clCreateImage")

	// Create space for the output image on the device
	d_outputImage := cl.CLCreateImage(context, cl.CL_MEM_WRITE_ONLY, &format, &desc,
		nil, &status)
	chk(status, "clCreateImage")

	// Create space for the 7x7 filter on the device
	d_filter := cl.CLCreateBuffer(context, 0, filterSize*cl.CL_size_t(unsafe.Sizeof(filter[0])),
		nil, &status)
	chk(status, "clCreateBuffer")

	// Copy the source image to the device
	var origin = [3]cl.CL_size_t{0, 0, 0}                                                // Offset within the image to copy from
	var region = [3]cl.CL_size_t{cl.CL_size_t(imageWidth), cl.CL_size_t(imageHeight), 1} // Elements to per dimension
	status = cl.CLEnqueueWriteImage(queue, d_inputImage, cl.CL_FALSE, origin, region,
		0, 0, unsafe.Pointer(&inputImage[0]), 0, nil, nil)
	chk(status, "clEnqueueWriteImage")

	// Copy the 7x7 filter to the device
	status = cl.CLEnqueueWriteBuffer(queue, d_filter, cl.CL_FALSE, 0,
		filterSize*cl.CL_size_t(unsafe.Sizeof(filter[0])), unsafe.Pointer(&filter[0]), 0, nil, nil)
	chk(status, "clEnqueueWriteBuffer")

	// Create the image sampler
	sampler := cl.CLCreateSampler(context, cl.CL_FALSE,
		cl.CL_ADDRESS_CLAMP_TO_EDGE, cl.CL_FILTER_NEAREST, &status)
	chk(status, "clCreateSampler")

	// Create a program object with source and build it
	program := utils.Build_program(context, device[:], "convolution.cl", nil)
	kernel := cl.CLCreateKernel(*program, []byte("convolution"), &status)
	chk(status, "clCreateKernel")

	// Set the kernel arguments
	var w, h, f cl.CL_int
	w = cl.CL_int(imageWidth)
	h = cl.CL_int(imageHeight)
	f = cl.CL_int(filterWidth)
	status = cl.CLSetKernelArg(kernel, 0, cl.CL_size_t(unsafe.Sizeof(d_inputImage)), unsafe.Pointer(&d_inputImage))
	status |= cl.CLSetKernelArg(kernel, 1, cl.CL_size_t(unsafe.Sizeof(d_outputImage)), unsafe.Pointer(&d_outputImage))
	status |= cl.CLSetKernelArg(kernel, 2, cl.CL_size_t(unsafe.Sizeof(h)), unsafe.Pointer(&h))
	status |= cl.CLSetKernelArg(kernel, 3, cl.CL_size_t(unsafe.Sizeof(w)), unsafe.Pointer(&w))
	status |= cl.CLSetKernelArg(kernel, 4, cl.CL_size_t(unsafe.Sizeof(d_filter)), unsafe.Pointer(&d_filter))
	status |= cl.CLSetKernelArg(kernel, 5, cl.CL_size_t(unsafe.Sizeof(f)), unsafe.Pointer(&f))
	status |= cl.CLSetKernelArg(kernel, 6, cl.CL_size_t(unsafe.Sizeof(sampler)), unsafe.Pointer(&sampler))
	chk(status, "clSetKernelArg")

	// Set the work item dimensions
	var globalSize = [2]cl.CL_size_t{imageWidth, imageHeight}
	status = cl.CLEnqueueNDRangeKernel(queue, kernel, 2, nil, globalSize[:], nil, 0,
		nil, nil)
	chk(status, "clEnqueueNDRange")

	// Read the image back to the host
	status = cl.CLEnqueueReadImage(queue, d_outputImage, cl.CL_TRUE, origin,
		region, 0, 0, unsafe.Pointer(&outputImage[0]), 0, nil, nil)
	chk(status, "clEnqueueReadImage")

	// Write the output image to file
	for i = 0; i < imageHeight*imageWidth; i++ {
		outputpixels[i] = uint16(outputImage[i])
	}
	utils.Write_image_data(outputFile, outputpixels, imageWidth, imageHeight)

	// Compute the reference image
	for i = 0; i < imageHeight; i++ {
		for j = 0; j < imageWidth; j++ {
			refImage[i*imageWidth+j] = 0
		}
	}

	// Iterate over the rows of the source image
	halfFilterWidth := filterWidth / 2
	var sum float32
	for i = 0; i < imageHeight; i++ {
		// Iterate over the columns of the source image
		for j = 0; j < imageWidth; j++ {
			sum = 0 // Reset sum for new source pixel
			// Apply the filter to the neighborhood
			for k := -halfFilterWidth; k <= halfFilterWidth; k++ {
				for l := -halfFilterWidth; l <= halfFilterWidth; l++ {
					if i+k >= 0 && i+k < imageHeight &&
						j+l >= 0 && j+l < imageWidth {
						sum += inputImage[(i+k)*imageWidth+j+l] *
							filter[(k+halfFilterWidth)*filterWidth+
								l+halfFilterWidth]
					} else {
						i_k := i + k
						j_l := j + l
						if i+k < 0 {
							i_k = 0
						} else if i+k >= imageHeight {
							i_k = imageHeight - 1
						}
						if j+l < 0 {
							j_l = 0
						} else if j+l >= imageWidth {
							j_l = imageWidth - 1
						}
						sum += inputImage[(i_k)*imageWidth+j_l] *
							filter[(k+halfFilterWidth)*filterWidth+
								l+halfFilterWidth]
					}
				}
			}
			refImage[i*imageWidth+j] = sum
		}
	}
	// Write the ref image to file
	for i = 0; i < imageHeight*imageWidth; i++ {
		outputpixels[i] = uint16(refImage[i])
	}
	utils.Write_image_data(refFile, outputpixels, imageWidth, imageHeight)

	failed := 0
	for i = 0; i < imageHeight; i++ {
		for j = 0; j < imageWidth; j++ {
			if math.Abs(float64(outputImage[i*imageWidth+j]-refImage[i*imageWidth+j])) > 0.01 {
				//fmt.Printf("Results are INCORRECT\n");
				//fmt.Printf("Pixel mismatch at <%d,%d> (%f vs. %f) %f\n", i, j,
				//   outputImage[i*imageWidth+j], refImage[i*imageWidth+j], inputImage[i*imageWidth+j]);
				failed++
			}
		}
	}
	fmt.Printf("Mismatch Pixel number/Total pixel number = %d/%d\n", failed, imageWidth*imageHeight)

	// Free OpenCL resources
	cl.CLReleaseKernel(kernel)
	cl.CLReleaseProgram(*program)
	cl.CLReleaseCommandQueue(queue)
	cl.CLReleaseMemObject(d_inputImage)
	cl.CLReleaseMemObject(d_outputImage)
	cl.CLReleaseMemObject(d_filter)
	cl.CLReleaseSampler(sampler)
	cl.CLReleaseContext(context)
}
Ejemplo n.º 4
0
func main() {

	/* OpenCL data structures */
	var device []cl.CL_device_id
	var context cl.CL_context
	var queue cl.CL_command_queue
	var program *cl.CL_program
	var kernel cl.CL_kernel
	var err cl.CL_int

	/* Data and buffers */
	var full_matrix, zero_matrix [80]float32
	var sizeoffloat32 = cl.CL_size_t(unsafe.Sizeof(full_matrix[0]))
	var buffer_origin = [3]cl.CL_size_t{5 * sizeoffloat32, 3, 0}
	var host_origin = [3]cl.CL_size_t{1 * sizeoffloat32, 1, 0}
	var region = [3]cl.CL_size_t{4 * sizeoffloat32, 4, 1}
	var matrix_buffer cl.CL_mem

	/* Initialize data */
	for i := 0; i < 80; i++ {
		full_matrix[i] = float32(i) * 1.0
		zero_matrix[i] = 0.0
	}

	/* Create a device and context */
	device = utils.Create_device()
	context = cl.CLCreateContext(nil, 1, device[:], nil, nil, &err)
	if err < 0 {
		println("Couldn't create a context")
		return
	}

	/* Build the program and create the kernel */
	program = utils.Build_program(context, device[:], PROGRAM_FILE, nil)
	if program == nil {
		println("Couldn't build program")
		return
	}

	kernel = cl.CLCreateKernel(*program, []byte(KERNEL_FUNC), &err)
	if err < 0 {
		println("Couldn't create a kernel")
		return
	}

	/* Create a buffer to hold 80 floats */
	matrix_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_READ_WRITE|
		cl.CL_MEM_COPY_HOST_PTR, cl.CL_size_t(unsafe.Sizeof(full_matrix)), unsafe.Pointer(&full_matrix[0]), &err)
	if err < 0 {
		println("Couldn't create a buffer object")
		return
	}

	/* Set buffer as argument to the kernel */
	err = cl.CLSetKernelArg(kernel, 0, cl.CL_size_t(unsafe.Sizeof(matrix_buffer)), unsafe.Pointer(&matrix_buffer))
	if err < 0 {
		println("Couldn't set the buffer as the kernel argument")
		return
	}

	/* Create a command queue */
	queue = cl.CLCreateCommandQueue(context, device[0], 0, &err)
	if err < 0 {
		println("Couldn't create a command queue")
		return
	}

	/* Enqueue kernel */
	err = cl.CLEnqueueTask(queue, kernel, 0, nil, nil)
	if err < 0 {
		println("Couldn't enqueue the kernel")
		return
	}

	/* Enqueue command to write to buffer */
	err = cl.CLEnqueueWriteBuffer(queue, matrix_buffer, cl.CL_TRUE, 0,
		cl.CL_size_t(unsafe.Sizeof(full_matrix)), unsafe.Pointer(&full_matrix[0]), 0, nil, nil)
	if err < 0 {
		println("Couldn't write to the buffer object")
		return
	}

	/* Enqueue command to read rectangle of data */
	err = cl.CLEnqueueReadBufferRect(queue, matrix_buffer, cl.CL_TRUE,
		buffer_origin, host_origin, region, 10*sizeoffloat32, 0,
		10*sizeoffloat32, 0, unsafe.Pointer(&zero_matrix[0]), 0, nil, nil)
	if err < 0 {
		println("Couldn't read the rectangle from the buffer object")
		return
	}

	/* Display updated buffer */
	for i := 0; i < 8; i++ {
		for j := 0; j < 10; j++ {
			fmt.Printf("%6.1f", zero_matrix[j+i*10])
		}
		fmt.Printf("\n")
	}

	/* Deallocate resources */
	cl.CLReleaseMemObject(matrix_buffer)
	cl.CLReleaseKernel(kernel)
	cl.CLReleaseCommandQueue(queue)
	cl.CLReleaseProgram(*program)
	cl.CLReleaseContext(context)
}