func (this *buffer) EnqueueMap(queue CommandQueue, blocking_map cl.CL_bool, map_flags cl.CL_map_flags, offset cl.CL_size_t, cb cl.CL_size_t, event_wait_list []Event) (unsafe.Pointer, Event, error) { var errCode cl.CL_int var event_id cl.CL_event numEvents := cl.CL_uint(len(event_wait_list)) events := make([]cl.CL_event, numEvents) for i := cl.CL_uint(0); i < numEvents; i++ { events[i] = event_wait_list[i].GetID() } if mapped_ptr := cl.CLEnqueueMapBuffer(queue.GetID(), this.memory_id, blocking_map, map_flags, offset, cb, numEvents, events, &event_id, &errCode); errCode != cl.CL_SUCCESS { return nil, nil, fmt.Errorf("EnqueueMap failure with errcode_ret %d: %s", errCode, cl.ERROR_CODES_STRINGS[-errCode]) } else { return mapped_ptr, &event{event_id}, nil } }
func (this *buffer) EnqueueWrite(queue CommandQueue, blocking_write cl.CL_bool, offset cl.CL_size_t, cb cl.CL_size_t, ptr unsafe.Pointer, event_wait_list []Event) (Event, error) { var errCode cl.CL_int var event_id cl.CL_event numEvents := cl.CL_uint(len(event_wait_list)) var events []cl.CL_event if numEvents > 0 { events = make([]cl.CL_event, numEvents) for i := cl.CL_uint(0); i < numEvents; i++ { events[i] = event_wait_list[i].GetID() } } if errCode = cl.CLEnqueueWriteBuffer(queue.GetID(), this.memory_id, blocking_write, offset, cb, ptr, numEvents, events, &event_id); errCode != cl.CL_SUCCESS { return nil, fmt.Errorf("EnqueueWrite failure with errcode_ret %d: %s", errCode, cl.ERROR_CODES_STRINGS[-errCode]) } else { return &event{event_id}, nil } }
func (this *buffer) EnqueueFill(queue CommandQueue, pattern unsafe.Pointer, pattern_size cl.CL_size_t, offset cl.CL_size_t, cb cl.CL_size_t, event_wait_list []Event) (Event, error) { var errCode cl.CL_int var event_id cl.CL_event numEvents := cl.CL_uint(len(event_wait_list)) events := make([]cl.CL_event, numEvents) for i := cl.CL_uint(0); i < numEvents; i++ { events[i] = event_wait_list[i].GetID() } if errCode = cl.CLEnqueueFillBuffer(queue.GetID(), this.memory_id, pattern, pattern_size, offset, cb, numEvents, events, &event_id); errCode != cl.CL_SUCCESS { return nil, fmt.Errorf("EnqueueFill failure with errcode_ret %d: %s", errCode, cl.ERROR_CODES_STRINGS[-errCode]) } else { return &event{event_id}, nil } }
func (this *command_queue) EnqueueCopyBuffer(src_buffer Buffer, dst_buffer Buffer, src_offset cl.CL_size_t, dst_offset cl.CL_size_t, cb cl.CL_size_t, event_wait_list []Event) (Event, error) { var errCode cl.CL_int var event_id cl.CL_event numEvents := cl.CL_uint(len(event_wait_list)) events := make([]cl.CL_event, numEvents) for i := cl.CL_uint(0); i < numEvents; i++ { events[i] = event_wait_list[i].GetID() } if errCode = cl.CLEnqueueCopyBuffer(this.command_queue_id, src_buffer.GetID(), dst_buffer.GetID(), src_offset, dst_offset, cb, numEvents, events, &event_id); errCode != cl.CL_SUCCESS { return nil, fmt.Errorf("EnqueueCopyBuffer failure with errcode_ret %d: %s", errCode, cl.ERROR_CODES_STRINGS[-errCode]) } else { return &event{event_id}, nil } }
func (this *image) EnqueueFill(queue CommandQueue, fill_color unsafe.Pointer, origin [3]cl.CL_size_t, region [3]cl.CL_size_t, event_wait_list []Event) (Event, error) { var errCode cl.CL_int var event_id cl.CL_event numEvents := cl.CL_uint(len(event_wait_list)) events := make([]cl.CL_event, numEvents) for i := cl.CL_uint(0); i < numEvents; i++ { events[i] = event_wait_list[i].GetID() } if errCode = cl.CLEnqueueFillImage(queue.GetID(), this.memory_id, fill_color, origin, region, numEvents, events, &event_id); errCode != cl.CL_SUCCESS { return nil, fmt.Errorf("EnqueueFill failure with errcode_ret %d: %s", errCode, cl.ERROR_CODES_STRINGS[-errCode]) } else { return &event{event_id}, nil } }
func (this *kernel) EnqueueNDRange(queue CommandQueue, work_dim cl.CL_uint, global_work_offset []cl.CL_size_t, global_work_size []cl.CL_size_t, local_work_size []cl.CL_size_t, event_wait_list []Event) (Event, error) { var errCode cl.CL_int var event_id cl.CL_event numEvents := cl.CL_uint(len(event_wait_list)) var events []cl.CL_event if numEvents > 0 { events = make([]cl.CL_event, numEvents) for i := cl.CL_uint(0); i < numEvents; i++ { events[i] = event_wait_list[i].GetID() } } if errCode = cl.CLEnqueueNDRangeKernel(queue.GetID(), this.kernel_id, work_dim, global_work_offset, global_work_size, local_work_size, numEvents, events, &event_id); errCode != cl.CL_SUCCESS { return nil, fmt.Errorf("EnqueueNDRange failure with errcode_ret %d: %s", errCode, cl.ERROR_CODES_STRINGS[-errCode]) } else { return &event{event_id}, nil } }
func WaitForEvents(event_list []Event) error { numEvents := cl.CL_uint(len(event_list)) events := make([]cl.CL_event, numEvents) for i := cl.CL_uint(0); i < numEvents; i++ { events[i] = event_list[i].GetID() } if errCode := cl.CLWaitForEvents(numEvents, events); errCode != cl.CL_SUCCESS { return fmt.Errorf("WaitForEvents failure with errcode_ret %d: %s", errCode, cl.ERROR_CODES_STRINGS[-errCode]) } else { return nil } }
func (this *command_queue) EnqueueMarkerWithWaitList(event_wait_list []Event) (Event, error) { var event_id cl.CL_event numEvents := cl.CL_uint(len(event_wait_list)) events := make([]cl.CL_event, numEvents) for i := cl.CL_uint(0); i < numEvents; i++ { events[i] = event_wait_list[i].GetID() } if errCode := cl.CLEnqueueMarkerWithWaitList(this.command_queue_id, numEvents, events, &event_id); errCode != cl.CL_SUCCESS { return nil, fmt.Errorf("EnqueueMarkerWithWaitList failure with errcode_ret %d: %s", errCode, cl.ERROR_CODES_STRINGS[-errCode]) } else { return &event{event_id}, nil } }
func (this *memory) EnqueueUnmap(queue CommandQueue, mapped_ptr unsafe.Pointer, event_wait_list []Event) (Event, error) { var event_id cl.CL_event numEvents := cl.CL_uint(len(event_wait_list)) events := make([]cl.CL_event, numEvents) for i := cl.CL_uint(0); i < numEvents; i++ { events[i] = event_wait_list[i].GetID() } if errCode := cl.CLEnqueueUnmapMemObject(queue.GetID(), this.memory_id, mapped_ptr, numEvents, events, &event_id); errCode != cl.CL_SUCCESS { return nil, fmt.Errorf("EnqueueMarkerWithWaitList failure with errcode_ret %d: %s", errCode, cl.ERROR_CODES_STRINGS[-errCode]) } else { return &event{event_id}, nil } }
func (this *context) CreateProgramWithBinary(devices []Device, lengths []cl.CL_size_t, binaries [][]byte, binary_status []cl.CL_int) (Program, error) { var errCode cl.CL_int numDevices := cl.CL_uint(len(devices)) deviceIds := make([]cl.CL_device_id, numDevices) for i := cl.CL_uint(0); i < numDevices; i++ { deviceIds[i] = devices[i].GetID() } if program_id := cl.CLCreateProgramWithBinary(this.context_id, numDevices, deviceIds, lengths, binaries, binary_status, &errCode); errCode != cl.CL_SUCCESS { return nil, fmt.Errorf("CreateProgramWithBinary failure with errcode_ret %d: %s", errCode, cl.ERROR_CODES_STRINGS[-errCode]) } else { return &program{program_id}, nil } }
func CreateContext(properties []cl.CL_context_properties, devices []Device, pfn_notify cl.CL_ctx_notify, user_data unsafe.Pointer) (Context, error) { var numDevices cl.CL_uint var deviceIds []cl.CL_device_id var errCode cl.CL_int numDevices = cl.CL_uint(len(devices)) deviceIds = make([]cl.CL_device_id, numDevices) for i := cl.CL_uint(0); i < numDevices; i++ { deviceIds[i] = devices[i].GetID() } /* Create the context */ if context_id := cl.CLCreateContext(properties, numDevices, deviceIds, pfn_notify, user_data, &errCode); errCode != cl.CL_SUCCESS { return nil, fmt.Errorf("CreateContext failure with errcode_ret %d: %s", errCode, cl.ERROR_CODES_STRINGS[-errCode]) } else { return &context{context_id}, nil } }
func (this *kernel) EnqueueTask(queue CommandQueue, event_wait_list []Event) (Event, error) { var errCode cl.CL_int var event_id cl.CL_event numEvents := cl.CL_uint(len(event_wait_list)) events := make([]cl.CL_event, numEvents) for i := cl.CL_uint(0); i < numEvents; i++ { events[i] = event_wait_list[i].GetID() } if errCode = cl.CLEnqueueTask(queue.GetID(), this.kernel_id, numEvents, events, &event_id); errCode != cl.CL_SUCCESS { return nil, fmt.Errorf("EnqueueTask failure with errcode_ret %d: %s", errCode, cl.ERROR_CODES_STRINGS[-errCode]) } else { return &event{event_id}, nil } }
func (this *command_queue) EnqueueMigrateMemObjects(mem_objects []Memory, flags cl.CL_mem_migration_flags, event_wait_list []Event) (Event, error) { var event_id cl.CL_event numEvents := cl.CL_uint(len(event_wait_list)) events := make([]cl.CL_event, numEvents) for i := cl.CL_uint(0); i < numEvents; i++ { events[i] = event_wait_list[i].GetID() } numMemorys := cl.CL_uint(len(mem_objects)) memorys := make([]cl.CL_mem, numMemorys) for i := cl.CL_uint(0); i < numEvents; i++ { memorys[i] = mem_objects[i].GetID() } if errCode := cl.CLEnqueueMigrateMemObjects(this.command_queue_id, numMemorys, memorys, flags, numEvents, events, &event_id); errCode != cl.CL_SUCCESS { return nil, fmt.Errorf("EnqueueMigrateMemObjects failure with errcode_ret %d: %s", errCode, cl.ERROR_CODES_STRINGS[-errCode]) } else { return &event{event_id}, nil } }
func (this *buffer) EnqueueReadRect(queue CommandQueue, blocking_read cl.CL_bool, buffer_origin [3]cl.CL_size_t, host_origin [3]cl.CL_size_t, region [3]cl.CL_size_t, buffer_row_pitch cl.CL_size_t, buffer_slice_pitch cl.CL_size_t, host_row_pitch cl.CL_size_t, host_slice_pitch cl.CL_size_t, ptr unsafe.Pointer, event_wait_list []Event) (Event, error) { var errCode cl.CL_int var event_id cl.CL_event numEvents := cl.CL_uint(len(event_wait_list)) events := make([]cl.CL_event, numEvents) for i := cl.CL_uint(0); i < numEvents; i++ { events[i] = event_wait_list[i].GetID() } if errCode = cl.CLEnqueueReadBufferRect(queue.GetID(), this.memory_id, blocking_read, buffer_origin, host_origin, region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, ptr, numEvents, events, &event_id); errCode != cl.CL_SUCCESS { return nil, fmt.Errorf("EnqueueReadRect failure with errcode_ret %d: %s", errCode, cl.ERROR_CODES_STRINGS[-errCode]) } else { return &event{event_id}, nil } }
func (this *program) Compile(devices []Device, options []byte, input_headers []Program, header_include_names [][]byte, pfn_notify cl.CL_prg_notify, user_data unsafe.Pointer) error { numDevices := cl.CL_uint(len(devices)) deviceIds := make([]cl.CL_device_id, numDevices) for i := cl.CL_uint(0); i < numDevices; i++ { deviceIds[i] = devices[i].GetID() } numInputHeaders := cl.CL_uint(len(input_headers)) inputHeaders := make([]cl.CL_program, numInputHeaders) for i := cl.CL_uint(0); i < numInputHeaders; i++ { inputHeaders[i] = input_headers[i].GetID() } if errCode := cl.CLCompileProgram(this.program_id, numDevices, deviceIds, options, numInputHeaders, inputHeaders, header_include_names, pfn_notify, user_data); errCode != cl.CL_SUCCESS { return fmt.Errorf("Compile failure with errcode_ret %d: %s", errCode, cl.ERROR_CODES_STRINGS[-errCode]) } return nil }
func (this *platform) GetDevices(deviceType cl.CL_device_type) ([]Device, error) { var devices []Device var deviceIds []cl.CL_device_id var numDevices cl.CL_uint var errCode cl.CL_int /* Determine number of connected devices */ if errCode = cl.CLGetDeviceIDs(this.platform_id, deviceType, 0, nil, &numDevices); errCode != cl.CL_SUCCESS { return nil, fmt.Errorf("GetDevices failure with errcode_ret %d: %s", errCode, cl.ERROR_CODES_STRINGS[-errCode]) } /* Access connected devices */ deviceIds = make([]cl.CL_device_id, numDevices) if errCode = cl.CLGetDeviceIDs(this.platform_id, deviceType, numDevices, deviceIds, nil); errCode != cl.CL_SUCCESS { return nil, fmt.Errorf("GetDevices failure with errcode_ret %d: %s", errCode, cl.ERROR_CODES_STRINGS[-errCode]) } devices = make([]Device, numDevices) for i := cl.CL_uint(0); i < numDevices; i++ { devices[i] = &device{deviceIds[i]} } return devices, nil }
func GetPlatforms() ([]Platform, error) { var platforms []Platform var platformIds []cl.CL_platform_id var numPlatforms cl.CL_uint var errCode cl.CL_int /* Determine number of platforms */ if errCode = cl.CLGetPlatformIDs(0, nil, &numPlatforms); errCode != cl.CL_SUCCESS { return nil, fmt.Errorf("GetPlatforms failure with errcode_ret %d: %s", errCode, cl.ERROR_CODES_STRINGS[-errCode]) } /* Access platforms */ platformIds = make([]cl.CL_platform_id, numPlatforms) if errCode = cl.CLGetPlatformIDs(numPlatforms, platformIds, nil); errCode != cl.CL_SUCCESS { return nil, fmt.Errorf("GetPlatforms failure with errcode_ret %d: %s", errCode, cl.ERROR_CODES_STRINGS[-errCode]) } platforms = make([]Platform, numPlatforms) for i := cl.CL_uint(0); i < numPlatforms; i++ { platforms[i] = &platform{platformIds[i]} } return platforms, nil }
func (this *device) CreateSubDevices(properties []cl.CL_device_partition_property) ([]Device, error) { var numDevices cl.CL_uint var deviceIds []cl.CL_device_id var devices []Device var errCode cl.CL_int /* Determine number of connected devices */ if errCode = cl.CLCreateSubDevices(this.device_id, properties, 0, nil, &numDevices); errCode != cl.CL_SUCCESS { return nil, fmt.Errorf("CreateSubDevices failure with errcode_ret %d: %s", errCode, cl.ERROR_CODES_STRINGS[-errCode]) } /* Access connected devices */ deviceIds = make([]cl.CL_device_id, numDevices) if errCode = cl.CLCreateSubDevices(this.device_id, properties, numDevices, deviceIds, nil); errCode != cl.CL_SUCCESS { return nil, fmt.Errorf("CreateSubDevices failure with errcode_ret %d: %s", errCode, cl.ERROR_CODES_STRINGS[-errCode]) } devices = make([]Device, numDevices) for i := cl.CL_uint(0); i < numDevices; i++ { devices[i] = &device{deviceIds[i]} } return devices, nil }
func main() { /* OpenCL data structures */ var device []cl.CL_device_id var context cl.CL_context var queue cl.CL_command_queue var program *cl.CL_program var kernel cl.CL_kernel var err cl.CL_int /* Data and buffers */ dim := cl.CL_uint(2) var global_offset = [2]cl.CL_size_t{3, 5} var global_size = [2]cl.CL_size_t{6, 4} var local_size = [2]cl.CL_size_t{3, 2} var test [24]float32 var test_buffer cl.CL_mem /* Create a device and context */ device = utils.Create_device() context = cl.CLCreateContext(nil, 1, device[:], nil, nil, &err) if err < 0 { println("Couldn't create a context") return } /* Build the program and create a kernel */ program = utils.Build_program(context, device[:], PROGRAM_FILE, nil) kernel = cl.CLCreateKernel(*program, KERNEL_FUNC, &err) if err < 0 { println("Couldn't create a kernel") return } /* Create a write-only buffer to hold the output data */ test_buffer = cl.CLCreateBuffer(context, cl.CL_MEM_WRITE_ONLY, cl.CL_size_t(unsafe.Sizeof(test)), nil, &err) if err < 0 { println("Couldn't create a buffer") return } /* Create kernel argument */ err = cl.CLSetKernelArg(kernel, 0, cl.CL_size_t(unsafe.Sizeof(test_buffer)), unsafe.Pointer(&test_buffer)) if err < 0 { println("Couldn't set a kernel argument") return } /* Create a command queue */ queue = cl.CLCreateCommandQueue(context, device[0], 0, &err) if err < 0 { println("Couldn't create a command queue") return } /* Enqueue kernel */ err = cl.CLEnqueueNDRangeKernel(queue, kernel, dim, global_offset[:], global_size[:], local_size[:], 0, nil, nil) if err < 0 { println("Couldn't enqueue the kernel") return } /* Read and print the result */ err = cl.CLEnqueueReadBuffer(queue, test_buffer, cl.CL_TRUE, 0, cl.CL_size_t(unsafe.Sizeof(test)), unsafe.Pointer(&test), 0, nil, nil) if err < 0 { println("Couldn't read the buffer") return } for i := 0; i < 24; i += 6 { fmt.Printf("%.2f %.2f %.2f %.2f %.2f %.2f\n", test[i], test[i+1], test[i+2], test[i+3], test[i+4], test[i+5]) } /* Deallocate resources */ cl.CLReleaseMemObject(test_buffer) cl.CLReleaseKernel(kernel) cl.CLReleaseCommandQueue(queue) cl.CLReleaseProgram(*program) cl.CLReleaseContext(context) }
func main() { // Use this to check the output of each API call var status cl.CL_int //----------------------------------------------------- // STEP 1: Discover and initialize the platforms //----------------------------------------------------- var numPlatforms cl.CL_uint var platforms []cl.CL_platform_id // Use clGetPlatformIDs() to retrieve the number of // platforms status = cl.CLGetPlatformIDs(0, nil, &numPlatforms) // Allocate enough space for each platform platforms = make([]cl.CL_platform_id, numPlatforms) // Fill in platforms with clGetPlatformIDs() status = cl.CLGetPlatformIDs(numPlatforms, platforms, nil) utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetPlatformIDs") //----------------------------------------------------- // STEP 2: Discover and initialize the GPU devices //----------------------------------------------------- var numDevices cl.CL_uint var devices []cl.CL_device_id // Use clGetDeviceIDs() to retrieve the number of // devices present status = cl.CLGetDeviceIDs(platforms[0], cl.CL_DEVICE_TYPE_GPU, 0, nil, &numDevices) utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetDeviceIDs") // Allocate enough space for each device devices = make([]cl.CL_device_id, numDevices) // Fill in devices with clGetDeviceIDs() status = cl.CLGetDeviceIDs(platforms[0], cl.CL_DEVICE_TYPE_GPU, numDevices, devices, nil) utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLGetDeviceIDs") //----------------------------------------------------- // STEP 3: Create a context //----------------------------------------------------- var context cl.CL_context // Create a context using clCreateContext() and // associate it with the devices context = cl.CLCreateContext(nil, numDevices, devices, nil, nil, &status) utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateContext") defer cl.CLReleaseContext(context) //----------------------------------------------------- // STEP 4: Create a command queue //----------------------------------------------------- var commandQueue [MAX_COMMAND_QUEUE]cl.CL_command_queue // Create a command queue using clCreateCommandQueueWithProperties(), // and associate it with the device you want to execute for i := 0; i < MAX_COMMAND_QUEUE; i++ { commandQueue[i] = cl.CLCreateCommandQueueWithProperties(context, devices[0], nil, &status) utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateCommandQueueWithProperties") defer cl.CLReleaseCommandQueue(commandQueue[i]) } //----------------------------------------------------- // STEP 5: Create device buffers //----------------------------------------------------- producerGroupSize := cl.CL_size_t(PRODUCER_GROUP_SIZE) producerGlobalSize := cl.CL_size_t(PRODUCER_GLOBAL_SIZE) consumerGroupSize := cl.CL_size_t(CONSUMER_GROUP_SIZE) consumerGlobalSize := cl.CL_size_t(CONSUMER_GLOBAL_SIZE) var samplePipePkt [2]cl.CL_float szPipe := cl.CL_uint(PIPE_SIZE) szPipePkt := cl.CL_uint(unsafe.Sizeof(samplePipePkt)) if szPipe%PRNG_CHANNELS != 0 { szPipe = (szPipe/PRNG_CHANNELS)*PRNG_CHANNELS + PRNG_CHANNELS } consumerGlobalSize = cl.CL_size_t(szPipe) pipePktPerThread := cl.CL_int(szPipe) / PRNG_CHANNELS seed := cl.CL_int(SEED) rngType := cl.CL_int(RV_GAUSSIAN) var histMin cl.CL_float var histMax cl.CL_float if rngType == cl.CL_int(RV_UNIFORM) { histMin = 0.0 histMax = 1.0 } else { histMin = -10.0 histMax = 10.0 } localDevHist := make([]cl.CL_int, MAX_HIST_BINS) cpuHist := make([]cl.CL_int, MAX_HIST_BINS) //Create and initialize memory objects rngPipe := cl.CLCreatePipe(context, cl.CL_MEM_READ_WRITE, szPipePkt, szPipe, nil, &status) utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clCreatePipe") devHist := cl.CLCreateBuffer(context, cl.CL_MEM_READ_WRITE|cl.CL_MEM_COPY_HOST_PTR, MAX_HIST_BINS*cl.CL_size_t(unsafe.Sizeof(localDevHist[0])), unsafe.Pointer(&localDevHist[0]), &status) utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clCreateBuffer") //----------------------------------------------------- // STEP 6: Create and compile the program //----------------------------------------------------- programSource, programeSize := utils.Load_programsource("pipe.cl") // Create a program using clCreateProgramWithSource() program := cl.CLCreateProgramWithSource(context, 1, programSource[:], programeSize[:], &status) utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateProgramWithSource") defer cl.CLReleaseProgram(program) // Build (compile) the program for the devices with // clBuildProgram() options := "-cl-std=CL2.0" status = cl.CLBuildProgram(program, numDevices, devices, []byte(options), nil, nil) if status != cl.CL_SUCCESS { var program_log interface{} var log_size cl.CL_size_t /* Find size of log and print to std output */ cl.CLGetProgramBuildInfo(program, devices[0], cl.CL_PROGRAM_BUILD_LOG, 0, nil, &log_size) cl.CLGetProgramBuildInfo(program, devices[0], cl.CL_PROGRAM_BUILD_LOG, log_size, &program_log, nil) fmt.Printf("%s\n", program_log) return } //utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLBuildProgram") //----------------------------------------------------- // STEP 7: Create the kernel //----------------------------------------------------- // Use clCreateKernel() to create a kernel produceKernel := cl.CLCreateKernel(program, []byte("pipe_producer"), &status) utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateKernel") defer cl.CLReleaseKernel(produceKernel) consumeKernel := cl.CLCreateKernel(program, []byte("pipe_consumer"), &status) utils.CHECK_STATUS(status, cl.CL_SUCCESS, "CLCreateKernel") defer cl.CLReleaseKernel(consumeKernel) //----------------------------------------------------- // STEP 8: Set the kernel arguments //----------------------------------------------------- // Associate the input and output buffers with the // kernel // using clSetKernelArg() // Set appropriate arguments to the kernel status = cl.CLSetKernelArg(produceKernel, 0, cl.CL_size_t(unsafe.Sizeof(rngPipe)), unsafe.Pointer(&rngPipe)) utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArg(rngPipe)") status = cl.CLSetKernelArg(produceKernel, 1, cl.CL_size_t(unsafe.Sizeof(pipePktPerThread)), unsafe.Pointer(&pipePktPerThread)) utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArg(pipePktPerThread)") status = cl.CLSetKernelArg(produceKernel, 2, cl.CL_size_t(unsafe.Sizeof(seed)), unsafe.Pointer(&seed)) utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArg(seed)") status = cl.CLSetKernelArg(produceKernel, 3, cl.CL_size_t(unsafe.Sizeof(rngType)), unsafe.Pointer(&rngType)) utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArg(rngType)") //----------------------------------------------------- // STEP 9: Configure the work-item structure //----------------------------------------------------- // Define an index space (global work size) of work // items for // execution. A workgroup size (local work size) is not // required, // but can be used. // Enqueue both the kernels. var globalThreads = []cl.CL_size_t{producerGlobalSize} var localThreads = []cl.CL_size_t{producerGroupSize} //----------------------------------------------------- // STEP 10: Enqueue the kernel for execution //----------------------------------------------------- // Execute the kernel by using // clEnqueueNDRangeKernel(). // 'globalWorkSize' is the 1D dimension of the // work-items var produceEvt [1]cl.CL_event status = cl.CLEnqueueNDRangeKernel(commandQueue[0], produceKernel, 1, nil, globalThreads, localThreads, 0, nil, &produceEvt[0]) utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clEnqueueNDRangeKernel") /* launch consumer kernel only after producer has finished. This is done to avoid concurrent kernels execution as the memory consistency of pipe is guaranteed only across synchronization points. */ status = cl.CLWaitForEvents(1, produceEvt[:]) utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clWaitForEvents(produceEvt)") //----------------------------------------------------- // STEP 8: Set the kernel arguments //----------------------------------------------------- // Associate the input and output buffers with the // kernel // using clSetKernelArg() // Set appropriate arguments to the kernel status = cl.CLSetKernelArg(consumeKernel, 0, cl.CL_size_t(unsafe.Sizeof(rngPipe)), unsafe.Pointer(&rngPipe)) utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArg(rngPipe)") status = cl.CLSetKernelArg(consumeKernel, 1, cl.CL_size_t(unsafe.Sizeof(devHist)), unsafe.Pointer(&devHist)) utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArg(devHist)") status = cl.CLSetKernelArg(consumeKernel, 2, cl.CL_size_t(unsafe.Sizeof(histMin)), unsafe.Pointer(&histMin)) utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArg(histMin)") status = cl.CLSetKernelArg(consumeKernel, 3, cl.CL_size_t(unsafe.Sizeof(histMax)), unsafe.Pointer(&histMax)) utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clSetKernelArg(histMax)") //----------------------------------------------------- // STEP 9: Configure the work-item structure //----------------------------------------------------- // Define an index space (global work size) of work // items for // execution. A workgroup size (local work size) is not // required, // but can be used. globalThreads[0] = consumerGlobalSize localThreads[0] = consumerGroupSize //----------------------------------------------------- // STEP 10: Enqueue the kernel for execution //----------------------------------------------------- // Execute the kernel by using // clEnqueueNDRangeKernel(). // 'globalWorkSize' is the 1D dimension of the // work-items var consumeEvt [1]cl.CL_event status = cl.CLEnqueueNDRangeKernel( commandQueue[1], consumeKernel, 1, nil, globalThreads, localThreads, 0, nil, &consumeEvt[0]) utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clEnqueueNDRangeKernel") status = cl.CLFlush(commandQueue[0]) utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clFlush(0)") status = cl.CLFlush(commandQueue[1]) utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clFlush(1)") //wait for kernels to finish status = cl.CLFinish(commandQueue[0]) utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clFinish(0)") status = cl.CLFinish(commandQueue[1]) utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clFinish(1)") //----------------------------------------------------- // STEP 11: Read the output buffer back to the host //----------------------------------------------------- // Use clEnqueueReadBuffer() to read the OpenCL output // buffer (bufferC) // to the host output array (C) //copy the data back to host buffer var readEvt cl.CL_event status = cl.CLEnqueueReadBuffer(commandQueue[1], devHist, cl.CL_TRUE, 0, (MAX_HIST_BINS)*cl.CL_size_t(unsafe.Sizeof(localDevHist[0])), unsafe.Pointer(&localDevHist[0]), 0, nil, &readEvt) utils.CHECK_STATUS(status, cl.CL_SUCCESS, "clEnqueueReadBuffer") //----------------------------------------------------- // STEP 12: Verify the results //----------------------------------------------------- //Find the tolerance limit fTol := (float32)(CONSUMER_GLOBAL_SIZE) * (float32)(COMP_TOL) / (float32)(100.0) iTol := (int)(fTol) if iTol == 0 { iTol = 1 } //CPU side histogram computation CPUReference(seed, pipePktPerThread, rngType, cpuHist, histMax, histMin) //Compare for bin := 0; bin < MAX_HIST_BINS; bin++ { diff := int(localDevHist[bin] - cpuHist[bin]) if diff < 0 { diff = -diff } if diff > iTol { println("Failed!") return } } println("Passed!") }
func main() { var errNum cl.CL_int var numPlatforms cl.CL_uint var platformIds []cl.CL_platform_id //var context cl.CL_context // First, query the total number of platforms errNum = cl.CLGetPlatformIDs(0, nil, &numPlatforms) if errNum != cl.CL_SUCCESS || numPlatforms <= 0 { println("Failed to find any OpenCL platform.") return } // Next, allocate memory for the installed plaforms, and qeury // to get the list. platformIds = make([]cl.CL_platform_id, numPlatforms) // First, query the total number of platforms errNum = cl.CLGetPlatformIDs(numPlatforms, platformIds, nil) if errNum != cl.CL_SUCCESS { println("Failed to find any OpenCL platforms.") return } fmt.Printf("Number of platforms: \t%d\n", numPlatforms) // Iterate through the list of platforms displaying associated information for i := cl.CL_uint(0); i < numPlatforms; i++ { // First we display information associated with the platform DisplayPlatformInfo( platformIds[i], cl.CL_PLATFORM_PROFILE, "CL_PLATFORM_PROFILE") DisplayPlatformInfo( platformIds[i], cl.CL_PLATFORM_VERSION, "CL_PLATFORM_VERSION") DisplayPlatformInfo( platformIds[i], cl.CL_PLATFORM_VENDOR, "CL_PLATFORM_VENDOR") DisplayPlatformInfo( platformIds[i], cl.CL_PLATFORM_EXTENSIONS, "CL_PLATFORM_EXTENSIONS") // Now query the set of devices associated with the platform var numDevices cl.CL_uint errNum = cl.CLGetDeviceIDs(platformIds[i], cl.CL_DEVICE_TYPE_ALL, 0, nil, &numDevices) if errNum != cl.CL_SUCCESS { println("Failed to find OpenCL devices.") return } devices := make([]cl.CL_device_id, numDevices) errNum = cl.CLGetDeviceIDs(platformIds[i], cl.CL_DEVICE_TYPE_ALL, numDevices, devices, nil) if errNum != cl.CL_SUCCESS { println("Failed to find OpenCL devices.") return } fmt.Printf("\n\tNumber of devices: \t%d\n", numDevices) // Iterate through each device, displaying associated information for j := cl.CL_uint(0); j < numDevices; j++ { DisplayDeviceInfo(devices[j], cl.CL_DEVICE_TYPE, "CL_DEVICE_TYPE") DisplayDeviceInfo(devices[j], cl.CL_DEVICE_NAME, "CL_DEVICE_NAME") DisplayDeviceInfo(devices[j], cl.CL_DEVICE_VENDOR, "CL_DEVICE_VENDOR") //DisplayDeviceInfo(devices[j], // cl.CL_DRIVER_VERSION, // "CL_DRIVER_VERSION") DisplayDeviceInfo(devices[j], cl.CL_DEVICE_PROFILE, "CL_DEVICE_PROFILE") fmt.Printf("\n") } } }