Ejemplo n.º 1
0
func flattenIntoSlice(src [][]int, dest []cl.CL_uint) {

	dest[0] = cl.CL_uint(len(src))
	i := 1
	for _, node := range src {
		j := 0
		numMasks := 0
		start := i // Reserve index for number of masks.
		i++

		for j < len(node) {
			maskIndex := uint32(node[j] >> 5) // index of mask to create
			mask := uint32(0)

			// Create a mask from all indices belonging to the block of bits.
			for j < len(node) && (uint32(node[j])>>5) == maskIndex {
				mask |= 1 << (uint32(node[j]) & 31)
				j++
			}

			// Append the mask to the flattened FOS.
			dest[i] = cl.CL_uint(maskIndex)
			dest[i+1] = cl.CL_uint(mask)
			i += 2
			numMasks++
		}

		dest[start] = cl.CL_uint(numMasks)
	}
}
Ejemplo n.º 2
0
func populationToSlice(pop *ga.Population, dest []cl.CL_uint) {

	destPtr := 0
	length := pop.Length()
	numBlocks := blocksPerSolution(pop)

	for _, solution := range pop.Solutions {
		toCopy := length

		for j := 0; j < numBlocks; j++ {
			// Determine the number of bits to copy.
			blockSize := 32
			if toCopy < 32 {
				blockSize = toCopy
			}
			toCopy -= blockSize

			// Copy the bits into 32-bit integer.
			var raw uint32
			raw = 0

			for k := 0; k < blockSize; k++ {
				index := j*32 + k
				if solution.Bits.Has(index) {
					raw |= (1 << uint32(k))
				}
			}

			dest[destPtr] = cl.CL_uint(raw)
			destPtr++
		}
	}
}
Ejemplo n.º 3
0
func setKernelArg(kernel cl.CL_kernel, pos int, data interface{}) {

	var status cl.CL_int

	switch data := data.(type) {
	case *cl.CL_mem:
		status = cl.CLSetKernelArg(
			kernel, cl.CL_uint(pos), cl.CL_size_t(unsafe.Sizeof(data)),
			unsafe.Pointer(data))

	case *cl.CL_uint:
		status = cl.CLSetKernelArg(
			kernel, cl.CL_uint(pos), cl.CL_size_t(unsafe.Sizeof(*data)),
			unsafe.Pointer(data))

	default:
		log.Fatalf("Fatal error: setting kernel arg for unknown type %t.", data)
	}

	if status != cl.CL_SUCCESS {
		log.Printf("%v", cl.ERROR_CODES_STRINGS[-status])
		log.Fatalf("Fatal error: could not set arg %d for OpenCL kernel.", pos)
	}
}
Ejemplo n.º 4
0
func runOpenCL() {

	var status cl.CL_int

	var numPlatforms cl.CL_uint

	//---------------------------------------------------
	// Step 1: Discover and retrieve OpenCL platforms.
	//---------------------------------------------------

	status = cl.CLGetPlatformIDs(0, nil, &numPlatforms)

	platforms := make([]cl.CL_platform_id, numPlatforms)

	requireSuccess(cl.CLGetPlatformIDs(numPlatforms, platforms, nil),
		"could not retrieve OpenCL platform IDs.")

	if verbosity >= 4 {
		printPlatforms(platforms)
	}

	//---------------------------------------------------
	// Step 2: Discover and retrieve OpenCL devices.
	//---------------------------------------------------

	var preferredType cl.CL_device_type

	if useCPU {
		preferredType = cl.CL_DEVICE_TYPE_CPU
	} else {
		preferredType = cl.CL_DEVICE_TYPE_GPU
	}

	_, gpuDevice := findDevice(platforms, preferredType)
	gpuDevices := make([]cl.CL_device_id, 1)
	gpuDevices[0] = gpuDevice

	if verbosity >= 4 {
		printDeviceInfo(gpuDevice)
	}

	//---------------------------------------------------
	// Step 3: Create an OpenCL context.
	//---------------------------------------------------

	context := cl.CLCreateContext(nil, 1, gpuDevices, nil, nil, &status)
	requireSuccess(status, "could not create OpenCL context.")
	defer cl.CLReleaseContext(context)

	//---------------------------------------------------
	// Step 3: Create an OpenCL command queue.
	//---------------------------------------------------

	commandQueue := cl.CLCreateCommandQueue(context, gpuDevice, 0, &status)
	requireSuccess(status, "could not create OpenCL command queue.")
	defer cl.CLReleaseCommandQueue(commandQueue)

	//---------------------------------------------------
	// Step 4: Create OpenCL program and kernel.
	//---------------------------------------------------

	var clSourceData [3][]byte
	var clSourceLengths [3]cl.CL_size_t
	var err error

	clSourceFiles := []string{
		"kernels/" + problems[problemIndex].clSource,
		"kernels/rng.cl",
		"kernels/gom.cl",
	}

	for i, s := range clSourceFiles {
		clSourceData[i], err = ioutil.ReadFile(s)

		if err != nil {
			log.Fatalf("Could not read the kernel source file %s.", s)
		}

		clSourceLengths[i] = cl.CL_size_t(len(clSourceData[i]))
	}

	program := cl.CLCreateProgramWithSource(context, 3, clSourceData[:], clSourceLengths[:], &status)
	requireSuccess(status, "could not compile an OpenCL kernel from source.")

	status = cl.CLBuildProgram(program, 1, gpuDevices, nil, nil, nil)

	if status != cl.CL_SUCCESS {
		printProgramBuildInfo(program, gpuDevice)
	}

	kernel := cl.CLCreateKernel(program, []byte("gom"), &status)
	requireSuccess(status, "could not create OpenCL kernel.")

	//---------------------------------------------------
	// Step 6: Initialize OpenCL memory.
	//---------------------------------------------------

	if verbosity >= 4 {
		printKernelWorkGroup(kernel, gpuDevice)
	}

	//---------------------------------------------------
	// Step 7: Initialize OpenCL memory.
	//---------------------------------------------------

	var size cl.CL_uint
	length := cl.CL_size_t(problemLength)

	pop := ga.NewPopulation(populationSize, problemLength)

	numBlocks := blocksPerSolution(pop) * pop.Size()
	dataSize := cl.CL_size_t(unsafe.Sizeof(size)) * cl.CL_size_t(numBlocks)

	populationData := make([]cl.CL_uint, numBlocks)
	offspringData := make([]cl.CL_uint, numBlocks)

	populationBuffer := cl.CLCreateBuffer(
		context, cl.CL_MEM_READ_ONLY, dataSize, nil, &status)
	requireSuccess(status, "could not allocate an OpenCL memory buffer.")

	cloneBuffer := cl.CLCreateBuffer(
		context, cl.CL_MEM_READ_WRITE, dataSize, nil, &status)
	requireSuccess(status, "could not allocate an OpenCL memory buffer.")

	// Maximum bound on the number of elements in the LT + node sizes.
	boundSum := (length*length+3*length-2)/2 + (2*length - 1) + 1
	ltSize := cl.CL_size_t(unsafe.Sizeof(length)) * boundSum

	ltData := make([]cl.CL_uint, boundSum)

	ltBuffer := cl.CLCreateBuffer(context, cl.CL_MEM_READ_ONLY, ltSize, nil, &status)
	requireSuccess(status, "could not allocate an OpenCL memory buffer.")

	var dummyCLBool cl.CL_char
	improvsSize := cl.CL_size_t(unsafe.Sizeof(dummyCLBool)) * cl.CL_size_t(pop.Size())
	improvsData := make([]cl.CL_char, pop.Size())
	improvsBuffer := cl.CLCreateBuffer(context, cl.CL_MEM_WRITE_ONLY, improvsSize, nil, &status)
	requireSuccess(status, "could not allocate an OpenCL memory buffer.")

	offspringBuffer := cl.CLCreateBuffer(
		context, cl.CL_MEM_WRITE_ONLY, dataSize, nil, &status)
	requireSuccess(status, "could not allocate an OpenCL memory buffer.")

	//---------------------------------------------------
	// Step 8: Perform GOMEA.
	//---------------------------------------------------

	if randomSeed == 0 {
		rand.Seed(time.Now().Unix())
	} else {
		rand.Seed(int64(randomSeed))
	}

	done := false

	generationsPassed := 0

	if verbosity >= 3 {
		printGeneration(0, pop)
	}

	for !done {

		// Build the linkage tree and upload a flattened version to the compute device.
		freqs := Frequencies(pop)
		lt := LinkageTree(pop, freqs)
		flattenIntoSlice(lt, ltData)

		requireSuccess(cl.CLEnqueueWriteBuffer(
			commandQueue, ltBuffer, cl.CL_TRUE, 0,
			ltSize, unsafe.Pointer(&ltData[0]), 0, nil, nil),
			"could not write data to an OpenCL memory buffer.")

		// Store a flattened version of the population on the compute device.
		populationToSlice(pop, populationData)
		requireSuccess(cl.CLEnqueueWriteBuffer(
			commandQueue, populationBuffer, cl.CL_TRUE, 0,
			dataSize, unsafe.Pointer(&populationData[0]), 0, nil, nil),
			"could not write data to an OpenCL memory buffer.")

		// Set the GOM kernel arguments.
		popSize := cl.CL_uint(pop.Size())
		solLength := cl.CL_uint(pop.Length())
		setKernelArg(kernel, 0, &populationBuffer)
		setKernelArg(kernel, 1, &popSize)
		setKernelArg(kernel, 2, &solLength)
		setKernelArg(kernel, 3, &cloneBuffer)
		setKernelArg(kernel, 4, &ltBuffer)
		setKernelArg(kernel, 5, &improvsBuffer)
		setKernelArg(kernel, 6, &offspringBuffer)

		var globalWorkSize [1]cl.CL_size_t
		globalWorkSize[0] = cl.CL_size_t(pop.Size())

		// Perform GOM crossover.
		requireSuccess(cl.CLEnqueueNDRangeKernel(
			commandQueue, kernel, 1, nil, globalWorkSize[:],
			nil, 0, nil, nil),
			"could not enqueue OpenCL kernel.")

		requireSuccess(cl.CLFinish(commandQueue), "could not finish command queue.")

		// Retrieve the offspring population from the compute device.
		requireSuccess(cl.CLEnqueueReadBuffer(
			commandQueue, offspringBuffer, cl.CL_TRUE, 0,
			dataSize, unsafe.Pointer(&offspringData[0]), 0, nil, nil),
			"reading a buffer failed.")

		requireSuccess(cl.CLEnqueueReadBuffer(
			commandQueue, improvsBuffer, cl.CL_TRUE, 0,
			improvsSize, unsafe.Pointer(&improvsData[0]), 0, nil, nil),
			"reading improvs buffer failed.")

		foundOptimal := sliceToPopulation(offspringData, pop)

		generationsPassed++

		if (verbosity == 2 && done) || (verbosity == 3) {
			printGeneration(generationsPassed, pop)
		}

		// TODO: Termination Criterion
		if generationsPassed == numGenerations {
			done = true
		}

		improved := false

		for _, b := range improvsData {
			if b > 0 {
				improved = true
				break
			}
		}

		if !improved {
			if verbosity >= 2 {
				log.Println("Terminated after the population did not improve for one generation.")
			}
			done = true
		}

		if foundOptimal {
			if verbosity >= 2 {
				log.Printf("Optimal solution found after %d generations.\n", generationsPassed)
			}
			done = true
		}
	}
}