Exemplo n.º 1
0
Arquivo: prng.go Projeto: xfong/gocl
/* uniform to gaussian */
func boxMuller(u [2]cl.CL_float) [2]cl.CL_float {
	var g [2]cl.CL_float

	r := cl.CL_float(math.Sqrt(-2 * math.Log(float64(u[0]))))
	theta := cl.CL_float(2.0 * PI * u[1])

	g[0] = r * cl.CL_float(math.Sin(float64(theta)))
	g[1] = r * cl.CL_float(math.Cos(float64(theta)))

	return g
}
Exemplo n.º 2
0
Arquivo: pipe.go Projeto: xfong/gocl
func CPUReference(seed cl.CL_int,
	pipePktPerThread cl.CL_int,
	rngType cl.CL_int,
	cpuHist []cl.CL_int,
	histMax, histMin cl.CL_float) {
	var pmPRNG PM_PRNG
	var irn [PRNG_CHANNELS][2]cl.CL_int
	var frn [PRNG_CHANNELS][2]cl.CL_float
	var grn [PRNG_CHANNELS][2]cl.CL_float

	var binWidth cl.CL_float

	// Initialize the prng
	pmPRNG.rngInit(seed)

	// Put starting values for each channel
	for ch := cl.CL_int(0); ch < cl.CL_int(PRNG_CHANNELS); ch++ {
		irn[ch][0] = (ch + 1) * (ch + 1)
		irn[ch][1] = (ch + 1) * (ch + 1)
	}

	//compute binWidth
	binWidth = (histMax - histMin) / cl.CL_float(MAX_HIST_BINS)

	for pkt := cl.CL_int(0); pkt < pipePktPerThread; pkt++ {
		// Generate random numbers
		for ch := 0; ch < PRNG_CHANNELS; ch++ {
			irn[ch][0] = pmPRNG.rngPM(irn[ch][1], cl.CL_int(ch))
			irn[ch][1] = pmPRNG.rngPM(irn[ch][0], cl.CL_int(ch))

			frn[ch][0] = cl.CL_float(irn[ch][0]) * AM
			if frn[ch][0] > RMAX {
				frn[ch][0] = cl.CL_float(RMAX)
			}
			frn[ch][1] = cl.CL_float(irn[ch][1]) * AM
			if frn[ch][1] > RMAX {
				frn[ch][1] = cl.CL_float(RMAX)
			}

			if rngType == RV_GAUSSIAN {
				grn[ch] = boxMuller(frn[ch])
			} else {
				grn[ch] = frn[ch]
			}

		}

		// host side histogram
		for ch := 0; ch < PRNG_CHANNELS; ch++ {
			rmin := histMin
			rmax := rmin + binWidth
			found := 0

			for bindex := 0; (bindex < MAX_HIST_BINS) && (found != 2); bindex++ {
				if (grn[ch][0] >= rmin) && (grn[ch][0] < rmax) {
					found += 1
					cpuHist[bindex] += 1
				}

				if (grn[ch][1] >= rmin) && (grn[ch][1] < rmax) {
					found += 1
					cpuHist[bindex] += 1
				}

				rmin = rmax
				rmax = rmin + binWidth
			}
		}
	}
}
Exemplo n.º 3
0
Arquivo: pipe.go Projeto: xfong/gocl
// This program demo OpenCL 2.0 Pipe feature
package main

import (
	"fmt"
	"gocl/cl"
	"gocl/cl_demo/utils"
	"unsafe"
)

const (
	//constants for urng
	IA   = 16807                 // a
	IM   = 2147483647            // m
	AM   = 1.0 / cl.CL_float(IM) // 1/m - To calculate floating point result
	IQ   = 127773
	IR   = 2836
	NTAB = 16
	NDIV = (1 + (IM-1)/NTAB)
	EPS  = 1.2e-7
	RMAX = (1.0 - EPS)

	PRNG_CHANNELS = 256
	MAX_NTAB      = (PRNG_CHANNELS * NTAB)

	//mathematical constants
	PI = 3.14159265359

	//arbitrary mask to xor with seed. required to avoid seed to rng being zero.
	MASK = 0x2F24EA81
)
Exemplo n.º 4
0
Arquivo: svmfg.go Projeto: xfong/gocl
func svmbasic(size cl.CL_size_t,
	context cl.CL_context,
	queue cl.CL_command_queue,
	kernel cl.CL_kernel) {
	// Prepare input data as follows.
	// Build two arrays:
	//     - an array that consists of the Element structures
	//       (refer to svmbasic.h for the structure definition)
	//     - an array that consists of the float values
	//
	// Each structure of the first array has the following pointers:
	//     - 'internal', which points to a 'value' field of another entry
	//       of the same array.
	//     - 'external', which points to a float value from the the
	//       second array.
	//
	// Pointers are set randomly. The structures do not reflect any real usage
	// scenario, but are illustrative for a simple device-side traversal.
	//
	//        Array of Element                        Array of floats
	//           structures
	//
	//    ||====================||
	//    ||    .............   ||                   ||============||
	//    ||    .............   ||<-----+            || .......... ||
	//    ||====================||      |            ||    float   ||
	//    ||   float* internal--||------+            ||    float   ||
	//    ||   float* external--||------------------>||    float   ||
	//    ||   float value <----||------+            || .......... ||
	//    ||====================||      |            || .......... ||
	//    ||    .............   ||      |            ||    float   ||
	//    ||    .............   ||      |            ||    float   ||
	//    ||====================||      |            ||    float   ||
	//    ||====================||      |            ||    float   ||
	//    ||   float* internal--||------+            ||    float   ||
	//    ||   float* external--||------------------>||    float   ||
	//    ||   float value      ||                   ||    float   ||
	//    ||====================||                   ||    float   ||
	//    ||    .............   ||                   || .......... ||
	//    ||    .............   ||                   ||============||
	//    ||====================||
	//
	// The two arrays are created independently and are used to illustrate
	// two new OpenCL 2.0 API functions:
	//    - the array of Element structures is passed to the kernel as a
	//      kernel argument with the clSetKernelArgSVMPointer function
	//    - the array of floats is used by the kernel indirectly, and this
	//      dependency should be also specified with the clSetKernelExecInfo
	//      function prior to the kernel execution

	var err cl.CL_int

	// To enable host & device code to share pointer to the same address space
	// the arrays should be allocated as SVM memory. Use the clSVMAlloc function
	// to allocate SVM memory.
	//
	// Optionally, this function allows specifying alignment in bytes as its
	// last argument. As this basic example doesn't require any _special_ alignment,
	// the following code illustrates requesting default alignment via passing
	// zero value.

	inputElements := cl.CLSVMAlloc(context, // the context where this memory is supposed to be used
		cl.CL_MEM_READ_ONLY|cl.CL_MEM_SVM_FINE_GRAIN_BUFFER,
		size*cl.CL_size_t(unsafe.Sizeof(sampleElement)), // amount of memory to allocate (in bytes)
		0) // alignment in bytes (0 means default)
	if nil == inputElements {
		println("Cannot allocate SVM memory with clSVMAlloc: it returns null pointer. You might be out of memory.")
		return
	}
	defer cl.CLSVMFree(context, inputElements)

	inputFloats := cl.CLSVMAlloc(context, // the context where this memory is supposed to be used
		cl.CL_MEM_READ_ONLY|cl.CL_MEM_SVM_FINE_GRAIN_BUFFER,
		size*cl.CL_size_t(unsafe.Sizeof(sampleFloat)), // amount of memory to allocate (in bytes)
		0) // alignment in bytes (0 means default)
	if nil == inputFloats {
		println("Cannot allocate SVM memory with clSVMAlloc: it returns null pointer. You might be out of memory.")
		return
	}
	defer cl.CLSVMFree(context, inputFloats)

	// The OpenCL kernel uses the aforementioned input arrays to compute
	// values for the output array.

	output := cl.CLSVMAlloc(context, // the context where this memory is supposed to be used
		cl.CL_MEM_WRITE_ONLY|cl.CL_MEM_SVM_FINE_GRAIN_BUFFER,
		size*cl.CL_size_t(unsafe.Sizeof(sampleFloat)), // amount of memory to allocate (in bytes)
		0) // alignment in bytes (0 means default)
	defer cl.CLSVMFree(context, output)

	if nil == output {
		println("Cannot allocate SVM memory with clSVMAlloc: it returns null pointer. You might be out of memory.")
		return
	}

	// Note: in the coarse-grained SVM, mapping of inputElement and inputFloats is
	// needed to do the following initialization. While here, in the fine-grained SVM,
	// it is not necessary.

	// Populate data-structures with initial data.
	r := rand.New(rand.NewSource(99))

	for i := cl.CL_size_t(0); i < size; i++ {
		inputElement := (*Element)(unsafe.Pointer(uintptr(inputElements) + uintptr(i)*unsafe.Sizeof(sampleElement)))
		inputFloat := (*cl.CL_float)(unsafe.Pointer(uintptr(inputFloats) + uintptr(i)*unsafe.Sizeof(sampleFloat)))
		randElement := (*Element)(unsafe.Pointer(uintptr(inputElements) + uintptr(r.Intn(int(size)))*unsafe.Sizeof(sampleElement)))
		randFloat := (*cl.CL_float)(unsafe.Pointer(uintptr(inputFloats) + uintptr(r.Intn(int(size)))*unsafe.Sizeof(sampleFloat)))

		inputElement.internal = &(randElement.value)
		inputElement.external = randFloat
		inputElement.value = cl.CL_float(i)
		*inputFloat = cl.CL_float(i + size)
	}

	// Note: in the coarse-grained SVM, unmapping of inputElement and inputFloats is
	// needed before scheduling the kernel for execution. While here, in the fine-grained SVM,
	// it is not necessary.

	// Pass arguments to the kernel.
	// According to the OpenCL 2.0 specification, you need to use a special
	// function to pass a pointer from SVM memory to kernel.

	err = cl.CLSetKernelArgSVMPointer(kernel, 0, inputElements)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLSetKernelArgSVMPointer")

	err = cl.CLSetKernelArgSVMPointer(kernel, 1, output)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLSetKernelArgSVMPointer")

	// For buffer based SVM (both coarse- and fine-grain) if one SVM buffer
	// points to memory allocated in another SVM buffer, such allocations
	// should be passed to the kernel via clSetKernelExecInfo.

	err = cl.CLSetKernelExecInfo(kernel,
		cl.CL_KERNEL_EXEC_INFO_SVM_PTRS,
		cl.CL_size_t(unsafe.Sizeof(inputFloats)),
		unsafe.Pointer(&inputFloats))
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLSetKernelExecInfo")

	// Run the kernel.
	println("Running kernel...")

	var globalWorkSize [1]cl.CL_size_t
	globalWorkSize[0] = size

	err = cl.CLEnqueueNDRangeKernel(queue,
		kernel,
		1,
		nil,
		globalWorkSize[:],
		nil,
		0,
		nil,
		nil)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLEnqueueNDRangeKernel")

	// Note: In the fine-grained SVM, after enqueuing the kernel above, the host application is
	// not blocked from accessing SVM allocations that were passed to the kernel. The host
	// can access the same regions of SVM memory as does the kernel if the kernel and the host
	// read/modify different bytes. If one side (host or device) needs to modify the same bytes
	// that are simultaniously read/modified by another side, atomics operations are usually
	// required to maintain sufficient memory consistency. This sample doesn't use this possibility
	// and the host just waits in clFinish below until the kernel is finished.
	err = cl.CLFinish(queue)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLFinish")

	println(" DONE.")

	// Validate output state for correctness.
	// Compare: in the coarse-grained SVM case you need to map the output.
	// Here it is not needed.

	println("Checking correctness of the output buffer...")
	for i := cl.CL_size_t(0); i < size; i++ {
		inputElement := (*Element)(unsafe.Pointer(uintptr(inputElements) + uintptr(i)*unsafe.Sizeof(sampleElement)))
		outputFloat := (*cl.CL_float)(unsafe.Pointer(uintptr(output) + uintptr(i)*unsafe.Sizeof(sampleFloat)))
		expectedValue := *(inputElement.internal) + *(inputElement.external)
		if *outputFloat != expectedValue {
			println(" FAILED.")
			fmt.Printf("Mismatch at position %d, read %f, expected %f\n", i, *outputFloat, expectedValue)
			return
		}
	}
	println(" PASSED.")
}
Exemplo n.º 5
0
Arquivo: svmcg.go Projeto: xfong/gocl
func svmbasic(size cl.CL_size_t,
	context cl.CL_context,
	queue cl.CL_command_queue,
	kernel cl.CL_kernel) {
	// Prepare input data as follows.
	// Build two arrays:
	//     - an array that consists of the Element structures
	//       (refer to svmbasic.h for the structure definition)
	//     - an array that consists of the float values
	//
	// Each structure of the first array has the following pointers:
	//     - 'internal', which points to a 'value' field of another entry
	//       of the same array.
	//     - 'external', which points to a float value from the the
	//       second array.
	//
	// Pointers are set randomly. The structures do not reflect any real usage
	// scenario, but are illustrative for a simple device-side traversal.
	//
	//        Array of Element                        Array of floats
	//           structures
	//
	//    ||====================||
	//    ||    .............   ||                   ||============||
	//    ||    .............   ||<-----+            || .......... ||
	//    ||====================||      |            ||    float   ||
	//    ||   float* internal--||------+            ||    float   ||
	//    ||   float* external--||------------------>||    float   ||
	//    ||   float value <----||------+            || .......... ||
	//    ||====================||      |            || .......... ||
	//    ||    .............   ||      |            ||    float   ||
	//    ||    .............   ||      |            ||    float   ||
	//    ||====================||      |            ||    float   ||
	//    ||====================||      |            ||    float   ||
	//    ||   float* internal--||------+            ||    float   ||
	//    ||   float* external--||------------------>||    float   ||
	//    ||   float value      ||                   ||    float   ||
	//    ||====================||                   ||    float   ||
	//    ||    .............   ||                   || .......... ||
	//    ||    .............   ||                   ||============||
	//    ||====================||
	//
	// The two arrays are created independently and are used to illustrate
	// two new OpenCL 2.0 API functions:
	//    - the array of Element structures is passed to the kernel as a
	//      kernel argument with the clSetKernelArgSVMPointer function
	//    - the array of floats is used by the kernel indirectly, and this
	//      dependency should be also specified with the clSetKernelExecInfo
	//      function prior to the kernel execution

	var err cl.CL_int

	// To enable host & device code to share pointer to the same address space
	// the arrays should be allocated as SVM memory. Use the clSVMAlloc function
	// to allocate SVM memory.
	//
	// Optionally, this function allows specifying alignment in bytes as its
	// last argument. As this basic example doesn't require any _special_ alignment,
	// the following code illustrates requesting default alignment via passing
	// zero value.

	inputElements := cl.CLSVMAlloc(context, // the context where this memory is supposed to be used
		cl.CL_MEM_READ_ONLY,
		size*cl.CL_size_t(unsafe.Sizeof(sampleElement)), // amount of memory to allocate (in bytes)
		0) // alignment in bytes (0 means default)
	if nil == inputElements {
		println("Cannot allocate SVM memory with clSVMAlloc: it returns null pointer. You might be out of memory.")
		return
	}
	defer cl.CLSVMFree(context, inputElements)

	inputFloats := cl.CLSVMAlloc(context, // the context where this memory is supposed to be used
		cl.CL_MEM_READ_ONLY,
		size*cl.CL_size_t(unsafe.Sizeof(sampleFloat)), // amount of memory to allocate (in bytes)
		0) // alignment in bytes (0 means default)
	if nil == inputFloats {
		println("Cannot allocate SVM memory with clSVMAlloc: it returns null pointer. You might be out of memory.")
		return
	}
	defer cl.CLSVMFree(context, inputFloats)

	// The OpenCL kernel uses the aforementioned input arrays to compute
	// values for the output array.

	output := cl.CLSVMAlloc(context, // the context where this memory is supposed to be used
		cl.CL_MEM_WRITE_ONLY,
		size*cl.CL_size_t(unsafe.Sizeof(sampleFloat)), // amount of memory to allocate (in bytes)
		0) // alignment in bytes (0 means default)
	defer cl.CLSVMFree(context, output)

	if nil == output {
		println("Cannot allocate SVM memory with clSVMAlloc: it returns null pointer. You might be out of memory.")
		return
	}

	// In the coarse-grained buffer SVM model, only one OpenCL device (or
	// host) can have ownership for writing to the buffer. Specifically, host
	// explicitly requests the ownership by mapping/unmapping the SVM buffer.
	//
	// So to fill the input SVM buffers on the host, you need to map them to have
	// access from the host program.
	//
	// The following two map calls are required in case of coarse-grained SVM only.

	err = cl.CLEnqueueSVMMap(queue,
		cl.CL_TRUE, // blocking map
		cl.CL_MAP_WRITE,
		inputElements,
		size*cl.CL_size_t(unsafe.Sizeof(sampleElement)),
		0,
		nil,
		nil)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLEnqueueSVMMap")

	err = cl.CLEnqueueSVMMap(queue,
		cl.CL_TRUE, // blocking map
		cl.CL_MAP_WRITE,
		inputFloats,
		size*cl.CL_size_t(unsafe.Sizeof(sampleFloat)),
		0,
		nil,
		nil)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLEnqueueSVMMap")

	// Populate data-structures with initial data.
	r := rand.New(rand.NewSource(99))

	for i := cl.CL_size_t(0); i < size; i++ {
		inputElement := (*Element)(unsafe.Pointer(uintptr(inputElements) + uintptr(i)*unsafe.Sizeof(sampleElement)))
		inputFloat := (*cl.CL_float)(unsafe.Pointer(uintptr(inputFloats) + uintptr(i)*unsafe.Sizeof(sampleFloat)))
		randElement := (*Element)(unsafe.Pointer(uintptr(inputElements) + uintptr(r.Intn(int(size)))*unsafe.Sizeof(sampleElement)))
		randFloat := (*cl.CL_float)(unsafe.Pointer(uintptr(inputFloats) + uintptr(r.Intn(int(size)))*unsafe.Sizeof(sampleFloat)))

		inputElement.internal = &(randElement.value)
		inputElement.external = randFloat
		inputElement.value = cl.CL_float(i)
		*inputFloat = cl.CL_float(i + size)
	}

	// The following two unmap calls are required in case of coarse-grained SVM only

	err = cl.CLEnqueueSVMUnmap(queue,
		inputElements,
		0,
		nil,
		nil)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLEnqueueSVMUnmap")

	err = cl.CLEnqueueSVMUnmap(queue,
		inputFloats,
		0,
		nil,
		nil)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLEnqueueSVMUnmap")

	// Pass arguments to the kernel.
	// According to the OpenCL 2.0 specification, you need to use a special
	// function to pass a pointer from SVM memory to kernel.

	err = cl.CLSetKernelArgSVMPointer(kernel, 0, inputElements)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLSetKernelArgSVMPointer")

	err = cl.CLSetKernelArgSVMPointer(kernel, 1, output)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLSetKernelArgSVMPointer")

	// For buffer based SVM (both coarse- and fine-grain) if one SVM buffer
	// points to memory allocated in another SVM buffer, such allocations
	// should be passed to the kernel via clSetKernelExecInfo.

	err = cl.CLSetKernelExecInfo(kernel,
		cl.CL_KERNEL_EXEC_INFO_SVM_PTRS,
		cl.CL_size_t(unsafe.Sizeof(inputFloats)),
		unsafe.Pointer(&inputFloats))
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLSetKernelExecInfo")

	// Run the kernel.
	println("Running kernel...")

	var globalWorkSize [1]cl.CL_size_t
	globalWorkSize[0] = size

	err = cl.CLEnqueueNDRangeKernel(queue,
		kernel,
		1,
		nil,
		globalWorkSize[:],
		nil,
		0,
		nil,
		nil)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLEnqueueNDRangeKernel")

	// Map the output SVM buffer to read the results.
	// Mapping is required for coarse-grained SVM only.

	err = cl.CLEnqueueSVMMap(queue,
		cl.CL_TRUE, // blocking map
		cl.CL_MAP_READ,
		output,
		size*cl.CL_size_t(unsafe.Sizeof(sampleFloat)),
		0,
		nil,
		nil)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLEnqueueSVMMap")

	println(" DONE.")

	// Validate output state for correctness.

	println("Checking correctness of the output buffer...")
	for i := cl.CL_size_t(0); i < size; i++ {
		inputElement := (*Element)(unsafe.Pointer(uintptr(inputElements) + uintptr(i)*unsafe.Sizeof(sampleElement)))
		outputFloat := (*cl.CL_float)(unsafe.Pointer(uintptr(output) + uintptr(i)*unsafe.Sizeof(sampleFloat)))
		expectedValue := *(inputElement.internal) + *(inputElement.external)
		if *outputFloat != expectedValue {
			println(" FAILED.")
			fmt.Printf("Mismatch at position %d, read %f, expected %f\n", i, *outputFloat, expectedValue)
			return
		}
	}
	println(" PASSED.")

	err = cl.CLEnqueueSVMUnmap(queue,
		output,
		0,
		nil,
		nil)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLEnqueueSVMUnmap")

	err = cl.CLFinish(queue)
	utils.CHECK_STATUS(err, cl.CL_SUCCESS, "CLFinish")
}