Example #1
0
func (h handle) getDeviceProperties() (*deviceProp, error) {
	var props C.struct_cudaDeviceProp

	r := C.cudaGetDeviceProperties(&props, h.dev)
	p := &deviceProp{
		major:                      int(props.major),
		minor:                      int(props.minor),
		multiProcessorCount:        uint(props.multiProcessorCount),
		ECCEnabled:                 bool(props.ECCEnabled != 0),
		totalGlobalMem:             uint(props.totalGlobalMem),
		sharedMemPerMultiprocessor: uint(props.sharedMemPerMultiprocessor),
		totalConstMem:              uint(props.totalConstMem),
		l2CacheSize:                uint(props.l2CacheSize),
		memoryClockRate:            uint(props.memoryClockRate),
		memoryBusWidth:             uint(props.memoryBusWidth),
	}
	return p, errorString(r)
}
Example #2
0
func NewDevice(busID string) (*Device, error) {
	var (
		dev  C.int
		prop C.struct_cudaDeviceProp
	)

	id := C.CString(busID)
	if err := cudaErr(C.cudaDeviceGetByPCIBusId(&dev, id)); err != nil {
		return nil, err
	}
	C.free(unsafe.Pointer(id))

	if err := cudaErr(C.cudaGetDeviceProperties(&prop, dev)); err != nil {
		return nil, err
	}
	arch := fmt.Sprintf("%d.%d", prop.major, prop.minor)
	cores, ok := archToCoresPerSM[arch]
	if !ok {
		return nil, fmt.Errorf("unsupported CUDA arch: %s", arch)
	}

	// Destroy the active CUDA context
	cudaErr(C.cudaDeviceReset())

	return &Device{
		handle: dev,
		Family: archToFamily[arch[:1]],
		Arch:   arch,
		Cores:  cores * uint(prop.multiProcessorCount),
		Memory: MemoryInfo{
			ECC:       bool(prop.ECCEnabled != 0),
			Global:    uint(prop.totalGlobalMem / (1024 * 1024)),
			Shared:    uint(prop.sharedMemPerMultiprocessor / 1024),
			Constant:  uint(prop.totalConstMem / 1024),
			L2Cache:   uint(prop.l2CacheSize / 1024),
			Bandwidth: 2 * uint((prop.memoryClockRate/1000)*(prop.memoryBusWidth/8)) / 1000,
		},
	}, nil
}
Example #3
0
// Returns the device properties
func GetDeviceProperties(device int) *DeviceProp {
	var prop C.struct_cudaDeviceProp
	err := Error(C.cudaGetDeviceProperties(&prop, C.int(device)))
	if err != Success {
		panic(err)
	}
	devProp := new(DeviceProp)

	devProp.Name = C.GoString(&prop.name[0])
	devProp.TotalGlobalMem = uint(prop.totalGlobalMem)
	devProp.SharedMemPerBlock = uint(prop.sharedMemPerBlock)
	devProp.RegsPerBlock = int(prop.regsPerBlock)
	devProp.WarpSize = int(prop.warpSize)
	devProp.MemPitch = int(prop.memPitch)
	devProp.MaxThreadsPerBlock = int(prop.maxThreadsPerBlock)
	devProp.MaxThreadsDim[0] = int(prop.maxThreadsDim[0])
	devProp.MaxThreadsDim[1] = int(prop.maxThreadsDim[1])
	devProp.MaxThreadsDim[2] = int(prop.maxThreadsDim[2])
	devProp.MaxGridSize[0] = int(prop.maxGridSize[0])
	devProp.MaxGridSize[1] = int(prop.maxGridSize[1])
	devProp.MaxGridSize[2] = int(prop.maxGridSize[2])
	devProp.TotalConstMem = uint(prop.totalConstMem)
	devProp.Major = int(prop.major)
	devProp.Minor = int(prop.minor)
	devProp.ClockRate = int(prop.clockRate)
	devProp.TextureAlignment = int(prop.textureAlignment)
	devProp.DeviceOverlap = int(prop.deviceOverlap)
	devProp.MultiProcessorCount = int(prop.multiProcessorCount)
	devProp.KernelExecTimeoutEnabled = int(prop.kernelExecTimeoutEnabled)
	devProp.Integrated = int(prop.integrated)
	devProp.CanMapHostMemory = int(prop.canMapHostMemory)
	devProp.ComputeMode = int(prop.computeMode)
	devProp.ConcurrentKernels = int(prop.concurrentKernels)
	devProp.ECCEnabled = int(prop.ECCEnabled)
	devProp.PciBusID = int(prop.pciBusID)
	devProp.PciDeviceID = int(prop.pciDeviceID)
	devProp.TccDriver = int(prop.tccDriver)
	return devProp
}