func (h handle) getDeviceProperties() (*deviceProp, error) { var props C.struct_cudaDeviceProp r := C.cudaGetDeviceProperties(&props, h.dev) p := &deviceProp{ major: int(props.major), minor: int(props.minor), multiProcessorCount: uint(props.multiProcessorCount), ECCEnabled: bool(props.ECCEnabled != 0), totalGlobalMem: uint(props.totalGlobalMem), sharedMemPerMultiprocessor: uint(props.sharedMemPerMultiprocessor), totalConstMem: uint(props.totalConstMem), l2CacheSize: uint(props.l2CacheSize), memoryClockRate: uint(props.memoryClockRate), memoryBusWidth: uint(props.memoryBusWidth), } return p, errorString(r) }
func NewDevice(busID string) (*Device, error) { var ( dev C.int prop C.struct_cudaDeviceProp ) id := C.CString(busID) if err := cudaErr(C.cudaDeviceGetByPCIBusId(&dev, id)); err != nil { return nil, err } C.free(unsafe.Pointer(id)) if err := cudaErr(C.cudaGetDeviceProperties(&prop, dev)); err != nil { return nil, err } arch := fmt.Sprintf("%d.%d", prop.major, prop.minor) cores, ok := archToCoresPerSM[arch] if !ok { return nil, fmt.Errorf("unsupported CUDA arch: %s", arch) } // Destroy the active CUDA context cudaErr(C.cudaDeviceReset()) return &Device{ handle: dev, Family: archToFamily[arch[:1]], Arch: arch, Cores: cores * uint(prop.multiProcessorCount), Memory: MemoryInfo{ ECC: bool(prop.ECCEnabled != 0), Global: uint(prop.totalGlobalMem / (1024 * 1024)), Shared: uint(prop.sharedMemPerMultiprocessor / 1024), Constant: uint(prop.totalConstMem / 1024), L2Cache: uint(prop.l2CacheSize / 1024), Bandwidth: 2 * uint((prop.memoryClockRate/1000)*(prop.memoryBusWidth/8)) / 1000, }, }, nil }
// Returns the device properties func GetDeviceProperties(device int) *DeviceProp { var prop C.struct_cudaDeviceProp err := Error(C.cudaGetDeviceProperties(&prop, C.int(device))) if err != Success { panic(err) } devProp := new(DeviceProp) devProp.Name = C.GoString(&prop.name[0]) devProp.TotalGlobalMem = uint(prop.totalGlobalMem) devProp.SharedMemPerBlock = uint(prop.sharedMemPerBlock) devProp.RegsPerBlock = int(prop.regsPerBlock) devProp.WarpSize = int(prop.warpSize) devProp.MemPitch = int(prop.memPitch) devProp.MaxThreadsPerBlock = int(prop.maxThreadsPerBlock) devProp.MaxThreadsDim[0] = int(prop.maxThreadsDim[0]) devProp.MaxThreadsDim[1] = int(prop.maxThreadsDim[1]) devProp.MaxThreadsDim[2] = int(prop.maxThreadsDim[2]) devProp.MaxGridSize[0] = int(prop.maxGridSize[0]) devProp.MaxGridSize[1] = int(prop.maxGridSize[1]) devProp.MaxGridSize[2] = int(prop.maxGridSize[2]) devProp.TotalConstMem = uint(prop.totalConstMem) devProp.Major = int(prop.major) devProp.Minor = int(prop.minor) devProp.ClockRate = int(prop.clockRate) devProp.TextureAlignment = int(prop.textureAlignment) devProp.DeviceOverlap = int(prop.deviceOverlap) devProp.MultiProcessorCount = int(prop.multiProcessorCount) devProp.KernelExecTimeoutEnabled = int(prop.kernelExecTimeoutEnabled) devProp.Integrated = int(prop.integrated) devProp.CanMapHostMemory = int(prop.canMapHostMemory) devProp.ComputeMode = int(prop.computeMode) devProp.ConcurrentKernels = int(prop.concurrentKernels) devProp.ECCEnabled = int(prop.ECCEnabled) devProp.PciBusID = int(prop.pciBusID) devProp.PciDeviceID = int(prop.pciDeviceID) devProp.TccDriver = int(prop.tccDriver) return devProp }