This code should map a CUdevice to a numa node (by enumerating all PCI devices). I have not compiled the the code in this form or tested it as is, but the calls should work fine for mapping any cuda device to the OS enumeration wrt to PCI device location:
{{{
long GetNumaNode(CUdevice dev)
{
BOOL ret;
DWORD lastError;
// get the cuda device string
char cuDevString[CUDA_DEV_STRING_LEN];
unsigned long cudaBus;
unsigned long cudaSubdevice;
unsigned long cudaFunction;
CUresult status = cuDeviceGetPCIBusId(cuDevString, CUDA_DEV_STRING_LEN, dev);
assert(CUDA_SUCCESS == status);
if (CUDA_SUCCESS != status) {
return 0;
}
char tmp;
char tmp2;
char del[] = ":.";
// remove domain
tmp = strtok_s(cuDevString, del, &tmp2);
// get bus
tmp = strtok_s(NULL, del, &tmp2);
sscanf_s(tmp, "%x", &cudaBus);
// get subdevice
tmp = strtok_s(NULL, del, &tmp2);
sscanf_s(tmp, "%x", &cudaSubdevice);
// get function
tmp = strtok_s(NULL, del, &tmp2);
sscanf_s(tmp, "%x", &cudaFunction);
// Use NULL as the first parameter as we need to look at non display devices too
HDEVINFO hNvDevInfo = SetupDiGetClassDevs(NULL, NULL, NULL, DIGCF_PRESENT | DIGCF_ALLCLASSES);
if(hNvDevInfo == INVALID_HANDLE_VALUE)
{
assert(!"INVALID_HANDLE_VALUE");
return 0;
}
// Find the deviceInfoData for each GPU
DWORD deviceIndex;
for (deviceIndex = 0; ; deviceIndex++)
{
SP_DEVINFO_DATA deviceInfoData;
unsigned long bus;
unsigned long subdevice;
unsigned long function;
deviceInfoData.cbSize = sizeof(SP_DEVINFO_DATA);
ret = SetupDiEnumDeviceInfo(hNvDevInfo, deviceIndex, &deviceInfoData);
if (!ret)
{
// MSDN says:
// call SetupDiEnumDeviceInfo until there are no more values (the function fails and a call
// to GetLastError returns ERROR_NO_MORE_ITEMS).
lastError = GetLastError();
assert(lastError == ERROR_NO_MORE_ITEMS);
break;
}
Based on http://www.open-mpi.org/community/lists/hwloc-users/2013/11/0926.php
Also useful: http://blogs.technet.com/b/winserverperformance/archive/2008/09/13/getting-system-topology-information-on-windows.aspx
This code should map a CUdevice to a numa node (by enumerating all PCI devices). I have not compiled the the code in this form or tested it as is, but the calls should work fine for mapping any cuda device to the OS enumeration wrt to PCI device location:
{{{ long GetNumaNode(CUdevice dev) { BOOL ret; DWORD lastError; // get the cuda device string char cuDevString[CUDA_DEV_STRING_LEN]; unsigned long cudaBus; unsigned long cudaSubdevice; unsigned long cudaFunction; CUresult status = cuDeviceGetPCIBusId(cuDevString, CUDA_DEV_STRING_LEN, dev); assert(CUDA_SUCCESS == status); if (CUDA_SUCCESS != status) { return 0; } char tmp; char tmp2; char del[] = ":."; // remove domain tmp = strtok_s(cuDevString, del, &tmp2); // get bus tmp = strtok_s(NULL, del, &tmp2); sscanf_s(tmp, "%x", &cudaBus); // get subdevice tmp = strtok_s(NULL, del, &tmp2); sscanf_s(tmp, "%x", &cudaSubdevice); // get function tmp = strtok_s(NULL, del, &tmp2); sscanf_s(tmp, "%x", &cudaFunction); // Use NULL as the first parameter as we need to look at non display devices too HDEVINFO hNvDevInfo = SetupDiGetClassDevs(NULL, NULL, NULL, DIGCF_PRESENT | DIGCF_ALLCLASSES); if(hNvDevInfo == INVALID_HANDLE_VALUE) { assert(!"INVALID_HANDLE_VALUE"); return 0; } // Find the deviceInfoData for each GPU DWORD deviceIndex; for (deviceIndex = 0; ; deviceIndex++) { SP_DEVINFO_DATA deviceInfoData; unsigned long bus; unsigned long subdevice; unsigned long function; deviceInfoData.cbSize = sizeof(SP_DEVINFO_DATA); ret = SetupDiEnumDeviceInfo(hNvDevInfo, deviceIndex, &deviceInfoData); if (!ret) { // MSDN says: // call SetupDiEnumDeviceInfo until there are no more values (the function fails and a call // to GetLastError returns ERROR_NO_MORE_ITEMS). lastError = GetLastError(); assert(lastError == ERROR_NO_MORE_ITEMS); break; }
}