platformCPUCount.cpp
Engine/source/platform/platformCPUCount.cpp
Detailed Description
1 2// Original code is: 3// Copyright (c) 2005 Intel Corporation 4// All Rights Reserved 5// 6// CPUCount.cpp : Detects three forms of hardware multi-threading support across IA-32 platform 7// The three forms of HW multithreading are: Multi-processor, Multi-core, and 8// HyperThreading Technology. 9// This application enumerates all the logical processors enabled by OS and BIOS, 10// determine the HW topology of these enabled logical processors in the system 11// using information provided by CPUID instruction. 12// A multi-processing system can support any combination of the three forms of HW 13// multi-threading support. The relevant topology can be identified using a 14// three level decomposition of the "initial APIC ID" into 15// Package_id, core_id, and SMT_id. Such decomposition provides a three-level map of 16// the topology of hardware resources and 17// allow multi-threaded software to manage shared hardware resources in 18// the platform to reduce resource contention 19 20// Multicore detection algorithm for processor and cache topology requires 21// all leaf functions of CPUID instructions be available. System administrator 22// must ensure BIOS settings is not configured to restrict CPUID functionalities. 23//------------------------------------------------------------------------------------------------- 24 25#if defined(TORQUE_OS_LINUX) || defined(LINUX) 26 27// TODO GCC code don't compile on Release with optimizations, mover code to platform layer 28 29#else 30 31#include "platform/platform.h" 32#include "platform/platformCPUCount.h" 33 34#if defined(TORQUE_OS_LINUX) || defined(TORQUE_OS_OSX) 35 36#ifdef TORQUE_OS_LINUX 37// The Linux source code listing can be compiled using Linux kernel verison 2.6 38// or higher (e.g. RH 4AS-2.8 using GCC 3.4.4). 39// Due to syntax variances of Linux affinity APIs with earlier kernel versions 40// and dependence on glibc library versions, compilation on Linux environment 41// with older kernels and compilers may require kernel patches or compiler upgrades. 42 43#include <stdlib.h> 44#include <unistd.h> 45#include <string.h> 46#include <sched.h> 47#define DWORD unsigned long 48#elif defined( TORQUE_OS_WIN ) 49#include <windows.h> 50#elif defined( TORQUE_OS_MAC ) 51# include <sys/types.h> 52# include <sys/sysctl.h> 53#else 54#error Not implemented on platform. 55#endif 56#include <stdio.h> 57#include <assert.h> 58 59namespace CPUInfo { 60 61#define HWD_MT_BIT 0x10000000 // EDX[28] Bit 28 is set if HT or multi-core is supported 62#define NUM_LOGICAL_BITS 0x00FF0000 // EBX[23:16] Bit 16-23 in ebx contains the number of logical 63 // processors per physical processor when execute cpuid with 64 // eax set to 1 65#define NUM_CORE_BITS 0xFC000000 // EAX[31:26] Bit 26-31 in eax contains the number of cores minus one 66 // per physical processor when execute cpuid with 67 // eax set to 4. 68 69 70#define INITIAL_APIC_ID_BITS 0xFF000000 // EBX[31:24] Bits 24-31 (8 bits) return the 8-bit unique 71 // initial APIC ID for the processor this code is running on. 72 73 74 #ifndef TORQUE_OS_MAC 75 static U32 CpuIDSupported(void); 76 static U32 find_maskwidth(unsigned int); 77 static U32 HWD_MTSupported(void); 78 static U32 MaxLogicalProcPerPhysicalProc(void); 79 static U32 MaxCorePerPhysicalProc(void); 80 static U8 GetAPIC_ID(void); 81 static U8 GetNzbSubID(U8, U8, U8); 82 #endif 83 84 static char g_s3Levels[2048]; 85 86#ifndef TORQUE_OS_MAC 87 88 // 89 // CpuIDSupported will return 0 if CPUID instruction is unavailable. Otherwise, it will return 90 // the maximum supported standard function. 91 // 92 static U32 CpuIDSupported(void) 93 { 94 U32 maxInputValue = 0; 95 // If CPUID instruction is supported 96#ifdef TORQUE_COMPILER_GCC 97 try 98 { 99 // call cpuid with eax = 0 100 asm 101 ( 102 "pushl %%ebx\n\t" 103 "xorl %%eax,%%eax\n\t" 104 "cpuid\n\t" 105 "popl %%ebx\n\t" 106 : "=a" (maxInputValue) 107 : 108 : "%ecx", "%edx" 109 ); 110 } 111 catch (...) 112 { 113 return(0); // cpuid instruction is unavailable 114 } 115#elif defined( TORQUE_COMPILER_VISUALC ) 116 try 117 { 118 // call cpuid with eax = 0 119 __asm 120 { 121 xor eax, eax 122 cpuid 123 mov maxInputValue, eax 124 } 125 } 126 catch (...) 127 { 128 // cpuid instruction is unavailable 129 } 130#else 131# error Not implemented. 132#endif 133 134 return maxInputValue; 135 } 136 137 138 139 // 140 // Function returns the maximum cores per physical package. Note that the number of 141 // AVAILABLE cores per physical to be used by an application might be less than this 142 // maximum value. 143 // 144 145 static U32 MaxCorePerPhysicalProc(void) 146 { 147 148 U32 Regeax = 0; 149 150 if (!HWD_MTSupported()) return (U32) 1; // Single core 151#ifdef TORQUE_COMPILER_GCC 152 { 153 asm 154 ( 155 "pushl %ebx\n\t" 156 "xorl %eax, %eax\n\t" 157 "cpuid\n\t" 158 "cmpl $4, %eax\n\t" // check if cpuid supports leaf 4 159 "jl .single_core\n\t" // Single core 160 "movl $4, %eax\n\t" 161 "movl $0, %ecx\n\t" // start with index = 0; Leaf 4 reports 162 "popl %ebx\n\t" 163 ); // at least one valid cache level 164 asm 165 ( 166 "cpuid" 167 : "=a" (Regeax) 168 : 169 : "%ecx", "%edx" 170 ); 171 asm 172 ( 173 "jmp .multi_core\n" 174 ".single_core:\n\t" 175 "xor %eax, %eax\n" 176 ".multi_core:" 177 ); 178 } 179#elif defined( TORQUE_COMPILER_VISUALC ) 180 __asm 181 { 182 xor eax, eax 183 cpuid 184 cmp eax, 4 // check if cpuid supports leaf 4 185 jl single_core // Single core 186 mov eax, 4 187 mov ecx, 0 // start with index = 0; Leaf 4 reports 188 cpuid // at least one valid cache level 189 mov Regeax, eax 190 jmp multi_core 191 192single_core: 193 xor eax, eax 194 195multi_core: 196 197 } 198#else 199# error Not implemented. 200#endif 201 return (U32)((Regeax & NUM_CORE_BITS) >> 26)+1; 202 203 } 204 205 206 207 // 208 // The function returns 0 when the hardware multi-threaded bit is not set. 209 // 210 static U32 HWD_MTSupported(void) 211 { 212 213 214 U32 Regedx = 0; 215 216 217 if ((CpuIDSupported() >= 1)) 218 { 219#ifdef TORQUE_COMPILER_GCC 220 asm 221 ( 222 "pushl %%ebx\n\t" 223 "movl $1,%%eax\n\t" 224 "cpuid\n\t" 225 "popl %%ebx\n\t" 226 : "=d" (Regedx) 227 : 228 : "%eax","%ecx" 229 ); 230#elif defined( TORQUE_COMPILER_VISUALC ) 231 __asm 232 { 233 mov eax, 1 234 cpuid 235 mov Regedx, edx 236 } 237#else 238# error Not implemented. 239#endif 240 } 241 242 return (Regedx & HWD_MT_BIT); 243 244 245 } 246 247 248 249 // 250 // Function returns the maximum logical processors per physical package. Note that the number of 251 // AVAILABLE logical processors per physical to be used by an application might be less than this 252 // maximum value. 253 // 254 static U32 MaxLogicalProcPerPhysicalProc(void) 255 { 256 257 U32 Regebx = 0; 258 259 if (!HWD_MTSupported()) return (U32) 1; 260#ifdef TORQUE_COMPILER_GCC 261 asm 262 ( 263 "movl $1,%%eax\n\t" 264 "cpuid" 265 : "=b" (Regebx) 266 : 267 : "%eax","%ecx","%edx" 268 ); 269#elif defined( TORQUE_COMPILER_VISUALC ) 270 __asm 271 { 272 mov eax, 1 273 cpuid 274 mov Regebx, ebx 275 } 276#else 277# error Not implemented. 278#endif 279 return (unsigned int) ((Regebx & NUM_LOGICAL_BITS) >> 16); 280 281 } 282 283 284 static U8 GetAPIC_ID(void) 285 { 286 287 U32 Regebx = 0; 288#ifdef TORQUE_COMPILER_GCC 289 asm 290 ( 291 "movl $1, %%eax\n\t" 292 "cpuid" 293 : "=b" (Regebx) 294 : 295 : "%eax","%ecx","%edx" 296 ); 297 298#elif defined( TORQUE_COMPILER_VISUALC ) 299 __asm 300 { 301 mov eax, 1 302 cpuid 303 mov Regebx, ebx 304 } 305#else 306# error Not implemented. 307#endif 308 309 return (unsigned char) ((Regebx & INITIAL_APIC_ID_BITS) >> 24); 310 311 } 312 313 // 314 // Determine the width of the bit field that can represent the value count_item. 315 // 316 U32 find_maskwidth(U32 CountItem) 317 { 318 U32 MaskWidth, 319 count = CountItem; 320#ifdef TORQUE_COMPILER_GCC 321 asm 322 ( 323#ifdef __x86_64__ // define constant to compile 324 "push %%rcx\n\t" // under 64-bit Linux 325 "push %%rax\n\t" 326#else 327 "pushl %%ecx\n\t" 328 "pushl %%eax\n\t" 329#endif 330 // "movl $count, %%eax\n\t" //done by Assembler below 331 "xorl %%ecx, %%ecx" 332 // "movl %%ecx, MaskWidth\n\t" //done by Assembler below 333 : "=c" (MaskWidth) 334 : "a" (count) 335 // : "%ecx", "%eax" We don't list these as clobbered because we don't want the assembler 336 //to put them back when we are done 337 ); 338 asm 339 ( 340 "decl %%eax\n\t" 341 "bsrw %%ax,%%cx\n\t" 342 "jz next\n\t" 343 "incw %%cx\n\t" 344 // "movl %%ecx, MaskWidth\n" //done by Assembler below 345 : "=c" (MaskWidth) 346 : 347 ); 348 asm 349 ( 350 "next:\n\t" 351#ifdef __x86_64__ 352 "pop %rax\n\t" 353 "pop %rcx" 354#else 355 "popl %eax\n\t" 356 "popl %ecx" 357#endif 358 ); 359 360#elif defined( TORQUE_COMPILER_VISUALC ) 361 __asm 362 { 363 mov eax, count 364 mov ecx, 0 365 mov MaskWidth, ecx 366 dec eax 367 bsr cx, ax 368 jz next 369 inc cx 370 mov MaskWidth, ecx 371next: 372 373 } 374#else 375# error Not implemented. 376#endif 377 return MaskWidth; 378 } 379 380 381 // 382 // Extract the subset of bit field from the 8-bit value FullID. It returns the 8-bit sub ID value 383 // 384 static U8 GetNzbSubID(U8 FullID, 385 U8 MaxSubIDValue, 386 U8 ShiftCount) 387 { 388 U32 MaskWidth; 389 U8 MaskBits; 390 391 MaskWidth = find_maskwidth((U32) MaxSubIDValue); 392 MaskBits = (0xff << ShiftCount) ^ 393 ((U8) (0xff << (ShiftCount + MaskWidth))); 394 395 return (FullID & MaskBits); 396 } 397 398#endif 399 400 401 // 402 // 403 // 404 EConfig CPUCount(U32& TotAvailLogical, U32& TotAvailCore, U32& PhysicalNum) 405 { 406 EConfig StatusFlag = CONFIG_UserConfigIssue; 407 408 g_s3Levels[0] = 0; 409 TotAvailCore = 1; 410 PhysicalNum = 1; 411 412 U32 numLPEnabled = 0; 413 S32 MaxLPPerCore = 1; 414 415#ifdef TORQUE_OS_MAC 416 417 //FIXME: This isn't a proper port but more or less just some sneaky cheating 418 // to get around having to mess with yet another crap UNIX-style API. Seems 419 // like there isn't a way to do this that's working across all OSX incarnations 420 // and machine configurations anyway. 421 422 S32 numCPUs; 423 S32 numPackages; 424 425 // Get the number of CPUs. 426 427 size_t len = sizeof( numCPUs ); 428 if( sysctlbyname( "hw.ncpu", &numCPUs, &len, 0, 0 ) == -1 ) 429 return CONFIG_UserConfigIssue; 430 431 // Get the number of packages. 432 len = sizeof( numPackages ); 433 if( sysctlbyname( "hw.packages", &numPackages, &len, 0, 0 ) == -1 ) 434 return CONFIG_UserConfigIssue; 435 436 TotAvailCore = numCPUs; 437 TotAvailLogical = numCPUs; 438 PhysicalNum = numPackages; 439#else 440 441 U32 dwAffinityMask; 442 S32 j = 0; 443 U8 apicID, PackageIDMask; 444 U8 tblPkgID[256], tblCoreID[256], tblSMTID[256]; 445 char tmp[256]; 446 447#ifdef TORQUE_OS_LINUX 448 //we need to make sure that this process is allowed to run on 449 //all of the logical processors that the OS itself can run on. 450 //A process could acquire/inherit affinity settings that restricts the 451 // current process to run on a subset of all logical processor visible to OS. 452 453 // Linux doesn't easily allow us to look at the Affinity Bitmask directly, 454 // but it does provide an API to test affinity maskbits of the current process 455 // against each logical processor visible under OS. 456 S32 sysNumProcs = sysconf(_SC_NPROCESSORS_CONF); //This will tell us how many 457 //CPUs are currently enabled. 458 459 //this will tell us which processors this process can run on. 460 cpu_set_t allowedCPUs; 461 sched_getaffinity(0, sizeof(allowedCPUs), &allowedCPUs); 462 463 for (S32 i = 0; i < sysNumProcs; i++ ) 464 { 465 if ( CPU_ISSET(i, &allowedCPUs) == 0 ) 466 return CONFIG_UserConfigIssue; 467 } 468#elif defined( TORQUE_OS_WIN ) 469 DWORD dwProcessAffinity, dwSystemAffinity; 470 GetProcessAffinityMask(GetCurrentProcess(), 471 &dwProcessAffinity, 472 &dwSystemAffinity); 473 if (dwProcessAffinity != dwSystemAffinity) // not all CPUs are enabled 474 return CONFIG_UserConfigIssue; 475#else 476# error Not implemented. 477#endif 478 479 // Assume that cores within a package have the SAME number of 480 // logical processors. Also, values returned by 481 // MaxLogicalProcPerPhysicalProc and MaxCorePerPhysicalProc do not have 482 // to be power of 2. 483 484 MaxLPPerCore = MaxLogicalProcPerPhysicalProc() / MaxCorePerPhysicalProc(); 485 dwAffinityMask = 1; 486 487#ifdef TORQUE_OS_LINUX 488 cpu_set_t currentCPU; 489 while ( j < sysNumProcs ) 490 { 491 CPU_ZERO(¤tCPU); 492 CPU_SET(j, ¤tCPU); 493 if ( sched_setaffinity (0, sizeof(currentCPU), ¤tCPU) == 0 ) 494 { 495 sleep(0); // Ensure system to switch to the right CPU 496#elif defined( TORQUE_OS_WIN ) 497 while (dwAffinityMask && dwAffinityMask <= dwSystemAffinity) 498 { 499 if (SetThreadAffinityMask(GetCurrentThread(), dwAffinityMask)) 500 { 501 Sleep(0); // Ensure system to switch to the right CPU 502#else 503# error Not implemented. 504#endif 505 apicID = GetAPIC_ID(); 506 507 508 // Store SMT ID and core ID of each logical processor 509 // Shift vlaue for SMT ID is 0 510 // Shift value for core ID is the mask width for maximum logical 511 // processors per core 512 513 tblSMTID[j] = GetNzbSubID(apicID, MaxLPPerCore, 0); 514 U8 maxCorePPP = MaxCorePerPhysicalProc(); 515 U8 maskWidth = find_maskwidth(MaxLPPerCore); 516 tblCoreID[j] = GetNzbSubID(apicID, maxCorePPP, maskWidth); 517 518 // Extract package ID, assume single cluster. 519 // Shift value is the mask width for max Logical per package 520 521 PackageIDMask = (unsigned char) (0xff << 522 find_maskwidth(MaxLogicalProcPerPhysicalProc())); 523 524 tblPkgID[j] = apicID & PackageIDMask; 525 sprintf(tmp," AffinityMask = %d; Initial APIC = %d; Physical ID = %d, Core ID = %d, SMT ID = %d\n", 526 dwAffinityMask, apicID, tblPkgID[j], tblCoreID[j], tblSMTID[j]); 527 dStrcat(g_s3Levels, tmp, 2048); 528 529 numLPEnabled ++; // Number of available logical processors in the system. 530 531 } // if 532 533 j++; 534 dwAffinityMask = 1 << j; 535 } // while 536 537 // restore the affinity setting to its original state 538#ifdef TORQUE_OS_LINUX 539 sched_setaffinity (0, sizeof(allowedCPUs), &allowedCPUs); 540 sleep(0); 541#elif defined( TORQUE_OS_WIN ) 542 SetThreadAffinityMask(GetCurrentThread(), dwProcessAffinity); 543 Sleep(0); 544#else 545# error Not implemented. 546#endif 547 TotAvailLogical = numLPEnabled; 548 549 // 550 // Count available cores (TotAvailCore) in the system 551 // 552 U8 CoreIDBucket[256]; 553 DWORD ProcessorMask, pCoreMask[256]; 554 U32 i, ProcessorNum; 555 556 CoreIDBucket[0] = tblPkgID[0] | tblCoreID[0]; 557 ProcessorMask = 1; 558 pCoreMask[0] = ProcessorMask; 559 560 for (ProcessorNum = 1; ProcessorNum < numLPEnabled; ProcessorNum++) 561 { 562 ProcessorMask <<= 1; 563 for (i = 0; i < TotAvailCore; i++) 564 { 565 // Comparing bit-fields of logical processors residing in different packages 566 // Assuming the bit-masks are the same on all processors in the system. 567 if ((tblPkgID[ProcessorNum] | tblCoreID[ProcessorNum]) == CoreIDBucket[i]) 568 { 569 pCoreMask[i] |= ProcessorMask; 570 break; 571 } 572 573 } // for i 574 575 if (i == TotAvailCore) // did not match any bucket. Start a new one. 576 { 577 CoreIDBucket[i] = tblPkgID[ProcessorNum] | tblCoreID[ProcessorNum]; 578 pCoreMask[i] = ProcessorMask; 579 580 TotAvailCore++; // Number of available cores in the system 581 582 } 583 584 } // for ProcessorNum 585 586 587 // 588 // Count physical processor (PhysicalNum) in the system 589 // 590 U8 PackageIDBucket[256]; 591 DWORD pPackageMask[256]; 592 593 PackageIDBucket[0] = tblPkgID[0]; 594 ProcessorMask = 1; 595 pPackageMask[0] = ProcessorMask; 596 597 for (ProcessorNum = 1; ProcessorNum < numLPEnabled; ProcessorNum++) 598 { 599 ProcessorMask <<= 1; 600 for (i = 0; i < PhysicalNum; i++) 601 { 602 // Comparing bit-fields of logical processors residing in different packages 603 // Assuming the bit-masks are the same on all processors in the system. 604 if (tblPkgID[ProcessorNum]== PackageIDBucket[i]) 605 { 606 pPackageMask[i] |= ProcessorMask; 607 break; 608 } 609 610 } // for i 611 612 if (i == PhysicalNum) // did not match any bucket. Start a new one. 613 { 614 PackageIDBucket[i] = tblPkgID[ProcessorNum]; 615 pPackageMask[i] = ProcessorMask; 616 617 PhysicalNum++; // Total number of physical processors in the system 618 619 } 620 621 } // for ProcessorNum 622#endif 623 624 // 625 // Check to see if the system is multi-core 626 // Check if the system is hyper-threading 627 // 628 if (TotAvailCore > PhysicalNum) 629 { 630 // Multi-core 631 if (MaxLPPerCore == 1) 632 StatusFlag = CONFIG_MultiCoreAndHTNotCapable; 633 else if (numLPEnabled > TotAvailCore) 634 StatusFlag = CONFIG_MultiCoreAndHTEnabled; 635 else StatusFlag = CONFIG_MultiCoreAndHTDisabled; 636 637 } 638 else 639 { 640 // Single-core 641 if (MaxLPPerCore == 1) 642 StatusFlag = CONFIG_SingleCoreAndHTNotCapable; 643 else if (numLPEnabled > TotAvailCore) 644 StatusFlag = CONFIG_SingleCoreHTEnabled; 645 else StatusFlag = CONFIG_SingleCoreHTDisabled; 646 647 648 } 649 650 651 652 return StatusFlag; 653 } 654 655} // namespace CPUInfo 656#endif 657 658#endif 659