add: GPU specs database with auto-detection (H100/H200/B200/B300)
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
parent
8f7539d9b0
commit
98e4977e28
164
modules/gpu_specs.py
Normal file
164
modules/gpu_specs.py
Normal file
@ -0,0 +1,164 @@
|
||||
"""GPU specifications database for NVIDIA datacenter GPUs."""
|
||||
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
# GPU name patterns -> internal key mapping
|
||||
GPU_NAME_PATTERNS = {
|
||||
"H100": "h100",
|
||||
"H200": "h200",
|
||||
"B200": "b200",
|
||||
"B300": "b300",
|
||||
}
|
||||
|
||||
# Specs database — ALL values are DENSE (non-sparse) TFLOPS
|
||||
GPU_SPECS = {
|
||||
"h100": {
|
||||
"full_name": "NVIDIA H100 SXM5",
|
||||
"architecture": "Hopper",
|
||||
"compute_capability": 9.0,
|
||||
"hbm_capacity_gb": 80,
|
||||
"hbm_type": "HBM3",
|
||||
"memory_bandwidth_gbps": 3400, # GB/s (3.4 TB/s)
|
||||
"fp32_tflops": 67,
|
||||
"tf32_tflops": 495, # dense (989 sparse)
|
||||
"fp16_tflops": 990, # dense (1979 sparse w/ 2:4)
|
||||
"bf16_tflops": 990, # dense
|
||||
"fp8_tflops": 1979, # dense
|
||||
"tdp_watts": 700,
|
||||
"nvlink_gen": 4,
|
||||
"nvlink_bandwidth_gbps": 900, # bidirectional
|
||||
"pcie_gen": 5,
|
||||
"min_driver_version": "535",
|
||||
"min_cuda_version": "12.1",
|
||||
},
|
||||
"h200": {
|
||||
"full_name": "NVIDIA H200 SXM",
|
||||
"architecture": "Hopper",
|
||||
"compute_capability": 9.0,
|
||||
"hbm_capacity_gb": 141,
|
||||
"hbm_type": "HBM3e",
|
||||
"memory_bandwidth_gbps": 4800, # GB/s (4.8 TB/s) — THIS IS THE CORRECT VALUE, NOT 989!
|
||||
"fp32_tflops": 67,
|
||||
"tf32_tflops": 495, # dense
|
||||
"fp16_tflops": 990, # dense
|
||||
"bf16_tflops": 990, # dense
|
||||
"fp8_tflops": 1979, # dense
|
||||
"tdp_watts": 700,
|
||||
"nvlink_gen": 4,
|
||||
"nvlink_bandwidth_gbps": 900,
|
||||
"pcie_gen": 5,
|
||||
"min_driver_version": "535",
|
||||
"min_cuda_version": "12.1",
|
||||
},
|
||||
"b200": {
|
||||
"full_name": "NVIDIA B200 SXM",
|
||||
"architecture": "Blackwell",
|
||||
"compute_capability": 10.0,
|
||||
"hbm_capacity_gb": 180,
|
||||
"hbm_type": "HBM3e",
|
||||
"memory_bandwidth_gbps": 8000, # GB/s (8 TB/s)
|
||||
"fp32_tflops": 90,
|
||||
"tf32_tflops": 1125, # dense
|
||||
"fp16_tflops": 2250, # dense
|
||||
"bf16_tflops": 2250, # dense
|
||||
"fp8_tflops": 4500, # dense
|
||||
"tdp_watts": 1000,
|
||||
"nvlink_gen": 5,
|
||||
"nvlink_bandwidth_gbps": 1800,
|
||||
"pcie_gen": 5,
|
||||
"min_driver_version": "550",
|
||||
"min_cuda_version": "12.4",
|
||||
},
|
||||
"b300": {
|
||||
"full_name": "NVIDIA B300 SXM (Blackwell Ultra)",
|
||||
"architecture": "Blackwell Ultra",
|
||||
"compute_capability": 10.0,
|
||||
"hbm_capacity_gb": 288,
|
||||
"hbm_type": "HBM3e",
|
||||
"memory_bandwidth_gbps": 8000, # GB/s (8 TB/s)
|
||||
"fp32_tflops": 125,
|
||||
"tf32_tflops": 1750, # dense (estimated)
|
||||
"fp16_tflops": 3500, # dense
|
||||
"bf16_tflops": 3500, # dense
|
||||
"fp8_tflops": 7000, # dense
|
||||
"tdp_watts": 1200,
|
||||
"nvlink_gen": 5,
|
||||
"nvlink_bandwidth_gbps": 1800,
|
||||
"pcie_gen": 5,
|
||||
"min_driver_version": "550",
|
||||
"min_cuda_version": "12.4",
|
||||
},
|
||||
}
|
||||
|
||||
# Fallback for unknown / unsupported GPUs
|
||||
_UNKNOWN_SPECS = {
|
||||
"full_name": "Unknown GPU",
|
||||
"architecture": "unknown",
|
||||
"compute_capability": 0.0,
|
||||
"hbm_capacity_gb": 0,
|
||||
"hbm_type": "unknown",
|
||||
"memory_bandwidth_gbps": 0,
|
||||
"fp32_tflops": 0,
|
||||
"tf32_tflops": 0,
|
||||
"fp16_tflops": 0,
|
||||
"bf16_tflops": 0,
|
||||
"fp8_tflops": 0,
|
||||
"tdp_watts": 700,
|
||||
"nvlink_gen": 0,
|
||||
"nvlink_bandwidth_gbps": 0,
|
||||
"pcie_gen": 0,
|
||||
"min_driver_version": "",
|
||||
"min_cuda_version": "",
|
||||
}
|
||||
|
||||
|
||||
def detect_gpu_type() -> str:
|
||||
"""Detect GPU type via nvidia-smi and return the internal key (e.g. 'h200').
|
||||
|
||||
Returns 'unknown' if nvidia-smi is unavailable or the GPU is not recognized.
|
||||
"""
|
||||
nvidia_smi = shutil.which("nvidia-smi")
|
||||
if not nvidia_smi:
|
||||
return "unknown"
|
||||
|
||||
try:
|
||||
r = subprocess.run(
|
||||
[nvidia_smi, "--query-gpu=name", "--format=csv,noheader"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
return "unknown"
|
||||
|
||||
first_line = r.stdout.strip().splitlines()[0].strip()
|
||||
for pattern, key in GPU_NAME_PATTERNS.items():
|
||||
if pattern in first_line.upper():
|
||||
return key
|
||||
return "unknown"
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||
return "unknown"
|
||||
|
||||
|
||||
def get_gpu_specs(gpu_type: str = None) -> dict:
|
||||
"""Return specs dict for the given gpu_type, auto-detecting if None.
|
||||
|
||||
Returns a minimal 'unknown' fallback dict with zero peaks for unsupported GPUs.
|
||||
"""
|
||||
if gpu_type is None:
|
||||
gpu_type = detect_gpu_type()
|
||||
return GPU_SPECS.get(gpu_type, dict(_UNKNOWN_SPECS))
|
||||
|
||||
|
||||
def get_supported_gpus() -> list:
|
||||
"""Return list of supported GPU type keys."""
|
||||
return list(GPU_SPECS.keys())
|
||||
|
||||
|
||||
def get_gpu_label(gpu_type: str) -> str:
|
||||
"""Return a short human-readable label like 'H200 SXM' for display in tables."""
|
||||
specs = GPU_SPECS.get(gpu_type)
|
||||
if specs:
|
||||
full = specs["full_name"]
|
||||
# Strip the "NVIDIA " prefix for display
|
||||
return full.replace("NVIDIA ", "")
|
||||
return "Unknown GPU"
|
||||
Loading…
x
Reference in New Issue
Block a user