test_gpu_scripts/reports_stress_smoke_reasons_aikubeworker0016.json

165 lines
3.7 KiB
JSON

{
"stress": {
"source": "pytorch",
"passed": false,
"duration_sec": 45,
"elapsed_sec": 45.4,
"gpu_status": {
"0": "PASS",
"1": "PASS",
"2": "PASS",
"3": "PASS",
"4": "PASS",
"5": "PASS",
"6": "PASS",
"7": "PASS"
},
"telemetry": {
"passed": false,
"samples": 39,
"steady_samples": 31,
"warmup_sec": 9.0,
"max_temp_c": {
"0": 50.0,
"1": 56.0,
"2": 57.0,
"3": 52.0,
"4": 51.0,
"5": 58.0,
"6": 53.0,
"7": 51.0
},
"avg_power_w": {
"0": 698.3,
"1": 698.5,
"2": 697.6,
"3": 697.9,
"4": 697.8,
"5": 698.0,
"6": 697.5,
"7": 698.0
},
"temp_delta_c": 8.0,
"throttle_events": [
{
"gpu": 0,
"throttle": "0x0000000000000004",
"real_throttle": "0x4"
},
{
"gpu": 1,
"throttle": "0x0000000000000004",
"real_throttle": "0x4"
},
{
"gpu": 2,
"throttle": "0x0000000000000004",
"real_throttle": "0x4"
},
{
"gpu": 3,
"throttle": "0x0000000000000004",
"real_throttle": "0x4"
},
{
"gpu": 4,
"throttle": "0x0000000000000004",
"real_throttle": "0x4"
},
{
"gpu": 5,
"throttle": "0x0000000000000004",
"real_throttle": "0x4"
},
{
"gpu": 6,
"throttle": "0x0000000000000004",
"real_throttle": "0x4"
},
{
"gpu": 7,
"throttle": "0x0000000000000004",
"real_throttle": "0x4"
},
{
"gpu": 0,
"throttle": "0x0000000000000004",
"real_throttle": "0x4"
},
{
"gpu": 1,
"throttle": "0x0000000000000004",
"real_throttle": "0x4"
},
{
"gpu": 2,
"throttle": "0x0000000000000004",
"real_throttle": "0x4"
},
{
"gpu": 3,
"throttle": "0x0000000000000004",
"real_throttle": "0x4"
},
{
"gpu": 4,
"throttle": "0x0000000000000004",
"real_throttle": "0x4"
},
{
"gpu": 5,
"throttle": "0x0000000000000004",
"real_throttle": "0x4"
},
{
"gpu": 6,
"throttle": "0x0000000000000004",
"real_throttle": "0x4"
},
{
"gpu": 7,
"throttle": "0x0000000000000004",
"real_throttle": "0x4"
},
{
"gpu": 0,
"throttle": "0x0000000000000004",
"real_throttle": "0x4"
},
{
"gpu": 1,
"throttle": "0x0000000000000004",
"real_throttle": "0x4"
},
{
"gpu": 2,
"throttle": "0x0000000000000004",
"real_throttle": "0x4"
},
{
"gpu": 3,
"throttle": "0x0000000000000004",
"real_throttle": "0x4"
}
],
"throttle_event_count": 248,
"xid_events": [],
"tflops_jitter_pct": 3.77,
"steady_tflops_samples": 787,
"failures": [
"GPU temperature delta 8.0C exceeds 5.0C",
"non-idle throttle reasons observed in 248 samples (first: GPU 0 0x4)"
],
"thresholds": {
"max_temp_c": 80.0,
"max_temp_delta_c": 5.0,
"min_power_w": 630.0,
"max_tflops_jitter_pct": 5.0,
"warmup_sec": 10.0,
"min_steady_samples": 10
}
},
"timestamp": "2026-05-22T17:53:02.058687"
},
"timestamp": "2026-05-22T17:53:02.066792"
}