benchmark:
  memory:
    size_mb: 4096
    iterations: 10
    nvbandwidth_buffer_mb: 512
    nvbandwidth_samples: 3
  compute:
    dtypes:
      - fp32
      - tf32
      - fp16
      - bf16
      - fp8
    matrix_size: 4096
    warmup: 10
    iterations: 100

health:
  temp_warning: 80
  temp_critical: 90
  power_limit: 700

nccl:
  min_bandwidth_gbps: 400
  test_allreduce: true
  test_alltoall: true
  test_broadcast: true
  test_reduce_scatter: false
  test_allgather: false
  test_sendrecv: false

stress:
  duration_sec: 60
  use_doubles: false
  use_tensor_cores: true
  memory_pct: 90
  gpus: all

rdma:
  min_bandwidth_gbps: 50
  max_latency_us: 10
  ib_iterations: 1000
  msg_size: 65536
  ib_device: null
  ib_port: 1

training:
  model: gpt2
  batch_size: 8
  seq_length: 2048
  num_steps: 50
  dtype: bf16

report:
  output_dir: ./reports
  format: json

tools:
  install_dir: /opt/h200-test-tools