63 lines
1.4 KiB
YAML
63 lines
1.4 KiB
YAML
tools:
|
|
install_dir: /opt/gpu-test-tools
|
|
|
|
report:
|
|
output_dir: ./reports
|
|
format: md
|
|
|
|
multinode_nccl:
|
|
enabled: true
|
|
mode: sweep-nccl-2.27.7
|
|
hosts:
|
|
- name: nccl-gpu-1
|
|
addr: 172.72.8.12
|
|
slots: 8
|
|
- name: nccl-gpu-2
|
|
addr: 172.72.8.16
|
|
slots: 8
|
|
ssh_user: root
|
|
ssh_preflight: true
|
|
mpirun_path: /usr/mpi/gcc/openmpi-4.1.9a1/bin/mpirun
|
|
mpi_ld_preload: null
|
|
extra_ld_library_path:
|
|
- /usr/mpi/gcc/openmpi-4.1.9a1/lib
|
|
- /tmp/nccl-2.27.7-cuda12.4/usr/lib/x86_64-linux-gnu
|
|
- /usr/local/cuda-12.4/targets/x86_64-linux/lib
|
|
nccl_tests_dir: null
|
|
tests:
|
|
- all_reduce_perf
|
|
- alltoall_perf
|
|
topologies:
|
|
- nodes: 2
|
|
gpus_per_node: 8
|
|
label: 2 nodes x 8 GPUs NCCL 2.27.7 sweep
|
|
begin_size: 1M
|
|
end_size: 4G
|
|
step_factor: 4
|
|
warmup_iters: 2
|
|
iters: 5
|
|
gpus_per_rank: 1
|
|
timeout_sec: 1200
|
|
debug: INFO
|
|
socket_ifname: bond0
|
|
oob_tcp_ifname: bond0
|
|
plm_rsh_args: "-o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 -o ServerAliveInterval=30"
|
|
ib_gid_index: 3
|
|
ib_sl: 5
|
|
ib_tc: 136
|
|
ib_hca: mlx5_0,mlx5_1,mlx5_6,mlx5_7
|
|
ib_timeout: 22
|
|
qps_per_connection: 4
|
|
min_nchannels: 4
|
|
net_plugin: none
|
|
nvls_enable: 1
|
|
split_data_on_qps: 1
|
|
extra_env:
|
|
NCCL_DEBUG_SUBSYS: INIT,NET
|
|
NCCL_NET_GDR_LEVEL: 5
|
|
NCCL_NET_GDR_READ: 1
|
|
NCCL_DMABUF_ENABLE: 0
|
|
min_peak_busbw_gbps:
|
|
allreduce: 480
|
|
alltoall: 75
|