test_gpu_scripts/scripts/run_cublaslt_fp8_gemm.sh

46 lines
1.2 KiB
Bash
Executable File

#!/usr/bin/env bash
set -uo pipefail
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
PROJECT_DIR="$(cd -- "$SCRIPT_DIR/.." >/dev/null 2>&1 && pwd)"
CUDA_HOME="${CUDA_HOME:-/usr/local/cuda}"
NVCC="${NVCC:-$CUDA_HOME/bin/nvcc}"
OUT_DIR="${OUT_DIR:-$PROJECT_DIR/reports}"
MATRIX_SIZE="${MATRIX_SIZE:-8192}"
WARMUP="${WARMUP:-20}"
ITERATIONS="${ITERATIONS:-200}"
GPU_COUNT="${GPU_COUNT:-8}"
FIRST_GPU="${FIRST_GPU:-0}"
WORKSPACE_MB="${WORKSPACE_MB:-256}"
if [[ ! -x "$NVCC" ]]; then
echo "nvcc not found: $NVCC" >&2
exit 1
fi
mkdir -p "$OUT_DIR" "$PROJECT_DIR/build"
HOST="$(hostname 2>/dev/null || echo unknown)"
TS="$(date +%Y%m%d_%H%M%S)"
BIN="$PROJECT_DIR/build/cublaslt_fp8_gemm_bench"
REPORT="$OUT_DIR/cublaslt_fp8_gemm_${HOST}_${TS}.json"
"$NVCC" -O3 -std=c++17 -arch=sm_90 \
"$PROJECT_DIR/scripts/cublaslt_fp8_gemm_bench.cu" \
-lcublasLt -lcublas -o "$BIN"
set +e
"$BIN" \
--matrix-size "$MATRIX_SIZE" \
--warmup "$WARMUP" \
--iterations "$ITERATIONS" \
--first-gpu "$FIRST_GPU" \
--gpu-count "$GPU_COUNT" \
--workspace-mb "$WORKSPACE_MB" \
| tee "$REPORT"
status=${PIPESTATUS[0]}
set -e
echo "Report written to: $REPORT"
exit "$status"