#!/usr/bin/env bash set -uo pipefail SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" PROJECT_DIR="$(cd -- "$SCRIPT_DIR/.." >/dev/null 2>&1 && pwd)" CUDA_HOME="${CUDA_HOME:-/usr/local/cuda}" NVCC="${NVCC:-$CUDA_HOME/bin/nvcc}" OUT_DIR="${OUT_DIR:-$PROJECT_DIR/reports}" MATRIX_SIZE="${MATRIX_SIZE:-8192}" WARMUP="${WARMUP:-20}" ITERATIONS="${ITERATIONS:-200}" GPU_COUNT="${GPU_COUNT:-8}" FIRST_GPU="${FIRST_GPU:-0}" WORKSPACE_MB="${WORKSPACE_MB:-256}" if [[ ! -x "$NVCC" ]]; then echo "nvcc not found: $NVCC" >&2 exit 1 fi mkdir -p "$OUT_DIR" "$PROJECT_DIR/build" HOST="$(hostname 2>/dev/null || echo unknown)" TS="$(date +%Y%m%d_%H%M%S)" BIN="$PROJECT_DIR/build/cublaslt_fp8_gemm_bench" REPORT="$OUT_DIR/cublaslt_fp8_gemm_${HOST}_${TS}.json" "$NVCC" -O3 -std=c++17 -arch=sm_90 \ "$PROJECT_DIR/scripts/cublaslt_fp8_gemm_bench.cu" \ -lcublasLt -lcublas -o "$BIN" set +e "$BIN" \ --matrix-size "$MATRIX_SIZE" \ --warmup "$WARMUP" \ --iterations "$ITERATIONS" \ --first-gpu "$FIRST_GPU" \ --gpu-count "$GPU_COUNT" \ --workspace-mb "$WORKSPACE_MB" \ | tee "$REPORT" status=${PIPESTATUS[0]} set -e echo "Report written to: $REPORT" exit "$status"