#!/usr/bin/env bash set -euo pipefail # Select GPUs 0 and 1 for tensor-parallel sharding export CUDA_VISIBLE_DEVICES=0,1 # Unbuffered stdout for real-time logs export PYTHONUNBUFFERED=1 # Help PyTorch allocator avoid fragmentation (see OOM hint) export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True # Start the local web app exec python3 web_app_local.py "$@"