使用v0.3.3构建具身平台镜像。
This commit is contained in:
parent
b883328e6c
commit
1e7bb40565
8
.gitignore
vendored
8
.gitignore
vendored
@ -173,3 +173,11 @@ outputs/
|
|||||||
|
|
||||||
# Dev folders
|
# Dev folders
|
||||||
.cache/*
|
.cache/*
|
||||||
|
|
||||||
|
datasets
|
||||||
|
20250901
|
||||||
|
s100
|
||||||
|
|
||||||
|
huggingface_models
|
||||||
|
docker/inputs
|
||||||
|
docker/outputs
|
||||||
33
clean_build.sh
Executable file
33
clean_build.sh
Executable file
@ -0,0 +1,33 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
cd "$(dirname "$0")"
|
||||||
|
|
||||||
|
VERSION=$(date +%Y%m%d)-latest
|
||||||
|
echo Building: ${VERSION}
|
||||||
|
|
||||||
|
|
||||||
|
###### Training Image ######
|
||||||
|
# docker build -t dcloud/lerobot-train:${VERSION} -f docker/Dockerfile.train . --build-arg http_proxy=http://192.168.16.68:18000 --build-arg https_proxy=http://192.168.16.68:18000
|
||||||
|
# docker run -it --rm --gpus '"device=7"' \
|
||||||
|
# -v ${PWD}/docker/inputs:/workspace/inputs \
|
||||||
|
# -v ${PWD}/docker/outputs:/workspace/outputs/checkpoints \
|
||||||
|
# -v ${PWD}/docker/train_task.json:/workspace/inputs/task.json \
|
||||||
|
# --shm-size=128G \
|
||||||
|
# dcloud/lerobot-train:${VERSION}
|
||||||
|
|
||||||
|
|
||||||
|
###### Merge Image ######
|
||||||
|
docker build -t dcloud/lerobot-merge:${VERSION} -f docker/Dockerfile.merge . --build-arg http_proxy=http://192.168.16.68:18000 --build-arg https_proxy=http://192.168.16.68:18000
|
||||||
|
docker run -it --rm \
|
||||||
|
-v ${PWD}/docker/inputs:/workspace/inputs \
|
||||||
|
-v ${PWD}/docker/outputs:/workspace/outputs \
|
||||||
|
-v ${PWD}/docker/merge_task.json:/workspace/inputs/task.json \
|
||||||
|
--shm-size=128G \
|
||||||
|
dcloud/lerobot-merge:${VERSION}
|
||||||
|
|
||||||
|
|
||||||
|
# # Remove dangling images
|
||||||
|
docker rmi $(docker images -f "dangling=true" -q)
|
||||||
|
docker images | grep lerobot | grep -v ${VERSION} | awk '{print $1":"$2}' | xargs docker rmi
|
||||||
@ -12,39 +12,23 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
# This Dockerfile is designed for HuggingFace internal CI environments
|
# This Dockerfile is designed for a lerobot user who wants to
|
||||||
# that require GPU access. It starts from an NVIDIA CUDA base image.
|
# experiment with the project. It starts from an Python Slim base image.
|
||||||
|
|
||||||
# docker build -f docker/Dockerfile.internal -t lerobot-internal .
|
# docker build -f docker/Dockerfile.user -t lerobot-user .
|
||||||
|
# docker run -it --rm lerobot-user
|
||||||
|
|
||||||
# Configure the base image for CI with GPU access
|
# Configure the base image
|
||||||
# TODO(Steven): Bump these versions
|
|
||||||
ARG CUDA_VERSION=12.4.1
|
|
||||||
ARG OS_VERSION=22.04
|
|
||||||
FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu${OS_VERSION}
|
|
||||||
|
|
||||||
# Define Python version argument
|
|
||||||
ARG PYTHON_VERSION=3.10
|
ARG PYTHON_VERSION=3.10
|
||||||
|
FROM python:${PYTHON_VERSION}-slim
|
||||||
|
|
||||||
# Configure environment variables
|
# Configure environment variables
|
||||||
ENV DEBIAN_FRONTEND=noninteractive \
|
ENV DEBIAN_FRONTEND=noninteractive \
|
||||||
MUJOCO_GL=egl \
|
PATH=/lerobot/.venv/bin:$PATH
|
||||||
PATH=/lerobot/.venv/bin:$PATH \
|
|
||||||
CUDA_VISIBLE_DEVICES=0 \
|
|
||||||
TEST_TYPE=single_gpu \
|
|
||||||
DEVICE=cuda
|
|
||||||
|
|
||||||
# Install Python, system dependencies, and uv (as root)
|
# Install system dependencies and uv (as root)
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
software-properties-common build-essential git curl \
|
build-essential git curl ffmpeg \
|
||||||
libglib2.0-0 libgl1-mesa-glx libegl1-mesa ffmpeg \
|
|
||||||
libusb-1.0-0-dev speech-dispatcher libgeos-dev portaudio19-dev \
|
|
||||||
&& add-apt-repository -y ppa:deadsnakes/ppa \
|
|
||||||
&& apt-get update \
|
|
||||||
&& apt-get install -y --no-install-recommends \
|
|
||||||
python${PYTHON_VERSION} \
|
|
||||||
python${PYTHON_VERSION}-venv \
|
|
||||||
python${PYTHON_VERSION}-dev \
|
|
||||||
&& curl -LsSf https://astral.sh/uv/install.sh | sh \
|
&& curl -LsSf https://astral.sh/uv/install.sh | sh \
|
||||||
&& mv /root/.local/bin/uv /usr/local/bin/uv \
|
&& mv /root/.local/bin/uv /usr/local/bin/uv \
|
||||||
&& useradd --create-home --shell /bin/bash user_lerobot \
|
&& useradd --create-home --shell /bin/bash user_lerobot \
|
||||||
@ -52,9 +36,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||||||
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Create application directory and set permissions
|
# Create application directory and set permissions
|
||||||
|
RUN git clone https://github.com/huggingface/lerobot.git -b v0.3.3 /lerobot
|
||||||
WORKDIR /lerobot
|
WORKDIR /lerobot
|
||||||
RUN chown -R user_lerobot:user_lerobot /lerobot
|
RUN chown -R user_lerobot:user_lerobot /lerobot
|
||||||
|
|
||||||
|
ADD ./huggingface_models /home/user_lerobot/.cache/huggingface
|
||||||
|
RUN chown -R user_lerobot:user_lerobot /home/user_lerobot/.cache
|
||||||
|
|
||||||
|
|
||||||
# Switch to the non-root user
|
# Switch to the non-root user
|
||||||
USER user_lerobot
|
USER user_lerobot
|
||||||
|
|
||||||
@ -67,18 +56,18 @@ ENV HOME=/home/user_lerobot \
|
|||||||
|
|
||||||
# Create the virtual environment
|
# Create the virtual environment
|
||||||
# We use a virtual environment inside the container—even though the container itself \
|
# We use a virtual environment inside the container—even though the container itself \
|
||||||
# provides isolation—to ensure compatibility with the cluster and to prevent \
|
# provides isolation—to closely resemble local development and allow users to \
|
||||||
# issues with MuJoCo and OpenGL drivers.
|
# run other Python projects in the same container without dependency conflicts.
|
||||||
RUN uv venv --python python${PYTHON_VERSION}
|
RUN uv venv
|
||||||
|
|
||||||
# Install Python dependencies for caching
|
# Install Python dependencies for caching
|
||||||
COPY --chown=user_lerobot:user_lerobot pyproject.toml README.md MANIFEST.in ./
|
COPY --chown=user_lerobot:user_lerobot pyproject.toml ./
|
||||||
COPY --chown=user_lerobot:user_lerobot src/ src/
|
# COPY --chown=user_lerobot:user_lerobot src/ src/
|
||||||
RUN uv pip install --no-cache ".[all]"
|
RUN uv pip install --no-cache ".[smolvla]"
|
||||||
|
|
||||||
# Copy the rest of the application source code
|
# Cloud Helper
|
||||||
# Make sure to have the git-LFS files for testing
|
# RUN uv pip install pyzmq msgpack msgpack_numpy zstandard
|
||||||
COPY --chown=user_lerobot:user_lerobot . .
|
|
||||||
|
|
||||||
# Set the default command
|
# Set the default command - Online Inference Mode
|
||||||
CMD ["/bin/bash"]
|
ADD docker/merge.py /workspace/merge.py
|
||||||
|
CMD ["python", "/workspace/merge.py"]
|
||||||
@ -24,13 +24,11 @@ FROM python:${PYTHON_VERSION}-slim
|
|||||||
|
|
||||||
# Configure environment variables
|
# Configure environment variables
|
||||||
ENV DEBIAN_FRONTEND=noninteractive \
|
ENV DEBIAN_FRONTEND=noninteractive \
|
||||||
MUJOCO_GL=egl \
|
|
||||||
PATH=/lerobot/.venv/bin:$PATH
|
PATH=/lerobot/.venv/bin:$PATH
|
||||||
|
|
||||||
# Install system dependencies and uv (as root)
|
# Install system dependencies and uv (as root)
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
build-essential git curl libglib2.0-0 libegl1-mesa ffmpeg \
|
build-essential git curl ffmpeg \
|
||||||
libusb-1.0-0-dev speech-dispatcher libgeos-dev portaudio19-dev \
|
|
||||||
&& curl -LsSf https://astral.sh/uv/install.sh | sh \
|
&& curl -LsSf https://astral.sh/uv/install.sh | sh \
|
||||||
&& mv /root/.local/bin/uv /usr/local/bin/uv \
|
&& mv /root/.local/bin/uv /usr/local/bin/uv \
|
||||||
&& useradd --create-home --shell /bin/bash user_lerobot \
|
&& useradd --create-home --shell /bin/bash user_lerobot \
|
||||||
@ -38,9 +36,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||||||
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Create application directory and set permissions
|
# Create application directory and set permissions
|
||||||
|
RUN git clone https://github.com/huggingface/lerobot.git -b v0.3.3 /lerobot
|
||||||
WORKDIR /lerobot
|
WORKDIR /lerobot
|
||||||
RUN chown -R user_lerobot:user_lerobot /lerobot
|
RUN chown -R user_lerobot:user_lerobot /lerobot
|
||||||
|
|
||||||
|
ADD ./huggingface_models /home/user_lerobot/.cache/huggingface
|
||||||
|
RUN chown -R user_lerobot:user_lerobot /home/user_lerobot/.cache
|
||||||
|
|
||||||
|
|
||||||
# Switch to the non-root user
|
# Switch to the non-root user
|
||||||
USER user_lerobot
|
USER user_lerobot
|
||||||
|
|
||||||
@ -59,12 +62,9 @@ RUN uv venv
|
|||||||
|
|
||||||
# Install Python dependencies for caching
|
# Install Python dependencies for caching
|
||||||
COPY --chown=user_lerobot:user_lerobot pyproject.toml README.md MANIFEST.in ./
|
COPY --chown=user_lerobot:user_lerobot pyproject.toml README.md MANIFEST.in ./
|
||||||
COPY --chown=user_lerobot:user_lerobot src/ src/
|
# COPY --chown=user_lerobot:user_lerobot src/ src/
|
||||||
RUN uv pip install --no-cache ".[all]"
|
RUN uv pip install --no-cache ".[smolvla]"
|
||||||
|
|
||||||
# Copy the rest of the application code
|
# Set the default command - Training mode
|
||||||
# Make sure to have the git-LFS files for testing
|
ADD docker/train.py /workspace/train.py
|
||||||
COPY --chown=user_lerobot:user_lerobot . .
|
CMD ["python", "/workspace/train.py"]
|
||||||
|
|
||||||
# Set the default command
|
|
||||||
CMD ["/bin/bash"]
|
|
||||||
143
docker/cloud_helper.py
Normal file
143
docker/cloud_helper.py
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
import zmq
|
||||||
|
import msgpack
|
||||||
|
import msgpack_numpy as m
|
||||||
|
|
||||||
|
from typing import Any, Callable
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
import zstandard as zstd
|
||||||
|
|
||||||
|
compresser = zstd.ZstdCompressor(level=3)
|
||||||
|
decompresser = zstd.ZstdDecompressor()
|
||||||
|
|
||||||
|
|
||||||
|
def _pack(data: Any) -> bytes:
|
||||||
|
return compresser.compress(msgpack.packb(data, default=m.encode, use_bin_type=True))
|
||||||
|
|
||||||
|
|
||||||
|
def _unpack(data: bytes) -> Any:
|
||||||
|
return msgpack.unpackb(
|
||||||
|
decompresser.decompress(data), object_hook=m.decode, raw=False
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Server:
|
||||||
|
def __init__(self, host: str = "*", port: int = 5555):
|
||||||
|
self.host = host
|
||||||
|
self.port = port
|
||||||
|
|
||||||
|
self.context = zmq.Context()
|
||||||
|
self.socket = self.context.socket(zmq.REP)
|
||||||
|
self.socket.bind(f"tcp://{self.host}:{self.port}")
|
||||||
|
logger.info(f"Server started at tcp://{self.host}:{self.port}")
|
||||||
|
|
||||||
|
self.endpoints: dict[str, Callable[[Any], Any]] = {}
|
||||||
|
|
||||||
|
def register_endpoint(self, command: str, func: Callable[[Any], Any]):
|
||||||
|
self.endpoints[command] = func
|
||||||
|
logger.info(f"Registered endpoint: {command} -> {func}")
|
||||||
|
|
||||||
|
def return_error(self, message: str) -> None:
|
||||||
|
self.socket.send(_pack({"status": "error", "data": message}))
|
||||||
|
|
||||||
|
def return_ok(self, data: Any) -> None:
|
||||||
|
self.socket.send(_pack({"status": "ok", "data": data}))
|
||||||
|
|
||||||
|
def handle_once(self) -> None:
|
||||||
|
message = self.socket.recv()
|
||||||
|
message = _unpack(message)
|
||||||
|
|
||||||
|
cmd = message.get("command")
|
||||||
|
data = message.get("data")
|
||||||
|
|
||||||
|
logger.info("Received Command: %s", cmd)
|
||||||
|
|
||||||
|
handler = self.endpoints.get(cmd)
|
||||||
|
|
||||||
|
if handler is not None:
|
||||||
|
try:
|
||||||
|
if data is None:
|
||||||
|
response = handler()
|
||||||
|
else:
|
||||||
|
response = handler(data)
|
||||||
|
self.return_ok(response)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error handling command {cmd}: {e}")
|
||||||
|
self.return_error(str(e))
|
||||||
|
else:
|
||||||
|
logger.warning(f"Unknown command: {cmd}")
|
||||||
|
self.return_error(f"Unknown command: {cmd}")
|
||||||
|
|
||||||
|
def loop_forever(self):
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
self.handle_once()
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
logger.info("Server shutting down...")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
self.socket.close()
|
||||||
|
self.context.term()
|
||||||
|
|
||||||
|
|
||||||
|
class Client:
|
||||||
|
def __init__(self, host: str = "localhost", port: int = 5555):
|
||||||
|
self.context = zmq.Context()
|
||||||
|
self.socket = self.context.socket(zmq.REQ)
|
||||||
|
self.socket.connect(f"tcp://{host}:{port}")
|
||||||
|
logger.info(f"Client connected to tcp://{host}:{port}")
|
||||||
|
|
||||||
|
def call_endpoint(self, command: str, data=None):
|
||||||
|
self.socket.send(_pack({"command": command, "data": data}))
|
||||||
|
message = self.socket.recv()
|
||||||
|
message = _unpack(message)
|
||||||
|
|
||||||
|
if message.get("status") == "ok":
|
||||||
|
return message.get("data")
|
||||||
|
else:
|
||||||
|
logger.error(f"Error from server: {message.get('data')}")
|
||||||
|
raise Exception(f"Error from server: {message.get('data')}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import sys
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert (len(sys.argv) == 2) and ((mode := sys.argv[1]) in ("server", "client")), (
|
||||||
|
"Usage: python service.py [server|client]"
|
||||||
|
)
|
||||||
|
|
||||||
|
## Protocol:
|
||||||
|
# Request: { "command": str, "data": Any }
|
||||||
|
# Response: { "status": "ok" | "error", "data": Any if status=="ok" else str (ErrorMsg) }
|
||||||
|
|
||||||
|
if mode == "server":
|
||||||
|
server = Server()
|
||||||
|
server.register_endpoint("ping", lambda: "pong")
|
||||||
|
server.register_endpoint("echo", lambda x: x)
|
||||||
|
server.register_endpoint("add", lambda data: data["a"] + data["b"])
|
||||||
|
server.loop_forever()
|
||||||
|
|
||||||
|
elif mode == "client":
|
||||||
|
client = Client()
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
response = client.call_endpoint("ping")
|
||||||
|
print(f"Response from server: {response}")
|
||||||
|
response = client.call_endpoint("echo", "Hello, World!")
|
||||||
|
print(f"Response from server: {response}")
|
||||||
|
response = client.call_endpoint("add", {"a": 5, "b": 10})
|
||||||
|
print(f"Response from server: {response}")
|
||||||
|
|
||||||
|
sleep(0.2)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error: {e}")
|
||||||
|
break
|
||||||
108
docker/merge.py
Normal file
108
docker/merge.py
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||||
|
|
||||||
|
|
||||||
|
with open("/workspace/inputs/task.json", "r") as f:
|
||||||
|
task_config = json.load(f)
|
||||||
|
|
||||||
|
src_dataset_paths = [i for i in Path(task_config["train"]["input_data_path"]).iterdir() if i.is_dir()]
|
||||||
|
|
||||||
|
EPS = 1e-2
|
||||||
|
|
||||||
|
# Feature Check
|
||||||
|
features = {}
|
||||||
|
keys_to_check = ["action", "observation.state", "observation.images"]
|
||||||
|
for p in src_dataset_paths:
|
||||||
|
dataset = LeRobotDataset(repo_id="O24H/Src", root=p)
|
||||||
|
if not features:
|
||||||
|
features = {
|
||||||
|
k: v for k, v in dataset.features.items() if any(k.startswith(prefix) for prefix in keys_to_check)
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
for k in features.keys():
|
||||||
|
assert k in dataset.features, f"Feature key {k} not found in dataset {p}"
|
||||||
|
# pprint(dataset.features[k])
|
||||||
|
# pprint(features[k])
|
||||||
|
# assert dataset.features[k] == features[k], f"Feature key {k} mismatch in dataset {p}"
|
||||||
|
|
||||||
|
# Initialize Target Dataset
|
||||||
|
target_path = Path(task_config["train"]["output_data_path"])
|
||||||
|
# assert not target_path.exists(), f"Output path {target_path} already exists!"
|
||||||
|
if target_path.exists():
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.system(f"rm -rf {target_path}")
|
||||||
|
|
||||||
|
### using images to store all data rather than videos:
|
||||||
|
### 35s per episode -> 20s per episode but size will be ~40x larger 6M -> 260M
|
||||||
|
# for i in features.keys():
|
||||||
|
# if i.startswith("observation.images"):
|
||||||
|
# if not features[i]["dtype"] == "image":
|
||||||
|
# features[i]["dtype"] = "image"
|
||||||
|
# try:
|
||||||
|
# features[i].pop("info")
|
||||||
|
# except KeyError:
|
||||||
|
# pass
|
||||||
|
# target = LeRobotDataset.create(
|
||||||
|
# repo_id="O24H/Target",
|
||||||
|
# fps=30,
|
||||||
|
# root=target_path,
|
||||||
|
# robot_type="so101_follower",
|
||||||
|
# features=features,
|
||||||
|
# image_writer_processes=8,
|
||||||
|
# image_writer_threads=16,
|
||||||
|
# use_videos=False
|
||||||
|
# )
|
||||||
|
|
||||||
|
# [TODO] use the largest dataset as the base rather than creating a new one
|
||||||
|
target = LeRobotDataset.create(
|
||||||
|
repo_id="O24H/Target",
|
||||||
|
fps=30,
|
||||||
|
root=target_path,
|
||||||
|
robot_type="so101_follower",
|
||||||
|
features=features,
|
||||||
|
image_writer_processes=8,
|
||||||
|
image_writer_threads=16,
|
||||||
|
)
|
||||||
|
|
||||||
|
for p in src_dataset_paths:
|
||||||
|
src = LeRobotDataset(repo_id="O24H/Src", root=p)
|
||||||
|
|
||||||
|
for eps_idx in tqdm(range(src.num_episodes), desc=f"Processing episode in {p.name}"):
|
||||||
|
frame_idx = range(
|
||||||
|
src.episode_data_index["from"][eps_idx].item(),
|
||||||
|
src.episode_data_index["to"][eps_idx].item(),
|
||||||
|
)
|
||||||
|
|
||||||
|
eps_data = [src.__getitem__(i) for i in frame_idx]
|
||||||
|
|
||||||
|
diff_actions = [eps_data[i]["action"] - eps_data[i - 1]["action"] for i in range(1, len(eps_data))]
|
||||||
|
keep_idx = [i + 1 for i, a in enumerate(diff_actions) if (a.abs() > EPS).any()]
|
||||||
|
|
||||||
|
compress_ratio = len(keep_idx) / len(frame_idx)
|
||||||
|
print(f"Episode {eps_idx}: compress ratio {compress_ratio:.2f}")
|
||||||
|
|
||||||
|
if len(keep_idx) < 32:
|
||||||
|
continue
|
||||||
|
# Skip too short episodes after compression
|
||||||
|
|
||||||
|
for o in keep_idx:
|
||||||
|
batch = eps_data[o]
|
||||||
|
|
||||||
|
image_keys = [k for k in batch.keys() if k.startswith("observation.images.")]
|
||||||
|
|
||||||
|
frame = {
|
||||||
|
"action": batch["action"],
|
||||||
|
"observation.state": batch["observation.state"],
|
||||||
|
}
|
||||||
|
|
||||||
|
for k in image_keys:
|
||||||
|
frame[k] = batch[k].permute(1, 2, 0).contiguous() # CHW -> HWC
|
||||||
|
|
||||||
|
target.add_frame(frame, task=batch["task"])
|
||||||
|
|
||||||
|
target.save_episode()
|
||||||
7
docker/merge_task.json
Normal file
7
docker/merge_task.json
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"task_id": "b5c75014c1142feab3ee395b4a0bcc0",
|
||||||
|
"train": {
|
||||||
|
"input_data_path": "/workspace/inputs/",
|
||||||
|
"output_data_path": "/workspace/outputs/pick_orange_mixed"
|
||||||
|
}
|
||||||
|
}
|
||||||
131
docker/smolvla_executor.py
Normal file
131
docker/smolvla_executor.py
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
from cloud_helper import Client
|
||||||
|
|
||||||
|
from collections import deque
|
||||||
|
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
|
||||||
|
from lerobot.robots import Robot
|
||||||
|
from lerobot.robots.so101_follower.so101_follower import SO101Follower
|
||||||
|
from lerobot.robots.so101_follower.config_so101_follower import SO101FollowerConfig
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
|
||||||
|
from lerobot.utils import buffer
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def freq_control(func, freq: int = 25):
|
||||||
|
def wrapper(*args, **kwargs):
|
||||||
|
start_time = time.time()
|
||||||
|
result = func(*args, **kwargs)
|
||||||
|
end_time = time.time()
|
||||||
|
elapsed_time = end_time - start_time
|
||||||
|
# logger.info(f"'{func.__name__}' tooks {elapsed_time * 1000:.2f} ms")
|
||||||
|
sleep_time = max(0, (1.0 / freq) - elapsed_time)
|
||||||
|
time.sleep(sleep_time)
|
||||||
|
return result
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
|
class SmolVLAExecutor:
|
||||||
|
def __init__(self, robot: Robot, runtime: Client, task: str, control_freq: int = 25):
|
||||||
|
self.robot = robot
|
||||||
|
self.runtime = runtime
|
||||||
|
self._action_queue = deque()
|
||||||
|
self._cache = {}
|
||||||
|
self.task = task
|
||||||
|
self.joint_names = [
|
||||||
|
"shoulder_pan",
|
||||||
|
"shoulder_lift",
|
||||||
|
"elbow_flex",
|
||||||
|
"wrist_flex",
|
||||||
|
"wrist_roll",
|
||||||
|
"gripper",
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_actions(self, instruction: str = ""):
|
||||||
|
observation = self.robot.get_observation()
|
||||||
|
|
||||||
|
batch = {
|
||||||
|
"observation": {
|
||||||
|
"images.front": observation["front"],
|
||||||
|
"images.wrist": observation["wrist"],
|
||||||
|
"state": np.array([observation[key + ".pos"] for key in self.joint_names], dtype="float32"),
|
||||||
|
},
|
||||||
|
"instruction": instruction if instruction else self.task,
|
||||||
|
}
|
||||||
|
|
||||||
|
actions_array = self.runtime.call_endpoint("get_actions", batch) # (B, chunk_size, action_dim)
|
||||||
|
|
||||||
|
if actions_array is None:
|
||||||
|
logger.warning("Server returned None")
|
||||||
|
raise ConnectionError("Failed to receive response from RDT server")
|
||||||
|
|
||||||
|
actions_array = (
|
||||||
|
actions_array.squeeze(0) if len(actions_array.shape) == 3 else actions_array
|
||||||
|
) # (chunk_size, action_dim)
|
||||||
|
|
||||||
|
return actions_array
|
||||||
|
|
||||||
|
def apply_filter(self, window_size: int = 3):
|
||||||
|
action_buffer = np.array(self._action_queue) # (n_steps, action_dim)
|
||||||
|
n_steps, batch_size, action_dim = action_buffer.shape
|
||||||
|
|
||||||
|
for b in range(batch_size):
|
||||||
|
for d in range(action_dim):
|
||||||
|
series = action_buffer[:, b, d]
|
||||||
|
|
||||||
|
if window_size > 1:
|
||||||
|
# Apply a simple moving average filter
|
||||||
|
padded_series = np.pad(series, (window_size // 2, window_size // 2), mode="edge")
|
||||||
|
smoothed_series = np.convolve(
|
||||||
|
padded_series, np.ones(window_size) / window_size, mode="valid"
|
||||||
|
)
|
||||||
|
series[:] = smoothed_series
|
||||||
|
|
||||||
|
action_buffer = self._action_queue = deque(action_buffer.tolist())
|
||||||
|
|
||||||
|
@freq_control(25)
|
||||||
|
def loop_once(self):
|
||||||
|
if len(self._action_queue) <= 1:
|
||||||
|
new_actions = self.get_actions()
|
||||||
|
self._action_queue.extend(new_actions.transpose(0, 1))
|
||||||
|
|
||||||
|
# Apply the filter
|
||||||
|
self.apply_filter()
|
||||||
|
|
||||||
|
action_values = self._action_queue.popleft()
|
||||||
|
|
||||||
|
action_dict = {f"{joint}.pos": float(action_values[i]) for i, joint in enumerate(self.joint_names)}
|
||||||
|
self.robot.send_action(action_dict)
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
while True:
|
||||||
|
self.loop_once()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
robot = SO101Follower(
|
||||||
|
SO101FollowerConfig(
|
||||||
|
port="/dev/ttyACM1",
|
||||||
|
cameras={
|
||||||
|
"wrist": OpenCVCameraConfig(index_or_path=8, width=640, height=480, fps=25),
|
||||||
|
"front": OpenCVCameraConfig(index_or_path=4, width=640, height=480, fps=30),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
robot.connect()
|
||||||
|
|
||||||
|
client = Client(host="120.48.81.132", port=50000)
|
||||||
|
|
||||||
|
executor = SmolVLAExecutor(
|
||||||
|
robot=robot,
|
||||||
|
runtime=client,
|
||||||
|
task="pick the red marker to the bin",
|
||||||
|
control_freq=25,
|
||||||
|
)
|
||||||
|
executor.run()
|
||||||
66
docker/smolvla_server.py
Normal file
66
docker/smolvla_server.py
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
import torch
|
||||||
|
import os
|
||||||
|
|
||||||
|
from cloud_helper import Server
|
||||||
|
from lerobot.policies.factory import get_policy_class
|
||||||
|
|
||||||
|
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
||||||
|
os.environ["HF_HUB_OFFLINE"] = "1"
|
||||||
|
|
||||||
|
|
||||||
|
class LerobotInferenceServer:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
checkpoint: str,
|
||||||
|
policy_type: str = "smolvla",
|
||||||
|
host: str = "localhost",
|
||||||
|
port: int = 5555,
|
||||||
|
device="cuda",
|
||||||
|
):
|
||||||
|
self.server = Server(host, port)
|
||||||
|
self.policy_type = policy_type
|
||||||
|
policy_class = get_policy_class(self.policy_type)
|
||||||
|
self.policy = policy_class.from_pretrained(checkpoint)
|
||||||
|
self.device = device
|
||||||
|
self.policy.to(self.device)
|
||||||
|
print(f"Loaded {self.policy_type.upper()} policy from {checkpoint}")
|
||||||
|
|
||||||
|
def get_actions(self, batch):
|
||||||
|
# batch = {
|
||||||
|
# "observation": {
|
||||||
|
# "state": ...,
|
||||||
|
# "images.front": ..., HWC uint8
|
||||||
|
# "images.wrist": ...,
|
||||||
|
# },
|
||||||
|
# "instruction": ...,
|
||||||
|
# }
|
||||||
|
|
||||||
|
obs = {}
|
||||||
|
|
||||||
|
for k, v in batch["observation"].items():
|
||||||
|
if k.startswith("images.") and v is not None:
|
||||||
|
img = v.astype("float32") / 255.0
|
||||||
|
img = img.transpose(2, 0, 1) # HWC -> CHW
|
||||||
|
img = torch.from_numpy(img).unsqueeze(0).to(self.device)
|
||||||
|
obs[f"observation.{k}"] = img
|
||||||
|
elif k == "state":
|
||||||
|
tensor = torch.from_numpy(v.astype("float32")).unsqueeze(0).to(self.device)
|
||||||
|
obs[f"observation.{k}"] = tensor
|
||||||
|
obs["task"] = batch["instruction"]
|
||||||
|
|
||||||
|
action_chunk = self.policy.predict_action_chunk(obs)
|
||||||
|
|
||||||
|
return action_chunk.cpu().numpy() # (B, chunk_size, action_dim)
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
self.server.register_endpoint("get_actions", self.get_actions)
|
||||||
|
print(f"Lerobot {self.policy_type.upper()} Server is running...")
|
||||||
|
self.server.loop_forever()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
smolvla_checkpoint = "./20250901/pick_red_marker_smolvla/checkpoints/last/pretrained_model"
|
||||||
|
server = LerobotInferenceServer(
|
||||||
|
checkpoint=smolvla_checkpoint, policy_type="smolvla", host="0.0.0.0", port=50000
|
||||||
|
)
|
||||||
|
server.run()
|
||||||
53
docker/train.py
Normal file
53
docker/train.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
with open("/workspace/inputs/task.json") as f:
|
||||||
|
task_configs = json.load(f)
|
||||||
|
|
||||||
|
|
||||||
|
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Lerobot supports only one GPU for training
|
||||||
|
os.environ["HF_HUB_OFFLINE"] = "1"
|
||||||
|
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||||
|
|
||||||
|
assert "train" in task_configs, "Not a validate train config"
|
||||||
|
assert task_configs["train"]["model"] in ["act", "smolvla"], "Only act and smolvla are supported for training"
|
||||||
|
|
||||||
|
use_policy = (
|
||||||
|
"--policy.path=lerobot/smolvla_base"
|
||||||
|
if task_configs["train"]["model"] == "smolvla"
|
||||||
|
else "--policy.type=act"
|
||||||
|
)
|
||||||
|
task_id = task_configs["task_id"]
|
||||||
|
data_path = task_configs["train"]["input_data_path"]
|
||||||
|
ckpt_path = task_configs["train"]["checkpoint_path"]
|
||||||
|
bs = task_configs["train"]["batch_size"]
|
||||||
|
epochs = task_configs["train"]["epochs"]
|
||||||
|
|
||||||
|
use_resume = task_configs["train"].get("resume", False)
|
||||||
|
if use_resume:
|
||||||
|
resume_path = f'--policy.path="{task_configs["train"]["checkpoint_path"]}/pretrained_model"'
|
||||||
|
# eg: ${checkpoint_path}/checkpoints/last
|
||||||
|
|
||||||
|
with open(data_path + "/meta/info.json", "r") as f:
|
||||||
|
dataset_info = json.load(f)
|
||||||
|
total_frames = dataset_info["total_frames"]
|
||||||
|
|
||||||
|
steps_per_epoch = total_frames // bs + 1
|
||||||
|
steps = steps_per_epoch * epochs
|
||||||
|
print(
|
||||||
|
"Lerobot only support steps, calculating steps from epochs...",
|
||||||
|
f"Steps per epoch: {steps_per_epoch}, Total steps: {steps}",
|
||||||
|
)
|
||||||
|
|
||||||
|
train_cmd = f"""lerobot-train \
|
||||||
|
{resume_path if use_resume else use_policy} \
|
||||||
|
--policy.push_to_hub=false \
|
||||||
|
--dataset.repo_id=D-Robotics/{task_id} \
|
||||||
|
--dataset.root={data_path} \
|
||||||
|
--batch_size={bs} \
|
||||||
|
--output_dir={ckpt_path} \
|
||||||
|
--steps={steps} --save_freq={steps_per_epoch} \
|
||||||
|
"""
|
||||||
|
|
||||||
|
print("Executing command:\n", train_cmd)
|
||||||
|
os.system(train_cmd)
|
||||||
12
docker/train_task.json
Normal file
12
docker/train_task.json
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
{
|
||||||
|
"task_id": "b5c75014c1142feab3ee395b4a0bcc0",
|
||||||
|
"gpu_id": "0",
|
||||||
|
"train": {
|
||||||
|
"model":"smolvla",
|
||||||
|
"epochs":20,
|
||||||
|
"batch_size":64,
|
||||||
|
"log_path": "/workspace/logs",
|
||||||
|
"checkpoint_path": "/workspace/outputs/checkpoints",
|
||||||
|
"input_data_path": "/workspace/inputs/pick_red_marker"
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -74,9 +74,9 @@ dependencies = [
|
|||||||
"pyserial>=3.5",
|
"pyserial>=3.5",
|
||||||
"wandb>=0.20.0",
|
"wandb>=0.20.0",
|
||||||
|
|
||||||
"torch>=2.2.1,<2.8.0", # TODO: Bumb dependency
|
"torch==2.6.0", # TODO: Bumb dependency
|
||||||
"torchcodec>=0.2.1,<0.6.0; sys_platform != 'win32' and (sys_platform != 'linux' or (platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l')) and (sys_platform != 'darwin' or platform_machine != 'x86_64')", # TODO: Bumb dependency
|
"torchcodec==0.2.1; sys_platform != 'win32' and (sys_platform != 'linux' or (platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l')) and (sys_platform != 'darwin' or platform_machine != 'x86_64')", # TODO: Bumb dependency
|
||||||
"torchvision>=0.21.0,<0.23.0", # TODO: Bumb dependency
|
"torchvision==0.21.0", # TODO: Bumb dependency
|
||||||
|
|
||||||
"draccus==0.10.0", # TODO: Remove ==
|
"draccus==0.10.0", # TODO: Remove ==
|
||||||
"gymnasium>=0.29.1,<1.0.0", # TODO: Bumb dependency
|
"gymnasium>=0.29.1,<1.0.0", # TODO: Bumb dependency
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user