Lerobot/docker/smolvla_executor.py

from cloud_helper import Client

from collections import deque
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
from lerobot.robots import Robot
from lerobot.robots.so101_follower.so101_follower import SO101Follower
from lerobot.robots.so101_follower.config_so101_follower import SO101FollowerConfig
import numpy as np

import logging
import time

from lerobot.utils import buffer

logger = logging.getLogger(__name__)


def freq_control(func, freq: int = 25):
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        elapsed_time = end_time - start_time
        # logger.info(f"'{func.__name__}' tooks {elapsed_time * 1000:.2f} ms")
        sleep_time = max(0, (1.0 / freq) - elapsed_time)
        time.sleep(sleep_time)
        return result

    return wrapper


class SmolVLAExecutor:
    def __init__(self, robot: Robot, runtime: Client, task: str, control_freq: int = 25):
        self.robot = robot
        self.runtime = runtime
        self._action_queue = deque()
        self._cache = {}
        self.task = task
        self.joint_names = [
            "shoulder_pan",
            "shoulder_lift",
            "elbow_flex",
            "wrist_flex",
            "wrist_roll",
            "gripper",
        ]

    def get_actions(self, instruction: str = ""):
        observation = self.robot.get_observation()

        batch = {
            "observation": {
                "images.front": observation["front"],
                "images.wrist": observation["wrist"],
                "state": np.array([observation[key + ".pos"] for key in self.joint_names], dtype="float32"),
            },
            "instruction": instruction if instruction else self.task,
        }

        actions_array = self.runtime.call_endpoint("get_actions", batch)  # (B, chunk_size, action_dim)

        if actions_array is None:
            logger.warning("Server returned None")
            raise ConnectionError("Failed to receive response from RDT server")

        actions_array = (
            actions_array.squeeze(0) if len(actions_array.shape) == 3 else actions_array
        )  # (chunk_size, action_dim)

        return actions_array

    def apply_filter(self, window_size: int = 3):
        action_buffer = np.array(self._action_queue)  # (n_steps, action_dim)
        n_steps, batch_size, action_dim = action_buffer.shape

        for b in range(batch_size):
            for d in range(action_dim):
                series = action_buffer[:, b, d]

                if window_size > 1:
                    # Apply a simple moving average filter
                    padded_series = np.pad(series, (window_size // 2, window_size // 2), mode="edge")
                    smoothed_series = np.convolve(
                        padded_series, np.ones(window_size) / window_size, mode="valid"
                    )
                    series[:] = smoothed_series

        action_buffer = self._action_queue = deque(action_buffer.tolist())

    @freq_control(25)
    def loop_once(self):
        if len(self._action_queue) <= 1:
            new_actions = self.get_actions()
            self._action_queue.extend(new_actions.transpose(0, 1))

            # Apply the filter
            self.apply_filter()

        action_values = self._action_queue.popleft()

        action_dict = {f"{joint}.pos": float(action_values[i]) for i, joint in enumerate(self.joint_names)}
        self.robot.send_action(action_dict)

    def run(self):
        while True:
            self.loop_once()

if __name__ == "__main__":

    logging.basicConfig(level=logging.INFO)

    robot = SO101Follower(
        SO101FollowerConfig(
            port="/dev/ttyACM1",
            cameras={
                "wrist": OpenCVCameraConfig(index_or_path=8, width=640, height=480, fps=25),
                "front": OpenCVCameraConfig(index_or_path=4, width=640, height=480, fps=30),
            },
        )
    )
    robot.connect()

    client = Client(host="120.48.81.132", port=50000)

    executor = SmolVLAExecutor(
        robot=robot,
        runtime=client,
        task="pick the red marker to the bin",
        control_freq=25,
    )
    executor.run()