from cloud_helper import Client from collections import deque from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig from lerobot.robots import Robot from lerobot.robots.so101_follower.so101_follower import SO101Follower from lerobot.robots.so101_follower.config_so101_follower import SO101FollowerConfig import numpy as np import logging import time from lerobot.utils import buffer logger = logging.getLogger(__name__) def freq_control(func, freq: int = 25): def wrapper(*args, **kwargs): start_time = time.time() result = func(*args, **kwargs) end_time = time.time() elapsed_time = end_time - start_time # logger.info(f"'{func.__name__}' tooks {elapsed_time * 1000:.2f} ms") sleep_time = max(0, (1.0 / freq) - elapsed_time) time.sleep(sleep_time) return result return wrapper class SmolVLAExecutor: def __init__(self, robot: Robot, runtime: Client, task: str, control_freq: int = 25): self.robot = robot self.runtime = runtime self._action_queue = deque() self._cache = {} self.task = task self.joint_names = [ "shoulder_pan", "shoulder_lift", "elbow_flex", "wrist_flex", "wrist_roll", "gripper", ] def get_actions(self, instruction: str = ""): observation = self.robot.get_observation() batch = { "observation": { "images.front": observation["front"], "images.wrist": observation["wrist"], "state": np.array([observation[key + ".pos"] for key in self.joint_names], dtype="float32"), }, "instruction": instruction if instruction else self.task, } actions_array = self.runtime.call_endpoint("get_actions", batch) # (B, chunk_size, action_dim) if actions_array is None: logger.warning("Server returned None") raise ConnectionError("Failed to receive response from RDT server") actions_array = ( actions_array.squeeze(0) if len(actions_array.shape) == 3 else actions_array ) # (chunk_size, action_dim) return actions_array def apply_filter(self, window_size: int = 3): action_buffer = np.array(self._action_queue) # (n_steps, action_dim) n_steps, batch_size, action_dim = action_buffer.shape for b in range(batch_size): for d in range(action_dim): series = action_buffer[:, b, d] if window_size > 1: # Apply a simple moving average filter padded_series = np.pad(series, (window_size // 2, window_size // 2), mode="edge") smoothed_series = np.convolve( padded_series, np.ones(window_size) / window_size, mode="valid" ) series[:] = smoothed_series action_buffer = self._action_queue = deque(action_buffer.tolist()) @freq_control(25) def loop_once(self): if len(self._action_queue) <= 1: new_actions = self.get_actions() self._action_queue.extend(new_actions) # Apply the filter self.apply_filter() action_values = self._action_queue.popleft() action_dict = {f"{joint}.pos": float(action_values[i]) for i, joint in enumerate(self.joint_names)} self.robot.send_action(action_dict) def run(self): while True: self.loop_once() if __name__ == "__main__": logging.basicConfig(level=logging.INFO) robot = SO101Follower( SO101FollowerConfig( port="/dev/ttyACM1", cameras={ "wrist": OpenCVCameraConfig(index_or_path=8, width=640, height=480, fps=25), "front": OpenCVCameraConfig(index_or_path=4, width=640, height=480, fps=30), }, ) ) robot.connect() client = Client(host="120.48.81.132", port=50000) executor = SmolVLAExecutor( robot=robot, runtime=client, task="pick the red marker to the bin", control_freq=25, ) executor.run()