Lerobot/docker/smolvla_executor.py

132 lines
4.1 KiB
Python

from cloud_helper import Client
from collections import deque
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
from lerobot.robots import Robot
from lerobot.robots.so101_follower.so101_follower import SO101Follower
from lerobot.robots.so101_follower.config_so101_follower import SO101FollowerConfig
import numpy as np
import logging
import time
from lerobot.utils import buffer
logger = logging.getLogger(__name__)
def freq_control(func, freq: int = 25):
def wrapper(*args, **kwargs):
start_time = time.time()
result = func(*args, **kwargs)
end_time = time.time()
elapsed_time = end_time - start_time
# logger.info(f"'{func.__name__}' tooks {elapsed_time * 1000:.2f} ms")
sleep_time = max(0, (1.0 / freq) - elapsed_time)
time.sleep(sleep_time)
return result
return wrapper
class SmolVLAExecutor:
def __init__(self, robot: Robot, runtime: Client, task: str, control_freq: int = 25):
self.robot = robot
self.runtime = runtime
self._action_queue = deque()
self._cache = {}
self.task = task
self.joint_names = [
"shoulder_pan",
"shoulder_lift",
"elbow_flex",
"wrist_flex",
"wrist_roll",
"gripper",
]
def get_actions(self, instruction: str = ""):
observation = self.robot.get_observation()
batch = {
"observation": {
"images.front": observation["front"],
"images.wrist": observation["wrist"],
"state": np.array([observation[key + ".pos"] for key in self.joint_names], dtype="float32"),
},
"instruction": instruction if instruction else self.task,
}
actions_array = self.runtime.call_endpoint("get_actions", batch) # (B, chunk_size, action_dim)
if actions_array is None:
logger.warning("Server returned None")
raise ConnectionError("Failed to receive response from RDT server")
actions_array = (
actions_array.squeeze(0) if len(actions_array.shape) == 3 else actions_array
) # (chunk_size, action_dim)
return actions_array
def apply_filter(self, window_size: int = 3):
action_buffer = np.array(self._action_queue) # (n_steps, action_dim)
n_steps, batch_size, action_dim = action_buffer.shape
for b in range(batch_size):
for d in range(action_dim):
series = action_buffer[:, b, d]
if window_size > 1:
# Apply a simple moving average filter
padded_series = np.pad(series, (window_size // 2, window_size // 2), mode="edge")
smoothed_series = np.convolve(
padded_series, np.ones(window_size) / window_size, mode="valid"
)
series[:] = smoothed_series
action_buffer = self._action_queue = deque(action_buffer.tolist())
@freq_control(25)
def loop_once(self):
if len(self._action_queue) <= 1:
new_actions = self.get_actions()
self._action_queue.extend(new_actions.transpose(0, 1))
# Apply the filter
self.apply_filter()
action_values = self._action_queue.popleft()
action_dict = {f"{joint}.pos": float(action_values[i]) for i, joint in enumerate(self.joint_names)}
self.robot.send_action(action_dict)
def run(self):
while True:
self.loop_once()
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
robot = SO101Follower(
SO101FollowerConfig(
port="/dev/ttyACM1",
cameras={
"wrist": OpenCVCameraConfig(index_or_path=8, width=640, height=480, fps=25),
"front": OpenCVCameraConfig(index_or_path=4, width=640, height=480, fps=30),
},
)
)
robot.connect()
client = Client(host="120.48.81.132", port=50000)
executor = SmolVLAExecutor(
robot=robot,
runtime=client,
task="pick the red marker to the bin",
control_freq=25,
)
executor.run()