132 lines
4.1 KiB
Python
132 lines
4.1 KiB
Python
from cloud_helper import Client
|
|
|
|
from collections import deque
|
|
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
|
|
from lerobot.robots import Robot
|
|
from lerobot.robots.so101_follower.so101_follower import SO101Follower
|
|
from lerobot.robots.so101_follower.config_so101_follower import SO101FollowerConfig
|
|
import numpy as np
|
|
|
|
import logging
|
|
import time
|
|
|
|
from lerobot.utils import buffer
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def freq_control(func, freq: int = 25):
|
|
def wrapper(*args, **kwargs):
|
|
start_time = time.time()
|
|
result = func(*args, **kwargs)
|
|
end_time = time.time()
|
|
elapsed_time = end_time - start_time
|
|
# logger.info(f"'{func.__name__}' tooks {elapsed_time * 1000:.2f} ms")
|
|
sleep_time = max(0, (1.0 / freq) - elapsed_time)
|
|
time.sleep(sleep_time)
|
|
return result
|
|
|
|
return wrapper
|
|
|
|
|
|
class SmolVLAExecutor:
|
|
def __init__(self, robot: Robot, runtime: Client, task: str, control_freq: int = 25):
|
|
self.robot = robot
|
|
self.runtime = runtime
|
|
self._action_queue = deque()
|
|
self._cache = {}
|
|
self.task = task
|
|
self.joint_names = [
|
|
"shoulder_pan",
|
|
"shoulder_lift",
|
|
"elbow_flex",
|
|
"wrist_flex",
|
|
"wrist_roll",
|
|
"gripper",
|
|
]
|
|
|
|
def get_actions(self, instruction: str = ""):
|
|
observation = self.robot.get_observation()
|
|
|
|
batch = {
|
|
"observation": {
|
|
"images.front": observation["front"],
|
|
"images.wrist": observation["wrist"],
|
|
"state": np.array([observation[key + ".pos"] for key in self.joint_names], dtype="float32"),
|
|
},
|
|
"instruction": instruction if instruction else self.task,
|
|
}
|
|
|
|
actions_array = self.runtime.call_endpoint("get_actions", batch) # (B, chunk_size, action_dim)
|
|
|
|
if actions_array is None:
|
|
logger.warning("Server returned None")
|
|
raise ConnectionError("Failed to receive response from RDT server")
|
|
|
|
actions_array = (
|
|
actions_array.squeeze(0) if len(actions_array.shape) == 3 else actions_array
|
|
) # (chunk_size, action_dim)
|
|
|
|
return actions_array
|
|
|
|
def apply_filter(self, window_size: int = 3):
|
|
action_buffer = np.array(self._action_queue) # (n_steps, action_dim)
|
|
n_steps, batch_size, action_dim = action_buffer.shape
|
|
|
|
for b in range(batch_size):
|
|
for d in range(action_dim):
|
|
series = action_buffer[:, b, d]
|
|
|
|
if window_size > 1:
|
|
# Apply a simple moving average filter
|
|
padded_series = np.pad(series, (window_size // 2, window_size // 2), mode="edge")
|
|
smoothed_series = np.convolve(
|
|
padded_series, np.ones(window_size) / window_size, mode="valid"
|
|
)
|
|
series[:] = smoothed_series
|
|
|
|
action_buffer = self._action_queue = deque(action_buffer.tolist())
|
|
|
|
@freq_control(25)
|
|
def loop_once(self):
|
|
if len(self._action_queue) <= 1:
|
|
new_actions = self.get_actions()
|
|
self._action_queue.extend(new_actions.transpose(0, 1))
|
|
|
|
# Apply the filter
|
|
self.apply_filter()
|
|
|
|
action_values = self._action_queue.popleft()
|
|
|
|
action_dict = {f"{joint}.pos": float(action_values[i]) for i, joint in enumerate(self.joint_names)}
|
|
self.robot.send_action(action_dict)
|
|
|
|
def run(self):
|
|
while True:
|
|
self.loop_once()
|
|
|
|
if __name__ == "__main__":
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
robot = SO101Follower(
|
|
SO101FollowerConfig(
|
|
port="/dev/ttyACM1",
|
|
cameras={
|
|
"wrist": OpenCVCameraConfig(index_or_path=8, width=640, height=480, fps=25),
|
|
"front": OpenCVCameraConfig(index_or_path=4, width=640, height=480, fps=30),
|
|
},
|
|
)
|
|
)
|
|
robot.connect()
|
|
|
|
client = Client(host="120.48.81.132", port=50000)
|
|
|
|
executor = SmolVLAExecutor(
|
|
robot=robot,
|
|
runtime=client,
|
|
task="pick the red marker to the bin",
|
|
control_freq=25,
|
|
)
|
|
executor.run()
|