# Copyright 2024 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ Provides the RealSenseCamera class for capturing frames from Intel RealSense cameras. """ import logging import time from threading import Event, Lock, Thread from typing import Any, Dict, List import cv2 import numpy as np try: import pyrealsense2 as rs except Exception as e: logging.info(f"Could not import realsense: {e}") from lerobot.common.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError from ..camera import Camera from ..configs import ColorMode from ..utils import get_cv2_rotation from .configuration_realsense import RealSenseCameraConfig logger = logging.getLogger(__name__) class RealSenseCamera(Camera): """ Manages interactions with Intel RealSense cameras for frame and depth recording. This class provides an interface similar to `OpenCVCamera` but tailored for RealSense devices, leveraging the `pyrealsense2` library. It uses the camera's unique serial number for identification, offering more stability than device indices, especially on Linux. It also supports capturing depth maps alongside color frames. Use the provided utility script to find available camera indices and default profiles: ```bash python -m lerobot.find_cameras realsense ``` A `RealSenseCamera` instance requires a configuration object specifying the camera's serial number or a unique device name. If using the name, ensure only one camera with that name is connected. The camera's default settings (FPS, resolution, color mode) from the stream profile are used unless overridden in the configuration. Example: ```python from lerobot.common.cameras.realsense import RealSenseCamera, RealSenseCameraConfig from lerobot.common.cameras import ColorMode, Cv2Rotation # Basic usage with serial number config = RealSenseCameraConfig(serial_number_or_name="0123456789") # Replace with actual SN camera = RealSenseCamera(config) camera.connect() # Read 1 frame synchronously color_image = camera.read() print(color_image.shape) # Read 1 frame asynchronously async_image = camera.async_read() # When done, properly disconnect the camera using camera.disconnect() # Example with depth capture and custom settings custom_config = RealSenseCameraConfig( serial_number_or_name="0123456789", # Replace with actual SN fps=30, width=1280, height=720, color_mode=ColorMode.BGR, # Request BGR output rotation=Cv2Rotation.NO_ROTATION, use_depth=True ) depth_camera = RealSenseCamera(custom_config) depth_camera.connect() # Read 1 depth frame depth_map = depth_camera.read_depth() # Example using a unique camera name name_config = RealSenseCameraConfig(serial_number_or_name="Intel RealSense D435") # If unique name_camera = RealSenseCamera(name_config) # ... connect, read, disconnect ... ``` """ def __init__(self, config: RealSenseCameraConfig): """ Initializes the RealSenseCamera instance. Args: config: The configuration settings for the camera. """ super().__init__(config) self.config = config if config.serial_number_or_name.isdigit(): self.serial_number = config.serial_number_or_name else: self.serial_number = self._find_serial_number_from_name(config.serial_number_or_name) self.fps = config.fps self.color_mode = config.color_mode self.use_depth = config.use_depth self.warmup_s = config.warmup_s self.rs_pipeline: rs.pipeline | None = None self.rs_profile: rs.pipeline_profile | None = None self.thread: Thread | None = None self.stop_event: Event | None = None self.frame_lock: Lock = Lock() self.latest_frame: np.ndarray | None = None self.new_frame_event: Event = Event() self.rotation: int | None = get_cv2_rotation(config.rotation) if self.height and self.width: self.capture_width, self.capture_height = self.width, self.height if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]: self.capture_width, self.capture_height = self.height, self.width def __str__(self) -> str: return f"{self.__class__.__name__}({self.serial_number})" @property def is_connected(self) -> bool: """Checks if the camera pipeline is started and streams are active.""" return self.rs_pipeline is not None and self.rs_profile is not None def connect(self, warmup: bool = True): """ Connects to the RealSense camera specified in the configuration. Initializes the RealSense pipeline, configures the required streams (color and optionally depth), starts the pipeline, and validates the actual stream settings. Raises: DeviceAlreadyConnectedError: If the camera is already connected. ValueError: If the configuration is invalid (e.g., missing serial/name, name not unique). ConnectionError: If the camera is found but fails to start the pipeline or no RealSense devices are detected at all. RuntimeError: If the pipeline starts but fails to apply requested settings. """ if self.is_connected: raise DeviceAlreadyConnectedError(f"{self} is already connected.") self.rs_pipeline = rs.pipeline() rs_config = rs.config() self._configure_rs_pipeline_config(rs_config) try: self.rs_profile = self.rs_pipeline.start(rs_config) except RuntimeError as e: self.rs_profile = None self.rs_pipeline = None raise ConnectionError( f"Failed to open {self}." "Run `python -m lerobot.find_cameras realsense` to find available cameras." ) from e self._configure_capture_settings() if warmup: time.sleep( 1 ) # NOTE(Steven): RS cameras need a bit of time to warm up before the first read. If we don't wait, the first read from the warmup will raise. start_time = time.time() while time.time() - start_time < self.warmup_s: self.read() time.sleep(0.1) logger.info(f"{self} connected.") @staticmethod def find_cameras() -> List[Dict[str, Any]]: """ Detects available Intel RealSense cameras connected to the system. Returns: List[Dict[str, Any]]: A list of dictionaries, where each dictionary contains 'type', 'id' (serial number), 'name', firmware version, USB type, and other available specs, and the default profile properties (width, height, fps, format). Raises: OSError: If pyrealsense2 is not installed. ImportError: If pyrealsense2 is not installed. """ found_cameras_info = [] context = rs.context() devices = context.query_devices() for device in devices: camera_info = { "name": device.get_info(rs.camera_info.name), "type": "RealSense", "id": device.get_info(rs.camera_info.serial_number), "firmware_version": device.get_info(rs.camera_info.firmware_version), "usb_type_descriptor": device.get_info(rs.camera_info.usb_type_descriptor), "physical_port": device.get_info(rs.camera_info.physical_port), "product_id": device.get_info(rs.camera_info.product_id), "product_line": device.get_info(rs.camera_info.product_line), } # Get stream profiles for each sensor sensors = device.query_sensors() for sensor in sensors: profiles = sensor.get_stream_profiles() for profile in profiles: if profile.is_video_stream_profile() and profile.is_default(): vprofile = profile.as_video_stream_profile() stream_info = { "stream_type": vprofile.stream_name(), "format": vprofile.format().name, "width": vprofile.width(), "height": vprofile.height(), "fps": vprofile.fps(), } camera_info["default_stream_profile"] = stream_info found_cameras_info.append(camera_info) return found_cameras_info def _find_serial_number_from_name(self, name: str) -> str: """Finds the serial number for a given unique camera name.""" camera_infos = self.find_cameras() found_devices = [cam for cam in camera_infos if str(cam["name"]) == name] if not found_devices: available_names = [cam["name"] for cam in camera_infos] raise ValueError( f"No RealSense camera found with name '{name}'. Available camera names: {available_names}" ) if len(found_devices) > 1: serial_numbers = [dev["serial_number"] for dev in found_devices] raise ValueError( f"Multiple RealSense cameras found with name '{name}'. " f"Please use a unique serial number instead. Found SNs: {serial_numbers}" ) serial_number = str(found_devices[0]["serial_number"]) return serial_number def _configure_rs_pipeline_config(self, rs_config): """Creates and configures the RealSense pipeline configuration object.""" rs.config.enable_device(rs_config, self.serial_number) if self.width and self.height and self.fps: rs_config.enable_stream( rs.stream.color, self.capture_width, self.capture_height, rs.format.rgb8, self.fps ) if self.use_depth: rs_config.enable_stream( rs.stream.depth, self.capture_width, self.capture_height, rs.format.z16, self.fps ) else: rs_config.enable_stream(rs.stream.color) if self.use_depth: rs_config.enable_stream(rs.stream.depth) def _configure_capture_settings(self) -> None: """Sets fps, width, and height from device stream if not already configured. Uses the color stream profile to update unset attributes. Handles rotation by swapping width/height when needed. Original capture dimensions are always stored. Raises: DeviceNotConnectedError: If device is not connected. """ if not self.is_connected: raise DeviceNotConnectedError(f"Cannot validate settings for {self} as it is not connected.") stream = self.rs_profile.get_stream(rs.stream.color).as_video_stream_profile() if self.fps is None: self.fps = stream.fps() if self.width is None or self.height is None: actual_width = int(round(stream.width())) actual_height = int(round(stream.height())) if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]: self.width, self.height = actual_height, actual_width self.capture_width, self.capture_height = actual_width, actual_height else: self.width, self.height = actual_width, actual_height self.capture_width, self.capture_height = actual_width, actual_height def read_depth(self, timeout_ms: int = 200) -> np.ndarray: """ Reads a single frame (depth) synchronously from the camera. This is a blocking call. It waits for a coherent set of frames (depth) from the camera hardware via the RealSense pipeline. Args: timeout_ms (int): Maximum time in milliseconds to wait for a frame. Defaults to 200ms. Returns: np.ndarray: The depth map as a NumPy array (height, width) of type `np.uint16` (raw depth values in millimeters) and rotation. Raises: DeviceNotConnectedError: If the camera is not connected. RuntimeError: If reading frames from the pipeline fails or frames are invalid. """ if not self.is_connected: raise DeviceNotConnectedError(f"{self} is not connected.") if not self.use_depth: raise RuntimeError( f"Failed to capture depth frame '.read_depth()'. Depth stream is not enabled for {self}." ) start_time = time.perf_counter() ret, frame = self.rs_pipeline.try_wait_for_frames(timeout_ms=timeout_ms) if not ret or frame is None: raise RuntimeError(f"{self} read_depth failed (status={ret}).") depth_frame = frame.get_depth_frame() depth_map = np.asanyarray(depth_frame.get_data()) depth_map_processed = self._postprocess_image(depth_map, depth_frame=True) read_duration_ms = (time.perf_counter() - start_time) * 1e3 logger.debug(f"{self} read took: {read_duration_ms:.1f}ms") return depth_map_processed def read(self, color_mode: ColorMode | None = None, timeout_ms: int = 200) -> np.ndarray: """ Reads a single frame (color) synchronously from the camera. This is a blocking call. It waits for a coherent set of frames (color) from the camera hardware via the RealSense pipeline. Args: timeout_ms (int): Maximum time in milliseconds to wait for a frame. Defaults to 200ms. Returns: np.ndarray: The captured color frame as a NumPy array (height, width, channels), processed according to `color_mode` and rotation. Raises: DeviceNotConnectedError: If the camera is not connected. RuntimeError: If reading frames from the pipeline fails or frames are invalid. ValueError: If an invalid `color_mode` is requested. """ if not self.is_connected: raise DeviceNotConnectedError(f"{self} is not connected.") start_time = time.perf_counter() ret, frame = self.rs_pipeline.try_wait_for_frames(timeout_ms=timeout_ms) if not ret or frame is None: raise RuntimeError(f"{self} read failed (status={ret}).") color_frame = frame.get_color_frame() color_image_raw = np.asanyarray(color_frame.get_data()) color_image_processed = self._postprocess_image(color_image_raw, color_mode) read_duration_ms = (time.perf_counter() - start_time) * 1e3 logger.debug(f"{self} read took: {read_duration_ms:.1f}ms") return color_image_processed def _postprocess_image( self, image: np.ndarray, color_mode: ColorMode | None = None, depth_frame: bool = False ) -> np.ndarray: """ Applies color conversion, dimension validation, and rotation to a raw color frame. Args: image (np.ndarray): The raw image frame (expected RGB format from RealSense). color_mode (Optional[ColorMode]): The target color mode (RGB or BGR). If None, uses the instance's default `self.color_mode`. Returns: np.ndarray: The processed image frame according to `self.color_mode` and `self.rotation`. Raises: ValueError: If the requested `color_mode` is invalid. RuntimeError: If the raw frame dimensions do not match the configured `width` and `height`. """ if color_mode and color_mode not in (ColorMode.RGB, ColorMode.BGR): raise ValueError( f"Invalid requested color mode '{color_mode}'. Expected {ColorMode.RGB} or {ColorMode.BGR}." ) if depth_frame: h, w = image.shape else: h, w, c = image.shape if c != 3: raise RuntimeError(f"{self} frame channels={c} do not match expected 3 channels (RGB/BGR).") if h != self.capture_height or w != self.capture_width: raise RuntimeError( f"{self} frame width={w} or height={h} do not match configured width={self.capture_width} or height={self.capture_height}." ) processed_image = image if self.color_mode == ColorMode.BGR: processed_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]: processed_image = cv2.rotate(processed_image, self.rotation) return processed_image def _read_loop(self): """ Internal loop run by the background thread for asynchronous reading. On each iteration: 1. Reads a color frame with 500ms timeout 2. Stores result in latest_frame (thread-safe) 3. Sets new_frame_event to notify listeners Stops on DeviceNotConnectedError, logs other errors and continues. """ while not self.stop_event.is_set(): try: color_image = self.read(timeout_ms=500) with self.frame_lock: self.latest_frame = color_image self.new_frame_event.set() except DeviceNotConnectedError: break except Exception as e: logger.warning(f"Error reading frame in background thread for {self}: {e}") def _start_read_thread(self) -> None: """Starts or restarts the background read thread if it's not running.""" if self.thread is not None and self.thread.is_alive(): self.thread.join(timeout=0.1) if self.stop_event is not None: self.stop_event.set() self.stop_event = Event() self.thread = Thread(target=self._read_loop, args=(), name=f"{self}_read_loop") self.thread.daemon = True self.thread.start() def _stop_read_thread(self): """Signals the background read thread to stop and waits for it to join.""" if self.stop_event is not None: self.stop_event.set() if self.thread is not None and self.thread.is_alive(): self.thread.join(timeout=2.0) self.thread = None self.stop_event = None # NOTE(Steven): Missing implementation for depth for now def async_read(self, timeout_ms: float = 200) -> np.ndarray: """ Reads the latest available frame data (color) asynchronously. This method retrieves the most recent color frame captured by the background read thread. It does not block waiting for the camera hardware directly, but may wait up to timeout_ms for the background thread to provide a frame. Args: timeout_ms (float): Maximum time in milliseconds to wait for a frame to become available. Defaults to 200ms (0.2 seconds). Returns: np.ndarray: The latest captured frame data (color image), processed according to configuration. Raises: DeviceNotConnectedError: If the camera is not connected. TimeoutError: If no frame data becomes available within the specified timeout. RuntimeError: If the background thread died unexpectedly or another error occurs. """ if not self.is_connected: raise DeviceNotConnectedError(f"{self} is not connected.") if self.thread is None or not self.thread.is_alive(): self._start_read_thread() if not self.new_frame_event.wait(timeout=timeout_ms / 1000.0): thread_alive = self.thread is not None and self.thread.is_alive() raise TimeoutError( f"Timed out waiting for frame from camera {self} after {timeout_ms} ms. " f"Read thread alive: {thread_alive}." ) with self.frame_lock: frame = self.latest_frame self.new_frame_event.clear() if frame is None: raise RuntimeError(f"Internal error: Event set but no frame available for {self}.") return frame def disconnect(self): """ Disconnects from the camera, stops the pipeline, and cleans up resources. Stops the background read thread (if running) and stops the RealSense pipeline. Raises: DeviceNotConnectedError: If the camera is already disconnected (pipeline not running). """ if not self.is_connected and self.thread is None: raise DeviceNotConnectedError( f"Attempted to disconnect {self}, but it appears already disconnected." ) if self.thread is not None: self._stop_read_thread() if self.rs_pipeline is not None: self.rs_pipeline.stop() self.rs_pipeline = None self.rs_profile = None logger.info(f"{self} disconnected.")