From e5a1fcf468ec9071b718e8e05447de8aec4a960a Mon Sep 17 00:00:00 2001 From: GH Date: Thu, 25 Dec 2025 18:22:05 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BA=86=E5=90=8E=E7=AB=AF?= =?UTF-8?q?=E7=9B=91=E6=8E=A7idle=5Ftime=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Makefile | 2 +- docker/Dockerfile.infer | 2 +- docker/infer.py | 46 ++++++++++++++++++++++++----------------- 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/Makefile b/Makefile index dfa5807..a203c7a 100644 --- a/Makefile +++ b/Makefile @@ -94,7 +94,7 @@ push-visualize: .PHONY: clean clean: -docker images -f "dangling=true" -q | xargs -r docker rmi - -docker images | grep lerobot | grep -v $(VERSION) | awk '{print $$1":"$$2}' | xargs -r docker rmi + -docker images | grep lerobot | grep -v $(VERSION) | grep -v $(REMOTE_VERSION) |awk '{print $$1":"$$2}' | xargs -r docker rmi # --- Help --- .PHONY: help diff --git a/docker/Dockerfile.infer b/docker/Dockerfile.infer index 187de8c..8eddde2 100644 --- a/docker/Dockerfile.infer +++ b/docker/Dockerfile.infer @@ -64,7 +64,7 @@ RUN uv venv COPY --chown=user_lerobot:user_lerobot pyproject.toml README.md MANIFEST.in ./ # COPY --chown=user_lerobot:user_lerobot src/ src/ RUN uv pip install --no-cache ".[smolvla]" -RUN uv pip install pyzmq msgpack msgpack_numpy zstandard +RUN uv pip install pyzmq msgpack msgpack_numpy zstandard fastapi uvicorn # Set the default command - Online Inference mode ADD docker/infer.py /workspace/infer.py diff --git a/docker/infer.py b/docker/infer.py index f8b6329..0484eca 100644 --- a/docker/infer.py +++ b/docker/infer.py @@ -1,11 +1,13 @@ import json import os -import signal import threading import time import torch +from fastapi import FastAPI +import uvicorn + from cloud_helper import Server from lerobot.policies.factory import get_policy_class @@ -27,6 +29,7 @@ if not checkpoint.endswith("/pretrained_model"): server_port = task_configs["online_infer"].get("port", 8080) + class LerobotInferenceServer: def __init__( self, @@ -35,7 +38,7 @@ class LerobotInferenceServer: host: str = "localhost", port: int = 5555, device="cuda", - timeout: int = 3600, + http_port: int = 80, ): self.server = Server(host, port) self.policy_type = policy_type @@ -45,20 +48,8 @@ class LerobotInferenceServer: self.policy.to(self.device) print(f"Loaded {self.policy_type.upper()} policy from {checkpoint}") - self.timeout = timeout self.last_activity = time.time() - self.stop_event = threading.Event() - self.monitor_thread = threading.Thread(target=self.watchout, daemon=True) - self.monitor_thread.start() - - def watchout(self): - while not self.stop_event.is_set(): - time.sleep(6) # Check every 6 seconds - elapsed = time.time() - self.last_activity - if elapsed > self.timeout: - print(f"No activity for {elapsed:.0f} seconds. Shutting down due to timeout.") - # Force exit since loop_forever might block - os.kill(os.getpid(), signal.SIGINT) + self.fastapi = FastAPI() def get_actions(self, batch): # batch = { @@ -87,18 +78,35 @@ class LerobotInferenceServer: self.last_activity = time.time() - return action_chunk.cpu().numpy() # (B, chunk_size, action_dim) + return action_chunk.cpu().numpy() # (B, chunk_size, action_dim) + + def get_idle_time(self): + return time.time() - self.last_activity def run(self): self.server.register_endpoint("get_actions", self.get_actions) - print(f"Lerobot {self.policy_type.upper()} Server is running...") + @self.fastapi.get("/health") + def health_check(): + return {"status": 0} + + @self.fastapi.get("/idle_time") + def idle_time(): + return {"status": 0, "idle_time": self.get_idle_time()} + + def start_fastapi(app, port: int = 80): + """在独立线程中启动 FastAPI""" + print(f"Starting FastAPI HTTP server on port {port}...") + uvicorn.run(app, host="0.0.0.0", port=port, log_level="info") + + threading.Thread(target=start_fastapi, args=(self.fastapi,), daemon=True).start() + + print(f"Lerobot {self.policy_type.upper()} Server is running...") self.server.loop_forever() if __name__ == "__main__": - server = LerobotInferenceServer( - checkpoint=checkpoint, policy_type=model, host="0.0.0.0", port=server_port, timeout=3600 + checkpoint=checkpoint, policy_type=model, host="0.0.0.0", port=server_port ) server.run()