添加了后端监控idle_time接口
This commit is contained in:
parent
4a519db15f
commit
e5a1fcf468
2
Makefile
2
Makefile
@ -94,7 +94,7 @@ push-visualize:
|
|||||||
.PHONY: clean
|
.PHONY: clean
|
||||||
clean:
|
clean:
|
||||||
-docker images -f "dangling=true" -q | xargs -r docker rmi
|
-docker images -f "dangling=true" -q | xargs -r docker rmi
|
||||||
-docker images | grep lerobot | grep -v $(VERSION) | awk '{print $$1":"$$2}' | xargs -r docker rmi
|
-docker images | grep lerobot | grep -v $(VERSION) | grep -v $(REMOTE_VERSION) |awk '{print $$1":"$$2}' | xargs -r docker rmi
|
||||||
|
|
||||||
# --- Help ---
|
# --- Help ---
|
||||||
.PHONY: help
|
.PHONY: help
|
||||||
|
|||||||
@ -64,7 +64,7 @@ RUN uv venv
|
|||||||
COPY --chown=user_lerobot:user_lerobot pyproject.toml README.md MANIFEST.in ./
|
COPY --chown=user_lerobot:user_lerobot pyproject.toml README.md MANIFEST.in ./
|
||||||
# COPY --chown=user_lerobot:user_lerobot src/ src/
|
# COPY --chown=user_lerobot:user_lerobot src/ src/
|
||||||
RUN uv pip install --no-cache ".[smolvla]"
|
RUN uv pip install --no-cache ".[smolvla]"
|
||||||
RUN uv pip install pyzmq msgpack msgpack_numpy zstandard
|
RUN uv pip install pyzmq msgpack msgpack_numpy zstandard fastapi uvicorn
|
||||||
|
|
||||||
# Set the default command - Online Inference mode
|
# Set the default command - Online Inference mode
|
||||||
ADD docker/infer.py /workspace/infer.py
|
ADD docker/infer.py /workspace/infer.py
|
||||||
|
|||||||
@ -1,11 +1,13 @@
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import signal
|
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
from fastapi import FastAPI
|
||||||
|
import uvicorn
|
||||||
|
|
||||||
from cloud_helper import Server
|
from cloud_helper import Server
|
||||||
from lerobot.policies.factory import get_policy_class
|
from lerobot.policies.factory import get_policy_class
|
||||||
|
|
||||||
@ -27,6 +29,7 @@ if not checkpoint.endswith("/pretrained_model"):
|
|||||||
|
|
||||||
server_port = task_configs["online_infer"].get("port", 8080)
|
server_port = task_configs["online_infer"].get("port", 8080)
|
||||||
|
|
||||||
|
|
||||||
class LerobotInferenceServer:
|
class LerobotInferenceServer:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@ -35,7 +38,7 @@ class LerobotInferenceServer:
|
|||||||
host: str = "localhost",
|
host: str = "localhost",
|
||||||
port: int = 5555,
|
port: int = 5555,
|
||||||
device="cuda",
|
device="cuda",
|
||||||
timeout: int = 3600,
|
http_port: int = 80,
|
||||||
):
|
):
|
||||||
self.server = Server(host, port)
|
self.server = Server(host, port)
|
||||||
self.policy_type = policy_type
|
self.policy_type = policy_type
|
||||||
@ -45,20 +48,8 @@ class LerobotInferenceServer:
|
|||||||
self.policy.to(self.device)
|
self.policy.to(self.device)
|
||||||
print(f"Loaded {self.policy_type.upper()} policy from {checkpoint}")
|
print(f"Loaded {self.policy_type.upper()} policy from {checkpoint}")
|
||||||
|
|
||||||
self.timeout = timeout
|
|
||||||
self.last_activity = time.time()
|
self.last_activity = time.time()
|
||||||
self.stop_event = threading.Event()
|
self.fastapi = FastAPI()
|
||||||
self.monitor_thread = threading.Thread(target=self.watchout, daemon=True)
|
|
||||||
self.monitor_thread.start()
|
|
||||||
|
|
||||||
def watchout(self):
|
|
||||||
while not self.stop_event.is_set():
|
|
||||||
time.sleep(6) # Check every 6 seconds
|
|
||||||
elapsed = time.time() - self.last_activity
|
|
||||||
if elapsed > self.timeout:
|
|
||||||
print(f"No activity for {elapsed:.0f} seconds. Shutting down due to timeout.")
|
|
||||||
# Force exit since loop_forever might block
|
|
||||||
os.kill(os.getpid(), signal.SIGINT)
|
|
||||||
|
|
||||||
def get_actions(self, batch):
|
def get_actions(self, batch):
|
||||||
# batch = {
|
# batch = {
|
||||||
@ -87,18 +78,35 @@ class LerobotInferenceServer:
|
|||||||
|
|
||||||
self.last_activity = time.time()
|
self.last_activity = time.time()
|
||||||
|
|
||||||
return action_chunk.cpu().numpy() # (B, chunk_size, action_dim)
|
return action_chunk.cpu().numpy() # (B, chunk_size, action_dim)
|
||||||
|
|
||||||
|
def get_idle_time(self):
|
||||||
|
return time.time() - self.last_activity
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
self.server.register_endpoint("get_actions", self.get_actions)
|
self.server.register_endpoint("get_actions", self.get_actions)
|
||||||
print(f"Lerobot {self.policy_type.upper()} Server is running...")
|
|
||||||
|
|
||||||
|
@self.fastapi.get("/health")
|
||||||
|
def health_check():
|
||||||
|
return {"status": 0}
|
||||||
|
|
||||||
|
@self.fastapi.get("/idle_time")
|
||||||
|
def idle_time():
|
||||||
|
return {"status": 0, "idle_time": self.get_idle_time()}
|
||||||
|
|
||||||
|
def start_fastapi(app, port: int = 80):
|
||||||
|
"""在独立线程中启动 FastAPI"""
|
||||||
|
print(f"Starting FastAPI HTTP server on port {port}...")
|
||||||
|
uvicorn.run(app, host="0.0.0.0", port=port, log_level="info")
|
||||||
|
|
||||||
|
threading.Thread(target=start_fastapi, args=(self.fastapi,), daemon=True).start()
|
||||||
|
|
||||||
|
print(f"Lerobot {self.policy_type.upper()} Server is running...")
|
||||||
self.server.loop_forever()
|
self.server.loop_forever()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
server = LerobotInferenceServer(
|
server = LerobotInferenceServer(
|
||||||
checkpoint=checkpoint, policy_type=model, host="0.0.0.0", port=server_port, timeout=3600
|
checkpoint=checkpoint, policy_type=model, host="0.0.0.0", port=server_port
|
||||||
)
|
)
|
||||||
server.run()
|
server.run()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user