update&add

This commit is contained in:
yangbin 2026-03-18 14:56:23 +08:00
parent f3528088df
commit ebef68aa2c
4 changed files with 79 additions and 2 deletions

View File

@ -7,7 +7,8 @@ spec:
hosts: hosts:
- annomidware.d-robotics.cc - annomidware.d-robotics.cc
pathPrefixes: pathPrefixes:
- /v1/predict - /classification/clip/v1/predict
- /classification/clip/v1/embedding/
# 目标 Deployment 或 StatefulSet # 目标 Deployment 或 StatefulSet
scaleTargetRef: scaleTargetRef:

View File

@ -0,0 +1,38 @@
apiVersion: http.keda.sh/v1alpha1
kind: HTTPScaledObject
metadata:
name: groudingdino-http-scaler
namespace: bj1-dcloud
spec:
hosts:
- annomidware.d-robotics.cc
pathPrefixes:
- /detection/groundingdino/v1/predict
# 目标 Deployment 或 StatefulSet
scaleTargetRef:
name: groundingdino
kind: Deployment
apiVersion: apps/v1
service: groundingdino-svc # 对应 HTTPRoute 中的后端服务
port: 80
# 副本数配置
replicas:
min: 0 # 支持缩容到 0空闲时节省资源
max: 1 # 最大副本数(根据实际需求调整)
# 扩缩容策略
scalingMetric:
# requestRate:
# granularity: 1s
# targetValue: 2 # 每秒 10 个请求触发扩容
# window: 1m # 1 分钟时间窗口
concurrency:
targetValue: 3
# 扩缩容行为控制
scaledownPeriod: 300
targetPendingRequests: 8 # 等待处理的请求数阈值

View File

@ -7,7 +7,7 @@ spec:
hosts: hosts:
- annomidware.d-robotics.cc - annomidware.d-robotics.cc
pathPrefixes: pathPrefixes:
- /v1/predict - /segmentation/sam/v1/predict
# 目标 Deployment 或 StatefulSet # 目标 Deployment 或 StatefulSet
scaleTargetRef: scaleTargetRef:

View File

@ -0,0 +1,38 @@
apiVersion: http.keda.sh/v1alpha1
kind: HTTPScaledObject
metadata:
name: selective-igev-http-scaler
namespace: bj1-dcloud
spec:
hosts:
- annomidware.d-robotics.cc
pathPrefixes:
- /stereo/selective-igev/v1/predict
# 目标 Deployment 或 StatefulSet
scaleTargetRef:
name: selective-igev
kind: Deployment
apiVersion: apps/v1
service: selective-igev-svc # 对应 HTTPRoute 中的后端服务
port: 80
# 副本数配置
replicas:
min: 0 # 支持缩容到 0空闲时节省资源
max: 1 # 最大副本数(根据实际需求调整)
# 扩缩容策略
scalingMetric:
# requestRate:
# granularity: 1s
# targetValue: 2 # 每秒 10 个请求触发扩容
# window: 1m # 1 分钟时间窗口
concurrency:
targetValue: 3
# 扩缩容行为控制
scaledownPeriod: 300
targetPendingRequests: 8 # 等待处理的请求数阈值