diff --git a/inference-scaled-config/clip/http-add-on/httpscaledobject.yaml b/inference-scaled-config/clip/http-add-on/httpscaledobject.yaml index 4d83a6a..a3a3e76 100644 --- a/inference-scaled-config/clip/http-add-on/httpscaledobject.yaml +++ b/inference-scaled-config/clip/http-add-on/httpscaledobject.yaml @@ -7,7 +7,8 @@ spec: hosts: - annomidware.d-robotics.cc pathPrefixes: - - /v1/predict + - /classification/clip/v1/predict + - /classification/clip/v1/embedding/ # 目标 Deployment 或 StatefulSet scaleTargetRef: diff --git a/inference-scaled-config/groudingdino/http-add-on/httpscaledobject.yaml b/inference-scaled-config/groudingdino/http-add-on/httpscaledobject.yaml new file mode 100644 index 0000000..2905c5d --- /dev/null +++ b/inference-scaled-config/groudingdino/http-add-on/httpscaledobject.yaml @@ -0,0 +1,38 @@ +apiVersion: http.keda.sh/v1alpha1 +kind: HTTPScaledObject +metadata: + name: groudingdino-http-scaler + namespace: bj1-dcloud +spec: + hosts: + - annomidware.d-robotics.cc + pathPrefixes: + - /detection/groundingdino/v1/predict + + # 目标 Deployment 或 StatefulSet + scaleTargetRef: + name: groundingdino + kind: Deployment + apiVersion: apps/v1 + service: groundingdino-svc # 对应 HTTPRoute 中的后端服务 + port: 80 + + # 副本数配置 + replicas: + min: 0 # 支持缩容到 0(空闲时节省资源) + max: 1 # 最大副本数(根据实际需求调整) + + # 扩缩容策略 + scalingMetric: + # requestRate: + # granularity: 1s + # targetValue: 2 # 每秒 10 个请求触发扩容 + # window: 1m # 1 分钟时间窗口 + concurrency: + targetValue: 3 + + + # 扩缩容行为控制 + scaledownPeriod: 300 + targetPendingRequests: 8 # 等待处理的请求数阈值 + \ No newline at end of file diff --git a/inference-scaled-config/segment/http-add-on/httpscaledobject.yaml b/inference-scaled-config/segment/http-add-on/httpscaledobject.yaml index 1050186..0d8c681 100644 --- a/inference-scaled-config/segment/http-add-on/httpscaledobject.yaml +++ b/inference-scaled-config/segment/http-add-on/httpscaledobject.yaml @@ -7,7 +7,7 @@ spec: hosts: - annomidware.d-robotics.cc pathPrefixes: - - /v1/predict + - /segmentation/sam/v1/predict # 目标 Deployment 或 StatefulSet scaleTargetRef: diff --git a/inference-scaled-config/selective-igev/http-add-on/httpscaledobject.yaml b/inference-scaled-config/selective-igev/http-add-on/httpscaledobject.yaml new file mode 100644 index 0000000..d7c9572 --- /dev/null +++ b/inference-scaled-config/selective-igev/http-add-on/httpscaledobject.yaml @@ -0,0 +1,38 @@ +apiVersion: http.keda.sh/v1alpha1 +kind: HTTPScaledObject +metadata: + name: selective-igev-http-scaler + namespace: bj1-dcloud +spec: + hosts: + - annomidware.d-robotics.cc + pathPrefixes: + - /stereo/selective-igev/v1/predict + + # 目标 Deployment 或 StatefulSet + scaleTargetRef: + name: selective-igev + kind: Deployment + apiVersion: apps/v1 + service: selective-igev-svc # 对应 HTTPRoute 中的后端服务 + port: 80 + + # 副本数配置 + replicas: + min: 0 # 支持缩容到 0(空闲时节省资源) + max: 1 # 最大副本数(根据实际需求调整) + + # 扩缩容策略 + scalingMetric: + # requestRate: + # granularity: 1s + # targetValue: 2 # 每秒 10 个请求触发扩容 + # window: 1m # 1 分钟时间窗口 + concurrency: + targetValue: 3 + + + # 扩缩容行为控制 + scaledownPeriod: 300 + targetPendingRequests: 8 # 等待处理的请求数阈值 + \ No newline at end of file