update
This commit is contained in:
parent
8f2e773def
commit
f3528088df
@ -0,0 +1,34 @@
|
||||
apiVersion: http.keda.sh/v1alpha1
|
||||
kind: HTTPScaledObject
|
||||
metadata:
|
||||
name: clip-http-scaler
|
||||
namespace: bj1-dcloud
|
||||
spec:
|
||||
hosts:
|
||||
- annomidware.d-robotics.cc
|
||||
pathPrefixes:
|
||||
- /v1/predict
|
||||
|
||||
# 目标 Deployment 或 StatefulSet
|
||||
scaleTargetRef:
|
||||
name: clip
|
||||
kind: Deployment
|
||||
apiVersion: apps/v1
|
||||
service: clip-svc # 对应 HTTPRoute 中的后端服务
|
||||
port: 80
|
||||
|
||||
# 副本数配置
|
||||
replicas:
|
||||
min: 0 # 支持缩容到 0(空闲时节省资源)
|
||||
max: 1 # 最大副本数(根据实际需求调整)
|
||||
|
||||
# 扩缩容策略
|
||||
scalingMetric:
|
||||
# requestRate:
|
||||
# granularity: 10s
|
||||
# targetValue: 2 # 每秒 10 个请求触发扩容
|
||||
# window: 1m # 1 分钟时间窗口
|
||||
concurrency:
|
||||
targetValue: 6
|
||||
# 扩缩容行为控制
|
||||
scaledownPeriod: 300
|
||||
14
inference-scaled-config/clip/http-add-on/referenceGrant.yaml
Normal file
14
inference-scaled-config/clip/http-add-on/referenceGrant.yaml
Normal file
@ -0,0 +1,14 @@
|
||||
apiVersion: gateway.networking.k8s.io/v1beta1
|
||||
kind: ReferenceGrant
|
||||
metadata:
|
||||
name: allow-httproute-from-bj1-dcloud
|
||||
namespace: keda
|
||||
spec:
|
||||
from:
|
||||
- group: gateway.networking.k8s.io
|
||||
kind: HTTPRoute
|
||||
namespace: bj1-dcloud
|
||||
to:
|
||||
- group: ""
|
||||
kind: Service
|
||||
name: keda-add-ons-http-interceptor-proxy
|
||||
@ -0,0 +1,38 @@
|
||||
apiVersion: http.keda.sh/v1alpha1
|
||||
kind: HTTPScaledObject
|
||||
metadata:
|
||||
name: segment-http-scaler
|
||||
namespace: bj1-dcloud
|
||||
spec:
|
||||
hosts:
|
||||
- annomidware.d-robotics.cc
|
||||
pathPrefixes:
|
||||
- /v1/predict
|
||||
|
||||
# 目标 Deployment 或 StatefulSet
|
||||
scaleTargetRef:
|
||||
name: sam
|
||||
kind: Deployment
|
||||
apiVersion: apps/v1
|
||||
service: sam-svc # 对应 HTTPRoute 中的后端服务
|
||||
port: 80
|
||||
|
||||
# 副本数配置
|
||||
replicas:
|
||||
min: 0 # 支持缩容到 0(空闲时节省资源)
|
||||
max: 1 # 最大副本数(根据实际需求调整)
|
||||
|
||||
# 扩缩容策略
|
||||
scalingMetric:
|
||||
# requestRate:
|
||||
# granularity: 1s
|
||||
# targetValue: 2 # 每秒 10 个请求触发扩容
|
||||
# window: 1m # 1 分钟时间窗口
|
||||
concurrency:
|
||||
targetValue: 3
|
||||
|
||||
|
||||
# 扩缩容行为控制
|
||||
scaledownPeriod: 300
|
||||
targetPendingRequests: 8 # 等待处理的请求数阈值
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user