diff --git a/inference-scaled-config/clip/http-add-on/httpscaledobject.yaml b/inference-scaled-config/clip/http-add-on/httpscaledobject.yaml new file mode 100644 index 0000000..4d83a6a --- /dev/null +++ b/inference-scaled-config/clip/http-add-on/httpscaledobject.yaml @@ -0,0 +1,34 @@ +apiVersion: http.keda.sh/v1alpha1 +kind: HTTPScaledObject +metadata: + name: clip-http-scaler + namespace: bj1-dcloud +spec: + hosts: + - annomidware.d-robotics.cc + pathPrefixes: + - /v1/predict + + # 目标 Deployment 或 StatefulSet + scaleTargetRef: + name: clip + kind: Deployment + apiVersion: apps/v1 + service: clip-svc # 对应 HTTPRoute 中的后端服务 + port: 80 + + # 副本数配置 + replicas: + min: 0 # 支持缩容到 0(空闲时节省资源) + max: 1 # 最大副本数(根据实际需求调整) + + # 扩缩容策略 + scalingMetric: + # requestRate: + # granularity: 10s + # targetValue: 2 # 每秒 10 个请求触发扩容 + # window: 1m # 1 分钟时间窗口 + concurrency: + targetValue: 6 + # 扩缩容行为控制 + scaledownPeriod: 300 diff --git a/inference-scaled-config/clip/http-add-on/referenceGrant.yaml b/inference-scaled-config/clip/http-add-on/referenceGrant.yaml new file mode 100644 index 0000000..befdbf7 --- /dev/null +++ b/inference-scaled-config/clip/http-add-on/referenceGrant.yaml @@ -0,0 +1,14 @@ +apiVersion: gateway.networking.k8s.io/v1beta1 +kind: ReferenceGrant +metadata: + name: allow-httproute-from-bj1-dcloud + namespace: keda +spec: + from: + - group: gateway.networking.k8s.io + kind: HTTPRoute + namespace: bj1-dcloud + to: + - group: "" + kind: Service + name: keda-add-ons-http-interceptor-proxy \ No newline at end of file diff --git a/inference-scaled-config/segment/http-add-on/httpscaledobject.yaml b/inference-scaled-config/segment/http-add-on/httpscaledobject.yaml new file mode 100644 index 0000000..1050186 --- /dev/null +++ b/inference-scaled-config/segment/http-add-on/httpscaledobject.yaml @@ -0,0 +1,38 @@ +apiVersion: http.keda.sh/v1alpha1 +kind: HTTPScaledObject +metadata: + name: segment-http-scaler + namespace: bj1-dcloud +spec: + hosts: + - annomidware.d-robotics.cc + pathPrefixes: + - /v1/predict + + # 目标 Deployment 或 StatefulSet + scaleTargetRef: + name: sam + kind: Deployment + apiVersion: apps/v1 + service: sam-svc # 对应 HTTPRoute 中的后端服务 + port: 80 + + # 副本数配置 + replicas: + min: 0 # 支持缩容到 0(空闲时节省资源) + max: 1 # 最大副本数(根据实际需求调整) + + # 扩缩容策略 + scalingMetric: + # requestRate: + # granularity: 1s + # targetValue: 2 # 每秒 10 个请求触发扩容 + # window: 1m # 1 分钟时间窗口 + concurrency: + targetValue: 3 + + + # 扩缩容行为控制 + scaledownPeriod: 300 + targetPendingRequests: 8 # 等待处理的请求数阈值 + \ No newline at end of file