apiVersion: http.keda.sh/v1alpha1 kind: HTTPScaledObject metadata: name: dstereox-http-scaler2 namespace: bj2-dcloud # labels: # app.kubernetes.io/name: dstereox # app.kubernetes.io/instance: dcloud-dstereox spec: hosts: - annomidware-dev.d-robotics.cc pathPrefixes: - /v1/predict # 目标 Deployment 或 StatefulSet scaleTargetRef: name: dstereox kind: Deployment apiVersion: apps/v1 service: dstereox-svc # 对应 HTTPRoute 中的后端服务 port: 80 # 副本数配置 replicas: min: 0 # 支持缩容到 0(空闲时节省资源) max: 2 # 最大副本数(根据实际需求调整) # 扩缩容策略 scalingMetric: requestRate: granularity: 1s targetValue: 2 # 每秒 10 个请求触发扩容 window: 1m # 1 分钟时间窗口 # # 针对长耗时请求的配置 # responseTime: # targetValue: 1000 # 目标响应时间 1000ms # window: 1m # 扩缩容行为控制 scaledownPeriod: 300 targetPendingRequests: 8 # 等待处理的请求数阈值 # 扩缩容速率限制 # advanced: # horizontalPodAutoscalerConfig: # behavior: # scaleDown: # stabilizationWindowSeconds: 300 # 缩容稳定窗口 5 分钟 # policies: # - type: Percent # value: 50 # 每次最多缩容 50% # periodSeconds: 60 # - type: Pods # value: 1 # 每次最多缩容 2 个 Pod # periodSeconds: 60 # selectPolicy: Min # 选择最保守的策略 # scaleUp: # stabilizationWindowSeconds: 0 # 立即扩容 # policies: # - type: Percent # value: 100 # 每次最多扩容 100% # periodSeconds: 15 # - type: Pods # value: 4 # 每次最多扩容 4 个 Pod # periodSeconds: 15 # selectPolicy: Max # 选择最激进的策略 # Gateway API 路由配置(对应您的 HTTPRoute) --- # 可选:ScaledObject(如果需要额外的 KEDA 触发器,如 GPU/CPU 指标) # apiVersion: keda.sh/v1alpha1 # kind: ScaledObject # metadata: # name: dstereox-combined-scaler # namespace: bj2-dcloud # labels: # app.kubernetes.io/name: dstereox # app.kubernetes.io/instance: dcloud-dstereox # spec: # scaleTargetRef: # name: dstereox # 与 HTTPScaledObject 指向同一 Deployment # kind: Deployment # apiVersion: apps/v1 # minReplicaCount: 0 # maxReplicaCount: 10 # pollingInterval: 30 # cooldownPeriod: 300 # triggers: # # HTTP 请求队列触发器(由 KEDA HTTP Add-on 提供) # - type: external # metadata: # scalerAddress: keda-http-add-on-external-scaler.keda:9090 # service: dstereox-svc # namespace: bj2-dcloud # targetPendingRequests: "10" # # CPU 利用率触发器(可选) # - type: cpu # metricType: Utilization # metadata: # value: "80" # CPU 使用率超过 80% 时扩容 # # 内存利用率触发器(可选) # - type: memory # metricType: Utilization # metadata: # value: "80" # 内存使用率超过 80% 时扩容 # # 扩缩容行为 # advanced: # horizontalPodAutoscalerConfig: # behavior: # scaleDown: # stabilizationWindowSeconds: 300 # policies: # - type: Percent # value: 50 # periodSeconds: 60 # - type: Pods # value: 2 # periodSeconds: 60 # selectPolicy: Min # scaleUp: # stabilizationWindowSeconds: 0 # policies: # - type: Percent # value: 100 # periodSeconds: 15 # - type: Pods # value: 4 # periodSeconds: 15 # selectPolicy: Max