apiVersion: http.keda.sh/v1alpha1
kind: HTTPScaledObject
metadata:
  name: dstereox-http-scaler2
  namespace: bj2-dcloud
  # labels:
  #   app.kubernetes.io/name: dstereox
  #   app.kubernetes.io/instance: dcloud-dstereox
spec:
  hosts:
     - annomidware-dev.d-robotics.cc
  pathPrefixes:
    - /v1/predict
  
  # 目标 Deployment 或 StatefulSet
  scaleTargetRef:
    name: dstereox
    kind: Deployment
    apiVersion: apps/v1
    service: dstereox-svc  # 对应 HTTPRoute 中的后端服务
    port: 80

  # 副本数配置
  replicas:
    min: 0  # 支持缩容到 0（空闲时节省资源）
    max: 2  # 最大副本数（根据实际需求调整）

  # 扩缩容策略
  scalingMetric:
    requestRate:
      granularity: 1s
      targetValue: 2  # 每秒 10 个请求触发扩容
      window: 1m  # 1 分钟时间窗口
    
    # # 针对长耗时请求的配置
    # responseTime:
    #   targetValue: 1000  # 目标响应时间 1000ms
    #   window: 1m

  # 扩缩容行为控制
  scaledownPeriod: 300
  targetPendingRequests: 8  # 等待处理的请求数阈值
    
    # 扩缩容速率限制
    # advanced:
    #   horizontalPodAutoscalerConfig:
    #     behavior:
    #       scaleDown:
    #         stabilizationWindowSeconds: 300  # 缩容稳定窗口 5 分钟
    #         policies:
    #         - type: Percent
    #           value: 50  # 每次最多缩容 50%
    #           periodSeconds: 60
    #         - type: Pods
    #           value: 1  # 每次最多缩容 2 个 Pod
    #           periodSeconds: 60
    #         selectPolicy: Min  # 选择最保守的策略
    #       scaleUp:
    #         stabilizationWindowSeconds: 0  # 立即扩容
    #         policies:
    #         - type: Percent
    #           value: 100  # 每次最多扩容 100%
    #           periodSeconds: 15
    #         - type: Pods
    #           value: 4  # 每次最多扩容 4 个 Pod
    #           periodSeconds: 15
    #         selectPolicy: Max  # 选择最激进的策略

  # Gateway API 路由配置（对应您的 HTTPRoute）
  

---
# 可选：ScaledObject（如果需要额外的 KEDA 触发器，如 GPU/CPU 指标）
# apiVersion: keda.sh/v1alpha1
# kind: ScaledObject
# metadata:
#   name: dstereox-combined-scaler
#   namespace: bj2-dcloud
#   labels:
#     app.kubernetes.io/name: dstereox
#     app.kubernetes.io/instance: dcloud-dstereox
# spec:
#   scaleTargetRef:
#     name: dstereox  # 与 HTTPScaledObject 指向同一 Deployment
#     kind: Deployment
#     apiVersion: apps/v1

#   minReplicaCount: 0
#   maxReplicaCount: 10
#   pollingInterval: 30
#   cooldownPeriod: 300

#   triggers:
#     # HTTP 请求队列触发器（由 KEDA HTTP Add-on 提供）
#     - type: external
#       metadata:
#         scalerAddress: keda-http-add-on-external-scaler.keda:9090
#         service: dstereox-svc
#         namespace: bj2-dcloud
#         targetPendingRequests: "10"

#     # CPU 利用率触发器（可选）
#     - type: cpu
#       metricType: Utilization
#       metadata:
#         value: "80"  # CPU 使用率超过 80% 时扩容

#     # 内存利用率触发器（可选）
#     - type: memory
#       metricType: Utilization
#       metadata:
#         value: "80"  # 内存使用率超过 80% 时扩容

#   # 扩缩容行为
#   advanced:
#     horizontalPodAutoscalerConfig:
#       behavior:
#         scaleDown:
#           stabilizationWindowSeconds: 300
#           policies:
#           - type: Percent
#             value: 50
#             periodSeconds: 60
#           - type: Pods
#             value: 2
#             periodSeconds: 60
#           selectPolicy: Min
#         scaleUp:
#           stabilizationWindowSeconds: 0
#           policies:
#           - type: Percent
#             value: 100
#             periodSeconds: 15
#           - type: Pods
#             value: 4
#             periodSeconds: 15
#           selectPolicy: Max