k8s-autoScale-temp/keda/http-add-on/httpscaledobject.yaml
2026-03-04 15:58:27 +08:00

138 lines
3.8 KiB
YAML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

apiVersion: http.keda.sh/v1alpha1
kind: HTTPScaledObject
metadata:
name: dstereox-http-scaler2
namespace: bj2-dcloud
# labels:
# app.kubernetes.io/name: dstereox
# app.kubernetes.io/instance: dcloud-dstereox
spec:
hosts:
- annomidware-dev.d-robotics.cc
pathPrefixes:
- /v1/predict
# 目标 Deployment 或 StatefulSet
scaleTargetRef:
name: dstereox
kind: Deployment
apiVersion: apps/v1
service: dstereox-svc # 对应 HTTPRoute 中的后端服务
port: 80
# 副本数配置
replicas:
min: 0 # 支持缩容到 0空闲时节省资源
max: 2 # 最大副本数(根据实际需求调整)
# 扩缩容策略
scalingMetric:
requestRate:
granularity: 1s
targetValue: 2 # 每秒 10 个请求触发扩容
window: 1m # 1 分钟时间窗口
# # 针对长耗时请求的配置
# responseTime:
# targetValue: 1000 # 目标响应时间 1000ms
# window: 1m
# 扩缩容行为控制
scaledownPeriod: 300
targetPendingRequests: 8 # 等待处理的请求数阈值
# 扩缩容速率限制
# advanced:
# horizontalPodAutoscalerConfig:
# behavior:
# scaleDown:
# stabilizationWindowSeconds: 300 # 缩容稳定窗口 5 分钟
# policies:
# - type: Percent
# value: 50 # 每次最多缩容 50%
# periodSeconds: 60
# - type: Pods
# value: 1 # 每次最多缩容 2 个 Pod
# periodSeconds: 60
# selectPolicy: Min # 选择最保守的策略
# scaleUp:
# stabilizationWindowSeconds: 0 # 立即扩容
# policies:
# - type: Percent
# value: 100 # 每次最多扩容 100%
# periodSeconds: 15
# - type: Pods
# value: 4 # 每次最多扩容 4 个 Pod
# periodSeconds: 15
# selectPolicy: Max # 选择最激进的策略
# Gateway API 路由配置(对应您的 HTTPRoute
---
# 可选ScaledObject如果需要额外的 KEDA 触发器,如 GPU/CPU 指标)
# apiVersion: keda.sh/v1alpha1
# kind: ScaledObject
# metadata:
# name: dstereox-combined-scaler
# namespace: bj2-dcloud
# labels:
# app.kubernetes.io/name: dstereox
# app.kubernetes.io/instance: dcloud-dstereox
# spec:
# scaleTargetRef:
# name: dstereox # 与 HTTPScaledObject 指向同一 Deployment
# kind: Deployment
# apiVersion: apps/v1
# minReplicaCount: 0
# maxReplicaCount: 10
# pollingInterval: 30
# cooldownPeriod: 300
# triggers:
# # HTTP 请求队列触发器(由 KEDA HTTP Add-on 提供)
# - type: external
# metadata:
# scalerAddress: keda-http-add-on-external-scaler.keda:9090
# service: dstereox-svc
# namespace: bj2-dcloud
# targetPendingRequests: "10"
# # CPU 利用率触发器(可选)
# - type: cpu
# metricType: Utilization
# metadata:
# value: "80" # CPU 使用率超过 80% 时扩容
# # 内存利用率触发器(可选)
# - type: memory
# metricType: Utilization
# metadata:
# value: "80" # 内存使用率超过 80% 时扩容
# # 扩缩容行为
# advanced:
# horizontalPodAutoscalerConfig:
# behavior:
# scaleDown:
# stabilizationWindowSeconds: 300
# policies:
# - type: Percent
# value: 50
# periodSeconds: 60
# - type: Pods
# value: 2
# periodSeconds: 60
# selectPolicy: Min
# scaleUp:
# stabilizationWindowSeconds: 0
# policies:
# - type: Percent
# value: 100
# periodSeconds: 15
# - type: Pods
# value: 4
# periodSeconds: 15
# selectPolicy: Max