This commit is contained in:
yangbin 2026-03-04 15:58:27 +08:00
commit 8f2e773def
7 changed files with 532 additions and 0 deletions

0
README.MD Normal file
View File

View File

@ -0,0 +1,88 @@
# KEDA HTTP Add-on 配置
# 基于 HTTPRoute: dstereox-public (namespace: bj2-dcloud)
# 对应服务: dstereox-svc:80
# 请求超时: 3600s (1小时)
# 基础配置
namespace: keda
nameOverride: keda-http-add-on
# 核心:启用 Gateway API 支持(使用 HTTPRoute 必须开启)
gatewayApi:
enabled: true
allowCrossNamespace: true # 支持跨命名空间的 Gateway/HTTPRoute 引用
# 关闭 Ingress 支持(仅使用 Gateway API
ingress:
enabled: false
# Interceptor 配置 - 拦截和代理 HTTP 请求
interceptor:
replicaCount: 1 # 生产环境建议至少 2 副本,避免单点故障
# 资源配置
resources:
limits:
cpu: 1000m # 根据流量调整
memory: 1Gi
requests:
cpu: 500m
memory: 512Mi
# 代理超时设置 - 匹配 HTTPRoute 的 3600s 超时
proxy:
timeout: 3600s
# 就绪和存活探针配置
livenessProbe:
initialDelaySeconds: 10
periodSeconds: 10
readinessProbe:
initialDelaySeconds: 5
periodSeconds: 5
# Operator 配置 - 监听和管理 HTTPScaledObject 资源
operator:
replicaCount: 1 # 高可用部署
# 监听的命名空间(与 HTTPRoute 所在命名空间一致)
watchedNamespaces: ["bj2-dcloud"]
# 资源配置
resources:
limits:
cpu: 500m
memory: 512Mi
requests:
cpu: 200m
memory: 256Mi
# Scaler 配置 - 处理扩缩容逻辑
scaler:
replicaCount: 1 # 高可用
resources:
limits:
cpu: 500m
memory: 512Mi
requests:
cpu: 200m
memory: 256Mi
# 指标服务端点配置
metricsServer:
port: 9090
# RBAC 配置
rbac:
create: true
# ServiceAccount 配置
serviceAccount:
create: true
annotations: {}
# 全局镜像配置(如需使用私有镜像仓库)
# images:
# pullPolicy: IfNotPresent
# pullSecrets: []

View File

@ -0,0 +1,137 @@
apiVersion: http.keda.sh/v1alpha1
kind: HTTPScaledObject
metadata:
name: dstereox-http-scaler2
namespace: bj2-dcloud
# labels:
# app.kubernetes.io/name: dstereox
# app.kubernetes.io/instance: dcloud-dstereox
spec:
hosts:
- annomidware-dev.d-robotics.cc
pathPrefixes:
- /v1/predict
# 目标 Deployment 或 StatefulSet
scaleTargetRef:
name: dstereox
kind: Deployment
apiVersion: apps/v1
service: dstereox-svc # 对应 HTTPRoute 中的后端服务
port: 80
# 副本数配置
replicas:
min: 0 # 支持缩容到 0空闲时节省资源
max: 2 # 最大副本数(根据实际需求调整)
# 扩缩容策略
scalingMetric:
requestRate:
granularity: 1s
targetValue: 2 # 每秒 10 个请求触发扩容
window: 1m # 1 分钟时间窗口
# # 针对长耗时请求的配置
# responseTime:
# targetValue: 1000 # 目标响应时间 1000ms
# window: 1m
# 扩缩容行为控制
scaledownPeriod: 300
targetPendingRequests: 8 # 等待处理的请求数阈值
# 扩缩容速率限制
# advanced:
# horizontalPodAutoscalerConfig:
# behavior:
# scaleDown:
# stabilizationWindowSeconds: 300 # 缩容稳定窗口 5 分钟
# policies:
# - type: Percent
# value: 50 # 每次最多缩容 50%
# periodSeconds: 60
# - type: Pods
# value: 1 # 每次最多缩容 2 个 Pod
# periodSeconds: 60
# selectPolicy: Min # 选择最保守的策略
# scaleUp:
# stabilizationWindowSeconds: 0 # 立即扩容
# policies:
# - type: Percent
# value: 100 # 每次最多扩容 100%
# periodSeconds: 15
# - type: Pods
# value: 4 # 每次最多扩容 4 个 Pod
# periodSeconds: 15
# selectPolicy: Max # 选择最激进的策略
# Gateway API 路由配置(对应您的 HTTPRoute
---
# 可选ScaledObject如果需要额外的 KEDA 触发器,如 GPU/CPU 指标)
# apiVersion: keda.sh/v1alpha1
# kind: ScaledObject
# metadata:
# name: dstereox-combined-scaler
# namespace: bj2-dcloud
# labels:
# app.kubernetes.io/name: dstereox
# app.kubernetes.io/instance: dcloud-dstereox
# spec:
# scaleTargetRef:
# name: dstereox # 与 HTTPScaledObject 指向同一 Deployment
# kind: Deployment
# apiVersion: apps/v1
# minReplicaCount: 0
# maxReplicaCount: 10
# pollingInterval: 30
# cooldownPeriod: 300
# triggers:
# # HTTP 请求队列触发器(由 KEDA HTTP Add-on 提供)
# - type: external
# metadata:
# scalerAddress: keda-http-add-on-external-scaler.keda:9090
# service: dstereox-svc
# namespace: bj2-dcloud
# targetPendingRequests: "10"
# # CPU 利用率触发器(可选)
# - type: cpu
# metricType: Utilization
# metadata:
# value: "80" # CPU 使用率超过 80% 时扩容
# # 内存利用率触发器(可选)
# - type: memory
# metricType: Utilization
# metadata:
# value: "80" # 内存使用率超过 80% 时扩容
# # 扩缩容行为
# advanced:
# horizontalPodAutoscalerConfig:
# behavior:
# scaleDown:
# stabilizationWindowSeconds: 300
# policies:
# - type: Percent
# value: 50
# periodSeconds: 60
# - type: Pods
# value: 2
# periodSeconds: 60
# selectPolicy: Min
# scaleUp:
# stabilizationWindowSeconds: 0
# policies:
# - type: Percent
# value: 100
# periodSeconds: 15
# - type: Pods
# value: 4
# periodSeconds: 15
# selectPolicy: Max

View File

@ -0,0 +1,85 @@
apiVersion: v1
kind: Secret
metadata:
name: keda-prom-secret
namespace: bj2-dcloud
stringData:
customAuthHeader: "Authorization"
customAuthValue: "Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJuYW1lc3BhY2UiOiJjcHJvbS11ejJuZ3J6YmpyN243Iiwic2VjcmV0TmFtZSI6IjJjMTI4NGJhZTUxYjRiZjNhMmU2MjM1NTc4NDRjNmQ2IiwiZXhwIjo0OTIzODc3ODIyLCJpc3MiOiJjcHJvbSJ9.idZaTKtQR4WOyr29T_TKZ9cGz2nW9N5Mw-5SBu321p8"
---
apiVersion: keda.sh/v1alpha1
kind: TriggerAuthentication
metadata:
name: keda-prom-creds
namespace: bj2-dcloud
spec:
secretTargetRef:
- parameter: customAuthHeader
name: keda-prom-secret
key: customAuthHeader
- parameter: customAuthValue
name: keda-prom-secret
key: customAuthValue
---
apiVersion: keda.sh/v1alpha1
kind: ScaledObject
metadata:
name: gpu-metrics-scaledobject
namespace: bj2-dcloud
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: dstereox
minReplicaCount: 1
maxReplicaCount: 2
# idleReplicaCount: 0 # 当指标为0时保持0副本
pollingInterval: 30 # 每30秒检查一次指标
cooldownPeriod: 300 # 扩容后等待5分钟再进行下一次扩容
# initialCooldownPeriod: 0 # 启动后立即检查指标
# fallback: # Optional. Section to specify fallback options
# failureThreshold: 3 # Mandatory if fallback section is included
# replicas: 6 # Mandatory if fallback section is included
# behavior: {kind-of-behavior} # Optional. Default: "static"
triggers:
# ---------------------------------------------------------
# 触发器 2: 基于 CPU 使用量 (Prometheus)
# ---------------------------------------------------------
- type: prometheus
metadata:
serverAddress: https://cprom.bj.baidubce.com/select/prometheus
metricName: cpu_utilization
query: >
(
sum by(pod) (
max by(pod) (
irate(container_cpu_usage_seconds_total{
namespace="bj2-dcloud",
pod=~"dstereox.*"
}[2m])
)
)
)
/(
sum by(pod) (
max by(pod) (
container_spec_cpu_quota{
namespace="bj2-dcloud",
pod=~"dstereox.*"
} / 100000
)
or vector(1)
)
)
* 100
customHeaders: InstanceId=cprom-uz2ngrzbjr7n7
threshold: "50" # 50%利用率时扩容;计算目标副本数
activationThreshold: "30" # 30%利用率时开始扩容;判断是否激活 scaler
authModes: "custom"
authenticationRef:
name: keda-prom-creds

View File

@ -0,0 +1,98 @@
apiVersion: v1
kind: Secret
metadata:
name: keda-prom-secret
namespace: bj2-dcloud
stringData:
customAuthHeader: "Authorization"
customAuthValue: "Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJuYW1lc3BhY2UiOiJjcHJvbS11ejJuZ3J6YmpyN243Iiwic2VjcmV0TmFtZSI6IjJjMTI4NGJhZTUxYjRiZjNhMmU2MjM1NTc4NDRjNmQ2IiwiZXhwIjo0OTIzODc3ODIyLCJpc3MiOiJjcHJvbSJ9.idZaTKtQR4WOyr29T_TKZ9cGz2nW9N5Mw-5SBu321p8"
---
apiVersion: keda.sh/v1alpha1
kind: TriggerAuthentication
metadata:
name: keda-prom-creds
namespace: bj2-dcloud
spec:
secretTargetRef:
- parameter: customAuthHeader
name: keda-prom-secret
key: customAuthHeader
- parameter: customAuthValue
name: keda-prom-secret
key: customAuthValue
---
apiVersion: keda.sh/v1alpha1
kind: ScaledObject
metadata:
name: gpu-metrics-scaledobject
namespace: bj2-dcloud
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: dstereox
minReplicaCount: 1
maxReplicaCount: 2
# idleReplicaCount: 0 # 当指标为0时保持0副本
pollingInterval: 30 # 每30秒检查一次指标
cooldownPeriod: 300 # 扩容后等待5分钟再进行下一次扩容
# initialCooldownPeriod: 0 # 启动后立即检查指标
# fallback: # Optional. Section to specify fallback options
# failureThreshold: 3 # Mandatory if fallback section is included
# replicas: 6 # Mandatory if fallback section is included
# behavior: {kind-of-behavior} # Optional. Default: "static"
triggers:
# ---------------------------------------------------------
# 触发器 1: 基于 GPU 利用率 (DCGM)
# ---------------------------------------------------------
- type: prometheus
metadata:
serverAddress: https://cprom.bj.baidubce.com/select/prometheus
metricName: dcgm_gpu_utilization
query: max(avg_over_time(DCGM_FI_DEV_GPU_UTIL{ pod_name=~"dstereox.*"}[2m]))
customHeaders: InstanceId=cprom-uz2ngrzbjr7n7
threshold: "80" # 80%利用率时扩容;计算目标副本数
activationThreshold: "60" # 30%利用率时开始扩容;判断是否激活 scaler
authModes: "custom"
authenticationRef:
name: keda-prom-creds
# ---------------------------------------------------------
# 触发器 2: 基于 CPU 使用量 (Prometheus)
# ---------------------------------------------------------
- type: prometheus
metadata:
serverAddress: https://cprom.bj.baidubce.com/select/prometheus
metricName: cpu_utilization
query: >
(
sum (
max by(pod) (
irate(container_cpu_usage_seconds_total{
namespace="bj2-dcloud",
pod=~"dstereox.*"
}[10m])
)
)
)
/(
sum (
max by(pod) (
container_spec_cpu_quota{
namespace="bj2-dcloud",
pod=~"dstereox.*"
} / 100000
)
or vector(1)
)
)
* 100
customHeaders: InstanceId=cprom-uz2ngrzbjr7n7
threshold: "80" # 50%利用率时扩容;计算目标副本数
activationThreshold: "60" # 30%利用率时开始扩容;判断是否激活 scaler
authModes: "custom"
authenticationRef:
name: keda-prom-creds

View File

@ -0,0 +1,62 @@
apiVersion: v1
kind: Secret
metadata:
name: keda-prom-secret
namespace: bj2-dcloud
stringData:
customAuthHeader: "Authorization"
customAuthValue: "Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJuYW1lc3BhY2UiOiJjcHJvbS11ejJuZ3J6YmpyN243Iiwic2VjcmV0TmFtZSI6IjJjMTI4NGJhZTUxYjRiZjNhMmU2MjM1NTc4NDRjNmQ2IiwiZXhwIjo0OTIzODc3ODIyLCJpc3MiOiJjcHJvbSJ9.idZaTKtQR4WOyr29T_TKZ9cGz2nW9N5Mw-5SBu321p8"
---
apiVersion: keda.sh/v1alpha1
kind: TriggerAuthentication
metadata:
name: keda-prom-creds
namespace: bj2-dcloud
spec:
secretTargetRef:
- parameter: customAuthHeader
name: keda-prom-secret
key: customAuthHeader
- parameter: customAuthValue
name: keda-prom-secret
key: customAuthValue
---
apiVersion: keda.sh/v1alpha1
kind: ScaledObject
metadata:
name: gpu-metrics-scaledobject
namespace: bj2-dcloud
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: dstereox
minReplicaCount: 1
maxReplicaCount: 2
# idleReplicaCount: 0 # 当指标为0时保持0副本
pollingInterval: 30 # 每30秒检查一次指标
cooldownPeriod: 300 # 扩容后等待5分钟再进行下一次扩容
# initialCooldownPeriod: 0 # 启动后立即检查指标
# fallback: # Optional. Section to specify fallback options
# failureThreshold: 3 # Mandatory if fallback section is included
# replicas: 6 # Mandatory if fallback section is included
# behavior: {kind-of-behavior} # Optional. Default: "static"
triggers:
# ---------------------------------------------------------
# 触发器 1: 基于 GPU 利用率 (DCGM)
# ---------------------------------------------------------
- type: prometheus
metadata:
serverAddress: https://cprom.bj.baidubce.com/select/prometheus
metricName: dcgm_gpu_utilization
# query: avg(avg_over_time(DCGM_FI_DEV_GPU_UTIL{namespace="bj2-dcloud",pod=~"dstereox-.*"}[3m]))
query: max(avg_over_time(DCGM_FI_DEV_GPU_UTIL{pod_name=~"dstereox.*"}[3m]))
customHeaders: InstanceId=cprom-uz2ngrzbjr7n7
threshold: "50" # 80%利用率时扩容;计算目标副本数
activationThreshold: "30" # 30%利用率时开始扩容;判断是否激活 scaler
authModes: "custom"
authenticationRef:
name: keda-prom-creds

View File

@ -0,0 +1,62 @@
apiVersion: v1
kind: Secret
metadata:
name: keda-prom-secret
namespace: bj2-dcloud
stringData:
customAuthHeader: "Authorization"
customAuthValue: "Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJuYW1lc3BhY2UiOiJjcHJvbS11ejJuZ3J6YmpyN243Iiwic2VjcmV0TmFtZSI6IjJjMTI4NGJhZTUxYjRiZjNhMmU2MjM1NTc4NDRjNmQ2IiwiZXhwIjo0OTIzODc3ODIyLCJpc3MiOiJjcHJvbSJ9.idZaTKtQR4WOyr29T_TKZ9cGz2nW9N5Mw-5SBu321p8"
---
apiVersion: keda.sh/v1alpha1
kind: TriggerAuthentication
metadata:
name: keda-prom-creds
namespace: bj2-dcloud
spec:
secretTargetRef:
- parameter: customAuthHeader
name: keda-prom-secret
key: customAuthHeader
- parameter: customAuthValue
name: keda-prom-secret
key: customAuthValue
---
apiVersion: keda.sh/v1alpha1
kind: ScaledObject
metadata:
name: gpu-metrics-scaledobject
namespace: bj2-dcloud
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: dstereox
minReplicaCount: 1
maxReplicaCount: 2
idleReplicaCount: 0 # 当指标为0时保持0副本
pollingInterval: 30 # 每30秒检查一次指标
cooldownPeriod: 300 # 扩容后等待5分钟再进行下一次扩容
# initialCooldownPeriod: 0 # 启动后立即检查指标
# fallback: # Optional. Section to specify fallback options
# failureThreshold: 3 # Mandatory if fallback section is included
# replicas: 6 # Mandatory if fallback section is included
# behavior: {kind-of-behavior} # Optional. Default: "static"
triggers:
# ---------------------------------------------------------
# 触发器 1: 基于 volcano 队列的pending任务数
# ---------------------------------------------------------
- type: prometheus
metadata:
serverAddress: https://cprom.bj.baidubce.com/select/prometheus
metricName: queuue_pending_tasks
query: sum(volcano_queue_pod_group_pending_count{queue_name="dcloud"})
customHeaders: InstanceId=cprom-uz2ngrzbjr7n7
threshold: "1"
activationThreshold: "1"
authModes: "custom"
authenticationRef:
name: keda-prom-creds