commit 8f2e773defda17db4859a4ff8a6fe1a496dc570b Author: yangbin Date: Wed Mar 4 15:58:27 2026 +0800 init diff --git a/README.MD b/README.MD new file mode 100644 index 0000000..e69de29 diff --git a/keda/http-add-on/charts/values.yaml b/keda/http-add-on/charts/values.yaml new file mode 100644 index 0000000..3f04eae --- /dev/null +++ b/keda/http-add-on/charts/values.yaml @@ -0,0 +1,88 @@ +# KEDA HTTP Add-on 配置 +# 基于 HTTPRoute: dstereox-public (namespace: bj2-dcloud) +# 对应服务: dstereox-svc:80 +# 请求超时: 3600s (1小时) + +# 基础配置 +namespace: keda +nameOverride: keda-http-add-on + +# 核心:启用 Gateway API 支持(使用 HTTPRoute 必须开启) +gatewayApi: + enabled: true + allowCrossNamespace: true # 支持跨命名空间的 Gateway/HTTPRoute 引用 + +# 关闭 Ingress 支持(仅使用 Gateway API) +ingress: + enabled: false + +# Interceptor 配置 - 拦截和代理 HTTP 请求 +interceptor: + replicaCount: 1 # 生产环境建议至少 2 副本,避免单点故障 + + # 资源配置 + resources: + limits: + cpu: 1000m # 根据流量调整 + memory: 1Gi + requests: + cpu: 500m + memory: 512Mi + + # 代理超时设置 - 匹配 HTTPRoute 的 3600s 超时 + proxy: + timeout: 3600s + + # 就绪和存活探针配置 + livenessProbe: + initialDelaySeconds: 10 + periodSeconds: 10 + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + +# Operator 配置 - 监听和管理 HTTPScaledObject 资源 +operator: + replicaCount: 1 # 高可用部署 + + # 监听的命名空间(与 HTTPRoute 所在命名空间一致) + watchedNamespaces: ["bj2-dcloud"] + + # 资源配置 + resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 200m + memory: 256Mi + +# Scaler 配置 - 处理扩缩容逻辑 +scaler: + replicaCount: 1 # 高可用 + + resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 200m + memory: 256Mi + + # 指标服务端点配置 + metricsServer: + port: 9090 + +# RBAC 配置 +rbac: + create: true + +# ServiceAccount 配置 +serviceAccount: + create: true + annotations: {} + +# 全局镜像配置(如需使用私有镜像仓库) +# images: +# pullPolicy: IfNotPresent +# pullSecrets: [] \ No newline at end of file diff --git a/keda/http-add-on/httpscaledobject.yaml b/keda/http-add-on/httpscaledobject.yaml new file mode 100644 index 0000000..e51d188 --- /dev/null +++ b/keda/http-add-on/httpscaledobject.yaml @@ -0,0 +1,137 @@ +apiVersion: http.keda.sh/v1alpha1 +kind: HTTPScaledObject +metadata: + name: dstereox-http-scaler2 + namespace: bj2-dcloud + # labels: + # app.kubernetes.io/name: dstereox + # app.kubernetes.io/instance: dcloud-dstereox +spec: + hosts: + - annomidware-dev.d-robotics.cc + pathPrefixes: + - /v1/predict + + # 目标 Deployment 或 StatefulSet + scaleTargetRef: + name: dstereox + kind: Deployment + apiVersion: apps/v1 + service: dstereox-svc # 对应 HTTPRoute 中的后端服务 + port: 80 + + # 副本数配置 + replicas: + min: 0 # 支持缩容到 0(空闲时节省资源) + max: 2 # 最大副本数(根据实际需求调整) + + # 扩缩容策略 + scalingMetric: + requestRate: + granularity: 1s + targetValue: 2 # 每秒 10 个请求触发扩容 + window: 1m # 1 分钟时间窗口 + + # # 针对长耗时请求的配置 + # responseTime: + # targetValue: 1000 # 目标响应时间 1000ms + # window: 1m + + # 扩缩容行为控制 + scaledownPeriod: 300 + targetPendingRequests: 8 # 等待处理的请求数阈值 + + # 扩缩容速率限制 + # advanced: + # horizontalPodAutoscalerConfig: + # behavior: + # scaleDown: + # stabilizationWindowSeconds: 300 # 缩容稳定窗口 5 分钟 + # policies: + # - type: Percent + # value: 50 # 每次最多缩容 50% + # periodSeconds: 60 + # - type: Pods + # value: 1 # 每次最多缩容 2 个 Pod + # periodSeconds: 60 + # selectPolicy: Min # 选择最保守的策略 + # scaleUp: + # stabilizationWindowSeconds: 0 # 立即扩容 + # policies: + # - type: Percent + # value: 100 # 每次最多扩容 100% + # periodSeconds: 15 + # - type: Pods + # value: 4 # 每次最多扩容 4 个 Pod + # periodSeconds: 15 + # selectPolicy: Max # 选择最激进的策略 + + # Gateway API 路由配置(对应您的 HTTPRoute) + + +--- +# 可选:ScaledObject(如果需要额外的 KEDA 触发器,如 GPU/CPU 指标) +# apiVersion: keda.sh/v1alpha1 +# kind: ScaledObject +# metadata: +# name: dstereox-combined-scaler +# namespace: bj2-dcloud +# labels: +# app.kubernetes.io/name: dstereox +# app.kubernetes.io/instance: dcloud-dstereox +# spec: +# scaleTargetRef: +# name: dstereox # 与 HTTPScaledObject 指向同一 Deployment +# kind: Deployment +# apiVersion: apps/v1 + +# minReplicaCount: 0 +# maxReplicaCount: 10 +# pollingInterval: 30 +# cooldownPeriod: 300 + +# triggers: +# # HTTP 请求队列触发器(由 KEDA HTTP Add-on 提供) +# - type: external +# metadata: +# scalerAddress: keda-http-add-on-external-scaler.keda:9090 +# service: dstereox-svc +# namespace: bj2-dcloud +# targetPendingRequests: "10" + +# # CPU 利用率触发器(可选) +# - type: cpu +# metricType: Utilization +# metadata: +# value: "80" # CPU 使用率超过 80% 时扩容 + +# # 内存利用率触发器(可选) +# - type: memory +# metricType: Utilization +# metadata: +# value: "80" # 内存使用率超过 80% 时扩容 + +# # 扩缩容行为 +# advanced: +# horizontalPodAutoscalerConfig: +# behavior: +# scaleDown: +# stabilizationWindowSeconds: 300 +# policies: +# - type: Percent +# value: 50 +# periodSeconds: 60 +# - type: Pods +# value: 2 +# periodSeconds: 60 +# selectPolicy: Min +# scaleUp: +# stabilizationWindowSeconds: 0 +# policies: +# - type: Percent +# value: 100 +# periodSeconds: 15 +# - type: Pods +# value: 4 +# periodSeconds: 15 +# selectPolicy: Max diff --git a/keda/scaledobject_cpu_util.yaml b/keda/scaledobject_cpu_util.yaml new file mode 100644 index 0000000..bdb6928 --- /dev/null +++ b/keda/scaledobject_cpu_util.yaml @@ -0,0 +1,85 @@ +apiVersion: v1 +kind: Secret +metadata: + name: keda-prom-secret + namespace: bj2-dcloud +stringData: + customAuthHeader: "Authorization" + customAuthValue: "Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJuYW1lc3BhY2UiOiJjcHJvbS11ejJuZ3J6YmpyN243Iiwic2VjcmV0TmFtZSI6IjJjMTI4NGJhZTUxYjRiZjNhMmU2MjM1NTc4NDRjNmQ2IiwiZXhwIjo0OTIzODc3ODIyLCJpc3MiOiJjcHJvbSJ9.idZaTKtQR4WOyr29T_TKZ9cGz2nW9N5Mw-5SBu321p8" + +--- +apiVersion: keda.sh/v1alpha1 +kind: TriggerAuthentication +metadata: + name: keda-prom-creds + namespace: bj2-dcloud +spec: + secretTargetRef: + - parameter: customAuthHeader + name: keda-prom-secret + key: customAuthHeader + - parameter: customAuthValue + name: keda-prom-secret + key: customAuthValue + +--- +apiVersion: keda.sh/v1alpha1 +kind: ScaledObject +metadata: + name: gpu-metrics-scaledobject + namespace: bj2-dcloud +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: dstereox + minReplicaCount: 1 + maxReplicaCount: 2 + # idleReplicaCount: 0 # 当指标为0时,保持0副本 + pollingInterval: 30 # 每30秒检查一次指标 + cooldownPeriod: 300 # 扩容后等待5分钟再进行下一次扩容 + # initialCooldownPeriod: 0 # 启动后立即检查指标 + + # fallback: # Optional. Section to specify fallback options + # failureThreshold: 3 # Mandatory if fallback section is included + # replicas: 6 # Mandatory if fallback section is included + # behavior: {kind-of-behavior} # Optional. Default: "static" + triggers: + # --------------------------------------------------------- + # 触发器 2: 基于 CPU 使用量 (Prometheus) + # --------------------------------------------------------- + - type: prometheus + metadata: + serverAddress: https://cprom.bj.baidubce.com/select/prometheus + metricName: cpu_utilization + query: > + ( + sum by(pod) ( + max by(pod) ( + irate(container_cpu_usage_seconds_total{ + namespace="bj2-dcloud", + pod=~"dstereox.*" + }[2m]) + ) + ) + ) + /( + sum by(pod) ( + max by(pod) ( + container_spec_cpu_quota{ + namespace="bj2-dcloud", + pod=~"dstereox.*" + } / 100000 + ) + or vector(1) + ) + ) + * 100 + customHeaders: InstanceId=cprom-uz2ngrzbjr7n7 + threshold: "50" # 50%利用率时扩容;计算目标副本数 + activationThreshold: "30" # 30%利用率时开始扩容;判断是否激活 scaler + authModes: "custom" + authenticationRef: + name: keda-prom-creds + + \ No newline at end of file diff --git a/keda/scaledobject_gpu_cpu_util.yaml b/keda/scaledobject_gpu_cpu_util.yaml new file mode 100644 index 0000000..bdd657e --- /dev/null +++ b/keda/scaledobject_gpu_cpu_util.yaml @@ -0,0 +1,98 @@ +apiVersion: v1 +kind: Secret +metadata: + name: keda-prom-secret + namespace: bj2-dcloud +stringData: + customAuthHeader: "Authorization" + customAuthValue: "Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJuYW1lc3BhY2UiOiJjcHJvbS11ejJuZ3J6YmpyN243Iiwic2VjcmV0TmFtZSI6IjJjMTI4NGJhZTUxYjRiZjNhMmU2MjM1NTc4NDRjNmQ2IiwiZXhwIjo0OTIzODc3ODIyLCJpc3MiOiJjcHJvbSJ9.idZaTKtQR4WOyr29T_TKZ9cGz2nW9N5Mw-5SBu321p8" + +--- +apiVersion: keda.sh/v1alpha1 +kind: TriggerAuthentication +metadata: + name: keda-prom-creds + namespace: bj2-dcloud +spec: + secretTargetRef: + - parameter: customAuthHeader + name: keda-prom-secret + key: customAuthHeader + - parameter: customAuthValue + name: keda-prom-secret + key: customAuthValue + +--- +apiVersion: keda.sh/v1alpha1 +kind: ScaledObject +metadata: + name: gpu-metrics-scaledobject + namespace: bj2-dcloud +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: dstereox + minReplicaCount: 1 + maxReplicaCount: 2 + # idleReplicaCount: 0 # 当指标为0时,保持0副本 + pollingInterval: 30 # 每30秒检查一次指标 + cooldownPeriod: 300 # 扩容后等待5分钟再进行下一次扩容 + # initialCooldownPeriod: 0 # 启动后立即检查指标 + + # fallback: # Optional. Section to specify fallback options + # failureThreshold: 3 # Mandatory if fallback section is included + # replicas: 6 # Mandatory if fallback section is included + # behavior: {kind-of-behavior} # Optional. Default: "static" + triggers: + # --------------------------------------------------------- + # 触发器 1: 基于 GPU 利用率 (DCGM) + # --------------------------------------------------------- + - type: prometheus + metadata: + serverAddress: https://cprom.bj.baidubce.com/select/prometheus + metricName: dcgm_gpu_utilization + query: max(avg_over_time(DCGM_FI_DEV_GPU_UTIL{ pod_name=~"dstereox.*"}[2m])) + customHeaders: InstanceId=cprom-uz2ngrzbjr7n7 + threshold: "80" # 80%利用率时扩容;计算目标副本数 + activationThreshold: "60" # 30%利用率时开始扩容;判断是否激活 scaler + authModes: "custom" + authenticationRef: + name: keda-prom-creds + + # --------------------------------------------------------- + # 触发器 2: 基于 CPU 使用量 (Prometheus) + # --------------------------------------------------------- + - type: prometheus + metadata: + serverAddress: https://cprom.bj.baidubce.com/select/prometheus + metricName: cpu_utilization + query: > + ( + sum ( + max by(pod) ( + irate(container_cpu_usage_seconds_total{ + namespace="bj2-dcloud", + pod=~"dstereox.*" + }[10m]) + ) + ) + ) + /( + sum ( + max by(pod) ( + container_spec_cpu_quota{ + namespace="bj2-dcloud", + pod=~"dstereox.*" + } / 100000 + ) + or vector(1) + ) + ) + * 100 + customHeaders: InstanceId=cprom-uz2ngrzbjr7n7 + threshold: "80" # 50%利用率时扩容;计算目标副本数 + activationThreshold: "60" # 30%利用率时开始扩容;判断是否激活 scaler + authModes: "custom" + authenticationRef: + name: keda-prom-creds \ No newline at end of file diff --git a/keda/scaledobject_gpu_util.yaml b/keda/scaledobject_gpu_util.yaml new file mode 100644 index 0000000..9f8b11d --- /dev/null +++ b/keda/scaledobject_gpu_util.yaml @@ -0,0 +1,62 @@ +apiVersion: v1 +kind: Secret +metadata: + name: keda-prom-secret + namespace: bj2-dcloud +stringData: + customAuthHeader: "Authorization" + customAuthValue: "Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJuYW1lc3BhY2UiOiJjcHJvbS11ejJuZ3J6YmpyN243Iiwic2VjcmV0TmFtZSI6IjJjMTI4NGJhZTUxYjRiZjNhMmU2MjM1NTc4NDRjNmQ2IiwiZXhwIjo0OTIzODc3ODIyLCJpc3MiOiJjcHJvbSJ9.idZaTKtQR4WOyr29T_TKZ9cGz2nW9N5Mw-5SBu321p8" + +--- +apiVersion: keda.sh/v1alpha1 +kind: TriggerAuthentication +metadata: + name: keda-prom-creds + namespace: bj2-dcloud +spec: + secretTargetRef: + - parameter: customAuthHeader + name: keda-prom-secret + key: customAuthHeader + - parameter: customAuthValue + name: keda-prom-secret + key: customAuthValue + +--- +apiVersion: keda.sh/v1alpha1 +kind: ScaledObject +metadata: + name: gpu-metrics-scaledobject + namespace: bj2-dcloud +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: dstereox + minReplicaCount: 1 + maxReplicaCount: 2 + # idleReplicaCount: 0 # 当指标为0时,保持0副本 + pollingInterval: 30 # 每30秒检查一次指标 + cooldownPeriod: 300 # 扩容后等待5分钟再进行下一次扩容 + # initialCooldownPeriod: 0 # 启动后立即检查指标 + + # fallback: # Optional. Section to specify fallback options + # failureThreshold: 3 # Mandatory if fallback section is included + # replicas: 6 # Mandatory if fallback section is included + # behavior: {kind-of-behavior} # Optional. Default: "static" + triggers: + # --------------------------------------------------------- + # 触发器 1: 基于 GPU 利用率 (DCGM) + # --------------------------------------------------------- + - type: prometheus + metadata: + serverAddress: https://cprom.bj.baidubce.com/select/prometheus + metricName: dcgm_gpu_utilization + # query: avg(avg_over_time(DCGM_FI_DEV_GPU_UTIL{namespace="bj2-dcloud",pod=~"dstereox-.*"}[3m])) + query: max(avg_over_time(DCGM_FI_DEV_GPU_UTIL{pod_name=~"dstereox.*"}[3m])) + customHeaders: InstanceId=cprom-uz2ngrzbjr7n7 + threshold: "50" # 80%利用率时扩容;计算目标副本数 + activationThreshold: "30" # 30%利用率时开始扩容;判断是否激活 scaler + authModes: "custom" + authenticationRef: + name: keda-prom-creds \ No newline at end of file diff --git a/keda/scaledobject_volcano.yaml b/keda/scaledobject_volcano.yaml new file mode 100644 index 0000000..274f571 --- /dev/null +++ b/keda/scaledobject_volcano.yaml @@ -0,0 +1,62 @@ +apiVersion: v1 +kind: Secret +metadata: + name: keda-prom-secret + namespace: bj2-dcloud +stringData: + customAuthHeader: "Authorization" + customAuthValue: "Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJuYW1lc3BhY2UiOiJjcHJvbS11ejJuZ3J6YmpyN243Iiwic2VjcmV0TmFtZSI6IjJjMTI4NGJhZTUxYjRiZjNhMmU2MjM1NTc4NDRjNmQ2IiwiZXhwIjo0OTIzODc3ODIyLCJpc3MiOiJjcHJvbSJ9.idZaTKtQR4WOyr29T_TKZ9cGz2nW9N5Mw-5SBu321p8" + +--- +apiVersion: keda.sh/v1alpha1 +kind: TriggerAuthentication +metadata: + name: keda-prom-creds + namespace: bj2-dcloud +spec: + secretTargetRef: + - parameter: customAuthHeader + name: keda-prom-secret + key: customAuthHeader + - parameter: customAuthValue + name: keda-prom-secret + key: customAuthValue + +--- +apiVersion: keda.sh/v1alpha1 +kind: ScaledObject +metadata: + name: gpu-metrics-scaledobject + namespace: bj2-dcloud +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: dstereox + minReplicaCount: 1 + maxReplicaCount: 2 + idleReplicaCount: 0 # 当指标为0时,保持0副本 + pollingInterval: 30 # 每30秒检查一次指标 + cooldownPeriod: 300 # 扩容后等待5分钟再进行下一次扩容 + # initialCooldownPeriod: 0 # 启动后立即检查指标 + + # fallback: # Optional. Section to specify fallback options + # failureThreshold: 3 # Mandatory if fallback section is included + # replicas: 6 # Mandatory if fallback section is included + # behavior: {kind-of-behavior} # Optional. Default: "static" + triggers: + # --------------------------------------------------------- + # 触发器 1: 基于 volcano 队列的pending任务数 + # --------------------------------------------------------- + - type: prometheus + metadata: + serverAddress: https://cprom.bj.baidubce.com/select/prometheus + metricName: queuue_pending_tasks + query: sum(volcano_queue_pod_group_pending_count{queue_name="dcloud"}) + customHeaders: InstanceId=cprom-uz2ngrzbjr7n7 + threshold: "1" + activationThreshold: "1" + authModes: "custom" + authenticationRef: + name: keda-prom-creds +