diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..89a74fe --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "coremark_images/coremark-pro"] + path = coremark_images/coremark-pro + url = https://github.com/eembc/coremark-pro.git diff --git a/README.md b/README.md index e8030c7..7737667 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,11 @@ - [交互式进入容器](#交互式进入容器) - [task.json 协议说明](#taskjson-协议说明) - [输出结果说明](#输出结果说明) +- [CoreMark-PRO CPU 性能测试工具](#coremark-pro-cpu-性能测试工具) + - [工具说明](#工具说明-1) + - [目录结构](#目录结构-1) + - [使用方法](#使用方法) + - [输出结果说明](#输出结果说明-1) --- @@ -157,7 +162,7 @@ bpu_model_perf_images/ └── output/ └── result.json # 运行后自动生成 ``` -## 预打包版本 +### 预打包版本 ```bash # RDK X5 / RDK X5 module @@ -354,3 +359,169 @@ hrt_model_exec infer --model_file /workspace/input/xxx.hbm --input_file input.jp } ] ``` + +--- + +## CoreMark-PRO CPU 性能测试工具 + +### 工具说明 + +基于 CoreMark-PRO 标准 CPU 性能测试套件,对 ARM 处理器进行全面性能测试。测试包含 9 个典型工作负载,自动检测 CPU 核心数并在 1 到最大核心数之间进行多线程性能测试,最终输出 CoreMark-PRO 综合得分。 + +| 硬件平台 | 架构 | 镜像名称 | +|----------|------|----------| +| 所有 ARM 平台 | ARMv8 / aarch64 | `coremark_pro_rdk_arm:latest` | + +CoreMark-PRO 包含以下 9 个工作负载: + +1. **cjpeg-rose7-preset** - JPEG 压缩 +2. **core** - 增强版 CoreMark +3. **linear_alg-mid-100x100-sp** - 线性代数(单精度浮点) +4. **loops-all-mid-10k-sp** - Livermore Loops(单精度浮点) +5. **nnet_test** - 神经网络 +6. **parser-125k** - XML 解析 +7. **radix2-big-64k** - FFT(双精度浮点) +8. **sha-test** - SHA-256 哈希 +9. **zip-test** - ZIP 压缩 + +### 目录结构 + +``` +coremark_images/ +├── Dockerfile # 镜像构建文件 +├── docker_build.sh # 构建脚本 +├── docker_run.sh # 运行脚本 +├── docker_test.sh # 测试脚本 +├── workspace/ +│ ├── coremark_perf.py # 性能测试主脚本 +│ └── entrypoint.sh # 容器入口脚本 +├── coremark-pro/ # CoreMark-PRO 源代码 +│ └── builds/linux64/gcc64/bin/ # 预编译的二进制文件 +└── example_fs/ + └── output/ # 默认输出目录 + └── result.json # 运行后自动生成 +``` + +### 使用方法 + +**第一步:构建镜像**(只需构建一次) + +```bash +cd coremark_images +bash docker_build.sh +``` + +**第二步:运行** + +```bash +bash docker_run.sh +``` + +运行脚本将 `example_fs/output` 挂载为容器内 `/workspace/output`。 + +**预期输出** + +```text +========================================== +CoreMark-PRO CPU Performance Benchmark +========================================== + +========================================================================================== +System Information +========================================================================================== +Hostname: xxxxxxxx +Platform: Linux-6.1.83-aarch64-with-glibc2.35 +Architecture: aarch64 +CPU Model: aarch64 +CPU Count: 8 logical, 8 physical +CPU Frequency: 1800 MHz +Memory Total: 5768 MB +Memory Used: 48.2% +Timestamp: 2026-03-20 11:37:11 +========================================================================================== + +[Benchmark] Running CoreMark-PRO with thread counts: [1, 2, 3, 4, 5, 6, 7, 8] +[Benchmark] Total workloads: 9 + +--- Running with 1 thread(s) --- + [1 threads] cjpeg-rose7-preset ... 59.88 iter/s + [1 threads] core ... 0.46 iter/s + [1 threads] linear_alg-mid-100x100-sp ... 24.74 iter/s + ... + +========================================================================================== +CoreMark-PRO Workload Results +========================================================================================== +Workload T=1 T=2 T=3 T=4 ... Scaling +------------------------------------------------------------------------------------------ +cjpeg-rose7-preset 59.88 117.65 169.49 232.56 ... 7.26 +core 0.46 0.91 1.37 1.82 ... 6.07 +... +========================================================================================== + +========================================================================================== +CoreMark-PRO Score Summary +========================================================================================== +Threads CoreMark-PRO Score +------------------------------------------------------------------------------------------ +1 1390.51 +2 2684.16 +... +8 7027.42 +------------------------------------------------------------------------------------------ +Final CoreMark-PRO Score 7027.42 +========================================================================================== + +[OK] Results saved to: /workspace/output/result.json +[OK] CoreMark-PRO Score: 7027.42 +``` + +### 输出结果说明 + +结果保存在 `example_fs/output/result.json`,结构如下: + +```json +{ + "system_info": { + "cpu": { + "cpu_count": 8, + "cpu_count_physical": 8, + "cpu_freq_mhz": 1800.0, + "cpu_model": "aarch64", + "architecture": "aarch64" + }, + "memory": { + "total_mb": 5768, + "available_mb": 2985, + "percent_used": 48.2 + }, + "system": { + "hostname": "rdk-board", + "platform": "Linux-6.1.83-aarch64", + "timestamp": "2026-03-20 11:37:11" + } + }, + "benchmark_config": { + "workloads": ["cjpeg-rose7-preset", "core", ...], + "thread_counts": [1, 2, 3, 4, 5, 6, 7, 8], + "max_threads": 8 + }, + "results": [ + { + "workload": "cjpeg-rose7-preset", + "thread_num": 1, + "iter_per_sec": 59.88, + "returncode": 0, + "elapsed_time_sec": 0.17 + } + ], + "coremark_pro_score": 7027.42 +} +``` + +| 字段 | 说明 | +|------|------| +| `system_info` | 系统信息(CPU、内存、平台等) | +| `benchmark_config` | 测试配置(workload 列表、线程数等) | +| `results` | 每个 workload 在不同线程数下的性能数据 | +| `coremark_pro_score` | 最终 CoreMark-PRO 综合得分 | diff --git a/bpu_model_perf_images/example_fs_bayese/input/task.json b/bpu_model_perf_images/example_fs_bayese/input/task.json index 348c9aa..5f1e4f0 100644 --- a/bpu_model_perf_images/example_fs_bayese/input/task.json +++ b/bpu_model_perf_images/example_fs_bayese/input/task.json @@ -1,4 +1 @@ -{ - "model_relative_path": "your_bayese_model.hbm", - "frame_count": 200 -} +{"model_relative_path": "yolov8_640x640_nv12.bin", "frame_count": 20} diff --git a/coremark_images/.dockerignore b/coremark_images/.dockerignore new file mode 100644 index 0000000..9249ca7 --- /dev/null +++ b/coremark_images/.dockerignore @@ -0,0 +1,29 @@ +# Git +.git +.gitignore + +# Build artifacts (these will be created during build) +builds/ +*.o +*.obj +*.a +*.lib +*.so +*.dll + +# Documentation +docs/ +*.md +LICENSE* + +# Temporary files +*.log +*.tmp +*.swp +*.swo +*~ + +# IDE +.vscode/ +.idea/ +*.iml diff --git a/coremark_images/Dockerfile b/coremark_images/Dockerfile new file mode 100644 index 0000000..8615691 --- /dev/null +++ b/coremark_images/Dockerfile @@ -0,0 +1,57 @@ +FROM ubuntu:22.04 + +ENV DEBIAN_FRONTEND=noninteractive + +# ============================================================ +# 基础工具 + 编译工具 +# ============================================================ +RUN apt-get update && apt-get install -y \ + python3 \ + python3-pip \ + build-essential \ + gcc \ + g++ \ + make \ + locales \ + && rm -rf /var/lib/apt/lists/* + +# 安装 Python 依赖 +RUN pip3 install --no-cache-dir psutil + +# 配置中文 locale(防止终端中文乱码) +RUN locale-gen zh_CN.UTF-8 && update-locale LANG=zh_CN.UTF-8 +ENV LANG=zh_CN.UTF-8 +ENV LC_ALL=zh_CN.UTF-8 + +# ============================================================ +# CoreMark-PRO 编译 +# ============================================================ + +# 拷贝 CoreMark-PRO 源代码 +COPY coremark-pro /workspace/coremark-pro-src + +# 编译 CoreMark-PRO +WORKDIR /workspace/coremark-pro-src +RUN make TARGET=linux64 clean 2>/dev/null || true && \ + make TARGET=linux64 build + +# 创建运行目录结构 +RUN mkdir -p /workspace/coremark-pro/builds/linux64/gcc64/bin && \ + cp /workspace/coremark-pro-src/builds/linux64/gcc64/bin/*.exe /workspace/coremark-pro/builds/linux64/gcc64/bin/ && \ + rm -rf /workspace/coremark-pro-src + +# ============================================================ +# 设置运行环境 +# ============================================================ + +# 拷贝 benchmark 脚本和 entrypoint +COPY workspace/coremark_perf.py /workspace/coremark/coremark_perf.py +COPY workspace/entrypoint.sh /workspace/coremark/entrypoint.sh +RUN chmod +x /workspace/coremark/entrypoint.sh + +# 工作目录和挂载点 +RUN mkdir -p /workspace/input /workspace/output + +WORKDIR /workspace/coremark + +ENTRYPOINT ["/workspace/coremark/entrypoint.sh"] diff --git a/coremark_images/coremark-pro b/coremark_images/coremark-pro new file mode 160000 index 0000000..4832cc6 --- /dev/null +++ b/coremark_images/coremark-pro @@ -0,0 +1 @@ +Subproject commit 4832cc67b0926c7a80a4b7ce0ce00f4640ea6bec diff --git a/coremark_images/docker_build.sh b/coremark_images/docker_build.sh new file mode 100755 index 0000000..cecaf5d --- /dev/null +++ b/coremark_images/docker_build.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# Build CoreMark-PRO benchmark Docker image + +IMAGE_NAME="coremark_pro_rdk_arm" +IMAGE_TAG="${1:-latest}" + +echo "Building CoreMark-PRO benchmark image..." +echo "Image: ${IMAGE_NAME}:${IMAGE_TAG}" +echo "" + +docker build -t ${IMAGE_NAME}:${IMAGE_TAG} -f Dockerfile . + +echo "" +echo "Build complete: ${IMAGE_NAME}:${IMAGE_TAG}" diff --git a/coremark_images/docker_run.sh b/coremark_images/docker_run.sh new file mode 100755 index 0000000..c96403e --- /dev/null +++ b/coremark_images/docker_run.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Run CoreMark-PRO CPU benchmark container +# Output: /workspace/output/result.json (mounted from host) + +IMAGE_NAME="coremark_pro_rdk_arm" +IMAGE_TAG="latest" + +# Default output directory on host +HOST_OUTPUT_DIR="$(pwd)/example_fs/output" + +# Allow override via environment variable +if [ -n "$COREMARK_OUTPUT_DIR" ]; then + HOST_OUTPUT_DIR="$COREMARK_OUTPUT_DIR" +fi + +mkdir -p "$HOST_OUTPUT_DIR" + +echo "Running CoreMark-PRO benchmark..." +echo "Output will be saved to: $HOST_OUTPUT_DIR/result.json" +echo "" + +docker run --rm \ + --name coremark_pro_rdk_arm \ + --privileged \ + -v "$HOST_OUTPUT_DIR":/workspace/output \ + ${IMAGE_NAME}:${IMAGE_TAG} + +echo "" +echo "Benchmark complete. Results saved to: $HOST_OUTPUT_DIR/result.json" diff --git a/coremark_images/docker_test.sh b/coremark_images/docker_test.sh new file mode 100755 index 0000000..f95b671 --- /dev/null +++ b/coremark_images/docker_test.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# Test CoreMark-PRO benchmark Docker image + +IMAGE_NAME="coremark_pro_rdk_arm" +IMAGE_TAG="${1:-latest}" + +echo "Testing CoreMark-PRO benchmark image..." +echo "Image: ${IMAGE_NAME}:${IMAGE_TAG}" +echo "" + +# Create test output directory +mkdir -p "$(pwd)/example_fs/output" + +# Run the benchmark +docker run --rm \ + --name coremark_pro_rdk_arm_test \ + --privileged \ + -v "$(pwd)/example_fs/output":/workspace/output \ + ${IMAGE_NAME}:${IMAGE_TAG} + +EXIT_CODE=$? + +echo "" +if [ $EXIT_CODE -eq 0 ]; then + echo "Test passed!" + echo "Results available at: $(pwd)/example_fs/output/result.json" +else + echo "Test failed with exit code: $EXIT_CODE" +fi + +exit $EXIT_CODE diff --git a/coremark_images/example_fs/output/result.json b/coremark_images/example_fs/output/result.json new file mode 100644 index 0000000..44d5d06 --- /dev/null +++ b/coremark_images/example_fs/output/result.json @@ -0,0 +1,633 @@ +{ + "system_info": { + "cpu": { + "cpu_count": 8, + "cpu_count_physical": 8, + "cpu_freq_mhz": 1800.0, + "cpu_percent": 2.0, + "cpu_implementer": "0x41", + "cpu_arch": "8", + "cpu_model": "aarch64", + "architecture": "aarch64" + }, + "memory": { + "total_mb": 5768, + "available_mb": 2962, + "percent_used": 48.6, + "used_mb": 2805, + "free_mb": 258 + }, + "system": { + "hostname": "0f806e261978", + "platform": "Linux-6.1.83-aarch64-with-glibc2.35", + "system": "Linux", + "release": "6.1.83", + "version": "#4 SMP PREEMPT Fri Dec 19 10:54:38 CST 2025", + "python_version": "3.10.12", + "timestamp": "2026-03-20 11:59:37" + } + }, + "benchmark_config": { + "workloads": [ + "cjpeg-rose7-preset", + "core", + "linear_alg-mid-100x100-sp", + "loops-all-mid-10k-sp", + "nnet_test", + "parser-125k", + "radix2-big-64k", + "sha-test", + "zip-test" + ], + "thread_counts": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8 + ], + "max_threads": 8 + }, + "results": [ + { + "workload": "cjpeg-rose7-preset", + "thread_num": 1, + "iter_per_sec": 59.8802, + "returncode": 0, + "elapsed_time_sec": 0.1709287166595459, + "raw_output": null + }, + { + "workload": "core", + "thread_num": 1, + "iter_per_sec": 0.457038, + "returncode": 0, + "elapsed_time_sec": 2.191352128982544, + "raw_output": null + }, + { + "workload": "linear_alg-mid-100x100-sp", + "thread_num": 1, + "iter_per_sec": 24.7402, + "returncode": 0, + "elapsed_time_sec": 2.027066707611084, + "raw_output": null + }, + { + "workload": "loops-all-mid-10k-sp", + "thread_num": 1, + "iter_per_sec": 1.20875, + "returncode": 0, + "elapsed_time_sec": 41.36767315864563, + "raw_output": null + }, + { + "workload": "nnet_test", + "thread_num": 1, + "iter_per_sec": 1.71615, + "returncode": 0, + "elapsed_time_sec": 5.830807209014893, + "raw_output": null + }, + { + "workload": "parser-125k", + "thread_num": 1, + "iter_per_sec": 10.8696, + "returncode": 0, + "elapsed_time_sec": 0.15040898323059082, + "raw_output": null + }, + { + "workload": "radix2-big-64k", + "thread_num": 1, + "iter_per_sec": 99.4827, + "returncode": 0, + "elapsed_time_sec": 10.061779260635376, + "raw_output": null + }, + { + "workload": "sha-test", + "thread_num": 1, + "iter_per_sec": 106.383, + "returncode": 0, + "elapsed_time_sec": 0.10429215431213379, + "raw_output": null + }, + { + "workload": "zip-test", + "thread_num": 1, + "iter_per_sec": 34.4828, + "returncode": 0, + "elapsed_time_sec": 3.7011659145355225, + "raw_output": null + }, + { + "workload": "cjpeg-rose7-preset", + "thread_num": 2, + "iter_per_sec": 120.482, + "returncode": 0, + "elapsed_time_sec": 0.08684968948364258, + "raw_output": null + }, + { + "workload": "core", + "thread_num": 2, + "iter_per_sec": 0.913659, + "returncode": 0, + "elapsed_time_sec": 2.1916873455047607, + "raw_output": null + }, + { + "workload": "linear_alg-mid-100x100-sp", + "thread_num": 2, + "iter_per_sec": 49.554, + "returncode": 0, + "elapsed_time_sec": 1.0147795677185059, + "raw_output": null + }, + { + "workload": "loops-all-mid-10k-sp", + "thread_num": 2, + "iter_per_sec": 2.43013, + "returncode": 0, + "elapsed_time_sec": 20.578801155090332, + "raw_output": null + }, + { + "workload": "nnet_test", + "thread_num": 2, + "iter_per_sec": 3.43171, + "returncode": 0, + "elapsed_time_sec": 2.916877031326294, + "raw_output": null + }, + { + "workload": "parser-125k", + "thread_num": 2, + "iter_per_sec": 21.2766, + "returncode": 0, + "elapsed_time_sec": 0.15271854400634766, + "raw_output": null + }, + { + "workload": "radix2-big-64k", + "thread_num": 2, + "iter_per_sec": 179.083, + "returncode": 0, + "elapsed_time_sec": 5.594027280807495, + "raw_output": null + }, + { + "workload": "sha-test", + "thread_num": 2, + "iter_per_sec": 212.766, + "returncode": 0, + "elapsed_time_sec": 0.057444095611572266, + "raw_output": null + }, + { + "workload": "zip-test", + "thread_num": 2, + "iter_per_sec": 66.6667, + "returncode": 0, + "elapsed_time_sec": 3.864583969116211, + "raw_output": null + }, + { + "workload": "cjpeg-rose7-preset", + "thread_num": 3, + "iter_per_sec": 175.439, + "returncode": 0, + "elapsed_time_sec": 0.06096076965332031, + "raw_output": null + }, + { + "workload": "core", + "thread_num": 3, + "iter_per_sec": 1.37112, + "returncode": 0, + "elapsed_time_sec": 2.191770315170288, + "raw_output": null + }, + { + "workload": "linear_alg-mid-100x100-sp", + "thread_num": 3, + "iter_per_sec": 72.7802, + "returncode": 0, + "elapsed_time_sec": 0.6921648979187012, + "raw_output": null + }, + { + "workload": "loops-all-mid-10k-sp", + "thread_num": 3, + "iter_per_sec": 3.51519, + "returncode": 0, + "elapsed_time_sec": 14.228093385696411, + "raw_output": null + }, + { + "workload": "nnet_test", + "thread_num": 3, + "iter_per_sec": 4.29, + "returncode": 0, + "elapsed_time_sec": 2.3344318866729736, + "raw_output": null + }, + { + "workload": "parser-125k", + "thread_num": 3, + "iter_per_sec": 28.3019, + "returncode": 0, + "elapsed_time_sec": 0.16345930099487305, + "raw_output": null + }, + { + "workload": "radix2-big-64k", + "thread_num": 3, + "iter_per_sec": 177.936, + "returncode": 0, + "elapsed_time_sec": 5.634125471115112, + "raw_output": null + }, + { + "workload": "sha-test", + "thread_num": 3, + "iter_per_sec": 270.27, + "returncode": 0, + "elapsed_time_sec": 0.048152923583984375, + "raw_output": null + }, + { + "workload": "zip-test", + "thread_num": 3, + "iter_per_sec": 100.0, + "returncode": 0, + "elapsed_time_sec": 3.8439626693725586, + "raw_output": null + }, + { + "workload": "cjpeg-rose7-preset", + "thread_num": 4, + "iter_per_sec": 232.558, + "returncode": 0, + "elapsed_time_sec": 0.04668903350830078, + "raw_output": null + }, + { + "workload": "core", + "thread_num": 4, + "iter_per_sec": 1.81736, + "returncode": 0, + "elapsed_time_sec": 2.2041430473327637, + "raw_output": null + }, + { + "workload": "linear_alg-mid-100x100-sp", + "thread_num": 4, + "iter_per_sec": 95.057, + "returncode": 0, + "elapsed_time_sec": 0.5304605960845947, + "raw_output": null + }, + { + "workload": "loops-all-mid-10k-sp", + "thread_num": 4, + "iter_per_sec": 4.49035, + "returncode": 0, + "elapsed_time_sec": 11.139124155044556, + "raw_output": null + }, + { + "workload": "nnet_test", + "thread_num": 4, + "iter_per_sec": 5.71429, + "returncode": 0, + "elapsed_time_sec": 1.7527673244476318, + "raw_output": null + }, + { + "workload": "parser-125k", + "thread_num": 4, + "iter_per_sec": 30.303, + "returncode": 0, + "elapsed_time_sec": 0.19083714485168457, + "raw_output": null + }, + { + "workload": "radix2-big-64k", + "thread_num": 4, + "iter_per_sec": 206.954, + "returncode": 0, + "elapsed_time_sec": 4.84836220741272, + "raw_output": null + }, + { + "workload": "sha-test", + "thread_num": 4, + "iter_per_sec": 357.143, + "returncode": 0, + "elapsed_time_sec": 0.03871965408325195, + "raw_output": null + }, + { + "workload": "zip-test", + "thread_num": 4, + "iter_per_sec": 129.032, + "returncode": 0, + "elapsed_time_sec": 3.823582410812378, + "raw_output": null + }, + { + "workload": "cjpeg-rose7-preset", + "thread_num": 5, + "iter_per_sec": 294.118, + "returncode": 0, + "elapsed_time_sec": 0.03779315948486328, + "raw_output": null + }, + { + "workload": "core", + "thread_num": 5, + "iter_per_sec": 2.28311, + "returncode": 0, + "elapsed_time_sec": 2.194005012512207, + "raw_output": null + }, + { + "workload": "linear_alg-mid-100x100-sp", + "thread_num": 5, + "iter_per_sec": 123.762, + "returncode": 0, + "elapsed_time_sec": 0.4098236560821533, + "raw_output": null + }, + { + "workload": "loops-all-mid-10k-sp", + "thread_num": 5, + "iter_per_sec": 5.65803, + "returncode": 0, + "elapsed_time_sec": 8.841012477874756, + "raw_output": null + }, + { + "workload": "nnet_test", + "thread_num": 5, + "iter_per_sec": 8.56164, + "returncode": 0, + "elapsed_time_sec": 1.1719512939453125, + "raw_output": null + }, + { + "workload": "parser-125k", + "thread_num": 5, + "iter_per_sec": 34.0136, + "returncode": 0, + "elapsed_time_sec": 0.20575261116027832, + "raw_output": null + }, + { + "workload": "radix2-big-64k", + "thread_num": 5, + "iter_per_sec": 218.436, + "returncode": 0, + "elapsed_time_sec": 4.595886468887329, + "raw_output": null + }, + { + "workload": "sha-test", + "thread_num": 5, + "iter_per_sec": 526.316, + "returncode": 0, + "elapsed_time_sec": 0.029636859893798828, + "raw_output": null + }, + { + "workload": "zip-test", + "thread_num": 5, + "iter_per_sec": 147.059, + "returncode": 0, + "elapsed_time_sec": 3.8295886516571045, + "raw_output": null + }, + { + "workload": "cjpeg-rose7-preset", + "thread_num": 6, + "iter_per_sec": 312.5, + "returncode": 0, + "elapsed_time_sec": 0.035584449768066406, + "raw_output": null + }, + { + "workload": "core", + "thread_num": 6, + "iter_per_sec": 2.72975, + "returncode": 0, + "elapsed_time_sec": 2.201174020767212, + "raw_output": null + }, + { + "workload": "linear_alg-mid-100x100-sp", + "thread_num": 6, + "iter_per_sec": 137.363, + "returncode": 0, + "elapsed_time_sec": 0.3693256378173828, + "raw_output": null + }, + { + "workload": "loops-all-mid-10k-sp", + "thread_num": 6, + "iter_per_sec": 6.13497, + "returncode": 0, + "elapsed_time_sec": 8.154258012771606, + "raw_output": null + }, + { + "workload": "nnet_test", + "thread_num": 6, + "iter_per_sec": 8.56164, + "returncode": 0, + "elapsed_time_sec": 1.1719541549682617, + "raw_output": null + }, + { + "workload": "parser-125k", + "thread_num": 6, + "iter_per_sec": 32.7869, + "returncode": 0, + "elapsed_time_sec": 0.242201566696167, + "raw_output": null + }, + { + "workload": "radix2-big-64k", + "thread_num": 6, + "iter_per_sec": 249.501, + "returncode": 0, + "elapsed_time_sec": 4.0301103591918945, + "raw_output": null + }, + { + "workload": "sha-test", + "thread_num": 6, + "iter_per_sec": 526.316, + "returncode": 0, + "elapsed_time_sec": 0.029955625534057617, + "raw_output": null + }, + { + "workload": "zip-test", + "thread_num": 6, + "iter_per_sec": 142.857, + "returncode": 0, + "elapsed_time_sec": 3.631044864654541, + "raw_output": null + }, + { + "workload": "cjpeg-rose7-preset", + "thread_num": 7, + "iter_per_sec": 303.03, + "returncode": 0, + "elapsed_time_sec": 0.036682844161987305, + "raw_output": null + }, + { + "workload": "core", + "thread_num": 7, + "iter_per_sec": 3.01984, + "returncode": 0, + "elapsed_time_sec": 2.3219473361968994, + "raw_output": null + }, + { + "workload": "linear_alg-mid-100x100-sp", + "thread_num": 7, + "iter_per_sec": 150.602, + "returncode": 0, + "elapsed_time_sec": 0.33828091621398926, + "raw_output": null + }, + { + "workload": "loops-all-mid-10k-sp", + "thread_num": 7, + "iter_per_sec": 6.49266, + "returncode": 0, + "elapsed_time_sec": 7.704557657241821, + "raw_output": null + }, + { + "workload": "nnet_test", + "thread_num": 7, + "iter_per_sec": 8.30565, + "returncode": 0, + "elapsed_time_sec": 1.2081749439239502, + "raw_output": null + }, + { + "workload": "parser-125k", + "thread_num": 7, + "iter_per_sec": 27.7778, + "returncode": 0, + "elapsed_time_sec": 0.31223487854003906, + "raw_output": null + }, + { + "workload": "radix2-big-64k", + "thread_num": 7, + "iter_per_sec": 278.164, + "returncode": 0, + "elapsed_time_sec": 3.6184468269348145, + "raw_output": null + }, + { + "workload": "sha-test", + "thread_num": 7, + "iter_per_sec": 500.0, + "returncode": 0, + "elapsed_time_sec": 0.030409574508666992, + "raw_output": null + }, + { + "workload": "zip-test", + "thread_num": 7, + "iter_per_sec": 152.174, + "returncode": 0, + "elapsed_time_sec": 3.6435928344726562, + "raw_output": null + }, + { + "workload": "cjpeg-rose7-preset", + "thread_num": 8, + "iter_per_sec": 370.37, + "returncode": 0, + "elapsed_time_sec": 0.031106233596801758, + "raw_output": null + }, + { + "workload": "core", + "thread_num": 8, + "iter_per_sec": 2.83386, + "returncode": 0, + "elapsed_time_sec": 2.8255538940429688, + "raw_output": null + }, + { + "workload": "linear_alg-mid-100x100-sp", + "thread_num": 8, + "iter_per_sec": 160.256, + "returncode": 0, + "elapsed_time_sec": 0.31850361824035645, + "raw_output": null + }, + { + "workload": "loops-all-mid-10k-sp", + "thread_num": 8, + "iter_per_sec": 7.03037, + "returncode": 0, + "elapsed_time_sec": 7.116137981414795, + "raw_output": null + }, + { + "workload": "nnet_test", + "thread_num": 8, + "iter_per_sec": 8.46024, + "returncode": 0, + "elapsed_time_sec": 1.1859660148620605, + "raw_output": null + }, + { + "workload": "parser-125k", + "thread_num": 8, + "iter_per_sec": 27.8746, + "returncode": 0, + "elapsed_time_sec": 0.34679079055786133, + "raw_output": null + }, + { + "workload": "radix2-big-64k", + "thread_num": 8, + "iter_per_sec": 277.469, + "returncode": 0, + "elapsed_time_sec": 3.6289494037628174, + "raw_output": null + }, + { + "workload": "sha-test", + "thread_num": 8, + "iter_per_sec": 526.316, + "returncode": 0, + "elapsed_time_sec": 0.03002643585205078, + "raw_output": null + }, + { + "workload": "zip-test", + "thread_num": 8, + "iter_per_sec": 145.455, + "returncode": 0, + "elapsed_time_sec": 3.640843152999878, + "raw_output": null + } + ], + "coremark_pro_score": 6507.155269914205 +} \ No newline at end of file diff --git a/coremark_images/workspace/coremark_perf.py b/coremark_images/workspace/coremark_perf.py new file mode 100755 index 0000000..e2bbef5 --- /dev/null +++ b/coremark_images/workspace/coremark_perf.py @@ -0,0 +1,436 @@ +#!/usr/bin/env python3 +""" +CoreMark-PRO CPU Performance Benchmark Tool +Runs CoreMark-PRO benchmark at different thread counts (1 to max CPU cores) + +This version follows the RDK Docker Tools protocol: +- Input: /workspace/input/task.json (optional configuration) +- Output: /workspace/output/result.json +""" + +import argparse +import json +import os +import platform +import psutil +import re +import subprocess +import sys +import time +from pathlib import Path +from typing import Dict, List, Optional + +# CoreMark-PRO workloads +WORKLOADS = [ + "cjpeg-rose7-preset", + "core", + "linear_alg-mid-100x100-sp", + "loops-all-mid-10k-sp", + "nnet_test", + "parser-125k", + "radix2-big-64k", + "sha-test", + "zip-test" +] + +# Reference scores for CoreMark-PRO score calculation +REFERENCE_SCORES = { + "cjpeg-rose7-preset": 40.3438, + "core": 2855.0, + "linear_alg-mid-100x100-sp": 38.5624, + "loops-all-mid-10k-sp": 0.87959, + "nnet_test": 1.45853, + "parser-125k": 4.81116, + "radix2-big-64k": 99.6587, + "sha-test": 48.5201, + "zip-test": 21.3618 +} + +# Scale factors +SCALE_FACTORS = { + "cjpeg-rose7-preset": 1, + "core": 10000, + "linear_alg-mid-100x100-sp": 1, + "loops-all-mid-10k-sp": 1, + "nnet_test": 1, + "parser-125k": 1, + "radix2-big-64k": 1, + "sha-test": 1, + "zip-test": 1 +} + + +def get_cpu_info() -> dict: + """Get CPU information.""" + info = { + "cpu_count": psutil.cpu_count(logical=True), + "cpu_count_physical": psutil.cpu_count(logical=False), + "cpu_freq_mhz": None, + "cpu_percent": psutil.cpu_percent(interval=1), + } + + # Try to get CPU frequency + freq = psutil.cpu_freq() + if freq: + info["cpu_freq_mhz"] = freq.current + + # Try to get CPU model name from /proc/cpuinfo + try: + with open("/proc/cpuinfo", "r") as f: + for line in f: + if line.startswith("model name") or line.startswith("Model") or line.startswith("Processor"): + info["cpu_model"] = line.split(":")[1].strip() + break + if line.startswith("CPU implementer"): + info["cpu_implementer"] = line.split(":")[1].strip() + if line.startswith("CPU architecture"): + info["cpu_arch"] = line.split(":")[1].strip() + except: + pass + + if "cpu_model" not in info: + info["cpu_model"] = platform.processor() or "Unknown" + + # Get architecture + info["architecture"] = platform.machine() + + return info + + +def get_memory_info() -> dict: + """Get memory information.""" + mem = psutil.virtual_memory() + return { + "total_mb": mem.total // (1024 * 1024), + "available_mb": mem.available // (1024 * 1024), + "percent_used": mem.percent, + "used_mb": mem.used // (1024 * 1024), + "free_mb": mem.free // (1024 * 1024) + } + + +def get_system_info() -> dict: + """Get system information.""" + return { + "hostname": platform.node(), + "platform": platform.platform(), + "system": platform.system(), + "release": platform.release(), + "version": platform.version(), + "python_version": platform.python_version(), + "timestamp": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + } + + +def run_workload(workload: str, thread_num: int, build_dir: Path) -> dict: + """Run a single CoreMark-PRO workload with specified thread count.""" + exe_path = build_dir / f"{workload}.exe" + + if not exe_path.exists(): + return { + "workload": workload, + "thread_num": thread_num, + "error": f"Executable not found: {exe_path}", + "iter_per_sec": None + } + + cmd = [str(exe_path), "-v0", f"-c{thread_num}"] + + start_time = time.time() + result = subprocess.run(cmd, capture_output=True, text=True, timeout=600) + elapsed_time = time.time() - start_time + + output = result.stdout + result.stderr + + # Parse iterations per second from output + # CoreMark-PRO output format: "workloads/sec" at the end + iter_per_sec = None + + # Try to find "workloads/sec" pattern + m = re.search(r"workloads/sec\s*[:\s=]+\s*([\d.]+)", output, re.IGNORECASE) + if m: + iter_per_sec = float(m.group(1)) + else: + # Alternative pattern - look for any decimal number followed by workloads/sec + m = re.search(r"([\d.]+)\s+workloads/sec", output, re.IGNORECASE) + if m: + iter_per_sec = float(m.group(1)) + + # Try to calculate from time and iterations if not found + if iter_per_sec is None: + time_match = re.search(r"time\(secs?\)\s*=\s*([\d.]+)", output, re.IGNORECASE) + iter_match = re.search(r"Total workload.*?=\s*(\d+)", output, re.IGNORECASE) + if time_match and iter_match: + iter_per_sec = float(iter_match.group(1)) / float(time_match.group(1)) + + return { + "workload": workload, + "thread_num": thread_num, + "iter_per_sec": iter_per_sec, + "returncode": result.returncode, + "elapsed_time_sec": elapsed_time, + "raw_output": output[:500] if iter_per_sec is None else None + } + + +def calculate_coremark_pro_score(results: List[dict]) -> Optional[float]: + """Calculate CoreMark-PRO score from workload results.""" + import math + + ratios = [] + for r in results: + workload = r["workload"] + score = r.get("iter_per_sec") + if score is None or workload not in REFERENCE_SCORES: + continue + + ref = REFERENCE_SCORES[workload] + scale = SCALE_FACTORS[workload] + + ratio = (score / ref) * scale + ratios.append(ratio) + + if not ratios: + return None + + # Calculate geometric mean + product = 1.0 + for r in ratios: + product *= r + + geometric_mean = product ** (1.0 / len(ratios)) + coremark_pro_score = geometric_mean * 1000 + + return coremark_pro_score + + +def print_results_table(all_results: List[dict], thread_counts: List[int]): + """Print results in a human-readable table format.""" + print("\n" + "=" * 90) + print("CoreMark-PRO Workload Results") + print("=" * 90) + + # Group by workload + workloads = {} + for r in all_results: + w = r["workload"] + if w not in workloads: + workloads[w] = {} + workloads[w][r["thread_num"]] = r.get("iter_per_sec") + + # Header + col_width = 22 + thread_col = 12 + + header = f"{'Workload':<{col_width}}" + for t in thread_counts: + header += f"{'T=' + str(t):>{thread_col}}" + header += f"{'Scaling':>{thread_col}}" + print(header) + print("-" * 90) + + # Data rows + for workload in WORKLOADS: + if workload not in workloads: + continue + + row_data = workloads[workload] + single_core = row_data.get(1) + + row = f"{workload:<{col_width}}" + for t in thread_counts: + score = row_data.get(t) + if score is not None: + row += f"{score:>{thread_col}.2f}" + else: + row += f"{'N/A':>{thread_col}}" + + # Calculate scaling (multi-core / single-core) + max_threads = max(thread_counts) + multi_core = row_data.get(max_threads) + if single_core is not None and multi_core is not None and single_core > 0: + scaling = multi_core / single_core + row += f"{scaling:>{thread_col}.2f}" + else: + row += f"{'N/A':>{thread_col}}" + + print(row) + + print("=" * 90) + + +def print_summary(all_results: List[dict], thread_counts: List[int], coremark_pro_score: Optional[float]): + """Print summary of benchmark results.""" + print("\n" + "=" * 90) + print("CoreMark-PRO Score Summary") + print("=" * 90) + + # Group by thread count + by_thread = {} + for r in all_results: + t = r["thread_num"] + if t not in by_thread: + by_thread[t] = [] + by_thread[t].append(r) + + print(f"{'Threads':<10} {'CoreMark-PRO Score':>20}") + print("-" * 90) + + for t in thread_counts: + if t in by_thread: + score = calculate_coremark_pro_score(by_thread[t]) + if score is not None: + print(f"{t:<10} {score:>20.2f}") + else: + print(f"{t:<10} {'N/A':>20}") + + if coremark_pro_score is not None: + print("-" * 90) + print(f"{'Final CoreMark-PRO Score':<30} {coremark_pro_score:>20.2f}") + + print("=" * 90) + + +def print_system_info(info: dict): + """Print system information.""" + print("\n" + "=" * 90) + print("System Information") + print("=" * 90) + + cpu = info["cpu"] + mem = info["memory"] + sys_info = info["system"] + + print(f"Hostname: {sys_info['hostname']}") + print(f"Platform: {sys_info['platform']}") + print(f"Architecture: {cpu['architecture']}") + print(f"CPU Model: {cpu.get('cpu_model', 'Unknown')}") + print(f"CPU Count: {cpu['cpu_count']} logical, {cpu['cpu_count_physical']} physical") + if cpu.get('cpu_freq_mhz'): + print(f"CPU Frequency: {cpu['cpu_freq_mhz']:.0f} MHz") + print(f"Memory Total: {mem['total_mb']} MB") + print(f"Memory Used: {mem['percent_used']:.1f}%") + print(f"Timestamp: {sys_info['timestamp']}") + + print("=" * 90) + + +def main(): + parser = argparse.ArgumentParser(description="CoreMark-PRO CPU Performance Benchmark") + parser.add_argument("--input", help="Input JSON file path (optional)") + parser.add_argument("--output", required=True, help="Output JSON file path") + parser.add_argument("--build-dir", default="/workspace/coremark-pro/builds/linux64/gcc64/bin", + help="Directory containing CoreMark-PRO binaries") + args = parser.parse_args() + + build_dir = Path(args.build_dir) + + if not build_dir.exists(): + print(f"[ERROR] Build directory not found: {build_dir}", file=sys.stderr) + sys.exit(1) + + # Load optional configuration from input + config = {} + if args.input and Path(args.input).exists(): + try: + with open(args.input) as f: + config = json.load(f) + print(f"[INFO] Loaded configuration from: {args.input}") + except Exception as e: + print(f"[WARNING] Failed to load input config: {e}", file=sys.stderr) + + # Get system information + cpu_info = get_cpu_info() + memory_info = get_memory_info() + system_info = get_system_info() + + # Print system info + info = { + "cpu": cpu_info, + "memory": memory_info, + "system": system_info + } + print_system_info(info) + + # Determine thread counts to test (1 to max CPU count) + max_cpus = config.get("max_threads", cpu_info["cpu_count"]) + thread_counts = list(range(1, max_cpus + 1)) + + print(f"\n[Benchmark] Running CoreMark-PRO with thread counts: {thread_counts}") + print(f"[Benchmark] Total workloads: {len(WORKLOADS)}") + + all_results = [] + + # Run benchmarks for each thread count + for thread_num in thread_counts: + print(f"\n--- Running with {thread_num} thread(s) ---") + + for workload in WORKLOADS: + print(f" [{thread_num} threads] {workload} ...", end=" ", flush=True) + + try: + result = run_workload(workload, thread_num, build_dir) + all_results.append(result) + + if result.get("iter_per_sec") is not None: + print(f"{result['iter_per_sec']:.2f} iter/s") + else: + print(f"FAILED (exit code: {result.get('returncode')})") + if result.get("raw_output"): + print(f" Output: {result['raw_output'][:200]}") + except Exception as e: + print(f"EXCEPTION: {e}") + all_results.append({ + "workload": workload, + "thread_num": thread_num, + "error": str(e), + "iter_per_sec": None + }) + + # Calculate final CoreMark-PRO score for max threads + by_thread = {} + for r in all_results: + t = r["thread_num"] + if t not in by_thread: + by_thread[t] = [] + by_thread[t].append(r) + + coremark_pro_score = None + if max_cpus in by_thread: + coremark_pro_score = calculate_coremark_pro_score(by_thread[max_cpus]) + + # Print results tables + print_results_table(all_results, thread_counts) + print_summary(all_results, thread_counts, coremark_pro_score) + + # Prepare output + output = { + "system_info": info, + "benchmark_config": { + "workloads": WORKLOADS, + "thread_counts": thread_counts, + "max_threads": max_cpus + }, + "results": all_results, + "coremark_pro_score": coremark_pro_score + } + + # Save to output file + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + + with open(output_path, "w") as f: + json.dump(output, f, indent=2) + + print(f"\n[OK] Results saved to: {args.output}") + + if coremark_pro_score is not None: + print(f"[OK] CoreMark-PRO Score: {coremark_pro_score:.2f}") + return 0 + else: + print("[WARNING] Could not calculate CoreMark-PRO score") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/coremark_images/workspace/entrypoint.sh b/coremark_images/workspace/entrypoint.sh new file mode 100755 index 0000000..69832d0 --- /dev/null +++ b/coremark_images/workspace/entrypoint.sh @@ -0,0 +1,33 @@ +#!/bin/bash +set -e + +INPUT_DIR="/workspace/input" +OUTPUT_DIR="/workspace/output" +INPUT_JSON="${INPUT_DIR}/task.json" +OUTPUT_JSON="${OUTPUT_DIR}/result.json" + +# Allow overrides via env vars +INPUT_JSON="${COREMARK_INPUT:-$INPUT_JSON}" +OUTPUT_JSON="${COREMARK_OUTPUT:-$OUTPUT_JSON}" + +mkdir -p "$(dirname "$OUTPUT_JSON")" + +echo "==========================================" +echo "CoreMark-PRO CPU Performance Benchmark" +echo "==========================================" +echo "" + +# Set build directory +BUILD_DIR="/workspace/coremark-pro/builds/linux64/gcc64/bin" + +# Check if binaries exist +if [ ! -f "$BUILD_DIR/core.exe" ]; then + echo "ERROR: CoreMark-PRO binaries not found in $BUILD_DIR" + exit 1 +fi + +# Run the benchmark +exec python3 /workspace/coremark/coremark_perf.py \ + --input "$INPUT_JSON" \ + --output "$OUTPUT_JSON" \ + --build-dir "$BUILD_DIR"