🎉 feat: initialize foundation docs guardrails and workspace skeleton
This commit is contained in:
commit
f41816bbd9
17
.env.example
Normal file
17
.env.example
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
NODE_ENV=development
|
||||||
|
|
||||||
|
WEB_PORT=3000
|
||||||
|
API_PORT=3001
|
||||||
|
WORKER_PORT=3002
|
||||||
|
|
||||||
|
MONGO_PORT=27017
|
||||||
|
MONGO_DB=emboflow
|
||||||
|
MONGO_ROOT_USERNAME=emboflow
|
||||||
|
MONGO_ROOT_PASSWORD=emboflow
|
||||||
|
|
||||||
|
MINIO_PORT=9000
|
||||||
|
MINIO_CONSOLE_PORT=9001
|
||||||
|
MINIO_ROOT_USER=emboflow
|
||||||
|
MINIO_ROOT_PASSWORD=emboflow123
|
||||||
|
|
||||||
|
STORAGE_PROVIDER=minio
|
||||||
7
.githooks/commit-msg
Executable file
7
.githooks/commit-msg
Executable file
@ -0,0 +1,7 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
repo_root="$(git rev-parse --show-toplevel)"
|
||||||
|
cd "$repo_root"
|
||||||
|
|
||||||
|
python3 scripts/check_commit_message.py --file "$1"
|
||||||
7
.githooks/pre-commit
Executable file
7
.githooks/pre-commit
Executable file
@ -0,0 +1,7 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
repo_root="$(git rev-parse --show-toplevel)"
|
||||||
|
cd "$repo_root"
|
||||||
|
|
||||||
|
python3 scripts/check_doc_code_sync.py . --staged --strict
|
||||||
19
.githooks/pre-push
Executable file
19
.githooks/pre-push
Executable file
@ -0,0 +1,19 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
repo_root="$(git rev-parse --show-toplevel)"
|
||||||
|
cd "$repo_root"
|
||||||
|
|
||||||
|
if git rev-parse --abbrev-ref --symbolic-full-name "@{upstream}" >/dev/null 2>&1; then
|
||||||
|
base_ref="$(git rev-parse --abbrev-ref --symbolic-full-name "@{upstream}")"
|
||||||
|
python3 scripts/check_doc_code_sync.py . --base-ref "$base_ref" --strict
|
||||||
|
python3 scripts/check_commit_message.py --rev-range "$base_ref..HEAD"
|
||||||
|
elif git rev-parse HEAD~1 >/dev/null 2>&1; then
|
||||||
|
python3 scripts/check_doc_code_sync.py . --base-ref HEAD~1 --strict
|
||||||
|
python3 scripts/check_commit_message.py --rev-range "HEAD~1..HEAD"
|
||||||
|
else
|
||||||
|
python3 scripts/check_doc_code_sync.py . --rev-range HEAD --strict
|
||||||
|
python3 scripts/check_commit_message.py --rev-range HEAD
|
||||||
|
fi
|
||||||
|
|
||||||
|
python3 -m unittest discover -s tests -p 'test_*.py'
|
||||||
40
.github/pull_request_template.md
vendored
Normal file
40
.github/pull_request_template.md
vendored
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
# Summary
|
||||||
|
|
||||||
|
- Describe the change in clear English.
|
||||||
|
- Explain the user-visible or system-level impact.
|
||||||
|
|
||||||
|
# Design Sync
|
||||||
|
|
||||||
|
- [ ] I reviewed the relevant files under `design/` before implementing.
|
||||||
|
- [ ] I updated the affected design or docs files in the same change set, or I confirmed no design update was required.
|
||||||
|
- [ ] I ran `python3 scripts/check_doc_code_sync.py . --strict`.
|
||||||
|
|
||||||
|
Design files reviewed or updated:
|
||||||
|
|
||||||
|
- ``
|
||||||
|
|
||||||
|
If design and code are not fully aligned yet, explain the gap:
|
||||||
|
|
||||||
|
-
|
||||||
|
|
||||||
|
# Validation
|
||||||
|
|
||||||
|
- [ ] I ran local checks relevant to this change.
|
||||||
|
- [ ] I ran `bash scripts/install_hooks.sh` in this clone or already had the repo hooks installed.
|
||||||
|
- [ ] My commit messages in this PR are English-only and use a gitmoji prefix.
|
||||||
|
|
||||||
|
Commands run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# paste commands here
|
||||||
|
```
|
||||||
|
|
||||||
|
# Scope Checklist
|
||||||
|
|
||||||
|
- [ ] This PR updates behavior, contracts, or runtime assumptions intentionally.
|
||||||
|
- [ ] This PR does not silently break documented architecture or workflow assumptions.
|
||||||
|
- [ ] This PR includes tests if behavior changed, or I confirmed tests were not required.
|
||||||
|
|
||||||
|
# Notes For Reviewers
|
||||||
|
|
||||||
|
- Call out any risky areas, follow-up work, or unresolved assumptions.
|
||||||
45
.github/workflows/guardrails.yml
vendored
Normal file
45
.github/workflows/guardrails.yml
vendored
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
name: Guardrails
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
push:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
repository-guardrails:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Check out repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: "3.11"
|
||||||
|
|
||||||
|
- name: Compute git range
|
||||||
|
id: git_range
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
if [ "${GITHUB_EVENT_NAME}" = "pull_request" ]; then
|
||||||
|
RANGE="${{ github.event.pull_request.base.sha }}..${{ github.sha }}"
|
||||||
|
elif [ "${{ github.event.before }}" != "0000000000000000000000000000000000000000" ]; then
|
||||||
|
RANGE="${{ github.event.before }}..${{ github.sha }}"
|
||||||
|
else
|
||||||
|
RANGE="${{ github.sha }}"
|
||||||
|
fi
|
||||||
|
echo "range=${RANGE}" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
|
- name: Validate commit messages
|
||||||
|
run: |
|
||||||
|
python3 scripts/check_commit_message.py --rev-range "${{ steps.git_range.outputs.range }}"
|
||||||
|
|
||||||
|
- name: Validate design and code sync
|
||||||
|
run: |
|
||||||
|
python3 scripts/check_doc_code_sync.py . --rev-range "${{ steps.git_range.outputs.range }}" --strict
|
||||||
|
|
||||||
|
- name: Run repository tests
|
||||||
|
run: |
|
||||||
|
python3 -m unittest discover -s tests -p 'test_*.py'
|
||||||
93
CONTRIBUTING.md
Normal file
93
CONTRIBUTING.md
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
# Contributing To EmboFlow
|
||||||
|
|
||||||
|
## Core Rule
|
||||||
|
|
||||||
|
Keep `design/` and implementation aligned in the same change set.
|
||||||
|
|
||||||
|
Do not treat design files as background notes. If a code change affects product behavior, workflow behavior, data models, contracts, runtime assumptions, permissions, or deployment assumptions, update the corresponding design documents before closing the task.
|
||||||
|
|
||||||
|
## Required Workflow
|
||||||
|
|
||||||
|
1. Read the relevant files under `design/` before implementing.
|
||||||
|
2. Summarize the intended contract you are changing.
|
||||||
|
3. Implement the code change.
|
||||||
|
4. Update the affected design files in the same work session.
|
||||||
|
5. Install the local git hooks once per clone:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash scripts/install_hooks.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
6. Use English-only commit messages with a gitmoji prefix, for example:
|
||||||
|
|
||||||
|
```text
|
||||||
|
:sparkles: add workflow guardrails and CI checks
|
||||||
|
```
|
||||||
|
|
||||||
|
7. Run the local sync check when needed:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python3 scripts/check_doc_code_sync.py . --strict
|
||||||
|
```
|
||||||
|
|
||||||
|
8. If design and code still diverge, document that explicitly in your final summary.
|
||||||
|
|
||||||
|
## When Design Updates Are Required
|
||||||
|
|
||||||
|
Update design files when a change affects:
|
||||||
|
|
||||||
|
- user-visible behavior
|
||||||
|
- workflow nodes or execution paths
|
||||||
|
- data model or storage structure
|
||||||
|
- API or schema contracts
|
||||||
|
- plugin or executor behavior
|
||||||
|
- workspace, project, or permission rules
|
||||||
|
- deployment or runtime assumptions
|
||||||
|
|
||||||
|
## When Design Updates May Be Skipped
|
||||||
|
|
||||||
|
Design updates are usually not required for:
|
||||||
|
|
||||||
|
- pure refactors with no behavior change
|
||||||
|
- test-only changes
|
||||||
|
- formatting, comments, and naming cleanup
|
||||||
|
|
||||||
|
Even in those cases, verify that no documented statement became false indirectly.
|
||||||
|
|
||||||
|
## Primary Design Locations
|
||||||
|
|
||||||
|
- `design/00-overview/`
|
||||||
|
- `design/01-product/`
|
||||||
|
- `design/02-architecture/`
|
||||||
|
- `design/03-workflows/`
|
||||||
|
- `design/05-data/`
|
||||||
|
- `design/08-decisions/`
|
||||||
|
|
||||||
|
## Local Tooling
|
||||||
|
|
||||||
|
This repository includes:
|
||||||
|
|
||||||
|
- git hook templates under `.githooks/`
|
||||||
|
- a hook installer:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash scripts/install_hooks.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
- a design/code sync checker:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python3 scripts/check_doc_code_sync.py . --strict
|
||||||
|
```
|
||||||
|
|
||||||
|
- a commit message validator:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python3 scripts/check_commit_message.py --rev-range HEAD
|
||||||
|
```
|
||||||
|
|
||||||
|
The hooks and CI enforce:
|
||||||
|
|
||||||
|
- English-only commit messages with a gitmoji prefix
|
||||||
|
- design/code consistency checks
|
||||||
|
- repository unit tests before push
|
||||||
8
apps/api/package.json
Normal file
8
apps/api/package.json
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"name": "@emboflow/api",
|
||||||
|
"private": true,
|
||||||
|
"version": "0.1.0",
|
||||||
|
"scripts": {
|
||||||
|
"dev": "echo 'api app scaffold pending'"
|
||||||
|
}
|
||||||
|
}
|
||||||
8
apps/web/package.json
Normal file
8
apps/web/package.json
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"name": "@emboflow/web",
|
||||||
|
"private": true,
|
||||||
|
"version": "0.1.0",
|
||||||
|
"scripts": {
|
||||||
|
"dev": "echo 'web app scaffold pending'"
|
||||||
|
}
|
||||||
|
}
|
||||||
8
apps/worker/package.json
Normal file
8
apps/worker/package.json
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"name": "@emboflow/worker",
|
||||||
|
"private": true,
|
||||||
|
"version": "0.1.0",
|
||||||
|
"scripts": {
|
||||||
|
"dev": "echo 'worker app scaffold pending'"
|
||||||
|
}
|
||||||
|
}
|
||||||
1
design/00-overview/.gitkeep
Normal file
1
design/00-overview/.gitkeep
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
||||||
70
design/00-overview/emboflow-platform-overview.md
Normal file
70
design/00-overview/emboflow-platform-overview.md
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
# EmboFlow Platform Overview
|
||||||
|
|
||||||
|
## Positioning
|
||||||
|
|
||||||
|
EmboFlow is a browser-based embodied data engineering platform for ingesting raw assets, organizing dataset workflows on a visual canvas, processing and converting data, annotating and inspecting results, exporting normalized artifacts, and generating downstream training configurations.
|
||||||
|
|
||||||
|
The platform is designed around plugin-based extensibility, but the first version should deliver a stable built-in core before opening broader extension surfaces.
|
||||||
|
|
||||||
|
## Primary Users
|
||||||
|
|
||||||
|
- Individual engineers building embodied datasets
|
||||||
|
- Team operators managing collection, preprocessing, delivery, and annotation workflows
|
||||||
|
- Data engineering teams that need repeatable conversion and packaging pipelines
|
||||||
|
- Teams preparing datasets for external training systems
|
||||||
|
|
||||||
|
## V1 Product Goal
|
||||||
|
|
||||||
|
Build a usable end-to-end platform that allows users to:
|
||||||
|
|
||||||
|
1. Log into a personal or team workspace
|
||||||
|
2. Create a project
|
||||||
|
3. Upload or import raw embodied data assets
|
||||||
|
4. Auto-detect asset structure and generate preview summaries
|
||||||
|
5. Compose processing pipelines on a canvas
|
||||||
|
6. Configure node parameters and inject code into processing nodes
|
||||||
|
7. Execute workflows asynchronously and inspect logs and outputs
|
||||||
|
8. Export normalized delivery packages, training datasets, or training config files
|
||||||
|
|
||||||
|
## Supported Input Formats in V1
|
||||||
|
|
||||||
|
- RLDS
|
||||||
|
- LeRobot v2/v3
|
||||||
|
- HDF5
|
||||||
|
- Rosbag
|
||||||
|
- Raw video folders and delivery-style directory packages
|
||||||
|
- Compressed archives containing the above
|
||||||
|
|
||||||
|
## Core Product Principles
|
||||||
|
|
||||||
|
- Raw assets are first-class objects
|
||||||
|
- Canonical semantic datasets are derived, not assumed
|
||||||
|
- Visualization can operate directly on raw assets
|
||||||
|
- Workflow execution is asynchronous and traceable
|
||||||
|
- Plugins are versioned and managed
|
||||||
|
- User-injected code is supported with strict runtime boundaries
|
||||||
|
- Training execution is out of scope for V1, but training handoff is in scope
|
||||||
|
|
||||||
|
## Major Workspaces
|
||||||
|
|
||||||
|
- Asset Workspace: upload, import, scan, probe, browse
|
||||||
|
- Canvas Workspace: build and run workflows
|
||||||
|
- Explore Workspace: inspect raw assets and processed outputs
|
||||||
|
- Label Workspace: create and review annotation tasks
|
||||||
|
- Admin Workspace: users, workspaces, plugins, storage, runtime settings
|
||||||
|
|
||||||
|
## V1 Output Types
|
||||||
|
|
||||||
|
- Standardized embodied dataset exports
|
||||||
|
- Customer delivery packages
|
||||||
|
- Validation and quality reports
|
||||||
|
- Annotation artifacts
|
||||||
|
- Training configuration packages for downstream training systems
|
||||||
|
|
||||||
|
## Non-Goals for V1
|
||||||
|
|
||||||
|
- Built-in training execution orchestration
|
||||||
|
- Real-time collaborative editing on the same canvas
|
||||||
|
- Public plugin marketplace
|
||||||
|
- Fully generalized MLOps lifecycle management
|
||||||
|
- Advanced distributed scheduling in the first deployment
|
||||||
1
design/01-product/.gitkeep
Normal file
1
design/01-product/.gitkeep
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
||||||
90
design/01-product/v1-scope-and-mvp.md
Normal file
90
design/01-product/v1-scope-and-mvp.md
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
# EmboFlow V1 Scope And MVP
|
||||||
|
|
||||||
|
## MVP Definition
|
||||||
|
|
||||||
|
The first release should prove that EmboFlow can turn raw embodied data assets into structured outputs through a visual workflow engine.
|
||||||
|
|
||||||
|
### MVP Success Path
|
||||||
|
|
||||||
|
1. A user signs into a workspace
|
||||||
|
2. The user creates a project
|
||||||
|
3. The user uploads or imports a raw asset
|
||||||
|
4. The platform probes the asset and generates a structure summary
|
||||||
|
5. The user previews the asset
|
||||||
|
6. The user composes a canvas workflow
|
||||||
|
7. The workflow executes asynchronously
|
||||||
|
8. The user reviews logs, outputs, and generated artifacts
|
||||||
|
9. The user exports a normalized dataset, delivery package, or training config
|
||||||
|
|
||||||
|
## In Scope For V1
|
||||||
|
|
||||||
|
- User login and workspace model
|
||||||
|
- Personal and team workspaces
|
||||||
|
- Project resource isolation
|
||||||
|
- Raw asset upload and import
|
||||||
|
- Object storage integration
|
||||||
|
- Asset probing and structure detection
|
||||||
|
- Raw asset preview
|
||||||
|
- Canvas workflow editor
|
||||||
|
- Built-in node library for ingest, transform, inspect, export
|
||||||
|
- Node configuration through schema-driven forms
|
||||||
|
- Code injection for processing nodes
|
||||||
|
- Workflow run orchestration
|
||||||
|
- Logs, status, retries, and artifact tracking
|
||||||
|
- Dataset conversion and delivery-package normalization
|
||||||
|
- Training config export
|
||||||
|
- Plugin registration skeleton
|
||||||
|
|
||||||
|
## Important Business Scenarios
|
||||||
|
|
||||||
|
### Embodied Dataset Conversion
|
||||||
|
|
||||||
|
- Import RLDS, LeRobot, HDF5, or Rosbag
|
||||||
|
- Map to canonical semantics
|
||||||
|
- Export to target dataset format
|
||||||
|
|
||||||
|
### Delivery Package Normalization
|
||||||
|
|
||||||
|
- Import customer-provided raw directory or archive
|
||||||
|
- Rename top-level folders
|
||||||
|
- Validate required file structure
|
||||||
|
- Validate metadata files
|
||||||
|
- Check video file quality and naming
|
||||||
|
- Export or upload normalized package
|
||||||
|
|
||||||
|
### Data Processing Workflow Authoring
|
||||||
|
|
||||||
|
- Drag nodes onto canvas
|
||||||
|
- Connect nodes into DAG
|
||||||
|
- Tune parameters
|
||||||
|
- Inject code into processing nodes
|
||||||
|
- Re-run pipeline with traceable history
|
||||||
|
|
||||||
|
## V1 Modules To Build Deeply
|
||||||
|
|
||||||
|
- Identity and workspace management
|
||||||
|
- Asset ingestion and probing
|
||||||
|
- Workflow editor and node model
|
||||||
|
- Execution engine
|
||||||
|
- Built-in dataset conversion nodes
|
||||||
|
- Built-in delivery normalization nodes
|
||||||
|
- Preview and inspection
|
||||||
|
- Artifact export
|
||||||
|
|
||||||
|
## V1 Modules To Keep Lightweight
|
||||||
|
|
||||||
|
- Annotation
|
||||||
|
- Collaboration
|
||||||
|
- Plugin lifecycle UX
|
||||||
|
- Advanced analytics
|
||||||
|
- Kubernetes and Volcano scheduling adapters
|
||||||
|
- Advanced multi-sensor synchronized visual analytics
|
||||||
|
|
||||||
|
## Explicit V1 Exclusions
|
||||||
|
|
||||||
|
- Platform-managed training execution
|
||||||
|
- Real-time multi-user canvas co-editing
|
||||||
|
- Full marketplace for third-party plugins
|
||||||
|
- Complex enterprise approval workflows
|
||||||
|
- Streaming data processing
|
||||||
|
- Large-scale distributed execution as a deployment requirement
|
||||||
1
design/02-architecture/.gitkeep
Normal file
1
design/02-architecture/.gitkeep
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
||||||
115
design/02-architecture/deployment-architecture.md
Normal file
115
design/02-architecture/deployment-architecture.md
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
# EmboFlow Deployment Architecture
|
||||||
|
|
||||||
|
## V1 Deployment Target
|
||||||
|
|
||||||
|
The first deployment target is a single public server. The platform should be deployed in a way that is operationally simple now and migration-friendly later.
|
||||||
|
|
||||||
|
## Recommended V1 Deployment Topology
|
||||||
|
|
||||||
|
- Reverse proxy
|
||||||
|
- Web frontend service
|
||||||
|
- API service
|
||||||
|
- Worker service
|
||||||
|
- MongoDB
|
||||||
|
- Optional MinIO
|
||||||
|
- Host Docker runtime for execution containers
|
||||||
|
|
||||||
|
## Deployment Principles
|
||||||
|
|
||||||
|
- Single-host deployment first
|
||||||
|
- All major services containerized
|
||||||
|
- Persistent state mounted on host volumes
|
||||||
|
- Object storage can be external or self-hosted
|
||||||
|
- Execution workers separated from API service
|
||||||
|
- Future scheduler migration should not require domain model changes
|
||||||
|
|
||||||
|
## Recommended Runtime Layout
|
||||||
|
|
||||||
|
### Edge
|
||||||
|
|
||||||
|
- Nginx or equivalent reverse proxy
|
||||||
|
- HTTPS termination
|
||||||
|
- Static web delivery or web upstream routing
|
||||||
|
|
||||||
|
### Application
|
||||||
|
|
||||||
|
- `web`
|
||||||
|
- `api`
|
||||||
|
- `worker`
|
||||||
|
|
||||||
|
### Data
|
||||||
|
|
||||||
|
- `mongo`
|
||||||
|
- `minio` optional
|
||||||
|
|
||||||
|
## Object Storage Strategy
|
||||||
|
|
||||||
|
The product should support both:
|
||||||
|
|
||||||
|
- Cloud object storage such as BOS or S3-compatible services
|
||||||
|
- Self-hosted MinIO for development, demos, or private deployment
|
||||||
|
|
||||||
|
The application should expose a unified storage abstraction instead of embedding provider-specific logic across modules.
|
||||||
|
|
||||||
|
## Local Scheduler In V1
|
||||||
|
|
||||||
|
V1 should use a local scheduler. Worker processes execute tasks on the same deployment host.
|
||||||
|
|
||||||
|
Design constraints:
|
||||||
|
|
||||||
|
- RuntimeSpec must already exist
|
||||||
|
- Scheduler abstraction must already exist
|
||||||
|
- Docker executor must already be scheduler-compatible
|
||||||
|
|
||||||
|
This keeps future migration to Kubernetes or Volcano feasible.
|
||||||
|
|
||||||
|
## Host-Level Persistent Directories
|
||||||
|
|
||||||
|
Recommended host directories:
|
||||||
|
|
||||||
|
- application config
|
||||||
|
- mongodb data
|
||||||
|
- minio data
|
||||||
|
- uploaded file staging
|
||||||
|
- execution temp workspace
|
||||||
|
- logs
|
||||||
|
- backup data
|
||||||
|
|
||||||
|
## Execution Isolation
|
||||||
|
|
||||||
|
The host Docker runtime serves two different purposes:
|
||||||
|
|
||||||
|
- Running the platform deployment stack
|
||||||
|
- Running task execution containers
|
||||||
|
|
||||||
|
These must be treated as separate concerns in configuration and security design.
|
||||||
|
|
||||||
|
## Future Migration Path
|
||||||
|
|
||||||
|
### Stage 1
|
||||||
|
|
||||||
|
- Single-host deployment
|
||||||
|
- Local scheduler
|
||||||
|
- Docker executor
|
||||||
|
|
||||||
|
### Stage 2
|
||||||
|
|
||||||
|
- Kubernetes-based service deployment
|
||||||
|
- Kubernetes scheduler adapter for workflow tasks
|
||||||
|
|
||||||
|
### Stage 3
|
||||||
|
|
||||||
|
- Volcano scheduler adapter
|
||||||
|
- Better support for large batch jobs and training-adjacent workloads
|
||||||
|
|
||||||
|
## Operational Baseline
|
||||||
|
|
||||||
|
V1 should provide basic operational support for:
|
||||||
|
|
||||||
|
- health checks
|
||||||
|
- service restart
|
||||||
|
- execution failure visibility
|
||||||
|
- disk space monitoring
|
||||||
|
- object storage connectivity checks
|
||||||
|
- MongoDB backup and restore procedures
|
||||||
|
- worker online status
|
||||||
200
design/02-architecture/system-architecture.md
Normal file
200
design/02-architecture/system-architecture.md
Normal file
@ -0,0 +1,200 @@
|
|||||||
|
# EmboFlow System Architecture
|
||||||
|
|
||||||
|
## Architecture Style
|
||||||
|
|
||||||
|
EmboFlow V1 is a browser/server platform built as:
|
||||||
|
|
||||||
|
- Web frontend
|
||||||
|
- Modular backend control plane
|
||||||
|
- Independent worker runtime
|
||||||
|
- MongoDB as the only database
|
||||||
|
- Object storage abstraction over cloud object storage or MinIO
|
||||||
|
- Local scheduler in V1 with future migration path to Kubernetes and Volcano
|
||||||
|
|
||||||
|
The architecture should preserve clear service boundaries even if V1 is implemented as a modular monolith plus workers.
|
||||||
|
|
||||||
|
## High-Level Layers
|
||||||
|
|
||||||
|
### Frontend Layer
|
||||||
|
|
||||||
|
- Asset workspace
|
||||||
|
- Canvas workspace
|
||||||
|
- Explore workspace
|
||||||
|
- Label workspace
|
||||||
|
- Admin workspace
|
||||||
|
|
||||||
|
### Control Plane
|
||||||
|
|
||||||
|
- Identity and authorization
|
||||||
|
- Workspace and project management
|
||||||
|
- Asset and dataset metadata
|
||||||
|
- Workflow definition management
|
||||||
|
- Plugin registry and activation
|
||||||
|
- Run orchestration API
|
||||||
|
- Artifact indexing
|
||||||
|
|
||||||
|
### Execution Plane
|
||||||
|
|
||||||
|
- Workflow DAG compilation
|
||||||
|
- Task queue dispatch
|
||||||
|
- Worker execution
|
||||||
|
- Executor routing
|
||||||
|
- Log and artifact collection
|
||||||
|
|
||||||
|
### Storage Layer
|
||||||
|
|
||||||
|
- MongoDB for metadata and run state
|
||||||
|
- Object storage for files and large outputs
|
||||||
|
- Temporary local working directories for execution
|
||||||
|
|
||||||
|
## Core Domain Objects
|
||||||
|
|
||||||
|
- User
|
||||||
|
- Workspace
|
||||||
|
- Project
|
||||||
|
- Asset
|
||||||
|
- Dataset
|
||||||
|
- DatasetVersion
|
||||||
|
- WorkflowDefinition
|
||||||
|
- WorkflowVersion
|
||||||
|
- WorkflowRun
|
||||||
|
- RunTask
|
||||||
|
- Artifact
|
||||||
|
- AnnotationTask
|
||||||
|
- Annotation
|
||||||
|
- Plugin
|
||||||
|
- StorageConnection
|
||||||
|
|
||||||
|
## Raw Asset And Canonical Dataset Model
|
||||||
|
|
||||||
|
The platform must distinguish between:
|
||||||
|
|
||||||
|
- Raw Asset View
|
||||||
|
- Canonical Dataset View
|
||||||
|
|
||||||
|
Raw assets preserve source structure, file paths, metadata layout, and original naming. Canonical datasets provide a normalized semantic layer for workflow nodes and export logic.
|
||||||
|
|
||||||
|
Visualization may read raw assets directly. Conversion, orchestration, and export should primarily target canonical semantics.
|
||||||
|
|
||||||
|
## Workflow Model
|
||||||
|
|
||||||
|
Workflow definitions are versioned and contain:
|
||||||
|
|
||||||
|
- Visual graph state
|
||||||
|
- Logical node and edge graph
|
||||||
|
- Runtime configuration
|
||||||
|
- Plugin references
|
||||||
|
|
||||||
|
Workflow execution produces immutable workflow runs. A run snapshots:
|
||||||
|
|
||||||
|
- Workflow version
|
||||||
|
- Node configuration
|
||||||
|
- Injected code
|
||||||
|
- Executor settings
|
||||||
|
- Input bindings
|
||||||
|
|
||||||
|
Runs compile into task DAGs.
|
||||||
|
|
||||||
|
## Node And Plugin Model
|
||||||
|
|
||||||
|
### Node Categories
|
||||||
|
|
||||||
|
- Source
|
||||||
|
- Transform
|
||||||
|
- Inspect
|
||||||
|
- Annotate
|
||||||
|
- Export
|
||||||
|
- Utility
|
||||||
|
|
||||||
|
### Node Definition Contract
|
||||||
|
|
||||||
|
Each node definition includes:
|
||||||
|
|
||||||
|
- Metadata
|
||||||
|
- Input schema
|
||||||
|
- Output schema
|
||||||
|
- Config schema
|
||||||
|
- UI schema
|
||||||
|
- Executor type
|
||||||
|
- Runtime limits
|
||||||
|
- Optional code hook contract
|
||||||
|
|
||||||
|
### Plugin Types
|
||||||
|
|
||||||
|
- Node plugins
|
||||||
|
- Reader/writer plugins
|
||||||
|
- Renderer plugins
|
||||||
|
- Executor plugins
|
||||||
|
- Integration plugins
|
||||||
|
|
||||||
|
## Execution Architecture
|
||||||
|
|
||||||
|
### Executors
|
||||||
|
|
||||||
|
- Python executor
|
||||||
|
- Docker executor
|
||||||
|
- HTTP executor
|
||||||
|
|
||||||
|
V1 should prioritize Python and Docker. HTTP executor is useful for integrating external services.
|
||||||
|
|
||||||
|
### Schedulers
|
||||||
|
|
||||||
|
- Local scheduler in V1
|
||||||
|
- Kubernetes scheduler later
|
||||||
|
- Volcano scheduler later
|
||||||
|
|
||||||
|
Executors and schedulers are separate abstractions:
|
||||||
|
|
||||||
|
- Executor defines how logic runs
|
||||||
|
- Scheduler defines where and under what scheduling policy it runs
|
||||||
|
|
||||||
|
## Storage Architecture
|
||||||
|
|
||||||
|
### MongoDB Collections
|
||||||
|
|
||||||
|
Recommended primary collections:
|
||||||
|
|
||||||
|
- users
|
||||||
|
- workspaces
|
||||||
|
- projects
|
||||||
|
- memberships
|
||||||
|
- assets
|
||||||
|
- asset_probe_reports
|
||||||
|
- datasets
|
||||||
|
- dataset_versions
|
||||||
|
- workflow_definitions
|
||||||
|
- workflow_definition_versions
|
||||||
|
- workflow_runs
|
||||||
|
- run_tasks
|
||||||
|
- artifacts
|
||||||
|
- annotation_tasks
|
||||||
|
- annotations
|
||||||
|
- plugins
|
||||||
|
- storage_connections
|
||||||
|
- audit_logs
|
||||||
|
|
||||||
|
### Object Storage Content
|
||||||
|
|
||||||
|
- Raw uploads
|
||||||
|
- Imported archives
|
||||||
|
- Normalized export packages
|
||||||
|
- Training config packages
|
||||||
|
- Preview resources
|
||||||
|
- Logs and attachments
|
||||||
|
- Large manifests and file indexes
|
||||||
|
|
||||||
|
## Security Model
|
||||||
|
|
||||||
|
User-injected code is low-trust code and must not run in web or API processes.
|
||||||
|
|
||||||
|
V1 runtime policy:
|
||||||
|
|
||||||
|
- Built-in trusted nodes may use Python executor
|
||||||
|
- Plugin code should run in controlled runtimes
|
||||||
|
- User-injected code should default to Docker executor
|
||||||
|
- Network access should be denied by default for user code
|
||||||
|
- Input and output paths should be explicitly mounted
|
||||||
|
|
||||||
|
## Deployment Direction
|
||||||
|
|
||||||
|
V1 deployment target is a single public server using containerized application services. The architecture must still preserve future migration to multi-node environments.
|
||||||
1
design/03-workflows/.gitkeep
Normal file
1
design/03-workflows/.gitkeep
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
||||||
316
design/03-workflows/workflow-execution-model.md
Normal file
316
design/03-workflows/workflow-execution-model.md
Normal file
@ -0,0 +1,316 @@
|
|||||||
|
# EmboFlow Workflow Execution Model
|
||||||
|
|
||||||
|
## Goal
|
||||||
|
|
||||||
|
Define how EmboFlow represents, validates, executes, and observes canvas workflows.
|
||||||
|
|
||||||
|
The workflow system is the product core. The canvas is only the editing surface. The real system of record is the versioned workflow definition and its immutable run snapshots.
|
||||||
|
|
||||||
|
## Core Objects
|
||||||
|
|
||||||
|
- `WorkflowDefinition`
|
||||||
|
Logical workflow identity under a project
|
||||||
|
- `WorkflowVersion`
|
||||||
|
Immutable snapshot of nodes, edges, runtime defaults, and plugin references
|
||||||
|
- `NodeInstance`
|
||||||
|
Concrete node on a workflow graph
|
||||||
|
- `WorkflowRun`
|
||||||
|
One execution of one workflow version
|
||||||
|
- `RunTask`
|
||||||
|
Executable unit derived from a node during one run
|
||||||
|
- `Artifact`
|
||||||
|
Managed output from a task or run
|
||||||
|
|
||||||
|
## Workflow Layers
|
||||||
|
|
||||||
|
Each workflow version contains three layers.
|
||||||
|
|
||||||
|
### Visual Layer
|
||||||
|
|
||||||
|
Used only by the editor:
|
||||||
|
|
||||||
|
- node positions
|
||||||
|
- collapsed state
|
||||||
|
- groups
|
||||||
|
- zoom defaults
|
||||||
|
- viewport metadata
|
||||||
|
|
||||||
|
### Logic Layer
|
||||||
|
|
||||||
|
Used for graph semantics:
|
||||||
|
|
||||||
|
- nodes
|
||||||
|
- edges
|
||||||
|
- input/output ports
|
||||||
|
- branch conditions
|
||||||
|
- merge semantics
|
||||||
|
- dependency graph
|
||||||
|
|
||||||
|
### Runtime Layer
|
||||||
|
|
||||||
|
Used for execution:
|
||||||
|
|
||||||
|
- node config values
|
||||||
|
- executor settings
|
||||||
|
- runtime resource limits
|
||||||
|
- retry policy
|
||||||
|
- code hooks
|
||||||
|
- cache policy
|
||||||
|
|
||||||
|
Visual changes must not change workflow semantics. Runtime changes must produce a new workflow version.
|
||||||
|
|
||||||
|
## Node Categories
|
||||||
|
|
||||||
|
V1 node categories:
|
||||||
|
|
||||||
|
- `Source`
|
||||||
|
- `Transform`
|
||||||
|
- `Inspect`
|
||||||
|
- `Annotate`
|
||||||
|
- `Export`
|
||||||
|
- `Utility`
|
||||||
|
|
||||||
|
### V1 Built-In Node Families
|
||||||
|
|
||||||
|
- asset upload/import
|
||||||
|
- archive extract
|
||||||
|
- folder rename
|
||||||
|
- directory validation
|
||||||
|
- metadata validation
|
||||||
|
- video quality inspection
|
||||||
|
- dataset readers for RLDS, LeRobot, HDF5, Rosbag
|
||||||
|
- canonical mapping nodes
|
||||||
|
- dataset writers and exporters
|
||||||
|
- training config export
|
||||||
|
- Python processing node
|
||||||
|
|
||||||
|
## Node Definition Contract
|
||||||
|
|
||||||
|
Each node definition must expose:
|
||||||
|
|
||||||
|
- `id`
|
||||||
|
- `name`
|
||||||
|
- `category`
|
||||||
|
- `version`
|
||||||
|
- `description`
|
||||||
|
- `inputSchema`
|
||||||
|
- `outputSchema`
|
||||||
|
- `configSchema`
|
||||||
|
- `uiSchema`
|
||||||
|
- `executorType`
|
||||||
|
- `runtimeDefaults`
|
||||||
|
- `permissions`
|
||||||
|
- `capabilities`
|
||||||
|
- `codeHookSpec`
|
||||||
|
|
||||||
|
### Code Hook Spec
|
||||||
|
|
||||||
|
V1 supports user code hooks only on:
|
||||||
|
|
||||||
|
- `Transform`
|
||||||
|
- `Inspect`
|
||||||
|
- `Utility`
|
||||||
|
|
||||||
|
Hooks must use a constrained entrypoint instead of arbitrary script structure.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def process(input_data, context):
|
||||||
|
return input_data
|
||||||
|
```
|
||||||
|
|
||||||
|
This keeps serialization, logging, and runtime control predictable.
|
||||||
|
|
||||||
|
## Data Flow Contract
|
||||||
|
|
||||||
|
Tasks should exchange managed references, not loose file paths.
|
||||||
|
|
||||||
|
V1 reference types:
|
||||||
|
|
||||||
|
- `assetRef`
|
||||||
|
- `datasetVersionRef`
|
||||||
|
- `artifactRef`
|
||||||
|
- `annotationTaskRef`
|
||||||
|
- `inlineConfig`
|
||||||
|
|
||||||
|
Executors may materialize files internally, but the platform-level contract must remain reference-based.
|
||||||
|
|
||||||
|
## Validation Stages
|
||||||
|
|
||||||
|
Workflow execution must validate in this order:
|
||||||
|
|
||||||
|
1. workflow version exists
|
||||||
|
2. referenced plugins exist and are enabled
|
||||||
|
3. node schemas are valid
|
||||||
|
4. edge connections are schema-compatible
|
||||||
|
5. runtime configuration is complete
|
||||||
|
6. referenced assets and datasets are accessible
|
||||||
|
7. code hooks pass static validation
|
||||||
|
8. executor and scheduler requirements are satisfiable
|
||||||
|
|
||||||
|
Validation failure must block run creation.
|
||||||
|
|
||||||
|
## Run Lifecycle
|
||||||
|
|
||||||
|
When a user executes a workflow:
|
||||||
|
|
||||||
|
1. resolve workflow version
|
||||||
|
2. snapshot all runtime-relevant inputs
|
||||||
|
3. resolve plugin versions
|
||||||
|
4. freeze node config and code hooks
|
||||||
|
5. compile graph into a DAG
|
||||||
|
6. create `WorkflowRun`
|
||||||
|
7. create `RunTask` entries
|
||||||
|
8. enqueue ready tasks
|
||||||
|
9. collect outputs, logs, and task state
|
||||||
|
10. finalize run status and summary
|
||||||
|
|
||||||
|
## Run State Model
|
||||||
|
|
||||||
|
### WorkflowRun Status
|
||||||
|
|
||||||
|
- `pending`
|
||||||
|
- `queued`
|
||||||
|
- `running`
|
||||||
|
- `success`
|
||||||
|
- `failed`
|
||||||
|
- `cancelled`
|
||||||
|
- `partial_success`
|
||||||
|
|
||||||
|
### RunTask Status
|
||||||
|
|
||||||
|
- `pending`
|
||||||
|
- `queued`
|
||||||
|
- `running`
|
||||||
|
- `success`
|
||||||
|
- `failed`
|
||||||
|
- `cancelled`
|
||||||
|
- `skipped`
|
||||||
|
|
||||||
|
`partial_success` is used for workflows where non-blocking nodes fail but the run still produces valid outputs.
|
||||||
|
|
||||||
|
## Retry And Failure Policy
|
||||||
|
|
||||||
|
Each node instance may define:
|
||||||
|
|
||||||
|
- retry count
|
||||||
|
- retry backoff policy
|
||||||
|
- fail-fast behavior
|
||||||
|
- continue-on-error behavior
|
||||||
|
- manual retry eligibility
|
||||||
|
|
||||||
|
V1 should support:
|
||||||
|
|
||||||
|
- `fail_fast`
|
||||||
|
- `continue_on_error`
|
||||||
|
- `retry_n_times`
|
||||||
|
- `manual_retry`
|
||||||
|
|
||||||
|
## Cache Model
|
||||||
|
|
||||||
|
V1 should support node-level cache reuse.
|
||||||
|
|
||||||
|
Recommended cache key inputs:
|
||||||
|
|
||||||
|
- workflow version
|
||||||
|
- node id
|
||||||
|
- upstream reference summary
|
||||||
|
- config summary
|
||||||
|
- code hook digest
|
||||||
|
- plugin version
|
||||||
|
- executor version
|
||||||
|
|
||||||
|
Cache hit behavior:
|
||||||
|
|
||||||
|
- reuse output artifact refs
|
||||||
|
- reuse output summaries
|
||||||
|
- retain previous logs reference
|
||||||
|
- mark task as cache-resolved in metadata
|
||||||
|
|
||||||
|
## Execution Context
|
||||||
|
|
||||||
|
Each task receives a normalized execution context containing:
|
||||||
|
|
||||||
|
- workspace id
|
||||||
|
- project id
|
||||||
|
- workflow run id
|
||||||
|
- task id
|
||||||
|
- actor id
|
||||||
|
- node config
|
||||||
|
- code hook content
|
||||||
|
- input references
|
||||||
|
- storage context
|
||||||
|
- temp working directory
|
||||||
|
- runtime resource limits
|
||||||
|
|
||||||
|
This context must be available across Python, Docker, and HTTP executors.
|
||||||
|
|
||||||
|
## Observability Requirements
|
||||||
|
|
||||||
|
Each task must emit:
|
||||||
|
|
||||||
|
- status transitions
|
||||||
|
- start time and finish time
|
||||||
|
- duration
|
||||||
|
- executor metadata
|
||||||
|
- resource request metadata
|
||||||
|
- stdout/stderr log stream
|
||||||
|
- structured task summary
|
||||||
|
- artifact refs
|
||||||
|
|
||||||
|
The UI must allow:
|
||||||
|
|
||||||
|
- graph-level run status
|
||||||
|
- node-level log inspection
|
||||||
|
- node-level artifact browsing
|
||||||
|
- task retry entrypoint
|
||||||
|
- direct navigation from a node to preview output
|
||||||
|
|
||||||
|
## Canvas Interaction Rules
|
||||||
|
|
||||||
|
V1 editor behavior should enforce:
|
||||||
|
|
||||||
|
- port-level connection rules
|
||||||
|
- incompatible edge blocking
|
||||||
|
- dirty-state detection
|
||||||
|
- explicit save before publish/run if graph changed
|
||||||
|
- per-node validation badges
|
||||||
|
- run from latest saved version, not unsaved draft
|
||||||
|
|
||||||
|
## Example V1 Pipelines
|
||||||
|
|
||||||
|
### Delivery Normalization
|
||||||
|
|
||||||
|
```text
|
||||||
|
Raw Folder Import
|
||||||
|
-> Archive Extract
|
||||||
|
-> Folder Rename
|
||||||
|
-> Directory Validation
|
||||||
|
-> Metadata Validation
|
||||||
|
-> Video Quality Check
|
||||||
|
-> Delivery Export
|
||||||
|
```
|
||||||
|
|
||||||
|
### Dataset Conversion
|
||||||
|
|
||||||
|
```text
|
||||||
|
Rosbag Reader
|
||||||
|
-> Canonical Mapping
|
||||||
|
-> Frame Filter
|
||||||
|
-> Metadata Normalize
|
||||||
|
-> LeRobot Writer
|
||||||
|
-> Training Config Export
|
||||||
|
```
|
||||||
|
|
||||||
|
## V1 Non-Goals
|
||||||
|
|
||||||
|
The V1 workflow engine does not need:
|
||||||
|
|
||||||
|
- loop semantics
|
||||||
|
- streaming execution
|
||||||
|
- unbounded dynamic fan-out
|
||||||
|
- event-driven triggers
|
||||||
|
- advanced distributed DAG partitioning
|
||||||
|
|
||||||
|
The V1 goal is a stable, observable DAG executor for data engineering workflows.
|
||||||
1
design/04-ui-ux/.gitkeep
Normal file
1
design/04-ui-ux/.gitkeep
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
||||||
296
design/04-ui-ux/information-architecture-and-key-screens.md
Normal file
296
design/04-ui-ux/information-architecture-and-key-screens.md
Normal file
@ -0,0 +1,296 @@
|
|||||||
|
# EmboFlow Information Architecture And Key Screens
|
||||||
|
|
||||||
|
## Goal
|
||||||
|
|
||||||
|
Define the primary navigation model, main screens, and key interaction patterns for EmboFlow V1.
|
||||||
|
|
||||||
|
The UI should feel like a serious data workflow product, not a generic low-code canvas. The most important interaction is the relationship between assets, workflows, runs, and outputs.
|
||||||
|
|
||||||
|
## Information Architecture
|
||||||
|
|
||||||
|
Top-level product areas:
|
||||||
|
|
||||||
|
- Workspace switcher
|
||||||
|
- Project selector
|
||||||
|
- Asset Workspace
|
||||||
|
- Canvas Workspace
|
||||||
|
- Explore Workspace
|
||||||
|
- Label Workspace
|
||||||
|
- Admin Workspace
|
||||||
|
|
||||||
|
## Navigation Model
|
||||||
|
|
||||||
|
### Global Header
|
||||||
|
|
||||||
|
Recommended global header content:
|
||||||
|
|
||||||
|
- workspace switcher
|
||||||
|
- project switcher
|
||||||
|
- search entry
|
||||||
|
- run notifications
|
||||||
|
- user menu
|
||||||
|
|
||||||
|
### Primary Sidebar
|
||||||
|
|
||||||
|
Recommended primary navigation:
|
||||||
|
|
||||||
|
- Assets
|
||||||
|
- Workflows
|
||||||
|
- Runs
|
||||||
|
- Explore
|
||||||
|
- Labels
|
||||||
|
- Admin
|
||||||
|
|
||||||
|
This keeps the product model explicit:
|
||||||
|
|
||||||
|
- assets are inputs
|
||||||
|
- workflows define transformation logic
|
||||||
|
- runs represent execution history
|
||||||
|
- explore is where users inspect outputs and raw inputs
|
||||||
|
|
||||||
|
## Screen 1: Workspace And Project Entry
|
||||||
|
|
||||||
|
Purpose:
|
||||||
|
|
||||||
|
- choose personal or team workspace
|
||||||
|
- choose or create project
|
||||||
|
- view recent projects and recent workflow runs
|
||||||
|
|
||||||
|
V1 should emphasize project-level organization because all major resources are project-scoped.
|
||||||
|
|
||||||
|
## Screen 2: Asset Workspace
|
||||||
|
|
||||||
|
Purpose:
|
||||||
|
|
||||||
|
- upload or import raw assets
|
||||||
|
- inspect asset type and status
|
||||||
|
- review probe summary
|
||||||
|
- launch preview or workflow entrypoint
|
||||||
|
|
||||||
|
Core regions:
|
||||||
|
|
||||||
|
- asset list with filters
|
||||||
|
- import actions
|
||||||
|
- asset status and source type
|
||||||
|
- probe summary card
|
||||||
|
- recommended next actions
|
||||||
|
|
||||||
|
Key actions:
|
||||||
|
|
||||||
|
- upload file
|
||||||
|
- upload archive
|
||||||
|
- import object storage prefix
|
||||||
|
- register storage path
|
||||||
|
- open preview
|
||||||
|
- create workflow from asset
|
||||||
|
|
||||||
|
## Screen 3: Asset Detail / Explore Entry
|
||||||
|
|
||||||
|
Purpose:
|
||||||
|
|
||||||
|
- inspect one asset deeply
|
||||||
|
- browse folder structure
|
||||||
|
- inspect metadata and detected format
|
||||||
|
- preview representative files
|
||||||
|
|
||||||
|
Suggested panels:
|
||||||
|
|
||||||
|
- left: file tree or asset structure
|
||||||
|
- center: preview surface
|
||||||
|
- right: metadata, probe report, warnings, recommended nodes
|
||||||
|
|
||||||
|
This screen should support both:
|
||||||
|
|
||||||
|
- raw asset view
|
||||||
|
- canonical dataset summary view when available
|
||||||
|
|
||||||
|
## Screen 4: Canvas Workspace
|
||||||
|
|
||||||
|
This is the core authoring surface.
|
||||||
|
|
||||||
|
### Layout
|
||||||
|
|
||||||
|
Recommended layout, aligned with the Xspark reference pattern:
|
||||||
|
|
||||||
|
- left: node library and workflow tools
|
||||||
|
- center: canvas
|
||||||
|
- right: node configuration panel
|
||||||
|
|
||||||
|
### Left Panel
|
||||||
|
|
||||||
|
Contains:
|
||||||
|
|
||||||
|
- source nodes
|
||||||
|
- transform nodes
|
||||||
|
- inspect nodes
|
||||||
|
- annotate nodes
|
||||||
|
- export nodes
|
||||||
|
- utility nodes
|
||||||
|
- search/filter
|
||||||
|
|
||||||
|
### Center Canvas
|
||||||
|
|
||||||
|
Supports:
|
||||||
|
|
||||||
|
- drag-and-drop node placement
|
||||||
|
- edge creation
|
||||||
|
- zoom and pan
|
||||||
|
- mini-map
|
||||||
|
- node badges for validation status
|
||||||
|
- run-state overlays when viewing an executed version
|
||||||
|
|
||||||
|
### Right Configuration Panel
|
||||||
|
|
||||||
|
The right panel is schema-driven.
|
||||||
|
|
||||||
|
It should render:
|
||||||
|
|
||||||
|
- node title
|
||||||
|
- node description
|
||||||
|
- config fields
|
||||||
|
- input/output schema summary
|
||||||
|
- executor selection
|
||||||
|
- runtime policy
|
||||||
|
- code hook editor if supported
|
||||||
|
- validation errors
|
||||||
|
|
||||||
|
This panel is critical. It should feel like a structured system console, not a generic form dump.
|
||||||
|
|
||||||
|
## Screen 5: Workflow Run Detail
|
||||||
|
|
||||||
|
Purpose:
|
||||||
|
|
||||||
|
- inspect execution state
|
||||||
|
- view DAG progress
|
||||||
|
- open task logs
|
||||||
|
- inspect task outputs
|
||||||
|
- retry failed nodes
|
||||||
|
|
||||||
|
Recommended layout:
|
||||||
|
|
||||||
|
- top: run summary and status
|
||||||
|
- center: workflow graph with execution overlays
|
||||||
|
- bottom or side drawer: logs and artifacts for selected node
|
||||||
|
|
||||||
|
## Screen 6: Explore Workspace
|
||||||
|
|
||||||
|
Purpose:
|
||||||
|
|
||||||
|
- inspect raw or processed outputs outside the canvas authoring context
|
||||||
|
- compare source and transformed outputs
|
||||||
|
- validate whether a run produced expected results
|
||||||
|
|
||||||
|
V1 renderer set:
|
||||||
|
|
||||||
|
- directory tree renderer
|
||||||
|
- JSON renderer
|
||||||
|
- video renderer
|
||||||
|
- dataset summary renderer
|
||||||
|
- quality report renderer
|
||||||
|
|
||||||
|
This workspace should open from:
|
||||||
|
|
||||||
|
- asset detail
|
||||||
|
- workflow node output
|
||||||
|
- artifact detail
|
||||||
|
|
||||||
|
## Screen 7: Label Workspace
|
||||||
|
|
||||||
|
Purpose:
|
||||||
|
|
||||||
|
- process annotation tasks
|
||||||
|
- review results
|
||||||
|
- attach annotations to data outputs
|
||||||
|
|
||||||
|
V1 should keep this lightweight:
|
||||||
|
|
||||||
|
- frame labels
|
||||||
|
- clip labels
|
||||||
|
- temporal segment labels
|
||||||
|
- quality tags
|
||||||
|
|
||||||
|
The label workspace should be able to open from an artifact or dataset version, not only from a workflow node.
|
||||||
|
|
||||||
|
## Screen 8: Admin Workspace
|
||||||
|
|
||||||
|
Purpose:
|
||||||
|
|
||||||
|
- manage members
|
||||||
|
- manage storage connections
|
||||||
|
- manage plugin enablement
|
||||||
|
- inspect audit and runtime settings
|
||||||
|
|
||||||
|
Suggested sections:
|
||||||
|
|
||||||
|
- members and roles
|
||||||
|
- workspace settings
|
||||||
|
- storage connections
|
||||||
|
- plugin registry
|
||||||
|
- executor policies
|
||||||
|
- audit log viewer
|
||||||
|
|
||||||
|
## Key UX Principles
|
||||||
|
|
||||||
|
### 1. Separate authoring from inspection
|
||||||
|
|
||||||
|
Do not overload the canvas with deep preview or annotation workflows. The canvas configures process. Explore and Label workspaces handle dense interaction.
|
||||||
|
|
||||||
|
### 2. Keep lineage visible
|
||||||
|
|
||||||
|
Users should be able to move across:
|
||||||
|
|
||||||
|
- asset
|
||||||
|
- workflow
|
||||||
|
- run
|
||||||
|
- task
|
||||||
|
- artifact
|
||||||
|
- annotation
|
||||||
|
|
||||||
|
without losing context.
|
||||||
|
|
||||||
|
### 3. Prefer explicit system terminology
|
||||||
|
|
||||||
|
Use consistent object names in the UI:
|
||||||
|
|
||||||
|
- Asset
|
||||||
|
- Dataset
|
||||||
|
- Workflow
|
||||||
|
- Run
|
||||||
|
- Task
|
||||||
|
- Artifact
|
||||||
|
- Plugin
|
||||||
|
|
||||||
|
Do not rename the same concept differently across pages.
|
||||||
|
|
||||||
|
### 4. Make validation obvious before execution
|
||||||
|
|
||||||
|
Before users run a workflow, the editor should visibly show:
|
||||||
|
|
||||||
|
- missing config
|
||||||
|
- invalid schema connections
|
||||||
|
- unsupported executor choices
|
||||||
|
- permission or plugin issues
|
||||||
|
|
||||||
|
### 5. Keep the product usable on standard screens
|
||||||
|
|
||||||
|
The canvas and right configuration panel must work on laptop-sized displays. On narrower screens, the right panel may collapse into a drawer.
|
||||||
|
|
||||||
|
## V1 Visual Direction
|
||||||
|
|
||||||
|
The UI should communicate:
|
||||||
|
|
||||||
|
- precision
|
||||||
|
- observability
|
||||||
|
- traceability
|
||||||
|
- strong operator control
|
||||||
|
|
||||||
|
It should feel closer to a workflow control console than a consumer productivity app.
|
||||||
|
|
||||||
|
## V1 Non-Goals
|
||||||
|
|
||||||
|
V1 UI does not need:
|
||||||
|
|
||||||
|
- real-time multi-user cursor collaboration
|
||||||
|
- advanced canvas commenting systems
|
||||||
|
- highly customized renderer marketplace UX
|
||||||
|
- heavy design polish ahead of workflow clarity
|
||||||
1
design/05-data/.gitkeep
Normal file
1
design/05-data/.gitkeep
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
||||||
521
design/05-data/mongodb-data-model.md
Normal file
521
design/05-data/mongodb-data-model.md
Normal file
@ -0,0 +1,521 @@
|
|||||||
|
# EmboFlow MongoDB Data Model
|
||||||
|
|
||||||
|
## Goal
|
||||||
|
|
||||||
|
Define the MongoDB-only persistence model for EmboFlow V1.
|
||||||
|
|
||||||
|
The database must support:
|
||||||
|
|
||||||
|
- user and workspace isolation
|
||||||
|
- raw asset tracking
|
||||||
|
- canonical dataset versions
|
||||||
|
- workflow versioning
|
||||||
|
- workflow execution history
|
||||||
|
- plugin registration
|
||||||
|
- auditability
|
||||||
|
|
||||||
|
## Storage Principles
|
||||||
|
|
||||||
|
- MongoDB stores metadata and execution state
|
||||||
|
- Object storage stores large binary files and large derived bundles
|
||||||
|
- MongoDB documents should have clear aggregate boundaries
|
||||||
|
- Large, fast-growing arrays should be split into separate collections
|
||||||
|
- Platform contracts should use references, not embedded file blobs
|
||||||
|
|
||||||
|
## Primary Collections
|
||||||
|
|
||||||
|
- `users`
|
||||||
|
- `workspaces`
|
||||||
|
- `projects`
|
||||||
|
- `memberships`
|
||||||
|
- `assets`
|
||||||
|
- `asset_probe_reports`
|
||||||
|
- `datasets`
|
||||||
|
- `dataset_versions`
|
||||||
|
- `workflow_definitions`
|
||||||
|
- `workflow_definition_versions`
|
||||||
|
- `workflow_runs`
|
||||||
|
- `run_tasks`
|
||||||
|
- `artifacts`
|
||||||
|
- `annotation_tasks`
|
||||||
|
- `annotations`
|
||||||
|
- `plugins`
|
||||||
|
- `storage_connections`
|
||||||
|
- `audit_logs`
|
||||||
|
|
||||||
|
## Collection Design
|
||||||
|
|
||||||
|
### users
|
||||||
|
|
||||||
|
Purpose:
|
||||||
|
|
||||||
|
- account identity
|
||||||
|
- profile
|
||||||
|
- login metadata
|
||||||
|
|
||||||
|
Core fields:
|
||||||
|
|
||||||
|
- `_id`
|
||||||
|
- `email`
|
||||||
|
- `displayName`
|
||||||
|
- `avatarUrl`
|
||||||
|
- `status`
|
||||||
|
- `lastLoginAt`
|
||||||
|
- `createdAt`
|
||||||
|
- `updatedAt`
|
||||||
|
|
||||||
|
### workspaces
|
||||||
|
|
||||||
|
Purpose:
|
||||||
|
|
||||||
|
- resource ownership boundary
|
||||||
|
|
||||||
|
Core fields:
|
||||||
|
|
||||||
|
- `_id`
|
||||||
|
- `type` as `personal` or `team`
|
||||||
|
- `name`
|
||||||
|
- `slug`
|
||||||
|
- `ownerId`
|
||||||
|
- `status`
|
||||||
|
- `settings`
|
||||||
|
- `createdAt`
|
||||||
|
- `updatedAt`
|
||||||
|
|
||||||
|
### memberships
|
||||||
|
|
||||||
|
Purpose:
|
||||||
|
|
||||||
|
- workspace and project role mapping
|
||||||
|
|
||||||
|
Core fields:
|
||||||
|
|
||||||
|
- `_id`
|
||||||
|
- `workspaceId`
|
||||||
|
- `projectId` optional
|
||||||
|
- `userId`
|
||||||
|
- `role`
|
||||||
|
- `status`
|
||||||
|
- `createdAt`
|
||||||
|
- `updatedAt`
|
||||||
|
|
||||||
|
This collection should stay independent instead of embedding large member arrays on every resource.
|
||||||
|
|
||||||
|
### projects
|
||||||
|
|
||||||
|
Purpose:
|
||||||
|
|
||||||
|
- project-scoped grouping for assets, workflows, runs, and outputs
|
||||||
|
|
||||||
|
Core fields:
|
||||||
|
|
||||||
|
- `_id`
|
||||||
|
- `workspaceId`
|
||||||
|
- `name`
|
||||||
|
- `slug`
|
||||||
|
- `description`
|
||||||
|
- `status`
|
||||||
|
- `createdBy`
|
||||||
|
- `createdAt`
|
||||||
|
- `updatedAt`
|
||||||
|
|
||||||
|
### assets
|
||||||
|
|
||||||
|
Purpose:
|
||||||
|
|
||||||
|
- represent raw uploaded or imported inputs
|
||||||
|
|
||||||
|
Supported asset types:
|
||||||
|
|
||||||
|
- `raw_file`
|
||||||
|
- `archive`
|
||||||
|
- `folder`
|
||||||
|
- `video_collection`
|
||||||
|
- `standard_dataset`
|
||||||
|
- `rosbag`
|
||||||
|
- `hdf5_dataset`
|
||||||
|
- `object_storage_prefix`
|
||||||
|
|
||||||
|
Core fields:
|
||||||
|
|
||||||
|
- `_id`
|
||||||
|
- `workspaceId`
|
||||||
|
- `projectId`
|
||||||
|
- `type`
|
||||||
|
- `sourceType`
|
||||||
|
- `displayName`
|
||||||
|
- `status`
|
||||||
|
- `storageRef`
|
||||||
|
- `sizeBytes`
|
||||||
|
- `fileCount`
|
||||||
|
- `topLevelPaths`
|
||||||
|
- `detectedFormats`
|
||||||
|
- `summary`
|
||||||
|
- `createdBy`
|
||||||
|
- `createdAt`
|
||||||
|
- `updatedAt`
|
||||||
|
|
||||||
|
Do not embed full large file listings in this document.
|
||||||
|
|
||||||
|
### asset_probe_reports
|
||||||
|
|
||||||
|
Purpose:
|
||||||
|
|
||||||
|
- retain richer structure-detection and validation output
|
||||||
|
|
||||||
|
Core fields:
|
||||||
|
|
||||||
|
- `_id`
|
||||||
|
- `assetId`
|
||||||
|
- `reportVersion`
|
||||||
|
- `detectedFormatCandidates`
|
||||||
|
- `structureSummary`
|
||||||
|
- `warnings`
|
||||||
|
- `recommendedNextNodes`
|
||||||
|
- `rawReport`
|
||||||
|
- `createdAt`
|
||||||
|
|
||||||
|
### datasets
|
||||||
|
|
||||||
|
Purpose:
|
||||||
|
|
||||||
|
- represent logical dataset identity
|
||||||
|
|
||||||
|
Core fields:
|
||||||
|
|
||||||
|
- `_id`
|
||||||
|
- `workspaceId`
|
||||||
|
- `projectId`
|
||||||
|
- `name`
|
||||||
|
- `type`
|
||||||
|
- `status`
|
||||||
|
- `latestVersionId`
|
||||||
|
- `summary`
|
||||||
|
- `createdBy`
|
||||||
|
- `createdAt`
|
||||||
|
- `updatedAt`
|
||||||
|
|
||||||
|
### dataset_versions
|
||||||
|
|
||||||
|
Purpose:
|
||||||
|
|
||||||
|
- represent immutable dataset snapshots
|
||||||
|
|
||||||
|
Core fields:
|
||||||
|
|
||||||
|
- `_id`
|
||||||
|
- `datasetId`
|
||||||
|
- `workspaceId`
|
||||||
|
- `projectId`
|
||||||
|
- `sourceAssetId`
|
||||||
|
- `parentVersionId`
|
||||||
|
- `versionTag`
|
||||||
|
- `canonicalSchemaVersion`
|
||||||
|
- `manifestRef`
|
||||||
|
- `stats`
|
||||||
|
- `summary`
|
||||||
|
- `status`
|
||||||
|
- `createdBy`
|
||||||
|
- `createdAt`
|
||||||
|
|
||||||
|
This collection is separated because versions will grow over time.
|
||||||
|
|
||||||
|
### workflow_definitions
|
||||||
|
|
||||||
|
Purpose:
|
||||||
|
|
||||||
|
- represent logical workflow identity
|
||||||
|
|
||||||
|
Core fields:
|
||||||
|
|
||||||
|
- `_id`
|
||||||
|
- `workspaceId`
|
||||||
|
- `projectId`
|
||||||
|
- `name`
|
||||||
|
- `slug`
|
||||||
|
- `status`
|
||||||
|
- `latestVersionNumber`
|
||||||
|
- `publishedVersionNumber`
|
||||||
|
- `createdBy`
|
||||||
|
- `createdAt`
|
||||||
|
- `updatedAt`
|
||||||
|
|
||||||
|
### workflow_definition_versions
|
||||||
|
|
||||||
|
Purpose:
|
||||||
|
|
||||||
|
- represent immutable workflow snapshots
|
||||||
|
|
||||||
|
Core fields:
|
||||||
|
|
||||||
|
- `_id`
|
||||||
|
- `workflowDefinitionId`
|
||||||
|
- `workspaceId`
|
||||||
|
- `projectId`
|
||||||
|
- `versionNumber`
|
||||||
|
- `visualGraph`
|
||||||
|
- `logicGraph`
|
||||||
|
- `runtimeGraph`
|
||||||
|
- `pluginRefs`
|
||||||
|
- `summary`
|
||||||
|
- `createdBy`
|
||||||
|
- `createdAt`
|
||||||
|
|
||||||
|
Splitting versions from workflow head metadata avoids oversized documents and simplifies history queries.
|
||||||
|
|
||||||
|
### workflow_runs
|
||||||
|
|
||||||
|
Purpose:
|
||||||
|
|
||||||
|
- store execution runs
|
||||||
|
|
||||||
|
Core fields:
|
||||||
|
|
||||||
|
- `_id`
|
||||||
|
- `workflowDefinitionId`
|
||||||
|
- `workflowVersionId`
|
||||||
|
- `workspaceId`
|
||||||
|
- `projectId`
|
||||||
|
- `triggeredBy`
|
||||||
|
- `status`
|
||||||
|
- `runtimeSnapshot`
|
||||||
|
- `summary`
|
||||||
|
- `startedAt`
|
||||||
|
- `finishedAt`
|
||||||
|
- `createdAt`
|
||||||
|
|
||||||
|
### run_tasks
|
||||||
|
|
||||||
|
Purpose:
|
||||||
|
|
||||||
|
- store one execution unit per node per run
|
||||||
|
|
||||||
|
Core fields:
|
||||||
|
|
||||||
|
- `_id`
|
||||||
|
- `workflowRunId`
|
||||||
|
- `workflowVersionId`
|
||||||
|
- `nodeId`
|
||||||
|
- `nodeType`
|
||||||
|
- `status`
|
||||||
|
- `attempt`
|
||||||
|
- `executor`
|
||||||
|
- `scheduler`
|
||||||
|
- `inputRefs`
|
||||||
|
- `outputRefs`
|
||||||
|
- `logRef`
|
||||||
|
- `cacheKey`
|
||||||
|
- `cacheHit`
|
||||||
|
- `errorSummary`
|
||||||
|
- `startedAt`
|
||||||
|
- `finishedAt`
|
||||||
|
- `createdAt`
|
||||||
|
|
||||||
|
This collection should remain separate from `workflow_runs` because task volume grows quickly.
|
||||||
|
|
||||||
|
### artifacts
|
||||||
|
|
||||||
|
Purpose:
|
||||||
|
|
||||||
|
- store managed outputs and previews
|
||||||
|
|
||||||
|
Artifact types may include:
|
||||||
|
|
||||||
|
- preview bundle
|
||||||
|
- quality report
|
||||||
|
- normalized dataset package
|
||||||
|
- delivery package
|
||||||
|
- training config package
|
||||||
|
- intermediate task output
|
||||||
|
|
||||||
|
Core fields:
|
||||||
|
|
||||||
|
- `_id`
|
||||||
|
- `workspaceId`
|
||||||
|
- `projectId`
|
||||||
|
- `type`
|
||||||
|
- `producerType`
|
||||||
|
- `producerId`
|
||||||
|
- `storageRef`
|
||||||
|
- `previewable`
|
||||||
|
- `summary`
|
||||||
|
- `lineage`
|
||||||
|
- `createdBy`
|
||||||
|
- `createdAt`
|
||||||
|
|
||||||
|
### annotation_tasks
|
||||||
|
|
||||||
|
Purpose:
|
||||||
|
|
||||||
|
- track assignment and state of manual labeling work
|
||||||
|
|
||||||
|
Core fields:
|
||||||
|
|
||||||
|
- `_id`
|
||||||
|
- `workspaceId`
|
||||||
|
- `projectId`
|
||||||
|
- `targetType`
|
||||||
|
- `targetRef`
|
||||||
|
- `labelType`
|
||||||
|
- `status`
|
||||||
|
- `assigneeIds`
|
||||||
|
- `reviewerIds`
|
||||||
|
- `createdBy`
|
||||||
|
- `createdAt`
|
||||||
|
- `updatedAt`
|
||||||
|
|
||||||
|
### annotations
|
||||||
|
|
||||||
|
Purpose:
|
||||||
|
|
||||||
|
- persist annotation outputs
|
||||||
|
|
||||||
|
Core fields:
|
||||||
|
|
||||||
|
- `_id`
|
||||||
|
- `annotationTaskId`
|
||||||
|
- `workspaceId`
|
||||||
|
- `projectId`
|
||||||
|
- `targetRef`
|
||||||
|
- `payload`
|
||||||
|
- `status`
|
||||||
|
- `createdBy`
|
||||||
|
- `createdAt`
|
||||||
|
- `updatedAt`
|
||||||
|
|
||||||
|
### plugins
|
||||||
|
|
||||||
|
Purpose:
|
||||||
|
|
||||||
|
- track installable and enabled plugin versions
|
||||||
|
|
||||||
|
Core fields:
|
||||||
|
|
||||||
|
- `_id`
|
||||||
|
- `workspaceId` optional for workspace-scoped plugins
|
||||||
|
- `scope` as `platform` or `workspace`
|
||||||
|
- `name`
|
||||||
|
- `status`
|
||||||
|
- `currentVersion`
|
||||||
|
- `versions`
|
||||||
|
- `permissions`
|
||||||
|
- `metadata`
|
||||||
|
- `createdAt`
|
||||||
|
- `updatedAt`
|
||||||
|
|
||||||
|
If plugin version payloads become large, split versions into a separate collection later. V1 can keep them nested if bounded.
|
||||||
|
|
||||||
|
### storage_connections
|
||||||
|
|
||||||
|
Purpose:
|
||||||
|
|
||||||
|
- store object storage and path registration configuration
|
||||||
|
|
||||||
|
Core fields:
|
||||||
|
|
||||||
|
- `_id`
|
||||||
|
- `workspaceId`
|
||||||
|
- `type`
|
||||||
|
- `provider`
|
||||||
|
- `name`
|
||||||
|
- `status`
|
||||||
|
- `config`
|
||||||
|
- `secretRef`
|
||||||
|
- `createdBy`
|
||||||
|
- `createdAt`
|
||||||
|
- `updatedAt`
|
||||||
|
|
||||||
|
Store secrets outside plaintext document fields where possible.
|
||||||
|
|
||||||
|
### audit_logs
|
||||||
|
|
||||||
|
Purpose:
|
||||||
|
|
||||||
|
- append-only history of sensitive actions
|
||||||
|
|
||||||
|
Core fields:
|
||||||
|
|
||||||
|
- `_id`
|
||||||
|
- `workspaceId`
|
||||||
|
- `projectId`
|
||||||
|
- `actorId`
|
||||||
|
- `resourceType`
|
||||||
|
- `resourceId`
|
||||||
|
- `action`
|
||||||
|
- `beforeSummary`
|
||||||
|
- `afterSummary`
|
||||||
|
- `metadata`
|
||||||
|
- `createdAt`
|
||||||
|
|
||||||
|
## Reference Strategy
|
||||||
|
|
||||||
|
Use stable ids between collections.
|
||||||
|
|
||||||
|
References should be explicit:
|
||||||
|
|
||||||
|
- asset to probe report
|
||||||
|
- dataset to dataset versions
|
||||||
|
- workflow definition to workflow versions
|
||||||
|
- workflow run to run tasks
|
||||||
|
- task to artifact
|
||||||
|
- annotation task to annotations
|
||||||
|
|
||||||
|
Do not depend on implicit path-based linkage.
|
||||||
|
|
||||||
|
## Index Recommendations
|
||||||
|
|
||||||
|
### Always index
|
||||||
|
|
||||||
|
- `workspaceId`
|
||||||
|
- `projectId`
|
||||||
|
- `status`
|
||||||
|
- `createdAt`
|
||||||
|
|
||||||
|
### Important compound indexes
|
||||||
|
|
||||||
|
- `memberships.workspaceId + memberships.userId`
|
||||||
|
- `projects.workspaceId + projects.slug`
|
||||||
|
- `assets.projectId + assets.type + assets.createdAt`
|
||||||
|
- `datasets.projectId + datasets.name`
|
||||||
|
- `dataset_versions.datasetId + dataset_versions.createdAt`
|
||||||
|
- `workflow_definitions.projectId + workflow_definitions.slug`
|
||||||
|
- `workflow_definition_versions.workflowDefinitionId + versionNumber`
|
||||||
|
- `workflow_runs.projectId + createdAt`
|
||||||
|
- `workflow_runs.workflowDefinitionId + status`
|
||||||
|
- `run_tasks.workflowRunId + nodeId`
|
||||||
|
- `artifacts.producerType + producerId`
|
||||||
|
- `annotation_tasks.projectId + status`
|
||||||
|
- `audit_logs.workspaceId + createdAt`
|
||||||
|
|
||||||
|
## Object Storage References
|
||||||
|
|
||||||
|
MongoDB should store references such as:
|
||||||
|
|
||||||
|
- bucket
|
||||||
|
- key
|
||||||
|
- uri
|
||||||
|
- checksum
|
||||||
|
- content type
|
||||||
|
- size
|
||||||
|
|
||||||
|
It should not store:
|
||||||
|
|
||||||
|
- large binary file payloads
|
||||||
|
- full raw video content
|
||||||
|
- giant archive contents
|
||||||
|
|
||||||
|
## V1 Constraints
|
||||||
|
|
||||||
|
- MongoDB is the only database
|
||||||
|
- No relational sidecar is assumed
|
||||||
|
- No GridFS-first strategy is assumed
|
||||||
|
- Large manifests may live in object storage and be referenced from MongoDB
|
||||||
|
|
||||||
|
## V1 Non-Goals
|
||||||
|
|
||||||
|
The V1 model does not need:
|
||||||
|
|
||||||
|
- cross-region data distribution
|
||||||
|
- advanced event sourcing
|
||||||
|
- fully normalized analytics warehouse modeling
|
||||||
|
- high-volume search indexing inside MongoDB itself
|
||||||
1
design/06-api/.gitkeep
Normal file
1
design/06-api/.gitkeep
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
||||||
1
design/07-research/.gitkeep
Normal file
1
design/07-research/.gitkeep
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
||||||
1
design/08-decisions/.gitkeep
Normal file
1
design/08-decisions/.gitkeep
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
||||||
@ -0,0 +1,45 @@
|
|||||||
|
# ADR-0001: Separate Raw Assets From Canonical Datasets
|
||||||
|
|
||||||
|
## Status
|
||||||
|
|
||||||
|
Accepted
|
||||||
|
|
||||||
|
## Context
|
||||||
|
|
||||||
|
EmboFlow must support both structured embodied dataset formats and unstructured or semi-structured delivery-style raw assets, including:
|
||||||
|
|
||||||
|
- RLDS
|
||||||
|
- LeRobot v2/v3
|
||||||
|
- HDF5
|
||||||
|
- Rosbag
|
||||||
|
- Raw video directories
|
||||||
|
- Archive packages
|
||||||
|
|
||||||
|
If the platform treats every input as an already-standardized dataset, ingestion and delivery workflows become awkward and lossy.
|
||||||
|
|
||||||
|
## Decision
|
||||||
|
|
||||||
|
The platform will model:
|
||||||
|
|
||||||
|
- Raw assets as first-class resources
|
||||||
|
- Canonical datasets as derived semantic resources
|
||||||
|
|
||||||
|
Raw assets preserve original structure, paths, naming, and metadata layout. Canonical datasets provide normalized semantics for conversion, workflow execution, and export logic.
|
||||||
|
|
||||||
|
## Consequences
|
||||||
|
|
||||||
|
### Positive
|
||||||
|
|
||||||
|
- Supports customer delivery package workflows
|
||||||
|
- Supports embodied dataset conversion workflows
|
||||||
|
- Preserves original structure for inspection and debugging
|
||||||
|
- Avoids forcing visualization to depend on a lossy normalized format
|
||||||
|
|
||||||
|
### Negative
|
||||||
|
|
||||||
|
- Adds one more layer to the object model
|
||||||
|
- Requires readers and mappers instead of direct format-to-format conversion
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
|
||||||
|
Visualization may operate on raw assets directly. Processing and export should primarily operate on canonical semantics where possible.
|
||||||
@ -0,0 +1,56 @@
|
|||||||
|
# ADR-0002: Separate Executors From Schedulers
|
||||||
|
|
||||||
|
## Status
|
||||||
|
|
||||||
|
Accepted
|
||||||
|
|
||||||
|
## Context
|
||||||
|
|
||||||
|
EmboFlow needs to support multiple runtime modes now and later:
|
||||||
|
|
||||||
|
- direct Python execution
|
||||||
|
- Docker-isolated execution
|
||||||
|
- HTTP-based execution
|
||||||
|
- local scheduling
|
||||||
|
- future Kubernetes scheduling
|
||||||
|
- future Volcano scheduling
|
||||||
|
|
||||||
|
If execution logic and scheduling logic are coupled together, migration from single-host operation to cluster operation becomes costly.
|
||||||
|
|
||||||
|
## Decision
|
||||||
|
|
||||||
|
The architecture will separate:
|
||||||
|
|
||||||
|
- Executor: how node logic runs
|
||||||
|
- Scheduler: where and under what dispatch policy tasks run
|
||||||
|
|
||||||
|
V1 executors:
|
||||||
|
|
||||||
|
- Python
|
||||||
|
- Docker
|
||||||
|
- HTTP
|
||||||
|
|
||||||
|
V1 scheduler:
|
||||||
|
|
||||||
|
- Local
|
||||||
|
|
||||||
|
Reserved future schedulers:
|
||||||
|
|
||||||
|
- Kubernetes
|
||||||
|
- Volcano
|
||||||
|
|
||||||
|
## Consequences
|
||||||
|
|
||||||
|
### Positive
|
||||||
|
|
||||||
|
- Cleaner evolution path
|
||||||
|
- Better runtime abstraction
|
||||||
|
- Less refactoring required for cluster migration
|
||||||
|
|
||||||
|
### Negative
|
||||||
|
|
||||||
|
- Slightly more abstraction in V1 than the immediate deployment requires
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
|
||||||
|
User-injected code should default to Docker execution, while trusted platform logic may use Python execution.
|
||||||
1
design/09-assets/.gitkeep
Normal file
1
design/09-assets/.gitkeep
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
||||||
21
design/README.md
Normal file
21
design/README.md
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
# EmboFlow Design Workspace
|
||||||
|
|
||||||
|
This directory stores project design materials before or alongside implementation.
|
||||||
|
|
||||||
|
## Structure
|
||||||
|
|
||||||
|
- `00-overview`: project goals, scope, milestones
|
||||||
|
- `01-product`: requirements, user stories, feature definitions
|
||||||
|
- `02-architecture`: system architecture, modules, technical constraints
|
||||||
|
- `03-workflows`: business flows, sequence diagrams, operational flows
|
||||||
|
- `04-ui-ux`: wireframes, interaction notes, UX decisions
|
||||||
|
- `05-data`: data model, entities, schema drafts
|
||||||
|
- `06-api`: API contracts, request/response drafts, integration notes
|
||||||
|
- `07-research`: competitive analysis, references, discovery notes
|
||||||
|
- `08-decisions`: ADRs and major tradeoff records
|
||||||
|
- `09-assets`: diagrams, exported images, attachments
|
||||||
|
- `templates`: reusable design document templates
|
||||||
|
|
||||||
|
## Suggested usage
|
||||||
|
|
||||||
|
Keep design artifacts in Markdown where possible so they diff cleanly in Git.
|
||||||
1
design/templates/.gitkeep
Normal file
1
design/templates/.gitkeep
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
||||||
62
docker-compose.yml
Normal file
62
docker-compose.yml
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
services:
|
||||||
|
web:
|
||||||
|
image: node:20-alpine
|
||||||
|
working_dir: /workspace
|
||||||
|
command: ["sh", "-c", "sleep infinity"]
|
||||||
|
ports:
|
||||||
|
- "${WEB_PORT:-3000}:3000"
|
||||||
|
volumes:
|
||||||
|
- .:/workspace
|
||||||
|
depends_on:
|
||||||
|
- api
|
||||||
|
|
||||||
|
api:
|
||||||
|
image: node:20-alpine
|
||||||
|
working_dir: /workspace
|
||||||
|
command: ["sh", "-c", "sleep infinity"]
|
||||||
|
ports:
|
||||||
|
- "${API_PORT:-3001}:3001"
|
||||||
|
volumes:
|
||||||
|
- .:/workspace
|
||||||
|
depends_on:
|
||||||
|
- mongo
|
||||||
|
|
||||||
|
worker:
|
||||||
|
image: node:20-alpine
|
||||||
|
working_dir: /workspace
|
||||||
|
command: ["sh", "-c", "sleep infinity"]
|
||||||
|
ports:
|
||||||
|
- "${WORKER_PORT:-3002}:3002"
|
||||||
|
volumes:
|
||||||
|
- .:/workspace
|
||||||
|
depends_on:
|
||||||
|
- mongo
|
||||||
|
- minio
|
||||||
|
|
||||||
|
mongo:
|
||||||
|
image: mongo:7
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "${MONGO_PORT:-27017}:27017"
|
||||||
|
environment:
|
||||||
|
MONGO_INITDB_ROOT_USERNAME: "${MONGO_ROOT_USERNAME:-emboflow}"
|
||||||
|
MONGO_INITDB_ROOT_PASSWORD: "${MONGO_ROOT_PASSWORD:-emboflow}"
|
||||||
|
volumes:
|
||||||
|
- mongo-data:/data/db
|
||||||
|
|
||||||
|
minio:
|
||||||
|
image: minio/minio:RELEASE.2024-10-29T16-01-48Z
|
||||||
|
restart: unless-stopped
|
||||||
|
command: ["server", "/data", "--console-address", ":9001"]
|
||||||
|
ports:
|
||||||
|
- "${MINIO_PORT:-9000}:9000"
|
||||||
|
- "${MINIO_CONSOLE_PORT:-9001}:9001"
|
||||||
|
environment:
|
||||||
|
MINIO_ROOT_USER: "${MINIO_ROOT_USER:-emboflow}"
|
||||||
|
MINIO_ROOT_PASSWORD: "${MINIO_ROOT_PASSWORD:-emboflow123}"
|
||||||
|
volumes:
|
||||||
|
- minio-data:/data
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
mongo-data:
|
||||||
|
minio-data:
|
||||||
96
docs/development-workflow.md
Normal file
96
docs/development-workflow.md
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
# EmboFlow Development Workflow
|
||||||
|
|
||||||
|
## Goal
|
||||||
|
|
||||||
|
Keep repository design artifacts and implementation changes aligned as EmboFlow evolves.
|
||||||
|
|
||||||
|
## Working Agreement
|
||||||
|
|
||||||
|
EmboFlow is being developed from explicit design documents under `design/`. Development should follow a doc-aware workflow instead of letting code drift ahead without recorded decisions.
|
||||||
|
|
||||||
|
## Standard Change Flow
|
||||||
|
|
||||||
|
### 1. Read Before Editing
|
||||||
|
|
||||||
|
Before changing code, review the design files that define the affected area:
|
||||||
|
|
||||||
|
- product scope
|
||||||
|
- architecture boundaries
|
||||||
|
- workflow model
|
||||||
|
- data model
|
||||||
|
- deployment model
|
||||||
|
- accepted ADRs
|
||||||
|
|
||||||
|
### 2. Identify Impact
|
||||||
|
|
||||||
|
Decide whether the change affects:
|
||||||
|
|
||||||
|
- product behavior
|
||||||
|
- object model
|
||||||
|
- workflow/run/task semantics
|
||||||
|
- node or plugin contract
|
||||||
|
- storage assumptions
|
||||||
|
- user or permission behavior
|
||||||
|
- deployment/runtime assumptions
|
||||||
|
|
||||||
|
If yes, the matching design files must be updated.
|
||||||
|
|
||||||
|
### 3. Change Code And Docs Together
|
||||||
|
|
||||||
|
Do not defer the design update. Treat design edits as part of the implementation, not follow-up cleanup.
|
||||||
|
|
||||||
|
### 4. Run The Consistency Check
|
||||||
|
|
||||||
|
From the repo root:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python3 scripts/check_doc_code_sync.py . --strict
|
||||||
|
```
|
||||||
|
|
||||||
|
Interpret warnings manually. The script is a guardrail, not a replacement for judgment.
|
||||||
|
|
||||||
|
### 5. Use The Local Hooks
|
||||||
|
|
||||||
|
Install local hooks once per clone:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash scripts/install_hooks.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
This enables:
|
||||||
|
|
||||||
|
- `commit-msg`: require English-only gitmoji commit messages
|
||||||
|
- `pre-commit`: block staged code/config drift without doc updates
|
||||||
|
- `pre-push`: run commit-message validation, doc/code sync checks, and repository tests
|
||||||
|
|
||||||
|
### 6. Close With Explicit Status
|
||||||
|
|
||||||
|
Every implementation summary should state one of:
|
||||||
|
|
||||||
|
- `Aligned`
|
||||||
|
- `Partially aligned`
|
||||||
|
- `Doc-first`
|
||||||
|
|
||||||
|
and name the exact design files that were reviewed or updated.
|
||||||
|
|
||||||
|
## EmboFlow-Specific Review Checklist
|
||||||
|
|
||||||
|
Before closing a non-trivial change, confirm whether any of these need updates:
|
||||||
|
|
||||||
|
- raw asset vs canonical dataset model
|
||||||
|
- workflow definition vs workflow run model
|
||||||
|
- node schema and plugin contract
|
||||||
|
- executor vs scheduler separation
|
||||||
|
- MongoDB collection or document shape
|
||||||
|
- workspace/project/user boundary
|
||||||
|
- deployment topology or storage assumptions
|
||||||
|
|
||||||
|
## Automation
|
||||||
|
|
||||||
|
This repository now uses both local and remote guardrails:
|
||||||
|
|
||||||
|
- local git hooks from `.githooks/`
|
||||||
|
- commit message validation
|
||||||
|
- CI checks in `.github/workflows/guardrails.yml`
|
||||||
|
|
||||||
|
These checks are intended to keep design documents, code changes, and commit history coherent.
|
||||||
621
docs/plans/2026-03-26-emboflow-v1-foundation-and-mvp.md
Normal file
621
docs/plans/2026-03-26-emboflow-v1-foundation-and-mvp.md
Normal file
@ -0,0 +1,621 @@
|
|||||||
|
# EmboFlow V1 Foundation And MVP Implementation Plan
|
||||||
|
|
||||||
|
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
|
||||||
|
|
||||||
|
**Goal:** Build the first usable EmboFlow increment: workspace-aware raw asset ingestion, workflow definition/versioning, local workflow execution, and the first web workflow authoring surfaces.
|
||||||
|
|
||||||
|
**Architecture:** Use a TypeScript monorepo with a React web app, a Node.js API control plane, and a separate Node.js worker. Use MongoDB as the only database, object storage abstraction for cloud storage or MinIO, and a local scheduler with Python and Docker executor contracts.
|
||||||
|
|
||||||
|
**Tech Stack:** pnpm workspace, React, TypeScript, React Flow, NestJS, Mongoose, MongoDB, Docker Compose, Python runtime hooks, unittest/Vitest/Jest-compatible project tests
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 1: Bootstrap The Monorepo And Runtime Skeleton
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Create: `package.json`
|
||||||
|
- Create: `pnpm-workspace.yaml`
|
||||||
|
- Create: `tsconfig.base.json`
|
||||||
|
- Create: `apps/web/package.json`
|
||||||
|
- Create: `apps/api/package.json`
|
||||||
|
- Create: `apps/worker/package.json`
|
||||||
|
- Create: `docker-compose.yml`
|
||||||
|
- Create: `.env.example`
|
||||||
|
- Test: `tests/test_repo_structure.py`
|
||||||
|
|
||||||
|
**Step 1: Write the failing test**
|
||||||
|
|
||||||
|
Create `tests/test_repo_structure.py` to assert the repository contains the expected top-level app folders and root workspace files.
|
||||||
|
|
||||||
|
**Step 2: Run test to verify it fails**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python3 -m unittest tests/test_repo_structure.py -v
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected: FAIL because the monorepo files and app folders do not exist yet.
|
||||||
|
|
||||||
|
**Step 3: Write minimal implementation**
|
||||||
|
|
||||||
|
Create the pnpm workspace root, app package manifests, root TypeScript config, `.env.example`, and `docker-compose.yml` with services for:
|
||||||
|
|
||||||
|
- `web`
|
||||||
|
- `api`
|
||||||
|
- `worker`
|
||||||
|
- `mongo`
|
||||||
|
- `minio`
|
||||||
|
|
||||||
|
Keep the first version minimal. Do not add extra infra services that are not required by the design.
|
||||||
|
|
||||||
|
**Step 4: Run test to verify it passes**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python3 -m unittest tests/test_repo_structure.py -v
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected: PASS
|
||||||
|
|
||||||
|
**Step 5: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git add package.json pnpm-workspace.yaml tsconfig.base.json apps docker-compose.yml .env.example tests/test_repo_structure.py
|
||||||
|
git commit -m ":tada: bootstrap workspace and runtime skeleton"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Task 2: Create Shared Domain Contracts And Mongo Setup
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Create: `packages/contracts/package.json`
|
||||||
|
- Create: `packages/contracts/src/domain.ts`
|
||||||
|
- Create: `apps/api/src/common/mongo/mongo.module.ts`
|
||||||
|
- Create: `apps/api/src/common/mongo/schemas/workspace.schema.ts`
|
||||||
|
- Create: `apps/api/src/common/mongo/schemas/project.schema.ts`
|
||||||
|
- Create: `apps/api/src/common/mongo/schemas/asset.schema.ts`
|
||||||
|
- Create: `apps/api/src/common/mongo/schemas/workflow.schema.ts`
|
||||||
|
- Test: `apps/api/test/domain-contracts.spec.ts`
|
||||||
|
|
||||||
|
**Step 1: Write the failing test**
|
||||||
|
|
||||||
|
Create `apps/api/test/domain-contracts.spec.ts` asserting:
|
||||||
|
|
||||||
|
- workspace types include `personal` and `team`
|
||||||
|
- asset types include raw and dataset-style sources
|
||||||
|
- workflow status values match the design docs
|
||||||
|
|
||||||
|
**Step 2: Run test to verify it fails**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pnpm --filter api test domain-contracts.spec.ts
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected: FAIL because contracts and schemas are missing.
|
||||||
|
|
||||||
|
**Step 3: Write minimal implementation**
|
||||||
|
|
||||||
|
Create shared domain enums and base Mongo schema definitions for:
|
||||||
|
|
||||||
|
- workspaces
|
||||||
|
- projects
|
||||||
|
- assets
|
||||||
|
- workflow definitions
|
||||||
|
|
||||||
|
Add a minimal Mongo module in the API app using environment-based connection config.
|
||||||
|
|
||||||
|
**Step 4: Run test to verify it passes**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pnpm --filter api test domain-contracts.spec.ts
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected: PASS
|
||||||
|
|
||||||
|
**Step 5: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git add packages/contracts apps/api/src/common apps/api/test/domain-contracts.spec.ts
|
||||||
|
git commit -m ":sparkles: add shared domain contracts and mongo setup"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Task 3: Implement Identity, Workspace, And Project APIs
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Create: `apps/api/src/modules/auth/auth.module.ts`
|
||||||
|
- Create: `apps/api/src/modules/auth/auth.controller.ts`
|
||||||
|
- Create: `apps/api/src/modules/workspaces/workspaces.module.ts`
|
||||||
|
- Create: `apps/api/src/modules/workspaces/workspaces.controller.ts`
|
||||||
|
- Create: `apps/api/src/modules/projects/projects.module.ts`
|
||||||
|
- Create: `apps/api/src/modules/projects/projects.controller.ts`
|
||||||
|
- Create: `apps/api/src/modules/projects/projects.service.ts`
|
||||||
|
- Test: `apps/api/test/projects.e2e-spec.ts`
|
||||||
|
|
||||||
|
**Step 1: Write the failing test**
|
||||||
|
|
||||||
|
Create `apps/api/test/projects.e2e-spec.ts` covering:
|
||||||
|
|
||||||
|
- create personal workspace bootstrap flow
|
||||||
|
- create project under a workspace
|
||||||
|
- reject project creation without a workspace id
|
||||||
|
|
||||||
|
**Step 2: Run test to verify it fails**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pnpm --filter api test projects.e2e-spec.ts
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected: FAIL because the modules and endpoints do not exist yet.
|
||||||
|
|
||||||
|
**Step 3: Write minimal implementation**
|
||||||
|
|
||||||
|
Implement:
|
||||||
|
|
||||||
|
- development-safe auth stub or local auth module
|
||||||
|
- workspace creation and listing
|
||||||
|
- project creation and listing
|
||||||
|
- basic membership checks sufficient for V1 local development
|
||||||
|
|
||||||
|
Do not build a full production auth stack before the API shape is stable.
|
||||||
|
|
||||||
|
**Step 4: Run test to verify it passes**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pnpm --filter api test projects.e2e-spec.ts
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected: PASS
|
||||||
|
|
||||||
|
**Step 5: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git add apps/api/src/modules/auth apps/api/src/modules/workspaces apps/api/src/modules/projects apps/api/test/projects.e2e-spec.ts
|
||||||
|
git commit -m ":sparkles: add workspace and project APIs"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Task 4: Implement Asset Ingestion, Storage Abstraction, And Probe Metadata
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Create: `apps/api/src/modules/storage/storage.module.ts`
|
||||||
|
- Create: `apps/api/src/modules/storage/storage.service.ts`
|
||||||
|
- Create: `apps/api/src/modules/assets/assets.module.ts`
|
||||||
|
- Create: `apps/api/src/modules/assets/assets.controller.ts`
|
||||||
|
- Create: `apps/api/src/modules/assets/assets.service.ts`
|
||||||
|
- Create: `apps/api/src/modules/assets/probe/probe.service.ts`
|
||||||
|
- Create: `apps/api/src/common/mongo/schemas/asset-probe-report.schema.ts`
|
||||||
|
- Test: `apps/api/test/assets.e2e-spec.ts`
|
||||||
|
|
||||||
|
**Step 1: Write the failing test**
|
||||||
|
|
||||||
|
Create `apps/api/test/assets.e2e-spec.ts` covering:
|
||||||
|
|
||||||
|
- register an uploaded asset record
|
||||||
|
- create a probe report for a raw asset
|
||||||
|
- return recommended next actions from probe metadata
|
||||||
|
|
||||||
|
**Step 2: Run test to verify it fails**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pnpm --filter api test assets.e2e-spec.ts
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected: FAIL because asset ingestion and probe services are missing.
|
||||||
|
|
||||||
|
**Step 3: Write minimal implementation**
|
||||||
|
|
||||||
|
Implement:
|
||||||
|
|
||||||
|
- storage abstraction interface
|
||||||
|
- MinIO/S3-compatible config contract
|
||||||
|
- asset create/list/detail endpoints
|
||||||
|
- probe-report persistence
|
||||||
|
- placeholder probe logic for directory and archive summaries
|
||||||
|
|
||||||
|
Do not build full binary upload optimization yet. First make the metadata contract stable.
|
||||||
|
|
||||||
|
**Step 4: Run test to verify it passes**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pnpm --filter api test assets.e2e-spec.ts
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected: PASS
|
||||||
|
|
||||||
|
**Step 5: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git add apps/api/src/modules/storage apps/api/src/modules/assets apps/api/src/common/mongo/schemas/asset-probe-report.schema.ts apps/api/test/assets.e2e-spec.ts
|
||||||
|
git commit -m ":truck: add asset ingestion and probe metadata flow"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Task 5: Implement Workflow Definitions, Versions, Runs, And Tasks
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Create: `apps/api/src/modules/workflows/workflows.module.ts`
|
||||||
|
- Create: `apps/api/src/modules/workflows/workflows.controller.ts`
|
||||||
|
- Create: `apps/api/src/modules/workflows/workflows.service.ts`
|
||||||
|
- Create: `apps/api/src/modules/runs/runs.module.ts`
|
||||||
|
- Create: `apps/api/src/modules/runs/runs.controller.ts`
|
||||||
|
- Create: `apps/api/src/modules/runs/runs.service.ts`
|
||||||
|
- Create: `apps/api/src/common/mongo/schemas/workflow-definition-version.schema.ts`
|
||||||
|
- Create: `apps/api/src/common/mongo/schemas/workflow-run.schema.ts`
|
||||||
|
- Create: `apps/api/src/common/mongo/schemas/run-task.schema.ts`
|
||||||
|
- Test: `apps/api/test/workflow-runs.e2e-spec.ts`
|
||||||
|
|
||||||
|
**Step 1: Write the failing test**
|
||||||
|
|
||||||
|
Create `apps/api/test/workflow-runs.e2e-spec.ts` covering:
|
||||||
|
|
||||||
|
- create workflow definition
|
||||||
|
- save workflow version
|
||||||
|
- create workflow run from saved version
|
||||||
|
- generate initial run tasks for ready nodes
|
||||||
|
|
||||||
|
**Step 2: Run test to verify it fails**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pnpm --filter api test workflow-runs.e2e-spec.ts
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected: FAIL because workflow versioning and run creation do not exist yet.
|
||||||
|
|
||||||
|
**Step 3: Write minimal implementation**
|
||||||
|
|
||||||
|
Implement:
|
||||||
|
|
||||||
|
- workflow definition head record
|
||||||
|
- immutable workflow version snapshots
|
||||||
|
- run creation from a workflow version
|
||||||
|
- initial DAG compilation for simple source-to-transform chains
|
||||||
|
- run task persistence
|
||||||
|
|
||||||
|
Keep V1 graph compilation simple. Support sequential edges first, then one-level branching.
|
||||||
|
|
||||||
|
**Step 4: Run test to verify it passes**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pnpm --filter api test workflow-runs.e2e-spec.ts
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected: PASS
|
||||||
|
|
||||||
|
**Step 5: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git add apps/api/src/modules/workflows apps/api/src/modules/runs apps/api/src/common/mongo/schemas/workflow-definition-version.schema.ts apps/api/src/common/mongo/schemas/workflow-run.schema.ts apps/api/src/common/mongo/schemas/run-task.schema.ts apps/api/test/workflow-runs.e2e-spec.ts
|
||||||
|
git commit -m ":sparkles: add workflow versioning and run records"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Task 6: Add The Worker, Local Scheduler, And Executor Contracts
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Create: `apps/worker/src/main.ts`
|
||||||
|
- Create: `apps/worker/src/runner/task-runner.ts`
|
||||||
|
- Create: `apps/worker/src/scheduler/local-scheduler.ts`
|
||||||
|
- Create: `apps/worker/src/executors/python-executor.ts`
|
||||||
|
- Create: `apps/worker/src/executors/docker-executor.ts`
|
||||||
|
- Create: `apps/worker/src/executors/http-executor.ts`
|
||||||
|
- Create: `apps/worker/src/contracts/execution-context.ts`
|
||||||
|
- Test: `apps/worker/test/task-runner.spec.ts`
|
||||||
|
|
||||||
|
**Step 1: Write the failing test**
|
||||||
|
|
||||||
|
Create `apps/worker/test/task-runner.spec.ts` covering:
|
||||||
|
|
||||||
|
- worker loads pending tasks
|
||||||
|
- worker marks task running then success
|
||||||
|
- worker chooses executor based on node runtime config
|
||||||
|
|
||||||
|
**Step 2: Run test to verify it fails**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pnpm --filter worker test task-runner.spec.ts
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected: FAIL because the worker runtime does not exist yet.
|
||||||
|
|
||||||
|
**Step 3: Write minimal implementation**
|
||||||
|
|
||||||
|
Implement:
|
||||||
|
|
||||||
|
- worker bootstrap
|
||||||
|
- polling or queue-backed local scheduler
|
||||||
|
- execution context builder
|
||||||
|
- stub Python, Docker, and HTTP executors
|
||||||
|
- task status transitions
|
||||||
|
|
||||||
|
Do not implement full Docker isolation logic in one step. First lock the runtime interfaces and transitions.
|
||||||
|
|
||||||
|
**Step 4: Run test to verify it passes**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pnpm --filter worker test task-runner.spec.ts
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected: PASS
|
||||||
|
|
||||||
|
**Step 5: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git add apps/worker apps/api/src/modules/runs apps/worker/test/task-runner.spec.ts
|
||||||
|
git commit -m ":construction_worker: add local worker and executor contracts"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Task 7: Build The Web Shell, Workspace Flow, And Asset Workspace
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Create: `apps/web/src/main.tsx`
|
||||||
|
- Create: `apps/web/src/app/router.tsx`
|
||||||
|
- Create: `apps/web/src/features/layout/app-shell.tsx`
|
||||||
|
- Create: `apps/web/src/features/workspaces/workspace-switcher.tsx`
|
||||||
|
- Create: `apps/web/src/features/projects/project-selector.tsx`
|
||||||
|
- Create: `apps/web/src/features/assets/assets-page.tsx`
|
||||||
|
- Create: `apps/web/src/features/assets/asset-detail-page.tsx`
|
||||||
|
- Create: `apps/web/src/features/assets/components/asset-list.tsx`
|
||||||
|
- Create: `apps/web/src/features/assets/components/asset-summary-panel.tsx`
|
||||||
|
- Test: `apps/web/src/features/assets/assets-page.test.tsx`
|
||||||
|
|
||||||
|
**Step 1: Write the failing test**
|
||||||
|
|
||||||
|
Create `apps/web/src/features/assets/assets-page.test.tsx` covering:
|
||||||
|
|
||||||
|
- app shell renders primary navigation
|
||||||
|
- assets page renders asset rows from API data
|
||||||
|
- asset detail page renders probe summary
|
||||||
|
|
||||||
|
**Step 2: Run test to verify it fails**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pnpm --filter web test assets-page.test.tsx
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected: FAIL because the web app shell and pages do not exist yet.
|
||||||
|
|
||||||
|
**Step 3: Write minimal implementation**
|
||||||
|
|
||||||
|
Implement:
|
||||||
|
|
||||||
|
- web app bootstrap
|
||||||
|
- primary navigation matching the design docs
|
||||||
|
- workspace/project header controls
|
||||||
|
- asset list page
|
||||||
|
- asset detail page with summary and action buttons
|
||||||
|
|
||||||
|
Defer advanced preview renderers. Start with structured metadata and simple detail views.
|
||||||
|
|
||||||
|
**Step 4: Run test to verify it passes**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pnpm --filter web test assets-page.test.tsx
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected: PASS
|
||||||
|
|
||||||
|
**Step 5: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git add apps/web apps/web/src/features/assets/assets-page.test.tsx
|
||||||
|
git commit -m ":sparkles: add web shell and asset workspace"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Task 8: Build Canvas Authoring, Run Detail, And First Workflow Actions
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Create: `apps/web/src/features/workflows/workflows-page.tsx`
|
||||||
|
- Create: `apps/web/src/features/workflows/workflow-editor-page.tsx`
|
||||||
|
- Create: `apps/web/src/features/workflows/components/node-library.tsx`
|
||||||
|
- Create: `apps/web/src/features/workflows/components/workflow-canvas.tsx`
|
||||||
|
- Create: `apps/web/src/features/workflows/components/node-config-panel.tsx`
|
||||||
|
- Create: `apps/web/src/features/runs/run-detail-page.tsx`
|
||||||
|
- Create: `apps/web/src/features/runs/components/run-graph-view.tsx`
|
||||||
|
- Create: `apps/web/src/features/runs/components/task-log-panel.tsx`
|
||||||
|
- Test: `apps/web/src/features/workflows/workflow-editor-page.test.tsx`
|
||||||
|
|
||||||
|
**Step 1: Write the failing test**
|
||||||
|
|
||||||
|
Create `apps/web/src/features/workflows/workflow-editor-page.test.tsx` covering:
|
||||||
|
|
||||||
|
- node library renders categories
|
||||||
|
- node config panel opens when a node is selected
|
||||||
|
- run detail view shows node status badges from run data
|
||||||
|
|
||||||
|
**Step 2: Run test to verify it fails**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pnpm --filter web test workflow-editor-page.test.tsx
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected: FAIL because the workflow editor and run detail pages do not exist yet.
|
||||||
|
|
||||||
|
**Step 3: Write minimal implementation**
|
||||||
|
|
||||||
|
Implement:
|
||||||
|
|
||||||
|
- workflow list page
|
||||||
|
- workflow editor page using React Flow
|
||||||
|
- left node library, center canvas, right config panel
|
||||||
|
- save workflow version action
|
||||||
|
- trigger workflow run action
|
||||||
|
- run detail page with graph and selected-node log panel
|
||||||
|
|
||||||
|
Keep the first editor scoped to V1 node categories and schema-driven config rendering.
|
||||||
|
|
||||||
|
**Step 4: Run test to verify it passes**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pnpm --filter web test workflow-editor-page.test.tsx
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected: PASS
|
||||||
|
|
||||||
|
**Step 5: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git add apps/web/src/features/workflows apps/web/src/features/runs apps/web/src/features/workflows/workflow-editor-page.test.tsx
|
||||||
|
git commit -m ":sparkles: add canvas workflow editor and run detail pages"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Task 9: Add Preview Surface, Delivery Nodes, And MVP Integration
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Create: `apps/api/src/modules/artifacts/artifacts.module.ts`
|
||||||
|
- Create: `apps/api/src/modules/artifacts/artifacts.controller.ts`
|
||||||
|
- Create: `apps/api/src/modules/artifacts/artifacts.service.ts`
|
||||||
|
- Create: `apps/web/src/features/explore/explore-page.tsx`
|
||||||
|
- Create: `apps/web/src/features/explore/renderers/json-renderer.tsx`
|
||||||
|
- Create: `apps/web/src/features/explore/renderers/video-renderer.tsx`
|
||||||
|
- Create: `apps/web/src/features/explore/renderers/directory-renderer.tsx`
|
||||||
|
- Create: `apps/api/src/modules/plugins/builtin/delivery-nodes.ts`
|
||||||
|
- Test: `apps/api/test/artifacts.e2e-spec.ts`
|
||||||
|
- Test: `apps/web/src/features/explore/explore-page.test.tsx`
|
||||||
|
|
||||||
|
**Step 1: Write the failing tests**
|
||||||
|
|
||||||
|
Create:
|
||||||
|
|
||||||
|
- `apps/api/test/artifacts.e2e-spec.ts` for artifact retrieval by producer
|
||||||
|
- `apps/web/src/features/explore/explore-page.test.tsx` for opening and rendering supported artifact types
|
||||||
|
|
||||||
|
**Step 2: Run tests to verify they fail**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pnpm --filter api test artifacts.e2e-spec.ts
|
||||||
|
pnpm --filter web test explore-page.test.tsx
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected: FAIL because artifact APIs and explore renderers do not exist yet.
|
||||||
|
|
||||||
|
**Step 3: Write minimal implementation**
|
||||||
|
|
||||||
|
Implement:
|
||||||
|
|
||||||
|
- artifact module and lookup endpoints
|
||||||
|
- explore page
|
||||||
|
- JSON, directory, and video renderers
|
||||||
|
- built-in delivery-normalization node definitions for the V1 business path
|
||||||
|
|
||||||
|
Do not implement the full renderer plugin platform yet. Start with built-ins and stable renderer contracts.
|
||||||
|
|
||||||
|
**Step 4: Run tests to verify they pass**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pnpm --filter api test artifacts.e2e-spec.ts
|
||||||
|
pnpm --filter web test explore-page.test.tsx
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected: PASS
|
||||||
|
|
||||||
|
**Step 5: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git add apps/api/src/modules/artifacts apps/api/src/modules/plugins/builtin/delivery-nodes.ts apps/api/test/artifacts.e2e-spec.ts apps/web/src/features/explore apps/web/src/features/explore/explore-page.test.tsx
|
||||||
|
git commit -m ":package: add explore surface and delivery artifacts"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Task 10: Harden Guardrails, Docs, And Developer Entry Commands
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Modify: `CONTRIBUTING.md`
|
||||||
|
- Modify: `docs/development-workflow.md`
|
||||||
|
- Modify: `design/03-workflows/workflow-execution-model.md`
|
||||||
|
- Modify: `design/05-data/mongodb-data-model.md`
|
||||||
|
- Create: `Makefile`
|
||||||
|
- Create: `README.md`
|
||||||
|
- Test: `tests/test_dev_commands.py`
|
||||||
|
|
||||||
|
**Step 1: Write the failing test**
|
||||||
|
|
||||||
|
Create `tests/test_dev_commands.py` asserting:
|
||||||
|
|
||||||
|
- `Makefile` exposes expected local commands
|
||||||
|
- `README.md` documents bootstrap, hooks, test, and local run commands
|
||||||
|
|
||||||
|
**Step 2: Run test to verify it fails**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python3 -m unittest tests/test_dev_commands.py -v
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected: FAIL because developer entry commands are not documented yet.
|
||||||
|
|
||||||
|
**Step 3: Write minimal implementation**
|
||||||
|
|
||||||
|
Add:
|
||||||
|
|
||||||
|
- `make bootstrap`
|
||||||
|
- `make test`
|
||||||
|
- `make dev-api`
|
||||||
|
- `make dev-web`
|
||||||
|
- `make dev-worker`
|
||||||
|
- `make guardrails`
|
||||||
|
|
||||||
|
Document the developer flow in `README.md` and update design docs if implementation decisions changed during Tasks 1-9.
|
||||||
|
|
||||||
|
**Step 4: Run test to verify it passes**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python3 -m unittest tests/test_dev_commands.py -v
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected: PASS
|
||||||
|
|
||||||
|
**Step 5: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git add CONTRIBUTING.md docs/development-workflow.md design/03-workflows/workflow-execution-model.md design/05-data/mongodb-data-model.md Makefile README.md tests/test_dev_commands.py
|
||||||
|
git commit -m ":memo: add developer entry commands and bootstrap docs"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Exit Criteria
|
||||||
|
|
||||||
|
The first implementation pass is complete when:
|
||||||
|
|
||||||
|
- a user can create a workspace and project
|
||||||
|
- a raw asset can be registered and probed
|
||||||
|
- a workflow can be created, versioned, and executed locally
|
||||||
|
- run tasks produce observable status and artifacts
|
||||||
|
- the web app exposes assets, workflows, runs, and basic explore views
|
||||||
|
- guardrails for docs, hooks, commit messages, and CI remain green
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
|
||||||
|
- Keep commits small and use the repository gitmoji + English commit policy.
|
||||||
|
- Update design files in the same task where behavior or architecture changes.
|
||||||
|
- Do not add training execution before the V1 data workflow loop is stable.
|
||||||
9
package.json
Normal file
9
package.json
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"name": "emboflow",
|
||||||
|
"private": true,
|
||||||
|
"version": "0.1.0",
|
||||||
|
"packageManager": "pnpm@9.12.3",
|
||||||
|
"scripts": {
|
||||||
|
"test": "python3 -m unittest discover -s tests -p 'test_*.py'"
|
||||||
|
}
|
||||||
|
}
|
||||||
3
pnpm-workspace.yaml
Normal file
3
pnpm-workspace.yaml
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
packages:
|
||||||
|
- "apps/*"
|
||||||
|
- "packages/*"
|
||||||
126
scripts/check_commit_message.py
Executable file
126
scripts/check_commit_message.py
Executable file
@ -0,0 +1,126 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import argparse
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
SHORTCODE_PREFIX = re.compile(r"^:[a-z0-9_+-]+:\s+")
|
||||||
|
EMOJI_PREFIX = re.compile(r"^[\u2600-\u27BF\U0001F300-\U0001FAFF]\s+")
|
||||||
|
|
||||||
|
|
||||||
|
def strip_prefix(message: str) -> str:
|
||||||
|
if SHORTCODE_PREFIX.match(message):
|
||||||
|
return SHORTCODE_PREFIX.sub("", message, count=1)
|
||||||
|
if EMOJI_PREFIX.match(message):
|
||||||
|
return EMOJI_PREFIX.sub("", message, count=1)
|
||||||
|
return message
|
||||||
|
|
||||||
|
|
||||||
|
def validate_message(message: str) -> list[str]:
|
||||||
|
lines = [line.rstrip("\n") for line in message.splitlines()]
|
||||||
|
cleaned_lines = [line for line in lines if line and not line.startswith("#")]
|
||||||
|
if not cleaned_lines:
|
||||||
|
return ["Commit message must not be empty."]
|
||||||
|
|
||||||
|
subject = cleaned_lines[0]
|
||||||
|
errors: list[str] = []
|
||||||
|
|
||||||
|
if not SHORTCODE_PREFIX.match(subject) and not EMOJI_PREFIX.match(subject):
|
||||||
|
errors.append("Commit subject must start with a gitmoji shortcode or emoji.")
|
||||||
|
|
||||||
|
body = "\n".join(cleaned_lines)
|
||||||
|
normalized = strip_prefix(subject) + ("\n" + "\n".join(cleaned_lines[1:]) if len(cleaned_lines) > 1 else "")
|
||||||
|
|
||||||
|
try:
|
||||||
|
normalized.encode("ascii")
|
||||||
|
except UnicodeEncodeError:
|
||||||
|
errors.append("Commit message must be written in English ASCII text after the gitmoji prefix.")
|
||||||
|
|
||||||
|
if not strip_prefix(subject).strip():
|
||||||
|
errors.append("Commit subject must include an English summary after the gitmoji prefix.")
|
||||||
|
|
||||||
|
if re.search(r"[\u4e00-\u9fff]", body):
|
||||||
|
errors.append("Commit message must not contain Chinese characters.")
|
||||||
|
|
||||||
|
return errors
|
||||||
|
|
||||||
|
|
||||||
|
def read_message_file(path: Path) -> str:
|
||||||
|
return path.read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
def run_git(*args: str) -> list[str]:
|
||||||
|
result = subprocess.run(
|
||||||
|
["git", *args],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
check=False,
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise RuntimeError(result.stderr.strip() or "git command failed")
|
||||||
|
return [line.strip() for line in result.stdout.splitlines() if line.strip()]
|
||||||
|
|
||||||
|
|
||||||
|
def commit_messages_from_range(rev_range: str) -> list[tuple[str, str]]:
|
||||||
|
if ".." in rev_range:
|
||||||
|
shas = run_git("rev-list", rev_range)
|
||||||
|
else:
|
||||||
|
shas = [rev_range]
|
||||||
|
|
||||||
|
messages: list[tuple[str, str]] = []
|
||||||
|
for sha in shas:
|
||||||
|
message = subprocess.run(
|
||||||
|
["git", "log", "--format=%B", "-n", "1", sha],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
check=False,
|
||||||
|
)
|
||||||
|
if message.returncode != 0:
|
||||||
|
raise RuntimeError(message.stderr.strip() or "git log failed")
|
||||||
|
messages.append((sha, message.stdout.strip()))
|
||||||
|
return messages
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args() -> argparse.Namespace:
|
||||||
|
parser = argparse.ArgumentParser(description="Validate commit message format.")
|
||||||
|
parser.add_argument("--file", help="path to commit message file")
|
||||||
|
parser.add_argument("--rev-range", help="git revision range or single commit")
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
args = parse_args()
|
||||||
|
|
||||||
|
if bool(args.file) == bool(args.rev_range):
|
||||||
|
print("Use exactly one of --file or --rev-range.")
|
||||||
|
return 2
|
||||||
|
|
||||||
|
failures: list[str] = []
|
||||||
|
|
||||||
|
if args.file:
|
||||||
|
message = read_message_file(Path(args.file))
|
||||||
|
errors = validate_message(message)
|
||||||
|
if errors:
|
||||||
|
failures.extend(errors)
|
||||||
|
else:
|
||||||
|
for sha, message in commit_messages_from_range(args.rev_range):
|
||||||
|
errors = validate_message(message)
|
||||||
|
for error in errors:
|
||||||
|
failures.append(f"{sha[:12]}: {error}")
|
||||||
|
|
||||||
|
if failures:
|
||||||
|
print("Commit message validation failed:")
|
||||||
|
for failure in failures:
|
||||||
|
print(f" - {failure}")
|
||||||
|
print("\nExpected format example:")
|
||||||
|
print(" :sparkles: add hook templates and CI guardrails")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
print("Commit message validation passed.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
194
scripts/check_doc_code_sync.py
Executable file
194
scripts/check_doc_code_sync.py
Executable file
@ -0,0 +1,194 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import argparse
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
DOC_PATTERNS = (
|
||||||
|
"design/",
|
||||||
|
"docs/",
|
||||||
|
"adr",
|
||||||
|
"architecture",
|
||||||
|
"prd",
|
||||||
|
"spec",
|
||||||
|
"plan",
|
||||||
|
)
|
||||||
|
|
||||||
|
CODE_SUFFIXES = {
|
||||||
|
".py",
|
||||||
|
".ts",
|
||||||
|
".tsx",
|
||||||
|
".js",
|
||||||
|
".jsx",
|
||||||
|
".java",
|
||||||
|
".go",
|
||||||
|
".rs",
|
||||||
|
".rb",
|
||||||
|
".php",
|
||||||
|
".kt",
|
||||||
|
".swift",
|
||||||
|
".scala",
|
||||||
|
".sh",
|
||||||
|
}
|
||||||
|
|
||||||
|
CODE_HINTS = ("apps/", "packages/", "scripts/")
|
||||||
|
TEST_HINTS = ("test", "spec", "__tests__", "tests/")
|
||||||
|
CONFIG_SUFFIXES = {".yml", ".yaml", ".json", ".toml", ".ini", ".env"}
|
||||||
|
CONFIG_HINTS = ("docker", "compose", "k8s", "helm", "terraform", ".github/", ".githooks/", ".env")
|
||||||
|
|
||||||
|
|
||||||
|
def run_git(repo: Path, *args: str) -> list[str]:
|
||||||
|
result = subprocess.run(
|
||||||
|
["git", "-C", str(repo), *args],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
check=False,
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise RuntimeError(result.stderr.strip() or "git command failed")
|
||||||
|
return [line.strip() for line in result.stdout.splitlines() if line.strip()]
|
||||||
|
|
||||||
|
|
||||||
|
def classify(path_text: str) -> str:
|
||||||
|
lower = path_text.lower()
|
||||||
|
path = Path(path_text)
|
||||||
|
|
||||||
|
if any(token in lower for token in DOC_PATTERNS) or path.suffix == ".md":
|
||||||
|
return "docs"
|
||||||
|
if any(token in lower for token in TEST_HINTS):
|
||||||
|
return "tests"
|
||||||
|
if any(token in lower for token in CODE_HINTS):
|
||||||
|
return "code"
|
||||||
|
if path.suffix in CODE_SUFFIXES:
|
||||||
|
return "code"
|
||||||
|
if path.suffix in CONFIG_SUFFIXES or any(token in lower for token in CONFIG_HINTS):
|
||||||
|
return "config"
|
||||||
|
return "other"
|
||||||
|
|
||||||
|
|
||||||
|
def print_group(title: str, items: list[str]) -> None:
|
||||||
|
print(f"\n{title}:")
|
||||||
|
if not items:
|
||||||
|
print(" - none")
|
||||||
|
return
|
||||||
|
for item in items:
|
||||||
|
print(f" - {item}")
|
||||||
|
|
||||||
|
|
||||||
|
def assess_changes(
|
||||||
|
docs: list[str],
|
||||||
|
code: list[str],
|
||||||
|
tests: list[str],
|
||||||
|
config: list[str],
|
||||||
|
other: list[str],
|
||||||
|
strict: bool,
|
||||||
|
) -> dict:
|
||||||
|
warnings: list[str] = []
|
||||||
|
blockers: list[str] = []
|
||||||
|
|
||||||
|
if code and not docs:
|
||||||
|
message = "Code changed but no design/doc files changed."
|
||||||
|
warnings.append(message)
|
||||||
|
if strict:
|
||||||
|
blockers.append(message)
|
||||||
|
if config and not docs:
|
||||||
|
message = "Config or deployment files changed without any doc updates."
|
||||||
|
warnings.append(message)
|
||||||
|
if strict:
|
||||||
|
blockers.append(message)
|
||||||
|
if docs and not code and not config and not tests:
|
||||||
|
warnings.append(
|
||||||
|
"Docs changed without code changes. This may be intentional, but verify they still match the repository."
|
||||||
|
)
|
||||||
|
if code and not tests:
|
||||||
|
warnings.append(
|
||||||
|
"Code changed without any test-file changes. Verify whether tests should change."
|
||||||
|
)
|
||||||
|
if other:
|
||||||
|
warnings.append(
|
||||||
|
"Unclassified files changed. Confirm they do not affect documented behavior or runtime assumptions."
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"warnings": warnings,
|
||||||
|
"blockers": blockers,
|
||||||
|
"blocking": bool(blockers),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def collect_paths(repo: Path, args: argparse.Namespace) -> list[str]:
|
||||||
|
if args.staged:
|
||||||
|
return run_git(repo, "diff", "--cached", "--name-only", "--diff-filter=ACMR")
|
||||||
|
if args.base_ref:
|
||||||
|
return run_git(repo, "diff", "--name-only", "--diff-filter=ACMR", f"{args.base_ref}...HEAD")
|
||||||
|
if args.rev_range:
|
||||||
|
if ".." in args.rev_range:
|
||||||
|
return run_git(repo, "diff", "--name-only", "--diff-filter=ACMR", args.rev_range)
|
||||||
|
return run_git(repo, "diff-tree", "--no-commit-id", "--name-only", "-r", args.rev_range)
|
||||||
|
|
||||||
|
changed = run_git(repo, "status", "--short")
|
||||||
|
return sorted({line[3:] for line in changed if len(line) > 3})
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args() -> argparse.Namespace:
|
||||||
|
parser = argparse.ArgumentParser(description="Check whether doc changes track code changes.")
|
||||||
|
parser.add_argument("repo", nargs="?", default=".", help="git repository path")
|
||||||
|
parser.add_argument("--strict", action="store_true", help="fail on blocking drift")
|
||||||
|
parser.add_argument("--staged", action="store_true", help="inspect staged files only")
|
||||||
|
parser.add_argument("--base-ref", help="compare changes from base ref to HEAD")
|
||||||
|
parser.add_argument("--rev-range", help="inspect a git revision range or a single commit")
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
args = parse_args()
|
||||||
|
repo = Path(args.repo).expanduser().resolve()
|
||||||
|
|
||||||
|
if not (repo / ".git").exists():
|
||||||
|
print(f"Not a git repository: {repo}")
|
||||||
|
return 2
|
||||||
|
|
||||||
|
paths = sorted(set(collect_paths(repo, args)))
|
||||||
|
|
||||||
|
docs = [p for p in paths if classify(p) == "docs"]
|
||||||
|
code = [p for p in paths if classify(p) == "code"]
|
||||||
|
tests = [p for p in paths if classify(p) == "tests"]
|
||||||
|
config = [p for p in paths if classify(p) == "config"]
|
||||||
|
other = [p for p in paths if classify(p) == "other"]
|
||||||
|
assessment = assess_changes(docs, code, tests, config, other, args.strict)
|
||||||
|
|
||||||
|
print(f"Repository: {repo}")
|
||||||
|
print(f"Changed files: {len(paths)}")
|
||||||
|
print_group("Design and doc files", docs)
|
||||||
|
print_group("Code files", code)
|
||||||
|
print_group("Test files", tests)
|
||||||
|
print_group("Config and infra files", config)
|
||||||
|
print_group("Other files", other)
|
||||||
|
|
||||||
|
print("\nAssessment:")
|
||||||
|
if not assessment["warnings"]:
|
||||||
|
print(" - No obvious doc/code drift detected from changed-file classification.")
|
||||||
|
else:
|
||||||
|
for warning in assessment["warnings"]:
|
||||||
|
print(f" - {warning}")
|
||||||
|
|
||||||
|
print("\nNext actions:")
|
||||||
|
if code and not docs:
|
||||||
|
print(" - Review design/ or docs/ and update affected architecture, workflow, or API notes.")
|
||||||
|
if docs:
|
||||||
|
print(" - Confirm each changed doc still matches the actual implementation.")
|
||||||
|
if code:
|
||||||
|
print(" - Confirm changed code paths match documented workflow, schema, and runtime assumptions.")
|
||||||
|
if other:
|
||||||
|
print(" - Review unclassified paths and decide whether docs or tests should be updated.")
|
||||||
|
|
||||||
|
if assessment["blocking"]:
|
||||||
|
print("\nResult: blocking drift detected.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
12
scripts/install_hooks.sh
Normal file
12
scripts/install_hooks.sh
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||||
|
|
||||||
|
git -C "$repo_root" config core.hooksPath .githooks
|
||||||
|
chmod +x "$repo_root"/.githooks/*
|
||||||
|
chmod +x "$repo_root"/scripts/check_doc_code_sync.py
|
||||||
|
chmod +x "$repo_root"/scripts/check_commit_message.py
|
||||||
|
|
||||||
|
echo "Installed local git hooks from .githooks"
|
||||||
|
echo "Active hooks path: $(git -C "$repo_root" config core.hooksPath)"
|
||||||
40
tests/test_commit_message.py
Normal file
40
tests/test_commit_message.py
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
import importlib.util
|
||||||
|
from pathlib import Path
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
|
||||||
|
def load_module(module_name: str, path: Path):
|
||||||
|
spec = importlib.util.spec_from_file_location(module_name, path)
|
||||||
|
module = importlib.util.module_from_spec(spec)
|
||||||
|
assert spec.loader is not None
|
||||||
|
spec.loader.exec_module(module)
|
||||||
|
return module
|
||||||
|
|
||||||
|
|
||||||
|
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||||
|
MODULE = load_module(
|
||||||
|
"check_commit_message",
|
||||||
|
REPO_ROOT / "scripts" / "check_commit_message.py",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CommitMessageValidationTests(unittest.TestCase):
|
||||||
|
def test_accepts_gitmoji_shortcode_with_english_message(self):
|
||||||
|
errors = MODULE.validate_message(":sparkles: add local hook templates")
|
||||||
|
self.assertEqual(errors, [])
|
||||||
|
|
||||||
|
def test_accepts_unicode_gitmoji_with_english_message(self):
|
||||||
|
errors = MODULE.validate_message("✨ add ci validation for hooks")
|
||||||
|
self.assertEqual(errors, [])
|
||||||
|
|
||||||
|
def test_rejects_message_without_gitmoji_prefix(self):
|
||||||
|
errors = MODULE.validate_message("add local hook templates")
|
||||||
|
self.assertTrue(any("gitmoji" in error.lower() for error in errors))
|
||||||
|
|
||||||
|
def test_rejects_non_english_message(self):
|
||||||
|
errors = MODULE.validate_message(":sparkles: 添加本地 hook")
|
||||||
|
self.assertTrue(any("english" in error.lower() for error in errors))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
55
tests/test_doc_code_sync.py
Normal file
55
tests/test_doc_code_sync.py
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
import importlib.util
|
||||||
|
from pathlib import Path
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
|
||||||
|
def load_module(module_name: str, path: Path):
|
||||||
|
spec = importlib.util.spec_from_file_location(module_name, path)
|
||||||
|
module = importlib.util.module_from_spec(spec)
|
||||||
|
assert spec.loader is not None
|
||||||
|
spec.loader.exec_module(module)
|
||||||
|
return module
|
||||||
|
|
||||||
|
|
||||||
|
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||||
|
MODULE = load_module(
|
||||||
|
"check_doc_code_sync",
|
||||||
|
REPO_ROOT / "scripts" / "check_doc_code_sync.py",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DocCodeSyncAssessmentTests(unittest.TestCase):
|
||||||
|
def test_classifies_python_scripts_as_code(self):
|
||||||
|
self.assertEqual(MODULE.classify("scripts/check_doc_code_sync.py"), "code")
|
||||||
|
|
||||||
|
def test_classifies_app_paths_as_code(self):
|
||||||
|
self.assertEqual(MODULE.classify("apps/web/package.json"), "code")
|
||||||
|
|
||||||
|
def test_classifies_env_example_as_config(self):
|
||||||
|
self.assertEqual(MODULE.classify(".env.example"), "config")
|
||||||
|
|
||||||
|
def test_strict_mode_blocks_code_without_doc_updates(self):
|
||||||
|
assessment = MODULE.assess_changes(
|
||||||
|
docs=[],
|
||||||
|
code=["src/app.ts"],
|
||||||
|
tests=[],
|
||||||
|
config=[],
|
||||||
|
other=[],
|
||||||
|
strict=True,
|
||||||
|
)
|
||||||
|
self.assertTrue(assessment["blocking"])
|
||||||
|
|
||||||
|
def test_doc_and_code_changes_together_do_not_block(self):
|
||||||
|
assessment = MODULE.assess_changes(
|
||||||
|
docs=["design/02-architecture/system-architecture.md"],
|
||||||
|
code=["src/app.ts"],
|
||||||
|
tests=[],
|
||||||
|
config=[],
|
||||||
|
other=[],
|
||||||
|
strict=True,
|
||||||
|
)
|
||||||
|
self.assertFalse(assessment["blocking"])
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
35
tests/test_repo_structure.py
Normal file
35
tests/test_repo_structure.py
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
|
||||||
|
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||||
|
|
||||||
|
|
||||||
|
class RepoStructureTests(unittest.TestCase):
|
||||||
|
def test_root_workspace_files_exist(self):
|
||||||
|
required_files = [
|
||||||
|
"package.json",
|
||||||
|
"pnpm-workspace.yaml",
|
||||||
|
"tsconfig.base.json",
|
||||||
|
"docker-compose.yml",
|
||||||
|
".env.example",
|
||||||
|
]
|
||||||
|
|
||||||
|
for relative_path in required_files:
|
||||||
|
with self.subTest(path=relative_path):
|
||||||
|
self.assertTrue((REPO_ROOT / relative_path).is_file())
|
||||||
|
|
||||||
|
def test_app_package_manifests_exist(self):
|
||||||
|
required_files = [
|
||||||
|
"apps/web/package.json",
|
||||||
|
"apps/api/package.json",
|
||||||
|
"apps/worker/package.json",
|
||||||
|
]
|
||||||
|
|
||||||
|
for relative_path in required_files:
|
||||||
|
with self.subTest(path=relative_path):
|
||||||
|
self.assertTrue((REPO_ROOT / relative_path).is_file())
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
12
tsconfig.base.json
Normal file
12
tsconfig.base.json
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
"target": "ES2022",
|
||||||
|
"module": "ESNext",
|
||||||
|
"moduleResolution": "Bundler",
|
||||||
|
"strict": true,
|
||||||
|
"esModuleInterop": true,
|
||||||
|
"resolveJsonModule": true,
|
||||||
|
"skipLibCheck": true,
|
||||||
|
"baseUrl": "."
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user