🎉 feat: initialize foundation docs guardrails and workspace skeleton
This commit is contained in:
commit
f41816bbd9
17
.env.example
Normal file
17
.env.example
Normal file
@ -0,0 +1,17 @@
|
||||
NODE_ENV=development
|
||||
|
||||
WEB_PORT=3000
|
||||
API_PORT=3001
|
||||
WORKER_PORT=3002
|
||||
|
||||
MONGO_PORT=27017
|
||||
MONGO_DB=emboflow
|
||||
MONGO_ROOT_USERNAME=emboflow
|
||||
MONGO_ROOT_PASSWORD=emboflow
|
||||
|
||||
MINIO_PORT=9000
|
||||
MINIO_CONSOLE_PORT=9001
|
||||
MINIO_ROOT_USER=emboflow
|
||||
MINIO_ROOT_PASSWORD=emboflow123
|
||||
|
||||
STORAGE_PROVIDER=minio
|
||||
7
.githooks/commit-msg
Executable file
7
.githooks/commit-msg
Executable file
@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
repo_root="$(git rev-parse --show-toplevel)"
|
||||
cd "$repo_root"
|
||||
|
||||
python3 scripts/check_commit_message.py --file "$1"
|
||||
7
.githooks/pre-commit
Executable file
7
.githooks/pre-commit
Executable file
@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
repo_root="$(git rev-parse --show-toplevel)"
|
||||
cd "$repo_root"
|
||||
|
||||
python3 scripts/check_doc_code_sync.py . --staged --strict
|
||||
19
.githooks/pre-push
Executable file
19
.githooks/pre-push
Executable file
@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
repo_root="$(git rev-parse --show-toplevel)"
|
||||
cd "$repo_root"
|
||||
|
||||
if git rev-parse --abbrev-ref --symbolic-full-name "@{upstream}" >/dev/null 2>&1; then
|
||||
base_ref="$(git rev-parse --abbrev-ref --symbolic-full-name "@{upstream}")"
|
||||
python3 scripts/check_doc_code_sync.py . --base-ref "$base_ref" --strict
|
||||
python3 scripts/check_commit_message.py --rev-range "$base_ref..HEAD"
|
||||
elif git rev-parse HEAD~1 >/dev/null 2>&1; then
|
||||
python3 scripts/check_doc_code_sync.py . --base-ref HEAD~1 --strict
|
||||
python3 scripts/check_commit_message.py --rev-range "HEAD~1..HEAD"
|
||||
else
|
||||
python3 scripts/check_doc_code_sync.py . --rev-range HEAD --strict
|
||||
python3 scripts/check_commit_message.py --rev-range HEAD
|
||||
fi
|
||||
|
||||
python3 -m unittest discover -s tests -p 'test_*.py'
|
||||
40
.github/pull_request_template.md
vendored
Normal file
40
.github/pull_request_template.md
vendored
Normal file
@ -0,0 +1,40 @@
|
||||
# Summary
|
||||
|
||||
- Describe the change in clear English.
|
||||
- Explain the user-visible or system-level impact.
|
||||
|
||||
# Design Sync
|
||||
|
||||
- [ ] I reviewed the relevant files under `design/` before implementing.
|
||||
- [ ] I updated the affected design or docs files in the same change set, or I confirmed no design update was required.
|
||||
- [ ] I ran `python3 scripts/check_doc_code_sync.py . --strict`.
|
||||
|
||||
Design files reviewed or updated:
|
||||
|
||||
- ``
|
||||
|
||||
If design and code are not fully aligned yet, explain the gap:
|
||||
|
||||
-
|
||||
|
||||
# Validation
|
||||
|
||||
- [ ] I ran local checks relevant to this change.
|
||||
- [ ] I ran `bash scripts/install_hooks.sh` in this clone or already had the repo hooks installed.
|
||||
- [ ] My commit messages in this PR are English-only and use a gitmoji prefix.
|
||||
|
||||
Commands run:
|
||||
|
||||
```bash
|
||||
# paste commands here
|
||||
```
|
||||
|
||||
# Scope Checklist
|
||||
|
||||
- [ ] This PR updates behavior, contracts, or runtime assumptions intentionally.
|
||||
- [ ] This PR does not silently break documented architecture or workflow assumptions.
|
||||
- [ ] This PR includes tests if behavior changed, or I confirmed tests were not required.
|
||||
|
||||
# Notes For Reviewers
|
||||
|
||||
- Call out any risky areas, follow-up work, or unresolved assumptions.
|
||||
45
.github/workflows/guardrails.yml
vendored
Normal file
45
.github/workflows/guardrails.yml
vendored
Normal file
@ -0,0 +1,45 @@
|
||||
name: Guardrails
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
|
||||
jobs:
|
||||
repository-guardrails:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Compute git range
|
||||
id: git_range
|
||||
shell: bash
|
||||
run: |
|
||||
if [ "${GITHUB_EVENT_NAME}" = "pull_request" ]; then
|
||||
RANGE="${{ github.event.pull_request.base.sha }}..${{ github.sha }}"
|
||||
elif [ "${{ github.event.before }}" != "0000000000000000000000000000000000000000" ]; then
|
||||
RANGE="${{ github.event.before }}..${{ github.sha }}"
|
||||
else
|
||||
RANGE="${{ github.sha }}"
|
||||
fi
|
||||
echo "range=${RANGE}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Validate commit messages
|
||||
run: |
|
||||
python3 scripts/check_commit_message.py --rev-range "${{ steps.git_range.outputs.range }}"
|
||||
|
||||
- name: Validate design and code sync
|
||||
run: |
|
||||
python3 scripts/check_doc_code_sync.py . --rev-range "${{ steps.git_range.outputs.range }}" --strict
|
||||
|
||||
- name: Run repository tests
|
||||
run: |
|
||||
python3 -m unittest discover -s tests -p 'test_*.py'
|
||||
93
CONTRIBUTING.md
Normal file
93
CONTRIBUTING.md
Normal file
@ -0,0 +1,93 @@
|
||||
# Contributing To EmboFlow
|
||||
|
||||
## Core Rule
|
||||
|
||||
Keep `design/` and implementation aligned in the same change set.
|
||||
|
||||
Do not treat design files as background notes. If a code change affects product behavior, workflow behavior, data models, contracts, runtime assumptions, permissions, or deployment assumptions, update the corresponding design documents before closing the task.
|
||||
|
||||
## Required Workflow
|
||||
|
||||
1. Read the relevant files under `design/` before implementing.
|
||||
2. Summarize the intended contract you are changing.
|
||||
3. Implement the code change.
|
||||
4. Update the affected design files in the same work session.
|
||||
5. Install the local git hooks once per clone:
|
||||
|
||||
```bash
|
||||
bash scripts/install_hooks.sh
|
||||
```
|
||||
|
||||
6. Use English-only commit messages with a gitmoji prefix, for example:
|
||||
|
||||
```text
|
||||
:sparkles: add workflow guardrails and CI checks
|
||||
```
|
||||
|
||||
7. Run the local sync check when needed:
|
||||
|
||||
```bash
|
||||
python3 scripts/check_doc_code_sync.py . --strict
|
||||
```
|
||||
|
||||
8. If design and code still diverge, document that explicitly in your final summary.
|
||||
|
||||
## When Design Updates Are Required
|
||||
|
||||
Update design files when a change affects:
|
||||
|
||||
- user-visible behavior
|
||||
- workflow nodes or execution paths
|
||||
- data model or storage structure
|
||||
- API or schema contracts
|
||||
- plugin or executor behavior
|
||||
- workspace, project, or permission rules
|
||||
- deployment or runtime assumptions
|
||||
|
||||
## When Design Updates May Be Skipped
|
||||
|
||||
Design updates are usually not required for:
|
||||
|
||||
- pure refactors with no behavior change
|
||||
- test-only changes
|
||||
- formatting, comments, and naming cleanup
|
||||
|
||||
Even in those cases, verify that no documented statement became false indirectly.
|
||||
|
||||
## Primary Design Locations
|
||||
|
||||
- `design/00-overview/`
|
||||
- `design/01-product/`
|
||||
- `design/02-architecture/`
|
||||
- `design/03-workflows/`
|
||||
- `design/05-data/`
|
||||
- `design/08-decisions/`
|
||||
|
||||
## Local Tooling
|
||||
|
||||
This repository includes:
|
||||
|
||||
- git hook templates under `.githooks/`
|
||||
- a hook installer:
|
||||
|
||||
```bash
|
||||
bash scripts/install_hooks.sh
|
||||
```
|
||||
|
||||
- a design/code sync checker:
|
||||
|
||||
```bash
|
||||
python3 scripts/check_doc_code_sync.py . --strict
|
||||
```
|
||||
|
||||
- a commit message validator:
|
||||
|
||||
```bash
|
||||
python3 scripts/check_commit_message.py --rev-range HEAD
|
||||
```
|
||||
|
||||
The hooks and CI enforce:
|
||||
|
||||
- English-only commit messages with a gitmoji prefix
|
||||
- design/code consistency checks
|
||||
- repository unit tests before push
|
||||
8
apps/api/package.json
Normal file
8
apps/api/package.json
Normal file
@ -0,0 +1,8 @@
|
||||
{
|
||||
"name": "@emboflow/api",
|
||||
"private": true,
|
||||
"version": "0.1.0",
|
||||
"scripts": {
|
||||
"dev": "echo 'api app scaffold pending'"
|
||||
}
|
||||
}
|
||||
8
apps/web/package.json
Normal file
8
apps/web/package.json
Normal file
@ -0,0 +1,8 @@
|
||||
{
|
||||
"name": "@emboflow/web",
|
||||
"private": true,
|
||||
"version": "0.1.0",
|
||||
"scripts": {
|
||||
"dev": "echo 'web app scaffold pending'"
|
||||
}
|
||||
}
|
||||
8
apps/worker/package.json
Normal file
8
apps/worker/package.json
Normal file
@ -0,0 +1,8 @@
|
||||
{
|
||||
"name": "@emboflow/worker",
|
||||
"private": true,
|
||||
"version": "0.1.0",
|
||||
"scripts": {
|
||||
"dev": "echo 'worker app scaffold pending'"
|
||||
}
|
||||
}
|
||||
1
design/00-overview/.gitkeep
Normal file
1
design/00-overview/.gitkeep
Normal file
@ -0,0 +1 @@
|
||||
|
||||
70
design/00-overview/emboflow-platform-overview.md
Normal file
70
design/00-overview/emboflow-platform-overview.md
Normal file
@ -0,0 +1,70 @@
|
||||
# EmboFlow Platform Overview
|
||||
|
||||
## Positioning
|
||||
|
||||
EmboFlow is a browser-based embodied data engineering platform for ingesting raw assets, organizing dataset workflows on a visual canvas, processing and converting data, annotating and inspecting results, exporting normalized artifacts, and generating downstream training configurations.
|
||||
|
||||
The platform is designed around plugin-based extensibility, but the first version should deliver a stable built-in core before opening broader extension surfaces.
|
||||
|
||||
## Primary Users
|
||||
|
||||
- Individual engineers building embodied datasets
|
||||
- Team operators managing collection, preprocessing, delivery, and annotation workflows
|
||||
- Data engineering teams that need repeatable conversion and packaging pipelines
|
||||
- Teams preparing datasets for external training systems
|
||||
|
||||
## V1 Product Goal
|
||||
|
||||
Build a usable end-to-end platform that allows users to:
|
||||
|
||||
1. Log into a personal or team workspace
|
||||
2. Create a project
|
||||
3. Upload or import raw embodied data assets
|
||||
4. Auto-detect asset structure and generate preview summaries
|
||||
5. Compose processing pipelines on a canvas
|
||||
6. Configure node parameters and inject code into processing nodes
|
||||
7. Execute workflows asynchronously and inspect logs and outputs
|
||||
8. Export normalized delivery packages, training datasets, or training config files
|
||||
|
||||
## Supported Input Formats in V1
|
||||
|
||||
- RLDS
|
||||
- LeRobot v2/v3
|
||||
- HDF5
|
||||
- Rosbag
|
||||
- Raw video folders and delivery-style directory packages
|
||||
- Compressed archives containing the above
|
||||
|
||||
## Core Product Principles
|
||||
|
||||
- Raw assets are first-class objects
|
||||
- Canonical semantic datasets are derived, not assumed
|
||||
- Visualization can operate directly on raw assets
|
||||
- Workflow execution is asynchronous and traceable
|
||||
- Plugins are versioned and managed
|
||||
- User-injected code is supported with strict runtime boundaries
|
||||
- Training execution is out of scope for V1, but training handoff is in scope
|
||||
|
||||
## Major Workspaces
|
||||
|
||||
- Asset Workspace: upload, import, scan, probe, browse
|
||||
- Canvas Workspace: build and run workflows
|
||||
- Explore Workspace: inspect raw assets and processed outputs
|
||||
- Label Workspace: create and review annotation tasks
|
||||
- Admin Workspace: users, workspaces, plugins, storage, runtime settings
|
||||
|
||||
## V1 Output Types
|
||||
|
||||
- Standardized embodied dataset exports
|
||||
- Customer delivery packages
|
||||
- Validation and quality reports
|
||||
- Annotation artifacts
|
||||
- Training configuration packages for downstream training systems
|
||||
|
||||
## Non-Goals for V1
|
||||
|
||||
- Built-in training execution orchestration
|
||||
- Real-time collaborative editing on the same canvas
|
||||
- Public plugin marketplace
|
||||
- Fully generalized MLOps lifecycle management
|
||||
- Advanced distributed scheduling in the first deployment
|
||||
1
design/01-product/.gitkeep
Normal file
1
design/01-product/.gitkeep
Normal file
@ -0,0 +1 @@
|
||||
|
||||
90
design/01-product/v1-scope-and-mvp.md
Normal file
90
design/01-product/v1-scope-and-mvp.md
Normal file
@ -0,0 +1,90 @@
|
||||
# EmboFlow V1 Scope And MVP
|
||||
|
||||
## MVP Definition
|
||||
|
||||
The first release should prove that EmboFlow can turn raw embodied data assets into structured outputs through a visual workflow engine.
|
||||
|
||||
### MVP Success Path
|
||||
|
||||
1. A user signs into a workspace
|
||||
2. The user creates a project
|
||||
3. The user uploads or imports a raw asset
|
||||
4. The platform probes the asset and generates a structure summary
|
||||
5. The user previews the asset
|
||||
6. The user composes a canvas workflow
|
||||
7. The workflow executes asynchronously
|
||||
8. The user reviews logs, outputs, and generated artifacts
|
||||
9. The user exports a normalized dataset, delivery package, or training config
|
||||
|
||||
## In Scope For V1
|
||||
|
||||
- User login and workspace model
|
||||
- Personal and team workspaces
|
||||
- Project resource isolation
|
||||
- Raw asset upload and import
|
||||
- Object storage integration
|
||||
- Asset probing and structure detection
|
||||
- Raw asset preview
|
||||
- Canvas workflow editor
|
||||
- Built-in node library for ingest, transform, inspect, export
|
||||
- Node configuration through schema-driven forms
|
||||
- Code injection for processing nodes
|
||||
- Workflow run orchestration
|
||||
- Logs, status, retries, and artifact tracking
|
||||
- Dataset conversion and delivery-package normalization
|
||||
- Training config export
|
||||
- Plugin registration skeleton
|
||||
|
||||
## Important Business Scenarios
|
||||
|
||||
### Embodied Dataset Conversion
|
||||
|
||||
- Import RLDS, LeRobot, HDF5, or Rosbag
|
||||
- Map to canonical semantics
|
||||
- Export to target dataset format
|
||||
|
||||
### Delivery Package Normalization
|
||||
|
||||
- Import customer-provided raw directory or archive
|
||||
- Rename top-level folders
|
||||
- Validate required file structure
|
||||
- Validate metadata files
|
||||
- Check video file quality and naming
|
||||
- Export or upload normalized package
|
||||
|
||||
### Data Processing Workflow Authoring
|
||||
|
||||
- Drag nodes onto canvas
|
||||
- Connect nodes into DAG
|
||||
- Tune parameters
|
||||
- Inject code into processing nodes
|
||||
- Re-run pipeline with traceable history
|
||||
|
||||
## V1 Modules To Build Deeply
|
||||
|
||||
- Identity and workspace management
|
||||
- Asset ingestion and probing
|
||||
- Workflow editor and node model
|
||||
- Execution engine
|
||||
- Built-in dataset conversion nodes
|
||||
- Built-in delivery normalization nodes
|
||||
- Preview and inspection
|
||||
- Artifact export
|
||||
|
||||
## V1 Modules To Keep Lightweight
|
||||
|
||||
- Annotation
|
||||
- Collaboration
|
||||
- Plugin lifecycle UX
|
||||
- Advanced analytics
|
||||
- Kubernetes and Volcano scheduling adapters
|
||||
- Advanced multi-sensor synchronized visual analytics
|
||||
|
||||
## Explicit V1 Exclusions
|
||||
|
||||
- Platform-managed training execution
|
||||
- Real-time multi-user canvas co-editing
|
||||
- Full marketplace for third-party plugins
|
||||
- Complex enterprise approval workflows
|
||||
- Streaming data processing
|
||||
- Large-scale distributed execution as a deployment requirement
|
||||
1
design/02-architecture/.gitkeep
Normal file
1
design/02-architecture/.gitkeep
Normal file
@ -0,0 +1 @@
|
||||
|
||||
115
design/02-architecture/deployment-architecture.md
Normal file
115
design/02-architecture/deployment-architecture.md
Normal file
@ -0,0 +1,115 @@
|
||||
# EmboFlow Deployment Architecture
|
||||
|
||||
## V1 Deployment Target
|
||||
|
||||
The first deployment target is a single public server. The platform should be deployed in a way that is operationally simple now and migration-friendly later.
|
||||
|
||||
## Recommended V1 Deployment Topology
|
||||
|
||||
- Reverse proxy
|
||||
- Web frontend service
|
||||
- API service
|
||||
- Worker service
|
||||
- MongoDB
|
||||
- Optional MinIO
|
||||
- Host Docker runtime for execution containers
|
||||
|
||||
## Deployment Principles
|
||||
|
||||
- Single-host deployment first
|
||||
- All major services containerized
|
||||
- Persistent state mounted on host volumes
|
||||
- Object storage can be external or self-hosted
|
||||
- Execution workers separated from API service
|
||||
- Future scheduler migration should not require domain model changes
|
||||
|
||||
## Recommended Runtime Layout
|
||||
|
||||
### Edge
|
||||
|
||||
- Nginx or equivalent reverse proxy
|
||||
- HTTPS termination
|
||||
- Static web delivery or web upstream routing
|
||||
|
||||
### Application
|
||||
|
||||
- `web`
|
||||
- `api`
|
||||
- `worker`
|
||||
|
||||
### Data
|
||||
|
||||
- `mongo`
|
||||
- `minio` optional
|
||||
|
||||
## Object Storage Strategy
|
||||
|
||||
The product should support both:
|
||||
|
||||
- Cloud object storage such as BOS or S3-compatible services
|
||||
- Self-hosted MinIO for development, demos, or private deployment
|
||||
|
||||
The application should expose a unified storage abstraction instead of embedding provider-specific logic across modules.
|
||||
|
||||
## Local Scheduler In V1
|
||||
|
||||
V1 should use a local scheduler. Worker processes execute tasks on the same deployment host.
|
||||
|
||||
Design constraints:
|
||||
|
||||
- RuntimeSpec must already exist
|
||||
- Scheduler abstraction must already exist
|
||||
- Docker executor must already be scheduler-compatible
|
||||
|
||||
This keeps future migration to Kubernetes or Volcano feasible.
|
||||
|
||||
## Host-Level Persistent Directories
|
||||
|
||||
Recommended host directories:
|
||||
|
||||
- application config
|
||||
- mongodb data
|
||||
- minio data
|
||||
- uploaded file staging
|
||||
- execution temp workspace
|
||||
- logs
|
||||
- backup data
|
||||
|
||||
## Execution Isolation
|
||||
|
||||
The host Docker runtime serves two different purposes:
|
||||
|
||||
- Running the platform deployment stack
|
||||
- Running task execution containers
|
||||
|
||||
These must be treated as separate concerns in configuration and security design.
|
||||
|
||||
## Future Migration Path
|
||||
|
||||
### Stage 1
|
||||
|
||||
- Single-host deployment
|
||||
- Local scheduler
|
||||
- Docker executor
|
||||
|
||||
### Stage 2
|
||||
|
||||
- Kubernetes-based service deployment
|
||||
- Kubernetes scheduler adapter for workflow tasks
|
||||
|
||||
### Stage 3
|
||||
|
||||
- Volcano scheduler adapter
|
||||
- Better support for large batch jobs and training-adjacent workloads
|
||||
|
||||
## Operational Baseline
|
||||
|
||||
V1 should provide basic operational support for:
|
||||
|
||||
- health checks
|
||||
- service restart
|
||||
- execution failure visibility
|
||||
- disk space monitoring
|
||||
- object storage connectivity checks
|
||||
- MongoDB backup and restore procedures
|
||||
- worker online status
|
||||
200
design/02-architecture/system-architecture.md
Normal file
200
design/02-architecture/system-architecture.md
Normal file
@ -0,0 +1,200 @@
|
||||
# EmboFlow System Architecture
|
||||
|
||||
## Architecture Style
|
||||
|
||||
EmboFlow V1 is a browser/server platform built as:
|
||||
|
||||
- Web frontend
|
||||
- Modular backend control plane
|
||||
- Independent worker runtime
|
||||
- MongoDB as the only database
|
||||
- Object storage abstraction over cloud object storage or MinIO
|
||||
- Local scheduler in V1 with future migration path to Kubernetes and Volcano
|
||||
|
||||
The architecture should preserve clear service boundaries even if V1 is implemented as a modular monolith plus workers.
|
||||
|
||||
## High-Level Layers
|
||||
|
||||
### Frontend Layer
|
||||
|
||||
- Asset workspace
|
||||
- Canvas workspace
|
||||
- Explore workspace
|
||||
- Label workspace
|
||||
- Admin workspace
|
||||
|
||||
### Control Plane
|
||||
|
||||
- Identity and authorization
|
||||
- Workspace and project management
|
||||
- Asset and dataset metadata
|
||||
- Workflow definition management
|
||||
- Plugin registry and activation
|
||||
- Run orchestration API
|
||||
- Artifact indexing
|
||||
|
||||
### Execution Plane
|
||||
|
||||
- Workflow DAG compilation
|
||||
- Task queue dispatch
|
||||
- Worker execution
|
||||
- Executor routing
|
||||
- Log and artifact collection
|
||||
|
||||
### Storage Layer
|
||||
|
||||
- MongoDB for metadata and run state
|
||||
- Object storage for files and large outputs
|
||||
- Temporary local working directories for execution
|
||||
|
||||
## Core Domain Objects
|
||||
|
||||
- User
|
||||
- Workspace
|
||||
- Project
|
||||
- Asset
|
||||
- Dataset
|
||||
- DatasetVersion
|
||||
- WorkflowDefinition
|
||||
- WorkflowVersion
|
||||
- WorkflowRun
|
||||
- RunTask
|
||||
- Artifact
|
||||
- AnnotationTask
|
||||
- Annotation
|
||||
- Plugin
|
||||
- StorageConnection
|
||||
|
||||
## Raw Asset And Canonical Dataset Model
|
||||
|
||||
The platform must distinguish between:
|
||||
|
||||
- Raw Asset View
|
||||
- Canonical Dataset View
|
||||
|
||||
Raw assets preserve source structure, file paths, metadata layout, and original naming. Canonical datasets provide a normalized semantic layer for workflow nodes and export logic.
|
||||
|
||||
Visualization may read raw assets directly. Conversion, orchestration, and export should primarily target canonical semantics.
|
||||
|
||||
## Workflow Model
|
||||
|
||||
Workflow definitions are versioned and contain:
|
||||
|
||||
- Visual graph state
|
||||
- Logical node and edge graph
|
||||
- Runtime configuration
|
||||
- Plugin references
|
||||
|
||||
Workflow execution produces immutable workflow runs. A run snapshots:
|
||||
|
||||
- Workflow version
|
||||
- Node configuration
|
||||
- Injected code
|
||||
- Executor settings
|
||||
- Input bindings
|
||||
|
||||
Runs compile into task DAGs.
|
||||
|
||||
## Node And Plugin Model
|
||||
|
||||
### Node Categories
|
||||
|
||||
- Source
|
||||
- Transform
|
||||
- Inspect
|
||||
- Annotate
|
||||
- Export
|
||||
- Utility
|
||||
|
||||
### Node Definition Contract
|
||||
|
||||
Each node definition includes:
|
||||
|
||||
- Metadata
|
||||
- Input schema
|
||||
- Output schema
|
||||
- Config schema
|
||||
- UI schema
|
||||
- Executor type
|
||||
- Runtime limits
|
||||
- Optional code hook contract
|
||||
|
||||
### Plugin Types
|
||||
|
||||
- Node plugins
|
||||
- Reader/writer plugins
|
||||
- Renderer plugins
|
||||
- Executor plugins
|
||||
- Integration plugins
|
||||
|
||||
## Execution Architecture
|
||||
|
||||
### Executors
|
||||
|
||||
- Python executor
|
||||
- Docker executor
|
||||
- HTTP executor
|
||||
|
||||
V1 should prioritize Python and Docker. HTTP executor is useful for integrating external services.
|
||||
|
||||
### Schedulers
|
||||
|
||||
- Local scheduler in V1
|
||||
- Kubernetes scheduler later
|
||||
- Volcano scheduler later
|
||||
|
||||
Executors and schedulers are separate abstractions:
|
||||
|
||||
- Executor defines how logic runs
|
||||
- Scheduler defines where and under what scheduling policy it runs
|
||||
|
||||
## Storage Architecture
|
||||
|
||||
### MongoDB Collections
|
||||
|
||||
Recommended primary collections:
|
||||
|
||||
- users
|
||||
- workspaces
|
||||
- projects
|
||||
- memberships
|
||||
- assets
|
||||
- asset_probe_reports
|
||||
- datasets
|
||||
- dataset_versions
|
||||
- workflow_definitions
|
||||
- workflow_definition_versions
|
||||
- workflow_runs
|
||||
- run_tasks
|
||||
- artifacts
|
||||
- annotation_tasks
|
||||
- annotations
|
||||
- plugins
|
||||
- storage_connections
|
||||
- audit_logs
|
||||
|
||||
### Object Storage Content
|
||||
|
||||
- Raw uploads
|
||||
- Imported archives
|
||||
- Normalized export packages
|
||||
- Training config packages
|
||||
- Preview resources
|
||||
- Logs and attachments
|
||||
- Large manifests and file indexes
|
||||
|
||||
## Security Model
|
||||
|
||||
User-injected code is low-trust code and must not run in web or API processes.
|
||||
|
||||
V1 runtime policy:
|
||||
|
||||
- Built-in trusted nodes may use Python executor
|
||||
- Plugin code should run in controlled runtimes
|
||||
- User-injected code should default to Docker executor
|
||||
- Network access should be denied by default for user code
|
||||
- Input and output paths should be explicitly mounted
|
||||
|
||||
## Deployment Direction
|
||||
|
||||
V1 deployment target is a single public server using containerized application services. The architecture must still preserve future migration to multi-node environments.
|
||||
1
design/03-workflows/.gitkeep
Normal file
1
design/03-workflows/.gitkeep
Normal file
@ -0,0 +1 @@
|
||||
|
||||
316
design/03-workflows/workflow-execution-model.md
Normal file
316
design/03-workflows/workflow-execution-model.md
Normal file
@ -0,0 +1,316 @@
|
||||
# EmboFlow Workflow Execution Model
|
||||
|
||||
## Goal
|
||||
|
||||
Define how EmboFlow represents, validates, executes, and observes canvas workflows.
|
||||
|
||||
The workflow system is the product core. The canvas is only the editing surface. The real system of record is the versioned workflow definition and its immutable run snapshots.
|
||||
|
||||
## Core Objects
|
||||
|
||||
- `WorkflowDefinition`
|
||||
Logical workflow identity under a project
|
||||
- `WorkflowVersion`
|
||||
Immutable snapshot of nodes, edges, runtime defaults, and plugin references
|
||||
- `NodeInstance`
|
||||
Concrete node on a workflow graph
|
||||
- `WorkflowRun`
|
||||
One execution of one workflow version
|
||||
- `RunTask`
|
||||
Executable unit derived from a node during one run
|
||||
- `Artifact`
|
||||
Managed output from a task or run
|
||||
|
||||
## Workflow Layers
|
||||
|
||||
Each workflow version contains three layers.
|
||||
|
||||
### Visual Layer
|
||||
|
||||
Used only by the editor:
|
||||
|
||||
- node positions
|
||||
- collapsed state
|
||||
- groups
|
||||
- zoom defaults
|
||||
- viewport metadata
|
||||
|
||||
### Logic Layer
|
||||
|
||||
Used for graph semantics:
|
||||
|
||||
- nodes
|
||||
- edges
|
||||
- input/output ports
|
||||
- branch conditions
|
||||
- merge semantics
|
||||
- dependency graph
|
||||
|
||||
### Runtime Layer
|
||||
|
||||
Used for execution:
|
||||
|
||||
- node config values
|
||||
- executor settings
|
||||
- runtime resource limits
|
||||
- retry policy
|
||||
- code hooks
|
||||
- cache policy
|
||||
|
||||
Visual changes must not change workflow semantics. Runtime changes must produce a new workflow version.
|
||||
|
||||
## Node Categories
|
||||
|
||||
V1 node categories:
|
||||
|
||||
- `Source`
|
||||
- `Transform`
|
||||
- `Inspect`
|
||||
- `Annotate`
|
||||
- `Export`
|
||||
- `Utility`
|
||||
|
||||
### V1 Built-In Node Families
|
||||
|
||||
- asset upload/import
|
||||
- archive extract
|
||||
- folder rename
|
||||
- directory validation
|
||||
- metadata validation
|
||||
- video quality inspection
|
||||
- dataset readers for RLDS, LeRobot, HDF5, Rosbag
|
||||
- canonical mapping nodes
|
||||
- dataset writers and exporters
|
||||
- training config export
|
||||
- Python processing node
|
||||
|
||||
## Node Definition Contract
|
||||
|
||||
Each node definition must expose:
|
||||
|
||||
- `id`
|
||||
- `name`
|
||||
- `category`
|
||||
- `version`
|
||||
- `description`
|
||||
- `inputSchema`
|
||||
- `outputSchema`
|
||||
- `configSchema`
|
||||
- `uiSchema`
|
||||
- `executorType`
|
||||
- `runtimeDefaults`
|
||||
- `permissions`
|
||||
- `capabilities`
|
||||
- `codeHookSpec`
|
||||
|
||||
### Code Hook Spec
|
||||
|
||||
V1 supports user code hooks only on:
|
||||
|
||||
- `Transform`
|
||||
- `Inspect`
|
||||
- `Utility`
|
||||
|
||||
Hooks must use a constrained entrypoint instead of arbitrary script structure.
|
||||
|
||||
Example:
|
||||
|
||||
```python
|
||||
def process(input_data, context):
|
||||
return input_data
|
||||
```
|
||||
|
||||
This keeps serialization, logging, and runtime control predictable.
|
||||
|
||||
## Data Flow Contract
|
||||
|
||||
Tasks should exchange managed references, not loose file paths.
|
||||
|
||||
V1 reference types:
|
||||
|
||||
- `assetRef`
|
||||
- `datasetVersionRef`
|
||||
- `artifactRef`
|
||||
- `annotationTaskRef`
|
||||
- `inlineConfig`
|
||||
|
||||
Executors may materialize files internally, but the platform-level contract must remain reference-based.
|
||||
|
||||
## Validation Stages
|
||||
|
||||
Workflow execution must validate in this order:
|
||||
|
||||
1. workflow version exists
|
||||
2. referenced plugins exist and are enabled
|
||||
3. node schemas are valid
|
||||
4. edge connections are schema-compatible
|
||||
5. runtime configuration is complete
|
||||
6. referenced assets and datasets are accessible
|
||||
7. code hooks pass static validation
|
||||
8. executor and scheduler requirements are satisfiable
|
||||
|
||||
Validation failure must block run creation.
|
||||
|
||||
## Run Lifecycle
|
||||
|
||||
When a user executes a workflow:
|
||||
|
||||
1. resolve workflow version
|
||||
2. snapshot all runtime-relevant inputs
|
||||
3. resolve plugin versions
|
||||
4. freeze node config and code hooks
|
||||
5. compile graph into a DAG
|
||||
6. create `WorkflowRun`
|
||||
7. create `RunTask` entries
|
||||
8. enqueue ready tasks
|
||||
9. collect outputs, logs, and task state
|
||||
10. finalize run status and summary
|
||||
|
||||
## Run State Model
|
||||
|
||||
### WorkflowRun Status
|
||||
|
||||
- `pending`
|
||||
- `queued`
|
||||
- `running`
|
||||
- `success`
|
||||
- `failed`
|
||||
- `cancelled`
|
||||
- `partial_success`
|
||||
|
||||
### RunTask Status
|
||||
|
||||
- `pending`
|
||||
- `queued`
|
||||
- `running`
|
||||
- `success`
|
||||
- `failed`
|
||||
- `cancelled`
|
||||
- `skipped`
|
||||
|
||||
`partial_success` is used for workflows where non-blocking nodes fail but the run still produces valid outputs.
|
||||
|
||||
## Retry And Failure Policy
|
||||
|
||||
Each node instance may define:
|
||||
|
||||
- retry count
|
||||
- retry backoff policy
|
||||
- fail-fast behavior
|
||||
- continue-on-error behavior
|
||||
- manual retry eligibility
|
||||
|
||||
V1 should support:
|
||||
|
||||
- `fail_fast`
|
||||
- `continue_on_error`
|
||||
- `retry_n_times`
|
||||
- `manual_retry`
|
||||
|
||||
## Cache Model
|
||||
|
||||
V1 should support node-level cache reuse.
|
||||
|
||||
Recommended cache key inputs:
|
||||
|
||||
- workflow version
|
||||
- node id
|
||||
- upstream reference summary
|
||||
- config summary
|
||||
- code hook digest
|
||||
- plugin version
|
||||
- executor version
|
||||
|
||||
Cache hit behavior:
|
||||
|
||||
- reuse output artifact refs
|
||||
- reuse output summaries
|
||||
- retain previous logs reference
|
||||
- mark task as cache-resolved in metadata
|
||||
|
||||
## Execution Context
|
||||
|
||||
Each task receives a normalized execution context containing:
|
||||
|
||||
- workspace id
|
||||
- project id
|
||||
- workflow run id
|
||||
- task id
|
||||
- actor id
|
||||
- node config
|
||||
- code hook content
|
||||
- input references
|
||||
- storage context
|
||||
- temp working directory
|
||||
- runtime resource limits
|
||||
|
||||
This context must be available across Python, Docker, and HTTP executors.
|
||||
|
||||
## Observability Requirements
|
||||
|
||||
Each task must emit:
|
||||
|
||||
- status transitions
|
||||
- start time and finish time
|
||||
- duration
|
||||
- executor metadata
|
||||
- resource request metadata
|
||||
- stdout/stderr log stream
|
||||
- structured task summary
|
||||
- artifact refs
|
||||
|
||||
The UI must allow:
|
||||
|
||||
- graph-level run status
|
||||
- node-level log inspection
|
||||
- node-level artifact browsing
|
||||
- task retry entrypoint
|
||||
- direct navigation from a node to preview output
|
||||
|
||||
## Canvas Interaction Rules
|
||||
|
||||
V1 editor behavior should enforce:
|
||||
|
||||
- port-level connection rules
|
||||
- incompatible edge blocking
|
||||
- dirty-state detection
|
||||
- explicit save before publish/run if graph changed
|
||||
- per-node validation badges
|
||||
- run from latest saved version, not unsaved draft
|
||||
|
||||
## Example V1 Pipelines
|
||||
|
||||
### Delivery Normalization
|
||||
|
||||
```text
|
||||
Raw Folder Import
|
||||
-> Archive Extract
|
||||
-> Folder Rename
|
||||
-> Directory Validation
|
||||
-> Metadata Validation
|
||||
-> Video Quality Check
|
||||
-> Delivery Export
|
||||
```
|
||||
|
||||
### Dataset Conversion
|
||||
|
||||
```text
|
||||
Rosbag Reader
|
||||
-> Canonical Mapping
|
||||
-> Frame Filter
|
||||
-> Metadata Normalize
|
||||
-> LeRobot Writer
|
||||
-> Training Config Export
|
||||
```
|
||||
|
||||
## V1 Non-Goals
|
||||
|
||||
The V1 workflow engine does not need:
|
||||
|
||||
- loop semantics
|
||||
- streaming execution
|
||||
- unbounded dynamic fan-out
|
||||
- event-driven triggers
|
||||
- advanced distributed DAG partitioning
|
||||
|
||||
The V1 goal is a stable, observable DAG executor for data engineering workflows.
|
||||
1
design/04-ui-ux/.gitkeep
Normal file
1
design/04-ui-ux/.gitkeep
Normal file
@ -0,0 +1 @@
|
||||
|
||||
296
design/04-ui-ux/information-architecture-and-key-screens.md
Normal file
296
design/04-ui-ux/information-architecture-and-key-screens.md
Normal file
@ -0,0 +1,296 @@
|
||||
# EmboFlow Information Architecture And Key Screens
|
||||
|
||||
## Goal
|
||||
|
||||
Define the primary navigation model, main screens, and key interaction patterns for EmboFlow V1.
|
||||
|
||||
The UI should feel like a serious data workflow product, not a generic low-code canvas. The most important interaction is the relationship between assets, workflows, runs, and outputs.
|
||||
|
||||
## Information Architecture
|
||||
|
||||
Top-level product areas:
|
||||
|
||||
- Workspace switcher
|
||||
- Project selector
|
||||
- Asset Workspace
|
||||
- Canvas Workspace
|
||||
- Explore Workspace
|
||||
- Label Workspace
|
||||
- Admin Workspace
|
||||
|
||||
## Navigation Model
|
||||
|
||||
### Global Header
|
||||
|
||||
Recommended global header content:
|
||||
|
||||
- workspace switcher
|
||||
- project switcher
|
||||
- search entry
|
||||
- run notifications
|
||||
- user menu
|
||||
|
||||
### Primary Sidebar
|
||||
|
||||
Recommended primary navigation:
|
||||
|
||||
- Assets
|
||||
- Workflows
|
||||
- Runs
|
||||
- Explore
|
||||
- Labels
|
||||
- Admin
|
||||
|
||||
This keeps the product model explicit:
|
||||
|
||||
- assets are inputs
|
||||
- workflows define transformation logic
|
||||
- runs represent execution history
|
||||
- explore is where users inspect outputs and raw inputs
|
||||
|
||||
## Screen 1: Workspace And Project Entry
|
||||
|
||||
Purpose:
|
||||
|
||||
- choose personal or team workspace
|
||||
- choose or create project
|
||||
- view recent projects and recent workflow runs
|
||||
|
||||
V1 should emphasize project-level organization because all major resources are project-scoped.
|
||||
|
||||
## Screen 2: Asset Workspace
|
||||
|
||||
Purpose:
|
||||
|
||||
- upload or import raw assets
|
||||
- inspect asset type and status
|
||||
- review probe summary
|
||||
- launch preview or workflow entrypoint
|
||||
|
||||
Core regions:
|
||||
|
||||
- asset list with filters
|
||||
- import actions
|
||||
- asset status and source type
|
||||
- probe summary card
|
||||
- recommended next actions
|
||||
|
||||
Key actions:
|
||||
|
||||
- upload file
|
||||
- upload archive
|
||||
- import object storage prefix
|
||||
- register storage path
|
||||
- open preview
|
||||
- create workflow from asset
|
||||
|
||||
## Screen 3: Asset Detail / Explore Entry
|
||||
|
||||
Purpose:
|
||||
|
||||
- inspect one asset deeply
|
||||
- browse folder structure
|
||||
- inspect metadata and detected format
|
||||
- preview representative files
|
||||
|
||||
Suggested panels:
|
||||
|
||||
- left: file tree or asset structure
|
||||
- center: preview surface
|
||||
- right: metadata, probe report, warnings, recommended nodes
|
||||
|
||||
This screen should support both:
|
||||
|
||||
- raw asset view
|
||||
- canonical dataset summary view when available
|
||||
|
||||
## Screen 4: Canvas Workspace
|
||||
|
||||
This is the core authoring surface.
|
||||
|
||||
### Layout
|
||||
|
||||
Recommended layout, aligned with the Xspark reference pattern:
|
||||
|
||||
- left: node library and workflow tools
|
||||
- center: canvas
|
||||
- right: node configuration panel
|
||||
|
||||
### Left Panel
|
||||
|
||||
Contains:
|
||||
|
||||
- source nodes
|
||||
- transform nodes
|
||||
- inspect nodes
|
||||
- annotate nodes
|
||||
- export nodes
|
||||
- utility nodes
|
||||
- search/filter
|
||||
|
||||
### Center Canvas
|
||||
|
||||
Supports:
|
||||
|
||||
- drag-and-drop node placement
|
||||
- edge creation
|
||||
- zoom and pan
|
||||
- mini-map
|
||||
- node badges for validation status
|
||||
- run-state overlays when viewing an executed version
|
||||
|
||||
### Right Configuration Panel
|
||||
|
||||
The right panel is schema-driven.
|
||||
|
||||
It should render:
|
||||
|
||||
- node title
|
||||
- node description
|
||||
- config fields
|
||||
- input/output schema summary
|
||||
- executor selection
|
||||
- runtime policy
|
||||
- code hook editor if supported
|
||||
- validation errors
|
||||
|
||||
This panel is critical. It should feel like a structured system console, not a generic form dump.
|
||||
|
||||
## Screen 5: Workflow Run Detail
|
||||
|
||||
Purpose:
|
||||
|
||||
- inspect execution state
|
||||
- view DAG progress
|
||||
- open task logs
|
||||
- inspect task outputs
|
||||
- retry failed nodes
|
||||
|
||||
Recommended layout:
|
||||
|
||||
- top: run summary and status
|
||||
- center: workflow graph with execution overlays
|
||||
- bottom or side drawer: logs and artifacts for selected node
|
||||
|
||||
## Screen 6: Explore Workspace
|
||||
|
||||
Purpose:
|
||||
|
||||
- inspect raw or processed outputs outside the canvas authoring context
|
||||
- compare source and transformed outputs
|
||||
- validate whether a run produced expected results
|
||||
|
||||
V1 renderer set:
|
||||
|
||||
- directory tree renderer
|
||||
- JSON renderer
|
||||
- video renderer
|
||||
- dataset summary renderer
|
||||
- quality report renderer
|
||||
|
||||
This workspace should open from:
|
||||
|
||||
- asset detail
|
||||
- workflow node output
|
||||
- artifact detail
|
||||
|
||||
## Screen 7: Label Workspace
|
||||
|
||||
Purpose:
|
||||
|
||||
- process annotation tasks
|
||||
- review results
|
||||
- attach annotations to data outputs
|
||||
|
||||
V1 should keep this lightweight:
|
||||
|
||||
- frame labels
|
||||
- clip labels
|
||||
- temporal segment labels
|
||||
- quality tags
|
||||
|
||||
The label workspace should be able to open from an artifact or dataset version, not only from a workflow node.
|
||||
|
||||
## Screen 8: Admin Workspace
|
||||
|
||||
Purpose:
|
||||
|
||||
- manage members
|
||||
- manage storage connections
|
||||
- manage plugin enablement
|
||||
- inspect audit and runtime settings
|
||||
|
||||
Suggested sections:
|
||||
|
||||
- members and roles
|
||||
- workspace settings
|
||||
- storage connections
|
||||
- plugin registry
|
||||
- executor policies
|
||||
- audit log viewer
|
||||
|
||||
## Key UX Principles
|
||||
|
||||
### 1. Separate authoring from inspection
|
||||
|
||||
Do not overload the canvas with deep preview or annotation workflows. The canvas configures process. Explore and Label workspaces handle dense interaction.
|
||||
|
||||
### 2. Keep lineage visible
|
||||
|
||||
Users should be able to move across:
|
||||
|
||||
- asset
|
||||
- workflow
|
||||
- run
|
||||
- task
|
||||
- artifact
|
||||
- annotation
|
||||
|
||||
without losing context.
|
||||
|
||||
### 3. Prefer explicit system terminology
|
||||
|
||||
Use consistent object names in the UI:
|
||||
|
||||
- Asset
|
||||
- Dataset
|
||||
- Workflow
|
||||
- Run
|
||||
- Task
|
||||
- Artifact
|
||||
- Plugin
|
||||
|
||||
Do not rename the same concept differently across pages.
|
||||
|
||||
### 4. Make validation obvious before execution
|
||||
|
||||
Before users run a workflow, the editor should visibly show:
|
||||
|
||||
- missing config
|
||||
- invalid schema connections
|
||||
- unsupported executor choices
|
||||
- permission or plugin issues
|
||||
|
||||
### 5. Keep the product usable on standard screens
|
||||
|
||||
The canvas and right configuration panel must work on laptop-sized displays. On narrower screens, the right panel may collapse into a drawer.
|
||||
|
||||
## V1 Visual Direction
|
||||
|
||||
The UI should communicate:
|
||||
|
||||
- precision
|
||||
- observability
|
||||
- traceability
|
||||
- strong operator control
|
||||
|
||||
It should feel closer to a workflow control console than a consumer productivity app.
|
||||
|
||||
## V1 Non-Goals
|
||||
|
||||
V1 UI does not need:
|
||||
|
||||
- real-time multi-user cursor collaboration
|
||||
- advanced canvas commenting systems
|
||||
- highly customized renderer marketplace UX
|
||||
- heavy design polish ahead of workflow clarity
|
||||
1
design/05-data/.gitkeep
Normal file
1
design/05-data/.gitkeep
Normal file
@ -0,0 +1 @@
|
||||
|
||||
521
design/05-data/mongodb-data-model.md
Normal file
521
design/05-data/mongodb-data-model.md
Normal file
@ -0,0 +1,521 @@
|
||||
# EmboFlow MongoDB Data Model
|
||||
|
||||
## Goal
|
||||
|
||||
Define the MongoDB-only persistence model for EmboFlow V1.
|
||||
|
||||
The database must support:
|
||||
|
||||
- user and workspace isolation
|
||||
- raw asset tracking
|
||||
- canonical dataset versions
|
||||
- workflow versioning
|
||||
- workflow execution history
|
||||
- plugin registration
|
||||
- auditability
|
||||
|
||||
## Storage Principles
|
||||
|
||||
- MongoDB stores metadata and execution state
|
||||
- Object storage stores large binary files and large derived bundles
|
||||
- MongoDB documents should have clear aggregate boundaries
|
||||
- Large, fast-growing arrays should be split into separate collections
|
||||
- Platform contracts should use references, not embedded file blobs
|
||||
|
||||
## Primary Collections
|
||||
|
||||
- `users`
|
||||
- `workspaces`
|
||||
- `projects`
|
||||
- `memberships`
|
||||
- `assets`
|
||||
- `asset_probe_reports`
|
||||
- `datasets`
|
||||
- `dataset_versions`
|
||||
- `workflow_definitions`
|
||||
- `workflow_definition_versions`
|
||||
- `workflow_runs`
|
||||
- `run_tasks`
|
||||
- `artifacts`
|
||||
- `annotation_tasks`
|
||||
- `annotations`
|
||||
- `plugins`
|
||||
- `storage_connections`
|
||||
- `audit_logs`
|
||||
|
||||
## Collection Design
|
||||
|
||||
### users
|
||||
|
||||
Purpose:
|
||||
|
||||
- account identity
|
||||
- profile
|
||||
- login metadata
|
||||
|
||||
Core fields:
|
||||
|
||||
- `_id`
|
||||
- `email`
|
||||
- `displayName`
|
||||
- `avatarUrl`
|
||||
- `status`
|
||||
- `lastLoginAt`
|
||||
- `createdAt`
|
||||
- `updatedAt`
|
||||
|
||||
### workspaces
|
||||
|
||||
Purpose:
|
||||
|
||||
- resource ownership boundary
|
||||
|
||||
Core fields:
|
||||
|
||||
- `_id`
|
||||
- `type` as `personal` or `team`
|
||||
- `name`
|
||||
- `slug`
|
||||
- `ownerId`
|
||||
- `status`
|
||||
- `settings`
|
||||
- `createdAt`
|
||||
- `updatedAt`
|
||||
|
||||
### memberships
|
||||
|
||||
Purpose:
|
||||
|
||||
- workspace and project role mapping
|
||||
|
||||
Core fields:
|
||||
|
||||
- `_id`
|
||||
- `workspaceId`
|
||||
- `projectId` optional
|
||||
- `userId`
|
||||
- `role`
|
||||
- `status`
|
||||
- `createdAt`
|
||||
- `updatedAt`
|
||||
|
||||
This collection should stay independent instead of embedding large member arrays on every resource.
|
||||
|
||||
### projects
|
||||
|
||||
Purpose:
|
||||
|
||||
- project-scoped grouping for assets, workflows, runs, and outputs
|
||||
|
||||
Core fields:
|
||||
|
||||
- `_id`
|
||||
- `workspaceId`
|
||||
- `name`
|
||||
- `slug`
|
||||
- `description`
|
||||
- `status`
|
||||
- `createdBy`
|
||||
- `createdAt`
|
||||
- `updatedAt`
|
||||
|
||||
### assets
|
||||
|
||||
Purpose:
|
||||
|
||||
- represent raw uploaded or imported inputs
|
||||
|
||||
Supported asset types:
|
||||
|
||||
- `raw_file`
|
||||
- `archive`
|
||||
- `folder`
|
||||
- `video_collection`
|
||||
- `standard_dataset`
|
||||
- `rosbag`
|
||||
- `hdf5_dataset`
|
||||
- `object_storage_prefix`
|
||||
|
||||
Core fields:
|
||||
|
||||
- `_id`
|
||||
- `workspaceId`
|
||||
- `projectId`
|
||||
- `type`
|
||||
- `sourceType`
|
||||
- `displayName`
|
||||
- `status`
|
||||
- `storageRef`
|
||||
- `sizeBytes`
|
||||
- `fileCount`
|
||||
- `topLevelPaths`
|
||||
- `detectedFormats`
|
||||
- `summary`
|
||||
- `createdBy`
|
||||
- `createdAt`
|
||||
- `updatedAt`
|
||||
|
||||
Do not embed full large file listings in this document.
|
||||
|
||||
### asset_probe_reports
|
||||
|
||||
Purpose:
|
||||
|
||||
- retain richer structure-detection and validation output
|
||||
|
||||
Core fields:
|
||||
|
||||
- `_id`
|
||||
- `assetId`
|
||||
- `reportVersion`
|
||||
- `detectedFormatCandidates`
|
||||
- `structureSummary`
|
||||
- `warnings`
|
||||
- `recommendedNextNodes`
|
||||
- `rawReport`
|
||||
- `createdAt`
|
||||
|
||||
### datasets
|
||||
|
||||
Purpose:
|
||||
|
||||
- represent logical dataset identity
|
||||
|
||||
Core fields:
|
||||
|
||||
- `_id`
|
||||
- `workspaceId`
|
||||
- `projectId`
|
||||
- `name`
|
||||
- `type`
|
||||
- `status`
|
||||
- `latestVersionId`
|
||||
- `summary`
|
||||
- `createdBy`
|
||||
- `createdAt`
|
||||
- `updatedAt`
|
||||
|
||||
### dataset_versions
|
||||
|
||||
Purpose:
|
||||
|
||||
- represent immutable dataset snapshots
|
||||
|
||||
Core fields:
|
||||
|
||||
- `_id`
|
||||
- `datasetId`
|
||||
- `workspaceId`
|
||||
- `projectId`
|
||||
- `sourceAssetId`
|
||||
- `parentVersionId`
|
||||
- `versionTag`
|
||||
- `canonicalSchemaVersion`
|
||||
- `manifestRef`
|
||||
- `stats`
|
||||
- `summary`
|
||||
- `status`
|
||||
- `createdBy`
|
||||
- `createdAt`
|
||||
|
||||
This collection is separated because versions will grow over time.
|
||||
|
||||
### workflow_definitions
|
||||
|
||||
Purpose:
|
||||
|
||||
- represent logical workflow identity
|
||||
|
||||
Core fields:
|
||||
|
||||
- `_id`
|
||||
- `workspaceId`
|
||||
- `projectId`
|
||||
- `name`
|
||||
- `slug`
|
||||
- `status`
|
||||
- `latestVersionNumber`
|
||||
- `publishedVersionNumber`
|
||||
- `createdBy`
|
||||
- `createdAt`
|
||||
- `updatedAt`
|
||||
|
||||
### workflow_definition_versions
|
||||
|
||||
Purpose:
|
||||
|
||||
- represent immutable workflow snapshots
|
||||
|
||||
Core fields:
|
||||
|
||||
- `_id`
|
||||
- `workflowDefinitionId`
|
||||
- `workspaceId`
|
||||
- `projectId`
|
||||
- `versionNumber`
|
||||
- `visualGraph`
|
||||
- `logicGraph`
|
||||
- `runtimeGraph`
|
||||
- `pluginRefs`
|
||||
- `summary`
|
||||
- `createdBy`
|
||||
- `createdAt`
|
||||
|
||||
Splitting versions from workflow head metadata avoids oversized documents and simplifies history queries.
|
||||
|
||||
### workflow_runs
|
||||
|
||||
Purpose:
|
||||
|
||||
- store execution runs
|
||||
|
||||
Core fields:
|
||||
|
||||
- `_id`
|
||||
- `workflowDefinitionId`
|
||||
- `workflowVersionId`
|
||||
- `workspaceId`
|
||||
- `projectId`
|
||||
- `triggeredBy`
|
||||
- `status`
|
||||
- `runtimeSnapshot`
|
||||
- `summary`
|
||||
- `startedAt`
|
||||
- `finishedAt`
|
||||
- `createdAt`
|
||||
|
||||
### run_tasks
|
||||
|
||||
Purpose:
|
||||
|
||||
- store one execution unit per node per run
|
||||
|
||||
Core fields:
|
||||
|
||||
- `_id`
|
||||
- `workflowRunId`
|
||||
- `workflowVersionId`
|
||||
- `nodeId`
|
||||
- `nodeType`
|
||||
- `status`
|
||||
- `attempt`
|
||||
- `executor`
|
||||
- `scheduler`
|
||||
- `inputRefs`
|
||||
- `outputRefs`
|
||||
- `logRef`
|
||||
- `cacheKey`
|
||||
- `cacheHit`
|
||||
- `errorSummary`
|
||||
- `startedAt`
|
||||
- `finishedAt`
|
||||
- `createdAt`
|
||||
|
||||
This collection should remain separate from `workflow_runs` because task volume grows quickly.
|
||||
|
||||
### artifacts
|
||||
|
||||
Purpose:
|
||||
|
||||
- store managed outputs and previews
|
||||
|
||||
Artifact types may include:
|
||||
|
||||
- preview bundle
|
||||
- quality report
|
||||
- normalized dataset package
|
||||
- delivery package
|
||||
- training config package
|
||||
- intermediate task output
|
||||
|
||||
Core fields:
|
||||
|
||||
- `_id`
|
||||
- `workspaceId`
|
||||
- `projectId`
|
||||
- `type`
|
||||
- `producerType`
|
||||
- `producerId`
|
||||
- `storageRef`
|
||||
- `previewable`
|
||||
- `summary`
|
||||
- `lineage`
|
||||
- `createdBy`
|
||||
- `createdAt`
|
||||
|
||||
### annotation_tasks
|
||||
|
||||
Purpose:
|
||||
|
||||
- track assignment and state of manual labeling work
|
||||
|
||||
Core fields:
|
||||
|
||||
- `_id`
|
||||
- `workspaceId`
|
||||
- `projectId`
|
||||
- `targetType`
|
||||
- `targetRef`
|
||||
- `labelType`
|
||||
- `status`
|
||||
- `assigneeIds`
|
||||
- `reviewerIds`
|
||||
- `createdBy`
|
||||
- `createdAt`
|
||||
- `updatedAt`
|
||||
|
||||
### annotations
|
||||
|
||||
Purpose:
|
||||
|
||||
- persist annotation outputs
|
||||
|
||||
Core fields:
|
||||
|
||||
- `_id`
|
||||
- `annotationTaskId`
|
||||
- `workspaceId`
|
||||
- `projectId`
|
||||
- `targetRef`
|
||||
- `payload`
|
||||
- `status`
|
||||
- `createdBy`
|
||||
- `createdAt`
|
||||
- `updatedAt`
|
||||
|
||||
### plugins
|
||||
|
||||
Purpose:
|
||||
|
||||
- track installable and enabled plugin versions
|
||||
|
||||
Core fields:
|
||||
|
||||
- `_id`
|
||||
- `workspaceId` optional for workspace-scoped plugins
|
||||
- `scope` as `platform` or `workspace`
|
||||
- `name`
|
||||
- `status`
|
||||
- `currentVersion`
|
||||
- `versions`
|
||||
- `permissions`
|
||||
- `metadata`
|
||||
- `createdAt`
|
||||
- `updatedAt`
|
||||
|
||||
If plugin version payloads become large, split versions into a separate collection later. V1 can keep them nested if bounded.
|
||||
|
||||
### storage_connections
|
||||
|
||||
Purpose:
|
||||
|
||||
- store object storage and path registration configuration
|
||||
|
||||
Core fields:
|
||||
|
||||
- `_id`
|
||||
- `workspaceId`
|
||||
- `type`
|
||||
- `provider`
|
||||
- `name`
|
||||
- `status`
|
||||
- `config`
|
||||
- `secretRef`
|
||||
- `createdBy`
|
||||
- `createdAt`
|
||||
- `updatedAt`
|
||||
|
||||
Store secrets outside plaintext document fields where possible.
|
||||
|
||||
### audit_logs
|
||||
|
||||
Purpose:
|
||||
|
||||
- append-only history of sensitive actions
|
||||
|
||||
Core fields:
|
||||
|
||||
- `_id`
|
||||
- `workspaceId`
|
||||
- `projectId`
|
||||
- `actorId`
|
||||
- `resourceType`
|
||||
- `resourceId`
|
||||
- `action`
|
||||
- `beforeSummary`
|
||||
- `afterSummary`
|
||||
- `metadata`
|
||||
- `createdAt`
|
||||
|
||||
## Reference Strategy
|
||||
|
||||
Use stable ids between collections.
|
||||
|
||||
References should be explicit:
|
||||
|
||||
- asset to probe report
|
||||
- dataset to dataset versions
|
||||
- workflow definition to workflow versions
|
||||
- workflow run to run tasks
|
||||
- task to artifact
|
||||
- annotation task to annotations
|
||||
|
||||
Do not depend on implicit path-based linkage.
|
||||
|
||||
## Index Recommendations
|
||||
|
||||
### Always index
|
||||
|
||||
- `workspaceId`
|
||||
- `projectId`
|
||||
- `status`
|
||||
- `createdAt`
|
||||
|
||||
### Important compound indexes
|
||||
|
||||
- `memberships.workspaceId + memberships.userId`
|
||||
- `projects.workspaceId + projects.slug`
|
||||
- `assets.projectId + assets.type + assets.createdAt`
|
||||
- `datasets.projectId + datasets.name`
|
||||
- `dataset_versions.datasetId + dataset_versions.createdAt`
|
||||
- `workflow_definitions.projectId + workflow_definitions.slug`
|
||||
- `workflow_definition_versions.workflowDefinitionId + versionNumber`
|
||||
- `workflow_runs.projectId + createdAt`
|
||||
- `workflow_runs.workflowDefinitionId + status`
|
||||
- `run_tasks.workflowRunId + nodeId`
|
||||
- `artifacts.producerType + producerId`
|
||||
- `annotation_tasks.projectId + status`
|
||||
- `audit_logs.workspaceId + createdAt`
|
||||
|
||||
## Object Storage References
|
||||
|
||||
MongoDB should store references such as:
|
||||
|
||||
- bucket
|
||||
- key
|
||||
- uri
|
||||
- checksum
|
||||
- content type
|
||||
- size
|
||||
|
||||
It should not store:
|
||||
|
||||
- large binary file payloads
|
||||
- full raw video content
|
||||
- giant archive contents
|
||||
|
||||
## V1 Constraints
|
||||
|
||||
- MongoDB is the only database
|
||||
- No relational sidecar is assumed
|
||||
- No GridFS-first strategy is assumed
|
||||
- Large manifests may live in object storage and be referenced from MongoDB
|
||||
|
||||
## V1 Non-Goals
|
||||
|
||||
The V1 model does not need:
|
||||
|
||||
- cross-region data distribution
|
||||
- advanced event sourcing
|
||||
- fully normalized analytics warehouse modeling
|
||||
- high-volume search indexing inside MongoDB itself
|
||||
1
design/06-api/.gitkeep
Normal file
1
design/06-api/.gitkeep
Normal file
@ -0,0 +1 @@
|
||||
|
||||
1
design/07-research/.gitkeep
Normal file
1
design/07-research/.gitkeep
Normal file
@ -0,0 +1 @@
|
||||
|
||||
1
design/08-decisions/.gitkeep
Normal file
1
design/08-decisions/.gitkeep
Normal file
@ -0,0 +1 @@
|
||||
|
||||
@ -0,0 +1,45 @@
|
||||
# ADR-0001: Separate Raw Assets From Canonical Datasets
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
EmboFlow must support both structured embodied dataset formats and unstructured or semi-structured delivery-style raw assets, including:
|
||||
|
||||
- RLDS
|
||||
- LeRobot v2/v3
|
||||
- HDF5
|
||||
- Rosbag
|
||||
- Raw video directories
|
||||
- Archive packages
|
||||
|
||||
If the platform treats every input as an already-standardized dataset, ingestion and delivery workflows become awkward and lossy.
|
||||
|
||||
## Decision
|
||||
|
||||
The platform will model:
|
||||
|
||||
- Raw assets as first-class resources
|
||||
- Canonical datasets as derived semantic resources
|
||||
|
||||
Raw assets preserve original structure, paths, naming, and metadata layout. Canonical datasets provide normalized semantics for conversion, workflow execution, and export logic.
|
||||
|
||||
## Consequences
|
||||
|
||||
### Positive
|
||||
|
||||
- Supports customer delivery package workflows
|
||||
- Supports embodied dataset conversion workflows
|
||||
- Preserves original structure for inspection and debugging
|
||||
- Avoids forcing visualization to depend on a lossy normalized format
|
||||
|
||||
### Negative
|
||||
|
||||
- Adds one more layer to the object model
|
||||
- Requires readers and mappers instead of direct format-to-format conversion
|
||||
|
||||
## Notes
|
||||
|
||||
Visualization may operate on raw assets directly. Processing and export should primarily operate on canonical semantics where possible.
|
||||
@ -0,0 +1,56 @@
|
||||
# ADR-0002: Separate Executors From Schedulers
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
EmboFlow needs to support multiple runtime modes now and later:
|
||||
|
||||
- direct Python execution
|
||||
- Docker-isolated execution
|
||||
- HTTP-based execution
|
||||
- local scheduling
|
||||
- future Kubernetes scheduling
|
||||
- future Volcano scheduling
|
||||
|
||||
If execution logic and scheduling logic are coupled together, migration from single-host operation to cluster operation becomes costly.
|
||||
|
||||
## Decision
|
||||
|
||||
The architecture will separate:
|
||||
|
||||
- Executor: how node logic runs
|
||||
- Scheduler: where and under what dispatch policy tasks run
|
||||
|
||||
V1 executors:
|
||||
|
||||
- Python
|
||||
- Docker
|
||||
- HTTP
|
||||
|
||||
V1 scheduler:
|
||||
|
||||
- Local
|
||||
|
||||
Reserved future schedulers:
|
||||
|
||||
- Kubernetes
|
||||
- Volcano
|
||||
|
||||
## Consequences
|
||||
|
||||
### Positive
|
||||
|
||||
- Cleaner evolution path
|
||||
- Better runtime abstraction
|
||||
- Less refactoring required for cluster migration
|
||||
|
||||
### Negative
|
||||
|
||||
- Slightly more abstraction in V1 than the immediate deployment requires
|
||||
|
||||
## Notes
|
||||
|
||||
User-injected code should default to Docker execution, while trusted platform logic may use Python execution.
|
||||
1
design/09-assets/.gitkeep
Normal file
1
design/09-assets/.gitkeep
Normal file
@ -0,0 +1 @@
|
||||
|
||||
21
design/README.md
Normal file
21
design/README.md
Normal file
@ -0,0 +1,21 @@
|
||||
# EmboFlow Design Workspace
|
||||
|
||||
This directory stores project design materials before or alongside implementation.
|
||||
|
||||
## Structure
|
||||
|
||||
- `00-overview`: project goals, scope, milestones
|
||||
- `01-product`: requirements, user stories, feature definitions
|
||||
- `02-architecture`: system architecture, modules, technical constraints
|
||||
- `03-workflows`: business flows, sequence diagrams, operational flows
|
||||
- `04-ui-ux`: wireframes, interaction notes, UX decisions
|
||||
- `05-data`: data model, entities, schema drafts
|
||||
- `06-api`: API contracts, request/response drafts, integration notes
|
||||
- `07-research`: competitive analysis, references, discovery notes
|
||||
- `08-decisions`: ADRs and major tradeoff records
|
||||
- `09-assets`: diagrams, exported images, attachments
|
||||
- `templates`: reusable design document templates
|
||||
|
||||
## Suggested usage
|
||||
|
||||
Keep design artifacts in Markdown where possible so they diff cleanly in Git.
|
||||
1
design/templates/.gitkeep
Normal file
1
design/templates/.gitkeep
Normal file
@ -0,0 +1 @@
|
||||
|
||||
62
docker-compose.yml
Normal file
62
docker-compose.yml
Normal file
@ -0,0 +1,62 @@
|
||||
services:
|
||||
web:
|
||||
image: node:20-alpine
|
||||
working_dir: /workspace
|
||||
command: ["sh", "-c", "sleep infinity"]
|
||||
ports:
|
||||
- "${WEB_PORT:-3000}:3000"
|
||||
volumes:
|
||||
- .:/workspace
|
||||
depends_on:
|
||||
- api
|
||||
|
||||
api:
|
||||
image: node:20-alpine
|
||||
working_dir: /workspace
|
||||
command: ["sh", "-c", "sleep infinity"]
|
||||
ports:
|
||||
- "${API_PORT:-3001}:3001"
|
||||
volumes:
|
||||
- .:/workspace
|
||||
depends_on:
|
||||
- mongo
|
||||
|
||||
worker:
|
||||
image: node:20-alpine
|
||||
working_dir: /workspace
|
||||
command: ["sh", "-c", "sleep infinity"]
|
||||
ports:
|
||||
- "${WORKER_PORT:-3002}:3002"
|
||||
volumes:
|
||||
- .:/workspace
|
||||
depends_on:
|
||||
- mongo
|
||||
- minio
|
||||
|
||||
mongo:
|
||||
image: mongo:7
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "${MONGO_PORT:-27017}:27017"
|
||||
environment:
|
||||
MONGO_INITDB_ROOT_USERNAME: "${MONGO_ROOT_USERNAME:-emboflow}"
|
||||
MONGO_INITDB_ROOT_PASSWORD: "${MONGO_ROOT_PASSWORD:-emboflow}"
|
||||
volumes:
|
||||
- mongo-data:/data/db
|
||||
|
||||
minio:
|
||||
image: minio/minio:RELEASE.2024-10-29T16-01-48Z
|
||||
restart: unless-stopped
|
||||
command: ["server", "/data", "--console-address", ":9001"]
|
||||
ports:
|
||||
- "${MINIO_PORT:-9000}:9000"
|
||||
- "${MINIO_CONSOLE_PORT:-9001}:9001"
|
||||
environment:
|
||||
MINIO_ROOT_USER: "${MINIO_ROOT_USER:-emboflow}"
|
||||
MINIO_ROOT_PASSWORD: "${MINIO_ROOT_PASSWORD:-emboflow123}"
|
||||
volumes:
|
||||
- minio-data:/data
|
||||
|
||||
volumes:
|
||||
mongo-data:
|
||||
minio-data:
|
||||
96
docs/development-workflow.md
Normal file
96
docs/development-workflow.md
Normal file
@ -0,0 +1,96 @@
|
||||
# EmboFlow Development Workflow
|
||||
|
||||
## Goal
|
||||
|
||||
Keep repository design artifacts and implementation changes aligned as EmboFlow evolves.
|
||||
|
||||
## Working Agreement
|
||||
|
||||
EmboFlow is being developed from explicit design documents under `design/`. Development should follow a doc-aware workflow instead of letting code drift ahead without recorded decisions.
|
||||
|
||||
## Standard Change Flow
|
||||
|
||||
### 1. Read Before Editing
|
||||
|
||||
Before changing code, review the design files that define the affected area:
|
||||
|
||||
- product scope
|
||||
- architecture boundaries
|
||||
- workflow model
|
||||
- data model
|
||||
- deployment model
|
||||
- accepted ADRs
|
||||
|
||||
### 2. Identify Impact
|
||||
|
||||
Decide whether the change affects:
|
||||
|
||||
- product behavior
|
||||
- object model
|
||||
- workflow/run/task semantics
|
||||
- node or plugin contract
|
||||
- storage assumptions
|
||||
- user or permission behavior
|
||||
- deployment/runtime assumptions
|
||||
|
||||
If yes, the matching design files must be updated.
|
||||
|
||||
### 3. Change Code And Docs Together
|
||||
|
||||
Do not defer the design update. Treat design edits as part of the implementation, not follow-up cleanup.
|
||||
|
||||
### 4. Run The Consistency Check
|
||||
|
||||
From the repo root:
|
||||
|
||||
```bash
|
||||
python3 scripts/check_doc_code_sync.py . --strict
|
||||
```
|
||||
|
||||
Interpret warnings manually. The script is a guardrail, not a replacement for judgment.
|
||||
|
||||
### 5. Use The Local Hooks
|
||||
|
||||
Install local hooks once per clone:
|
||||
|
||||
```bash
|
||||
bash scripts/install_hooks.sh
|
||||
```
|
||||
|
||||
This enables:
|
||||
|
||||
- `commit-msg`: require English-only gitmoji commit messages
|
||||
- `pre-commit`: block staged code/config drift without doc updates
|
||||
- `pre-push`: run commit-message validation, doc/code sync checks, and repository tests
|
||||
|
||||
### 6. Close With Explicit Status
|
||||
|
||||
Every implementation summary should state one of:
|
||||
|
||||
- `Aligned`
|
||||
- `Partially aligned`
|
||||
- `Doc-first`
|
||||
|
||||
and name the exact design files that were reviewed or updated.
|
||||
|
||||
## EmboFlow-Specific Review Checklist
|
||||
|
||||
Before closing a non-trivial change, confirm whether any of these need updates:
|
||||
|
||||
- raw asset vs canonical dataset model
|
||||
- workflow definition vs workflow run model
|
||||
- node schema and plugin contract
|
||||
- executor vs scheduler separation
|
||||
- MongoDB collection or document shape
|
||||
- workspace/project/user boundary
|
||||
- deployment topology or storage assumptions
|
||||
|
||||
## Automation
|
||||
|
||||
This repository now uses both local and remote guardrails:
|
||||
|
||||
- local git hooks from `.githooks/`
|
||||
- commit message validation
|
||||
- CI checks in `.github/workflows/guardrails.yml`
|
||||
|
||||
These checks are intended to keep design documents, code changes, and commit history coherent.
|
||||
621
docs/plans/2026-03-26-emboflow-v1-foundation-and-mvp.md
Normal file
621
docs/plans/2026-03-26-emboflow-v1-foundation-and-mvp.md
Normal file
@ -0,0 +1,621 @@
|
||||
# EmboFlow V1 Foundation And MVP Implementation Plan
|
||||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
|
||||
|
||||
**Goal:** Build the first usable EmboFlow increment: workspace-aware raw asset ingestion, workflow definition/versioning, local workflow execution, and the first web workflow authoring surfaces.
|
||||
|
||||
**Architecture:** Use a TypeScript monorepo with a React web app, a Node.js API control plane, and a separate Node.js worker. Use MongoDB as the only database, object storage abstraction for cloud storage or MinIO, and a local scheduler with Python and Docker executor contracts.
|
||||
|
||||
**Tech Stack:** pnpm workspace, React, TypeScript, React Flow, NestJS, Mongoose, MongoDB, Docker Compose, Python runtime hooks, unittest/Vitest/Jest-compatible project tests
|
||||
|
||||
---
|
||||
|
||||
### Task 1: Bootstrap The Monorepo And Runtime Skeleton
|
||||
|
||||
**Files:**
|
||||
- Create: `package.json`
|
||||
- Create: `pnpm-workspace.yaml`
|
||||
- Create: `tsconfig.base.json`
|
||||
- Create: `apps/web/package.json`
|
||||
- Create: `apps/api/package.json`
|
||||
- Create: `apps/worker/package.json`
|
||||
- Create: `docker-compose.yml`
|
||||
- Create: `.env.example`
|
||||
- Test: `tests/test_repo_structure.py`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
Create `tests/test_repo_structure.py` to assert the repository contains the expected top-level app folders and root workspace files.
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
python3 -m unittest tests/test_repo_structure.py -v
|
||||
```
|
||||
|
||||
Expected: FAIL because the monorepo files and app folders do not exist yet.
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
Create the pnpm workspace root, app package manifests, root TypeScript config, `.env.example`, and `docker-compose.yml` with services for:
|
||||
|
||||
- `web`
|
||||
- `api`
|
||||
- `worker`
|
||||
- `mongo`
|
||||
- `minio`
|
||||
|
||||
Keep the first version minimal. Do not add extra infra services that are not required by the design.
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
python3 -m unittest tests/test_repo_structure.py -v
|
||||
```
|
||||
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add package.json pnpm-workspace.yaml tsconfig.base.json apps docker-compose.yml .env.example tests/test_repo_structure.py
|
||||
git commit -m ":tada: bootstrap workspace and runtime skeleton"
|
||||
```
|
||||
|
||||
### Task 2: Create Shared Domain Contracts And Mongo Setup
|
||||
|
||||
**Files:**
|
||||
- Create: `packages/contracts/package.json`
|
||||
- Create: `packages/contracts/src/domain.ts`
|
||||
- Create: `apps/api/src/common/mongo/mongo.module.ts`
|
||||
- Create: `apps/api/src/common/mongo/schemas/workspace.schema.ts`
|
||||
- Create: `apps/api/src/common/mongo/schemas/project.schema.ts`
|
||||
- Create: `apps/api/src/common/mongo/schemas/asset.schema.ts`
|
||||
- Create: `apps/api/src/common/mongo/schemas/workflow.schema.ts`
|
||||
- Test: `apps/api/test/domain-contracts.spec.ts`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
Create `apps/api/test/domain-contracts.spec.ts` asserting:
|
||||
|
||||
- workspace types include `personal` and `team`
|
||||
- asset types include raw and dataset-style sources
|
||||
- workflow status values match the design docs
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter api test domain-contracts.spec.ts
|
||||
```
|
||||
|
||||
Expected: FAIL because contracts and schemas are missing.
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
Create shared domain enums and base Mongo schema definitions for:
|
||||
|
||||
- workspaces
|
||||
- projects
|
||||
- assets
|
||||
- workflow definitions
|
||||
|
||||
Add a minimal Mongo module in the API app using environment-based connection config.
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter api test domain-contracts.spec.ts
|
||||
```
|
||||
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add packages/contracts apps/api/src/common apps/api/test/domain-contracts.spec.ts
|
||||
git commit -m ":sparkles: add shared domain contracts and mongo setup"
|
||||
```
|
||||
|
||||
### Task 3: Implement Identity, Workspace, And Project APIs
|
||||
|
||||
**Files:**
|
||||
- Create: `apps/api/src/modules/auth/auth.module.ts`
|
||||
- Create: `apps/api/src/modules/auth/auth.controller.ts`
|
||||
- Create: `apps/api/src/modules/workspaces/workspaces.module.ts`
|
||||
- Create: `apps/api/src/modules/workspaces/workspaces.controller.ts`
|
||||
- Create: `apps/api/src/modules/projects/projects.module.ts`
|
||||
- Create: `apps/api/src/modules/projects/projects.controller.ts`
|
||||
- Create: `apps/api/src/modules/projects/projects.service.ts`
|
||||
- Test: `apps/api/test/projects.e2e-spec.ts`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
Create `apps/api/test/projects.e2e-spec.ts` covering:
|
||||
|
||||
- create personal workspace bootstrap flow
|
||||
- create project under a workspace
|
||||
- reject project creation without a workspace id
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter api test projects.e2e-spec.ts
|
||||
```
|
||||
|
||||
Expected: FAIL because the modules and endpoints do not exist yet.
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
Implement:
|
||||
|
||||
- development-safe auth stub or local auth module
|
||||
- workspace creation and listing
|
||||
- project creation and listing
|
||||
- basic membership checks sufficient for V1 local development
|
||||
|
||||
Do not build a full production auth stack before the API shape is stable.
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter api test projects.e2e-spec.ts
|
||||
```
|
||||
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add apps/api/src/modules/auth apps/api/src/modules/workspaces apps/api/src/modules/projects apps/api/test/projects.e2e-spec.ts
|
||||
git commit -m ":sparkles: add workspace and project APIs"
|
||||
```
|
||||
|
||||
### Task 4: Implement Asset Ingestion, Storage Abstraction, And Probe Metadata
|
||||
|
||||
**Files:**
|
||||
- Create: `apps/api/src/modules/storage/storage.module.ts`
|
||||
- Create: `apps/api/src/modules/storage/storage.service.ts`
|
||||
- Create: `apps/api/src/modules/assets/assets.module.ts`
|
||||
- Create: `apps/api/src/modules/assets/assets.controller.ts`
|
||||
- Create: `apps/api/src/modules/assets/assets.service.ts`
|
||||
- Create: `apps/api/src/modules/assets/probe/probe.service.ts`
|
||||
- Create: `apps/api/src/common/mongo/schemas/asset-probe-report.schema.ts`
|
||||
- Test: `apps/api/test/assets.e2e-spec.ts`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
Create `apps/api/test/assets.e2e-spec.ts` covering:
|
||||
|
||||
- register an uploaded asset record
|
||||
- create a probe report for a raw asset
|
||||
- return recommended next actions from probe metadata
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter api test assets.e2e-spec.ts
|
||||
```
|
||||
|
||||
Expected: FAIL because asset ingestion and probe services are missing.
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
Implement:
|
||||
|
||||
- storage abstraction interface
|
||||
- MinIO/S3-compatible config contract
|
||||
- asset create/list/detail endpoints
|
||||
- probe-report persistence
|
||||
- placeholder probe logic for directory and archive summaries
|
||||
|
||||
Do not build full binary upload optimization yet. First make the metadata contract stable.
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter api test assets.e2e-spec.ts
|
||||
```
|
||||
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add apps/api/src/modules/storage apps/api/src/modules/assets apps/api/src/common/mongo/schemas/asset-probe-report.schema.ts apps/api/test/assets.e2e-spec.ts
|
||||
git commit -m ":truck: add asset ingestion and probe metadata flow"
|
||||
```
|
||||
|
||||
### Task 5: Implement Workflow Definitions, Versions, Runs, And Tasks
|
||||
|
||||
**Files:**
|
||||
- Create: `apps/api/src/modules/workflows/workflows.module.ts`
|
||||
- Create: `apps/api/src/modules/workflows/workflows.controller.ts`
|
||||
- Create: `apps/api/src/modules/workflows/workflows.service.ts`
|
||||
- Create: `apps/api/src/modules/runs/runs.module.ts`
|
||||
- Create: `apps/api/src/modules/runs/runs.controller.ts`
|
||||
- Create: `apps/api/src/modules/runs/runs.service.ts`
|
||||
- Create: `apps/api/src/common/mongo/schemas/workflow-definition-version.schema.ts`
|
||||
- Create: `apps/api/src/common/mongo/schemas/workflow-run.schema.ts`
|
||||
- Create: `apps/api/src/common/mongo/schemas/run-task.schema.ts`
|
||||
- Test: `apps/api/test/workflow-runs.e2e-spec.ts`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
Create `apps/api/test/workflow-runs.e2e-spec.ts` covering:
|
||||
|
||||
- create workflow definition
|
||||
- save workflow version
|
||||
- create workflow run from saved version
|
||||
- generate initial run tasks for ready nodes
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter api test workflow-runs.e2e-spec.ts
|
||||
```
|
||||
|
||||
Expected: FAIL because workflow versioning and run creation do not exist yet.
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
Implement:
|
||||
|
||||
- workflow definition head record
|
||||
- immutable workflow version snapshots
|
||||
- run creation from a workflow version
|
||||
- initial DAG compilation for simple source-to-transform chains
|
||||
- run task persistence
|
||||
|
||||
Keep V1 graph compilation simple. Support sequential edges first, then one-level branching.
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter api test workflow-runs.e2e-spec.ts
|
||||
```
|
||||
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add apps/api/src/modules/workflows apps/api/src/modules/runs apps/api/src/common/mongo/schemas/workflow-definition-version.schema.ts apps/api/src/common/mongo/schemas/workflow-run.schema.ts apps/api/src/common/mongo/schemas/run-task.schema.ts apps/api/test/workflow-runs.e2e-spec.ts
|
||||
git commit -m ":sparkles: add workflow versioning and run records"
|
||||
```
|
||||
|
||||
### Task 6: Add The Worker, Local Scheduler, And Executor Contracts
|
||||
|
||||
**Files:**
|
||||
- Create: `apps/worker/src/main.ts`
|
||||
- Create: `apps/worker/src/runner/task-runner.ts`
|
||||
- Create: `apps/worker/src/scheduler/local-scheduler.ts`
|
||||
- Create: `apps/worker/src/executors/python-executor.ts`
|
||||
- Create: `apps/worker/src/executors/docker-executor.ts`
|
||||
- Create: `apps/worker/src/executors/http-executor.ts`
|
||||
- Create: `apps/worker/src/contracts/execution-context.ts`
|
||||
- Test: `apps/worker/test/task-runner.spec.ts`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
Create `apps/worker/test/task-runner.spec.ts` covering:
|
||||
|
||||
- worker loads pending tasks
|
||||
- worker marks task running then success
|
||||
- worker chooses executor based on node runtime config
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter worker test task-runner.spec.ts
|
||||
```
|
||||
|
||||
Expected: FAIL because the worker runtime does not exist yet.
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
Implement:
|
||||
|
||||
- worker bootstrap
|
||||
- polling or queue-backed local scheduler
|
||||
- execution context builder
|
||||
- stub Python, Docker, and HTTP executors
|
||||
- task status transitions
|
||||
|
||||
Do not implement full Docker isolation logic in one step. First lock the runtime interfaces and transitions.
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter worker test task-runner.spec.ts
|
||||
```
|
||||
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add apps/worker apps/api/src/modules/runs apps/worker/test/task-runner.spec.ts
|
||||
git commit -m ":construction_worker: add local worker and executor contracts"
|
||||
```
|
||||
|
||||
### Task 7: Build The Web Shell, Workspace Flow, And Asset Workspace
|
||||
|
||||
**Files:**
|
||||
- Create: `apps/web/src/main.tsx`
|
||||
- Create: `apps/web/src/app/router.tsx`
|
||||
- Create: `apps/web/src/features/layout/app-shell.tsx`
|
||||
- Create: `apps/web/src/features/workspaces/workspace-switcher.tsx`
|
||||
- Create: `apps/web/src/features/projects/project-selector.tsx`
|
||||
- Create: `apps/web/src/features/assets/assets-page.tsx`
|
||||
- Create: `apps/web/src/features/assets/asset-detail-page.tsx`
|
||||
- Create: `apps/web/src/features/assets/components/asset-list.tsx`
|
||||
- Create: `apps/web/src/features/assets/components/asset-summary-panel.tsx`
|
||||
- Test: `apps/web/src/features/assets/assets-page.test.tsx`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
Create `apps/web/src/features/assets/assets-page.test.tsx` covering:
|
||||
|
||||
- app shell renders primary navigation
|
||||
- assets page renders asset rows from API data
|
||||
- asset detail page renders probe summary
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter web test assets-page.test.tsx
|
||||
```
|
||||
|
||||
Expected: FAIL because the web app shell and pages do not exist yet.
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
Implement:
|
||||
|
||||
- web app bootstrap
|
||||
- primary navigation matching the design docs
|
||||
- workspace/project header controls
|
||||
- asset list page
|
||||
- asset detail page with summary and action buttons
|
||||
|
||||
Defer advanced preview renderers. Start with structured metadata and simple detail views.
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter web test assets-page.test.tsx
|
||||
```
|
||||
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add apps/web apps/web/src/features/assets/assets-page.test.tsx
|
||||
git commit -m ":sparkles: add web shell and asset workspace"
|
||||
```
|
||||
|
||||
### Task 8: Build Canvas Authoring, Run Detail, And First Workflow Actions
|
||||
|
||||
**Files:**
|
||||
- Create: `apps/web/src/features/workflows/workflows-page.tsx`
|
||||
- Create: `apps/web/src/features/workflows/workflow-editor-page.tsx`
|
||||
- Create: `apps/web/src/features/workflows/components/node-library.tsx`
|
||||
- Create: `apps/web/src/features/workflows/components/workflow-canvas.tsx`
|
||||
- Create: `apps/web/src/features/workflows/components/node-config-panel.tsx`
|
||||
- Create: `apps/web/src/features/runs/run-detail-page.tsx`
|
||||
- Create: `apps/web/src/features/runs/components/run-graph-view.tsx`
|
||||
- Create: `apps/web/src/features/runs/components/task-log-panel.tsx`
|
||||
- Test: `apps/web/src/features/workflows/workflow-editor-page.test.tsx`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
Create `apps/web/src/features/workflows/workflow-editor-page.test.tsx` covering:
|
||||
|
||||
- node library renders categories
|
||||
- node config panel opens when a node is selected
|
||||
- run detail view shows node status badges from run data
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter web test workflow-editor-page.test.tsx
|
||||
```
|
||||
|
||||
Expected: FAIL because the workflow editor and run detail pages do not exist yet.
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
Implement:
|
||||
|
||||
- workflow list page
|
||||
- workflow editor page using React Flow
|
||||
- left node library, center canvas, right config panel
|
||||
- save workflow version action
|
||||
- trigger workflow run action
|
||||
- run detail page with graph and selected-node log panel
|
||||
|
||||
Keep the first editor scoped to V1 node categories and schema-driven config rendering.
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter web test workflow-editor-page.test.tsx
|
||||
```
|
||||
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add apps/web/src/features/workflows apps/web/src/features/runs apps/web/src/features/workflows/workflow-editor-page.test.tsx
|
||||
git commit -m ":sparkles: add canvas workflow editor and run detail pages"
|
||||
```
|
||||
|
||||
### Task 9: Add Preview Surface, Delivery Nodes, And MVP Integration
|
||||
|
||||
**Files:**
|
||||
- Create: `apps/api/src/modules/artifacts/artifacts.module.ts`
|
||||
- Create: `apps/api/src/modules/artifacts/artifacts.controller.ts`
|
||||
- Create: `apps/api/src/modules/artifacts/artifacts.service.ts`
|
||||
- Create: `apps/web/src/features/explore/explore-page.tsx`
|
||||
- Create: `apps/web/src/features/explore/renderers/json-renderer.tsx`
|
||||
- Create: `apps/web/src/features/explore/renderers/video-renderer.tsx`
|
||||
- Create: `apps/web/src/features/explore/renderers/directory-renderer.tsx`
|
||||
- Create: `apps/api/src/modules/plugins/builtin/delivery-nodes.ts`
|
||||
- Test: `apps/api/test/artifacts.e2e-spec.ts`
|
||||
- Test: `apps/web/src/features/explore/explore-page.test.tsx`
|
||||
|
||||
**Step 1: Write the failing tests**
|
||||
|
||||
Create:
|
||||
|
||||
- `apps/api/test/artifacts.e2e-spec.ts` for artifact retrieval by producer
|
||||
- `apps/web/src/features/explore/explore-page.test.tsx` for opening and rendering supported artifact types
|
||||
|
||||
**Step 2: Run tests to verify they fail**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter api test artifacts.e2e-spec.ts
|
||||
pnpm --filter web test explore-page.test.tsx
|
||||
```
|
||||
|
||||
Expected: FAIL because artifact APIs and explore renderers do not exist yet.
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
Implement:
|
||||
|
||||
- artifact module and lookup endpoints
|
||||
- explore page
|
||||
- JSON, directory, and video renderers
|
||||
- built-in delivery-normalization node definitions for the V1 business path
|
||||
|
||||
Do not implement the full renderer plugin platform yet. Start with built-ins and stable renderer contracts.
|
||||
|
||||
**Step 4: Run tests to verify they pass**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter api test artifacts.e2e-spec.ts
|
||||
pnpm --filter web test explore-page.test.tsx
|
||||
```
|
||||
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add apps/api/src/modules/artifacts apps/api/src/modules/plugins/builtin/delivery-nodes.ts apps/api/test/artifacts.e2e-spec.ts apps/web/src/features/explore apps/web/src/features/explore/explore-page.test.tsx
|
||||
git commit -m ":package: add explore surface and delivery artifacts"
|
||||
```
|
||||
|
||||
### Task 10: Harden Guardrails, Docs, And Developer Entry Commands
|
||||
|
||||
**Files:**
|
||||
- Modify: `CONTRIBUTING.md`
|
||||
- Modify: `docs/development-workflow.md`
|
||||
- Modify: `design/03-workflows/workflow-execution-model.md`
|
||||
- Modify: `design/05-data/mongodb-data-model.md`
|
||||
- Create: `Makefile`
|
||||
- Create: `README.md`
|
||||
- Test: `tests/test_dev_commands.py`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
Create `tests/test_dev_commands.py` asserting:
|
||||
|
||||
- `Makefile` exposes expected local commands
|
||||
- `README.md` documents bootstrap, hooks, test, and local run commands
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
python3 -m unittest tests/test_dev_commands.py -v
|
||||
```
|
||||
|
||||
Expected: FAIL because developer entry commands are not documented yet.
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
Add:
|
||||
|
||||
- `make bootstrap`
|
||||
- `make test`
|
||||
- `make dev-api`
|
||||
- `make dev-web`
|
||||
- `make dev-worker`
|
||||
- `make guardrails`
|
||||
|
||||
Document the developer flow in `README.md` and update design docs if implementation decisions changed during Tasks 1-9.
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
python3 -m unittest tests/test_dev_commands.py -v
|
||||
```
|
||||
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add CONTRIBUTING.md docs/development-workflow.md design/03-workflows/workflow-execution-model.md design/05-data/mongodb-data-model.md Makefile README.md tests/test_dev_commands.py
|
||||
git commit -m ":memo: add developer entry commands and bootstrap docs"
|
||||
```
|
||||
|
||||
## Exit Criteria
|
||||
|
||||
The first implementation pass is complete when:
|
||||
|
||||
- a user can create a workspace and project
|
||||
- a raw asset can be registered and probed
|
||||
- a workflow can be created, versioned, and executed locally
|
||||
- run tasks produce observable status and artifacts
|
||||
- the web app exposes assets, workflows, runs, and basic explore views
|
||||
- guardrails for docs, hooks, commit messages, and CI remain green
|
||||
|
||||
## Notes
|
||||
|
||||
- Keep commits small and use the repository gitmoji + English commit policy.
|
||||
- Update design files in the same task where behavior or architecture changes.
|
||||
- Do not add training execution before the V1 data workflow loop is stable.
|
||||
9
package.json
Normal file
9
package.json
Normal file
@ -0,0 +1,9 @@
|
||||
{
|
||||
"name": "emboflow",
|
||||
"private": true,
|
||||
"version": "0.1.0",
|
||||
"packageManager": "pnpm@9.12.3",
|
||||
"scripts": {
|
||||
"test": "python3 -m unittest discover -s tests -p 'test_*.py'"
|
||||
}
|
||||
}
|
||||
3
pnpm-workspace.yaml
Normal file
3
pnpm-workspace.yaml
Normal file
@ -0,0 +1,3 @@
|
||||
packages:
|
||||
- "apps/*"
|
||||
- "packages/*"
|
||||
126
scripts/check_commit_message.py
Executable file
126
scripts/check_commit_message.py
Executable file
@ -0,0 +1,126 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
SHORTCODE_PREFIX = re.compile(r"^:[a-z0-9_+-]+:\s+")
|
||||
EMOJI_PREFIX = re.compile(r"^[\u2600-\u27BF\U0001F300-\U0001FAFF]\s+")
|
||||
|
||||
|
||||
def strip_prefix(message: str) -> str:
|
||||
if SHORTCODE_PREFIX.match(message):
|
||||
return SHORTCODE_PREFIX.sub("", message, count=1)
|
||||
if EMOJI_PREFIX.match(message):
|
||||
return EMOJI_PREFIX.sub("", message, count=1)
|
||||
return message
|
||||
|
||||
|
||||
def validate_message(message: str) -> list[str]:
|
||||
lines = [line.rstrip("\n") for line in message.splitlines()]
|
||||
cleaned_lines = [line for line in lines if line and not line.startswith("#")]
|
||||
if not cleaned_lines:
|
||||
return ["Commit message must not be empty."]
|
||||
|
||||
subject = cleaned_lines[0]
|
||||
errors: list[str] = []
|
||||
|
||||
if not SHORTCODE_PREFIX.match(subject) and not EMOJI_PREFIX.match(subject):
|
||||
errors.append("Commit subject must start with a gitmoji shortcode or emoji.")
|
||||
|
||||
body = "\n".join(cleaned_lines)
|
||||
normalized = strip_prefix(subject) + ("\n" + "\n".join(cleaned_lines[1:]) if len(cleaned_lines) > 1 else "")
|
||||
|
||||
try:
|
||||
normalized.encode("ascii")
|
||||
except UnicodeEncodeError:
|
||||
errors.append("Commit message must be written in English ASCII text after the gitmoji prefix.")
|
||||
|
||||
if not strip_prefix(subject).strip():
|
||||
errors.append("Commit subject must include an English summary after the gitmoji prefix.")
|
||||
|
||||
if re.search(r"[\u4e00-\u9fff]", body):
|
||||
errors.append("Commit message must not contain Chinese characters.")
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
def read_message_file(path: Path) -> str:
|
||||
return path.read_text(encoding="utf-8")
|
||||
|
||||
|
||||
def run_git(*args: str) -> list[str]:
|
||||
result = subprocess.run(
|
||||
["git", *args],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(result.stderr.strip() or "git command failed")
|
||||
return [line.strip() for line in result.stdout.splitlines() if line.strip()]
|
||||
|
||||
|
||||
def commit_messages_from_range(rev_range: str) -> list[tuple[str, str]]:
|
||||
if ".." in rev_range:
|
||||
shas = run_git("rev-list", rev_range)
|
||||
else:
|
||||
shas = [rev_range]
|
||||
|
||||
messages: list[tuple[str, str]] = []
|
||||
for sha in shas:
|
||||
message = subprocess.run(
|
||||
["git", "log", "--format=%B", "-n", "1", sha],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
if message.returncode != 0:
|
||||
raise RuntimeError(message.stderr.strip() or "git log failed")
|
||||
messages.append((sha, message.stdout.strip()))
|
||||
return messages
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description="Validate commit message format.")
|
||||
parser.add_argument("--file", help="path to commit message file")
|
||||
parser.add_argument("--rev-range", help="git revision range or single commit")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
|
||||
if bool(args.file) == bool(args.rev_range):
|
||||
print("Use exactly one of --file or --rev-range.")
|
||||
return 2
|
||||
|
||||
failures: list[str] = []
|
||||
|
||||
if args.file:
|
||||
message = read_message_file(Path(args.file))
|
||||
errors = validate_message(message)
|
||||
if errors:
|
||||
failures.extend(errors)
|
||||
else:
|
||||
for sha, message in commit_messages_from_range(args.rev_range):
|
||||
errors = validate_message(message)
|
||||
for error in errors:
|
||||
failures.append(f"{sha[:12]}: {error}")
|
||||
|
||||
if failures:
|
||||
print("Commit message validation failed:")
|
||||
for failure in failures:
|
||||
print(f" - {failure}")
|
||||
print("\nExpected format example:")
|
||||
print(" :sparkles: add hook templates and CI guardrails")
|
||||
return 1
|
||||
|
||||
print("Commit message validation passed.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
194
scripts/check_doc_code_sync.py
Executable file
194
scripts/check_doc_code_sync.py
Executable file
@ -0,0 +1,194 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
DOC_PATTERNS = (
|
||||
"design/",
|
||||
"docs/",
|
||||
"adr",
|
||||
"architecture",
|
||||
"prd",
|
||||
"spec",
|
||||
"plan",
|
||||
)
|
||||
|
||||
CODE_SUFFIXES = {
|
||||
".py",
|
||||
".ts",
|
||||
".tsx",
|
||||
".js",
|
||||
".jsx",
|
||||
".java",
|
||||
".go",
|
||||
".rs",
|
||||
".rb",
|
||||
".php",
|
||||
".kt",
|
||||
".swift",
|
||||
".scala",
|
||||
".sh",
|
||||
}
|
||||
|
||||
CODE_HINTS = ("apps/", "packages/", "scripts/")
|
||||
TEST_HINTS = ("test", "spec", "__tests__", "tests/")
|
||||
CONFIG_SUFFIXES = {".yml", ".yaml", ".json", ".toml", ".ini", ".env"}
|
||||
CONFIG_HINTS = ("docker", "compose", "k8s", "helm", "terraform", ".github/", ".githooks/", ".env")
|
||||
|
||||
|
||||
def run_git(repo: Path, *args: str) -> list[str]:
|
||||
result = subprocess.run(
|
||||
["git", "-C", str(repo), *args],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(result.stderr.strip() or "git command failed")
|
||||
return [line.strip() for line in result.stdout.splitlines() if line.strip()]
|
||||
|
||||
|
||||
def classify(path_text: str) -> str:
|
||||
lower = path_text.lower()
|
||||
path = Path(path_text)
|
||||
|
||||
if any(token in lower for token in DOC_PATTERNS) or path.suffix == ".md":
|
||||
return "docs"
|
||||
if any(token in lower for token in TEST_HINTS):
|
||||
return "tests"
|
||||
if any(token in lower for token in CODE_HINTS):
|
||||
return "code"
|
||||
if path.suffix in CODE_SUFFIXES:
|
||||
return "code"
|
||||
if path.suffix in CONFIG_SUFFIXES or any(token in lower for token in CONFIG_HINTS):
|
||||
return "config"
|
||||
return "other"
|
||||
|
||||
|
||||
def print_group(title: str, items: list[str]) -> None:
|
||||
print(f"\n{title}:")
|
||||
if not items:
|
||||
print(" - none")
|
||||
return
|
||||
for item in items:
|
||||
print(f" - {item}")
|
||||
|
||||
|
||||
def assess_changes(
|
||||
docs: list[str],
|
||||
code: list[str],
|
||||
tests: list[str],
|
||||
config: list[str],
|
||||
other: list[str],
|
||||
strict: bool,
|
||||
) -> dict:
|
||||
warnings: list[str] = []
|
||||
blockers: list[str] = []
|
||||
|
||||
if code and not docs:
|
||||
message = "Code changed but no design/doc files changed."
|
||||
warnings.append(message)
|
||||
if strict:
|
||||
blockers.append(message)
|
||||
if config and not docs:
|
||||
message = "Config or deployment files changed without any doc updates."
|
||||
warnings.append(message)
|
||||
if strict:
|
||||
blockers.append(message)
|
||||
if docs and not code and not config and not tests:
|
||||
warnings.append(
|
||||
"Docs changed without code changes. This may be intentional, but verify they still match the repository."
|
||||
)
|
||||
if code and not tests:
|
||||
warnings.append(
|
||||
"Code changed without any test-file changes. Verify whether tests should change."
|
||||
)
|
||||
if other:
|
||||
warnings.append(
|
||||
"Unclassified files changed. Confirm they do not affect documented behavior or runtime assumptions."
|
||||
)
|
||||
|
||||
return {
|
||||
"warnings": warnings,
|
||||
"blockers": blockers,
|
||||
"blocking": bool(blockers),
|
||||
}
|
||||
|
||||
|
||||
def collect_paths(repo: Path, args: argparse.Namespace) -> list[str]:
|
||||
if args.staged:
|
||||
return run_git(repo, "diff", "--cached", "--name-only", "--diff-filter=ACMR")
|
||||
if args.base_ref:
|
||||
return run_git(repo, "diff", "--name-only", "--diff-filter=ACMR", f"{args.base_ref}...HEAD")
|
||||
if args.rev_range:
|
||||
if ".." in args.rev_range:
|
||||
return run_git(repo, "diff", "--name-only", "--diff-filter=ACMR", args.rev_range)
|
||||
return run_git(repo, "diff-tree", "--no-commit-id", "--name-only", "-r", args.rev_range)
|
||||
|
||||
changed = run_git(repo, "status", "--short")
|
||||
return sorted({line[3:] for line in changed if len(line) > 3})
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description="Check whether doc changes track code changes.")
|
||||
parser.add_argument("repo", nargs="?", default=".", help="git repository path")
|
||||
parser.add_argument("--strict", action="store_true", help="fail on blocking drift")
|
||||
parser.add_argument("--staged", action="store_true", help="inspect staged files only")
|
||||
parser.add_argument("--base-ref", help="compare changes from base ref to HEAD")
|
||||
parser.add_argument("--rev-range", help="inspect a git revision range or a single commit")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
repo = Path(args.repo).expanduser().resolve()
|
||||
|
||||
if not (repo / ".git").exists():
|
||||
print(f"Not a git repository: {repo}")
|
||||
return 2
|
||||
|
||||
paths = sorted(set(collect_paths(repo, args)))
|
||||
|
||||
docs = [p for p in paths if classify(p) == "docs"]
|
||||
code = [p for p in paths if classify(p) == "code"]
|
||||
tests = [p for p in paths if classify(p) == "tests"]
|
||||
config = [p for p in paths if classify(p) == "config"]
|
||||
other = [p for p in paths if classify(p) == "other"]
|
||||
assessment = assess_changes(docs, code, tests, config, other, args.strict)
|
||||
|
||||
print(f"Repository: {repo}")
|
||||
print(f"Changed files: {len(paths)}")
|
||||
print_group("Design and doc files", docs)
|
||||
print_group("Code files", code)
|
||||
print_group("Test files", tests)
|
||||
print_group("Config and infra files", config)
|
||||
print_group("Other files", other)
|
||||
|
||||
print("\nAssessment:")
|
||||
if not assessment["warnings"]:
|
||||
print(" - No obvious doc/code drift detected from changed-file classification.")
|
||||
else:
|
||||
for warning in assessment["warnings"]:
|
||||
print(f" - {warning}")
|
||||
|
||||
print("\nNext actions:")
|
||||
if code and not docs:
|
||||
print(" - Review design/ or docs/ and update affected architecture, workflow, or API notes.")
|
||||
if docs:
|
||||
print(" - Confirm each changed doc still matches the actual implementation.")
|
||||
if code:
|
||||
print(" - Confirm changed code paths match documented workflow, schema, and runtime assumptions.")
|
||||
if other:
|
||||
print(" - Review unclassified paths and decide whether docs or tests should be updated.")
|
||||
|
||||
if assessment["blocking"]:
|
||||
print("\nResult: blocking drift detected.")
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
12
scripts/install_hooks.sh
Normal file
12
scripts/install_hooks.sh
Normal file
@ -0,0 +1,12 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
|
||||
git -C "$repo_root" config core.hooksPath .githooks
|
||||
chmod +x "$repo_root"/.githooks/*
|
||||
chmod +x "$repo_root"/scripts/check_doc_code_sync.py
|
||||
chmod +x "$repo_root"/scripts/check_commit_message.py
|
||||
|
||||
echo "Installed local git hooks from .githooks"
|
||||
echo "Active hooks path: $(git -C "$repo_root" config core.hooksPath)"
|
||||
40
tests/test_commit_message.py
Normal file
40
tests/test_commit_message.py
Normal file
@ -0,0 +1,40 @@
|
||||
import importlib.util
|
||||
from pathlib import Path
|
||||
import unittest
|
||||
|
||||
|
||||
def load_module(module_name: str, path: Path):
|
||||
spec = importlib.util.spec_from_file_location(module_name, path)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
assert spec.loader is not None
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||
MODULE = load_module(
|
||||
"check_commit_message",
|
||||
REPO_ROOT / "scripts" / "check_commit_message.py",
|
||||
)
|
||||
|
||||
|
||||
class CommitMessageValidationTests(unittest.TestCase):
|
||||
def test_accepts_gitmoji_shortcode_with_english_message(self):
|
||||
errors = MODULE.validate_message(":sparkles: add local hook templates")
|
||||
self.assertEqual(errors, [])
|
||||
|
||||
def test_accepts_unicode_gitmoji_with_english_message(self):
|
||||
errors = MODULE.validate_message("✨ add ci validation for hooks")
|
||||
self.assertEqual(errors, [])
|
||||
|
||||
def test_rejects_message_without_gitmoji_prefix(self):
|
||||
errors = MODULE.validate_message("add local hook templates")
|
||||
self.assertTrue(any("gitmoji" in error.lower() for error in errors))
|
||||
|
||||
def test_rejects_non_english_message(self):
|
||||
errors = MODULE.validate_message(":sparkles: 添加本地 hook")
|
||||
self.assertTrue(any("english" in error.lower() for error in errors))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
55
tests/test_doc_code_sync.py
Normal file
55
tests/test_doc_code_sync.py
Normal file
@ -0,0 +1,55 @@
|
||||
import importlib.util
|
||||
from pathlib import Path
|
||||
import unittest
|
||||
|
||||
|
||||
def load_module(module_name: str, path: Path):
|
||||
spec = importlib.util.spec_from_file_location(module_name, path)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
assert spec.loader is not None
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||
MODULE = load_module(
|
||||
"check_doc_code_sync",
|
||||
REPO_ROOT / "scripts" / "check_doc_code_sync.py",
|
||||
)
|
||||
|
||||
|
||||
class DocCodeSyncAssessmentTests(unittest.TestCase):
|
||||
def test_classifies_python_scripts_as_code(self):
|
||||
self.assertEqual(MODULE.classify("scripts/check_doc_code_sync.py"), "code")
|
||||
|
||||
def test_classifies_app_paths_as_code(self):
|
||||
self.assertEqual(MODULE.classify("apps/web/package.json"), "code")
|
||||
|
||||
def test_classifies_env_example_as_config(self):
|
||||
self.assertEqual(MODULE.classify(".env.example"), "config")
|
||||
|
||||
def test_strict_mode_blocks_code_without_doc_updates(self):
|
||||
assessment = MODULE.assess_changes(
|
||||
docs=[],
|
||||
code=["src/app.ts"],
|
||||
tests=[],
|
||||
config=[],
|
||||
other=[],
|
||||
strict=True,
|
||||
)
|
||||
self.assertTrue(assessment["blocking"])
|
||||
|
||||
def test_doc_and_code_changes_together_do_not_block(self):
|
||||
assessment = MODULE.assess_changes(
|
||||
docs=["design/02-architecture/system-architecture.md"],
|
||||
code=["src/app.ts"],
|
||||
tests=[],
|
||||
config=[],
|
||||
other=[],
|
||||
strict=True,
|
||||
)
|
||||
self.assertFalse(assessment["blocking"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
35
tests/test_repo_structure.py
Normal file
35
tests/test_repo_structure.py
Normal file
@ -0,0 +1,35 @@
|
||||
from pathlib import Path
|
||||
import unittest
|
||||
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||
|
||||
|
||||
class RepoStructureTests(unittest.TestCase):
|
||||
def test_root_workspace_files_exist(self):
|
||||
required_files = [
|
||||
"package.json",
|
||||
"pnpm-workspace.yaml",
|
||||
"tsconfig.base.json",
|
||||
"docker-compose.yml",
|
||||
".env.example",
|
||||
]
|
||||
|
||||
for relative_path in required_files:
|
||||
with self.subTest(path=relative_path):
|
||||
self.assertTrue((REPO_ROOT / relative_path).is_file())
|
||||
|
||||
def test_app_package_manifests_exist(self):
|
||||
required_files = [
|
||||
"apps/web/package.json",
|
||||
"apps/api/package.json",
|
||||
"apps/worker/package.json",
|
||||
]
|
||||
|
||||
for relative_path in required_files:
|
||||
with self.subTest(path=relative_path):
|
||||
self.assertTrue((REPO_ROOT / relative_path).is_file())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
12
tsconfig.base.json
Normal file
12
tsconfig.base.json
Normal file
@ -0,0 +1,12 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "Bundler",
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"resolveJsonModule": true,
|
||||
"skipLibCheck": true,
|
||||
"baseUrl": "."
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user