🐛 fix docker runner handling for null code hooks

2026-03-30 03:31:59 +08:00 · 2026-03-30 03:31:59 +08:00 · 93d20b230f
commit 93d20b230f
parent 93eb07d7d7
4 changed files with 77 additions and 1 deletions
--- a/README.md
+++ b/README.md
@ -91,6 +91,7 @@ The node library now supports both click-to-append and drag-and-drop placement i
 The Runs workspace now shows project-scoped run history, run-level aggregated summaries, cancel/retry controls, and run detail views with persisted task summaries, stdout/stderr sections, result previews, and artifact links into Explore.
 Selected run tasks now expose the frozen node definition id, executor config snapshot, and code-hook metadata that were captured when the run was created.
 Most built-in delivery nodes now default to `executorType=docker`. When a node uses `executorType=docker` and provides `executorConfig.image`, the worker runs a real local Docker container with mounted `input.json` / `output.json` exchange files plus read-only mounts for bound asset paths. If no image is configured, the executor falls back to the lightweight simulated behavior used by older demo tasks.
+The Docker runner now treats missing or `null` `codeHookSpec` values as “no hook configured”, so built-in Docker nodes and custom container nodes can share the same task envelope without crashing on optional hook fields.
 Custom Docker nodes follow the same runtime contract. The container reads the task snapshot and execution context from `EMBOFLOW_INPUT_PATH`, writes `{\"result\": ...}` JSON to `EMBOFLOW_OUTPUT_PATH`, and if it declares an asset-set output contract it must return `result.assetIds` as a string array. Dockerfile-based custom nodes are built locally on first execution and then reused by tag. The Nodes page and API now share the same validation rules, including required names, valid source kinds, a mandatory `FROM` instruction for Dockerfiles, and rejection of `Source` category nodes that incorrectly declare `inputMode=multi_asset_set`. The editor also renders the standard EmboFlow input and output envelope preview for custom nodes so users can align container code to the actual runtime JSON shape.
 When a node uses the built-in Python path without a custom hook, `source-asset` now emits bound asset metadata from Mongo-backed asset records and `validate-structure` now performs a real directory validation pass against local source paths. On the current sample path `/Users/longtaowu/workspace/emboldata/data`, that validation reports `valid=false`, `videoFileCount=407`, and missing delivery files because the sample root is a mixed dataset collection rather than a delivery package.
 The worker now also carries direct upstream task results into execution context so set-operation utility nodes can compute narrowed asset sets and pass those effective asset ids to downstream tasks.
--- a/apps/worker/src/executors/docker-executor.ts
+++ b/apps/worker/src/executors/docker-executor.ts
@ -279,8 +279,9 @@ function createDockerRunner() {
    "    output_path = pathlib.Path(sys.argv[2])",
    "    task = payload.get('task') or {}",
    "    context = payload.get('context') or {}",
+    "    hook = task.get('codeHookSpec') or {}",
    "    definition_id = task.get('nodeDefinitionId') or task.get('nodeId')",
-    "    if task.get('codeHookSpec', {}).get('source'):",
+    "    if hook.get('source'):",
    "        result = execute_hook(task, context)",
    "    elif definition_id == 'source-asset':",
    "        result = build_source_result(context)",
--- a/apps/worker/test/mongo-worker-runtime.spec.ts
+++ b/apps/worker/test/mongo-worker-runtime.spec.ts
@ -584,6 +584,78 @@ test("worker executes a queued docker task inside a real container", {
  });
 });

+test("worker executes built-in docker source nodes when codeHookSpec is null", {
+  skip: !hasDockerRuntime(),
+}, async (t) => {
+  ensureDockerImage("python:3.11-alpine");
+  const fixture = await createRuntimeFixture("emboflow-worker-built-in-docker-source");
+  t.after(async () => {
+    await fixture.close();
+  });
+
+  await fixture.db.collection("assets").insertOne({
+    _id: "asset-built-in-docker-source",
+    workspaceId: "workspace-1",
+    projectId: "project-1",
+    type: "folder",
+    sourceType: "registered_path",
+    displayName: "Built-in Docker Source Asset",
+    sourcePath: "/tmp/built-in-docker-source",
+    status: "probed",
+    storageRef: {},
+    topLevelPaths: ["meta.json"],
+    detectedFormats: ["delivery_package"],
+    fileCount: 1,
+    summary: {},
+    createdBy: "local-user",
+    createdAt: new Date().toISOString(),
+    updatedAt: new Date().toISOString(),
+  });
+
+  await fixture.db.collection("workflow_runs").insertOne({
+    _id: "run-built-in-docker-source",
+    workflowDefinitionId: "workflow-built-in-docker-source",
+    workflowVersionId: "workflow-built-in-docker-source-v1",
+    status: "queued",
+    triggeredBy: "local-user",
+    assetIds: ["asset-built-in-docker-source"],
+    createdAt: new Date().toISOString(),
+    updatedAt: new Date().toISOString(),
+  });
+
+  await fixture.db.collection("run_tasks").insertOne({
+    _id: "task-built-in-docker-source",
+    workflowRunId: "run-built-in-docker-source",
+    workflowVersionId: "workflow-built-in-docker-source-v1",
+    nodeId: "source-asset",
+    nodeDefinitionId: "source-asset",
+    nodeType: "source",
+    executorType: "docker",
+    executorConfig: {
+      image: "python:3.11-alpine",
+      networkMode: "none",
+    },
+    codeHookSpec: null,
+    status: "queued",
+    attempt: 1,
+    assetIds: ["asset-built-in-docker-source"],
+    upstreamNodeIds: [],
+    outputArtifactIds: [],
+    createdAt: new Date().toISOString(),
+    updatedAt: new Date().toISOString(),
+  });
+
+  await fixture.runtime.runNextTask();
+
+  const task = await fixture.store.getRunTask("task-built-in-docker-source");
+  const artifact = await fixture.db.collection("artifacts").findOne({ producerId: "task-built-in-docker-source" });
+
+  assert.equal(task?.status, "success");
+  assert.deepEqual(task?.stderrLines, []);
+  assert.deepEqual(task?.stdoutLines, ["loaded 1 bound asset"]);
+  assert.deepEqual((artifact?.payload as { result?: { assetCount?: number } } | undefined)?.result?.assetCount, 1);
+});
+
 test("worker loads bound asset metadata into the execution context for built-in source nodes", async (t) => {
  let capturedContext: ExecutionContext | null = null;
  const fixture = await createRuntimeFixture("emboflow-worker-source-context", {
--- a/design/03-workflows/workflow-execution-model.md
+++ b/design/03-workflows/workflow-execution-model.md
@ -194,6 +194,8 @@ In real container mode the worker:
 - captures container stdout and stderr from the Docker CLI process
 - parses `output.json` back into the task artifact payload when present

+Optional hook metadata must remain optional in this path. The current V1 Docker runner now treats missing or explicit `null` `codeHookSpec` values as “no hook configured” instead of attempting to execute them. This keeps built-in Docker nodes and custom nodes on the same task schema without adding fake hook payloads.
+
 The default Docker runtime policy is `--network none`. This keeps V1 safer for local processing nodes unless a later phase deliberately opens network access for containerized tasks.

 The V1 worker now also carries direct upstream task previews into the execution context. This is what makes multi-input set nodes executable instead of purely visual: