From f4be2541688f0e978b4b62dc18bd014b775f9d88 Mon Sep 17 00:00:00 2001 From: sjfhsjfh Date: Wed, 23 Jul 2025 01:59:21 +0800 Subject: [PATCH] chore: update json schema --- libraries/core/dora-schema.json | 161 ++++++++++++++++++++++++++++++-- 1 file changed, 151 insertions(+), 10 deletions(-) diff --git a/libraries/core/dora-schema.json b/libraries/core/dora-schema.json index fbaa240f3..5796c1df1 100644 --- a/libraries/core/dora-schema.json +++ b/libraries/core/dora-schema.json @@ -1,13 +1,14 @@ { "$schema": "http://json-schema.org/draft-07/schema#", "title": "dora-rs specification", - "description": "Dataflow description", + "description": "The main configuration structure for defining a Dora dataflow. Dataflows are specified through YAML files that describe the nodes, their connections, and execution parameters.\n\n## Structure\n\nA dataflow consists of: - **Nodes**: The computational units that process data - **Communication**: Optional communication configuration - **Deployment**: Optional deployment configuration (unstable) - **Debug options**: Optional development and debugging settings (unstable)\n\n## Example\n\n```yaml nodes: - id: webcam operator: python: webcam.py inputs: tick: dora/timer/millis/100 outputs: - image - id: plot operator: python: plot.py inputs: image: webcam/image ```", "type": "object", "required": [ "nodes" ], "properties": { "nodes": { + "description": "List of nodes in the dataflow\n\nThis is the most important field of the dataflow specification. Each node must be identified by a unique `id`:\n\n## Example\n\n```yaml nodes: - id: foo path: path/to/the/executable # ... (see below) - id: bar path: path/to/another/executable # ... (see below) ```\n\nFor each node, you need to specify the `path` of the executable or script that Dora should run when starting the node. Most of the other node fields are optional, but you typically want to specify at least some `inputs` and/or `outputs`.", "type": "array", "items": { "$ref": "#/definitions/Node" @@ -17,8 +18,10 @@ "additionalProperties": true, "definitions": { "CustomNode": { + "description": "Contains the input and output configuration of the node.", "type": "object", "required": [ + "path", "source" ], "properties": { @@ -60,6 +63,10 @@ }, "uniqueItems": true }, + "path": { + "description": "Path of the source code\n\nIf you want to use a specific `conda` environment. Provide the python path within the source.\n\nsource: /home/peter/miniconda3/bin/python\n\nargs: some_node.py\n\nSource can match any executable in PATH.", + "type": "string" + }, "send_stdout_as": { "description": "Send stdout and stderr to another node", "type": [ @@ -68,8 +75,7 @@ ] }, "source": { - "description": "Path of the source code\n\nIf you want to use a specific `conda` environment. Provide the python path within the source.\n\nsource: /home/peter/miniconda3/bin/python\n\nargs: some_node.py\n\nSource can match any executable in PATH.", - "type": "string" + "$ref": "#/definitions/NodeSource" } } }, @@ -113,6 +119,46 @@ } ] }, + "GitRepoRev": { + "oneOf": [ + { + "type": "object", + "required": [ + "Branch" + ], + "properties": { + "Branch": { + "type": "string" + } + }, + "additionalProperties": true + }, + { + "type": "object", + "required": [ + "Tag" + ], + "properties": { + "Tag": { + "type": "string" + } + }, + "additionalProperties": true + }, + { + "type": "object", + "required": [ + "Rev" + ], + "properties": { + "Rev": { + "type": "string" + } + }, + "additionalProperties": true + } + ] + }, "Input": { "type": "object", "required": [ @@ -169,25 +215,36 @@ ] }, "Node": { - "description": "Dora Node", + "title": "Dora Node Configuration", + "description": "A node represents a computational unit in a Dora dataflow. Each node runs as a separate process and can communicate with other nodes through inputs and outputs.", "type": "object", "required": [ "id" ], "properties": { "args": { + "description": "Command-line arguments passed to the executable.\n\nThe command-line arguments that should be passed to the executable/script specified in `path`. The arguments should be separated by space. This field is optional and defaults to an empty argument list.\n\n## Example ```yaml nodes: - id: example path: example-node args: -v --some-flag foo ```", + "type": [ + "string", + "null" + ] + }, + "branch": { + "description": "Git branch to checkout after cloning.\n\nThe `branch` field is only allowed in combination with the [`git`](#git) field. It specifies the branch that should be checked out after cloning. Only one of `branch`, `tag`, or `rev` can be specified.\n\n## Example\n\n```yaml nodes: - id: rust-node git: https://github.com/dora-rs/dora.git branch: some-branch-name ```", "type": [ "string", "null" ] }, "build": { + "description": "Build commands executed during `dora build`. Each line runs separately.\n\nThe `build` key specifies the command that should be invoked for building the node. The key expects a single- or multi-line string.\n\nEach line is run as a separate command. Spaces are used to separate arguments.\n\nNote that all the environment variables specified in the [`env`](Self::env) field are also applied to the build commands.\n\n## Special treatment of `pip`\n\nBuild lines that start with `pip` or `pip3` are treated in a special way: If the `--uv` argument is passed to the `dora build` command, all `pip`/`pip3` commands are run through the [`uv` package manager](https://docs.astral.sh/uv/).\n\n## Example\n\n```yaml nodes: - id: build-example build: cargo build -p receive_data --release path: target/release/receive_data - id: multi-line-example build: | pip install requirements.txt pip install -e some/local/package path: package ```\n\nIn the above example, the `pip` commands will be replaced by `uv pip` when run through `dora build --uv`.", "type": [ "string", "null" ] }, "custom": { + "description": "Legacy node configuration (deprecated).\n\nPlease use the top-level [`path`](Self::path), [`args`](Self::args), etc. fields instead.", "anyOf": [ { "$ref": "#/definitions/CustomNode" @@ -198,14 +255,14 @@ ] }, "description": { - "description": "Description of the node", + "description": "Detailed description of the node's functionality.\n\n## Example\n\n```yaml nodes: - id: camera_node description: \"Captures video frames from webcam\" ```", "type": [ "string", "null" ] }, "env": { - "description": "Environment variables", + "description": "Environment variables for node builds and execution.\n\nKey-value map of environment variables that should be set for both the [`build`](Self::build) operation and the node execution (i.e. when the node is spawned through [`path`](Self::path)).\n\nSupports strings, numbers, and booleans.\n\n## Example\n\n```yaml nodes: - id: example-node path: path/to/node env: DEBUG: true PORT: 8080 API_KEY: \"secret-key\" ```", "type": [ "object", "null" @@ -214,8 +271,15 @@ "$ref": "#/definitions/EnvValue" } }, + "git": { + "description": "Git repository URL for downloading nodes.\n\nThe `git` key allows downloading nodes (i.e. their source code) from git repositories. This can be especially useful for distributed dataflows.\n\nWhen a `git` key is specified, `dora build` automatically clones the specified repository (or reuse an existing clone). Then it checks out the specified [`branch`](Self::branch), [`tag`](Self::tag), or [`rev`](Self::rev), or the default branch if none of them are specified. Afterwards it runs the [`build`](Self::build) command if specified.\n\nNote that the git clone directory is set as working directory for both the [`build`](Self::build) command and the specified [`path`](Self::path).\n\n## Example\n\n```yaml nodes: - id: rust-node git: https://github.com/dora-rs/dora.git build: cargo build -p rust-dataflow-example-node path: target/debug/rust-dataflow-example-node ```\n\nIn the above example, `dora build` will first clone the specified `git` repository and then run the specified `build` inside the local clone directory. When `dora run` or `dora start` is invoked, the working directory will be the git clone directory too. So a relative `path` will start from the clone directory.", + "type": [ + "string", + "null" + ] + }, "id": { - "description": "Node identifier", + "description": "Unique node identifier. Must not contain `/` characters.\n\nNode IDs can be arbitrary strings with the following limitations:\n\n- They must not contain any `/` characters (slashes). - We do not recommend using whitespace characters (e.g. spaces) in IDs\n\nEach node must have an ID field.\n\n## Example\n\n```yaml nodes: - id: camera_node - id: some_other_node ```", "allOf": [ { "$ref": "#/definitions/NodeId" @@ -223,18 +287,20 @@ ] }, "inputs": { + "description": "Input data connections from other nodes.\n\nDefines the inputs that this node is subscribing to.\n\nThe `inputs` field should be a key-value map of the following format:\n\n`input_id: source_node_id/source_node_output_id`\n\nThe components are defined as follows:\n\n- `input_id` is the local identifier that should be used for this input.\n\nThis will map to the `id` field of [`Event::Input`](https://docs.rs/dora-node-api/latest/dora_node_api/enum.Event.html#variant.Input) events sent to the node event loop. - `source_node_id` should be the `id` field of the node that sends the output that we want to subscribe to - `source_node_output_id` should be the identifier of the output that that we want to subscribe to\n\n## Example\n\n```yaml nodes: - id: example-node outputs: - one - two - id: receiver inputs: my_input: example-node/two ```", "default": {}, "type": "object", "additionalProperties": true }, "name": { - "description": "Node name", + "description": "Human-readable node name for documentation.\n\nThis optional field can be used to define a more descriptive name in addition to a short [`id`](Self::id).\n\n## Example\n\n```yaml nodes: - id: camera_node name: \"Camera Input Handler\"", "type": [ "string", "null" ] }, "operator": { + "description": "Single operator configuration.\n\nThis is a convenience field for defining runtime nodes that contain only a single operator. This field is an alternative to the [`operators`](Self::operators) field, which can be used if there is only a single operator defined for the runtime node.\n\n## Example\n\n```yaml nodes: - id: runtime-node operator: id: processor python: script.py outputs: [data] ```", "anyOf": [ { "$ref": "#/definitions/SingleOperatorDefinition" @@ -245,6 +311,7 @@ ] }, "operators": { + "description": "Multiple operators running in a shared runtime process.\n\nOperators are an experimental, lightweight alternative to nodes. Instead of running as a separate process, operators are linked into a runtime process. This allows running multiple operators to share a single address space (not supported for Python currently).\n\nOperators are defined as part of the node list, as children of a runtime node. A runtime node is a special node that specifies no [`path`](Self::path) field, but contains an `operators` field instead.\n\n## Example\n\n```yaml nodes: - id: runtime-node operators: - id: processor python: process.py ```", "type": [ "array", "null" @@ -254,6 +321,7 @@ } }, "outputs": { + "description": "Output data identifiers produced by this node.\n\nList of output identifiers that the node sends. Must contain all `output_id` values that the node uses when sending output, e.g. through the [`send_output`](https://docs.rs/dora-node-api/latest/dora_node_api/struct.DoraNode.html#method.send_output) function.\n\n## Example\n\n```yaml nodes: - id: example-node outputs: - processed_image - metadata ```", "default": [], "type": "array", "items": { @@ -262,12 +330,28 @@ "uniqueItems": true }, "path": { + "description": "Path to executable or script that should be run.\n\nSpecifies the path of the executable or script that Dora should run when starting the dataflow. This can point to a normal executable (e.g. when using a compiled language such as Rust) or a Python script.\n\nDora will automatically append a `.exe` extension on Windows systems when the specified file name has no extension.\n\n## Example\n\n```yaml nodes: - id: rust-example path: target/release/rust-node - id: python-example path: ./receive_data.py ```\n\n## URL as Path\n\nThe `path` field can also point to a URL instead of a local path. In this case, Dora will download the given file when starting the dataflow.\n\nNote that this is quite an old feature and using this functionality is **not recommended** anymore. Instead, we recommend using a [`git`][Self::git] and/or [`build`](Self::build) key.", + "type": [ + "string", + "null" + ] + }, + "rev": { + "description": "Git revision (e.g. commit hash) to checkout after cloning.\n\nThe `rev` field is only allowed in combination with the [`git`](#git) field. It specifies the git revision (e.g. a commit hash) that should be checked out after cloning. Only one of `branch`, `tag`, or `rev` can be specified.\n\n## Example\n\n```yaml nodes: - id: rust-node git: https://github.com/dora-rs/dora.git rev: 64ab0d7c ```", "type": [ "string", "null" ] }, "send_stdout_as": { + "description": "Redirect stdout/stderr to a data output.\n\nThis field can be used to send all stdout and stderr output of the node as a Dora output. Each output line is sent as a separate message.\n\n## Example\n\n```yaml nodes: - id: example send_stdout_as: stdout_output - id: logger inputs: example_output: example/stdout_output ```", + "type": [ + "string", + "null" + ] + }, + "tag": { + "description": "Git tag to checkout after cloning.\n\nThe `tag` field is only allowed in combination with the [`git`](#git) field. It specifies the git tag that should be checked out after cloning. Only one of `branch`, `tag`, or `rev` can be specified.\n\n## Example\n\n```yaml nodes: - id: rust-node git: https://github.com/dora-rs/dora.git tag: v0.3.0 ```", "type": [ "string", "null" @@ -279,6 +363,46 @@ "NodeId": { "type": "string" }, + "NodeSource": { + "oneOf": [ + { + "type": "string", + "enum": [ + "Local" + ] + }, + { + "type": "object", + "required": [ + "GitBranch" + ], + "properties": { + "GitBranch": { + "type": "object", + "required": [ + "repo" + ], + "properties": { + "repo": { + "type": "string" + }, + "rev": { + "anyOf": [ + { + "$ref": "#/definitions/GitRepoRev" + }, + { + "type": "null" + } + ] + } + } + } + }, + "additionalProperties": true + } + ] + }, "OperatorDefinition": { "type": "object", "oneOf": [ @@ -310,32 +434,42 @@ ], "properties": { "build": { + "description": "Build commands for this operator", "type": [ "string", "null" ] }, "description": { + "description": "Detailed description of the operator", "type": [ "string", "null" ] }, "id": { - "$ref": "#/definitions/OperatorId" + "description": "Unique operator identifier within the runtime", + "allOf": [ + { + "$ref": "#/definitions/OperatorId" + } + ] }, "inputs": { + "description": "Input data connections", "default": {}, "type": "object", "additionalProperties": true }, "name": { + "description": "Human-readable operator name", "type": [ "string", "null" ] }, "outputs": { + "description": "Output data identifiers", "default": [], "type": "array", "items": { @@ -344,6 +478,7 @@ "uniqueItems": true }, "send_stdout_as": { + "description": "Redirect stdout to data output", "type": [ "string", "null" @@ -399,19 +534,21 @@ ], "properties": { "build": { + "description": "Build commands for this operator", "type": [ "string", "null" ] }, "description": { + "description": "Detailed description of the operator", "type": [ "string", "null" ] }, "id": { - "description": "ID is optional if there is only a single operator.", + "description": "Operator identifier (optional for single operators)", "anyOf": [ { "$ref": "#/definitions/OperatorId" @@ -422,17 +559,20 @@ ] }, "inputs": { + "description": "Input data connections", "default": {}, "type": "object", "additionalProperties": true }, "name": { + "description": "Human-readable operator name", "type": [ "string", "null" ] }, "outputs": { + "description": "Output data identifiers", "default": [], "type": "array", "items": { @@ -441,6 +581,7 @@ "uniqueItems": true }, "send_stdout_as": { + "description": "Redirect stdout to data output", "type": [ "string", "null"