Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# SQL transform — how to provide calcite_connection_properties
#
# This example shows how to provide Calcite connection properties (for
# example to enable PostgreSQL-specific functions) to a YAML pipeline.
#
# The connection properties can be provided under the top-level `options:`
# key. Most of the time you can provide them as normal YAML mappings.
# Some environments may expect a JSON-formatted string instead; both forms
# are shown below.

pipeline:
transforms:
- name: Source
type: Create
config:
elements:
- {a: "x", b: 1}
- {a: "x", b: 2}
- {a: "x", b: 3}
- {a: "y", b: 10}
- name: Transform
type: Sql
config:
query: "SELECT STRING_TO_ARRAY('abc def g', ' ') as col_name"
input: Source
- name: Sink
type: LogForTesting
input: Transform
config:
level: INFO

# Preferred: pass connection properties as YAML mapping
options:
calcite_connection_properties:
fun: postgresql

# Alternative: pass as a JSON string (useful if your runner or expansion
# service expects a stringified JSON). Note the quoting.
#
# options:
# calcite_connection_properties: '{"fun": "postgresql"}'
74 changes: 48 additions & 26 deletions sdks/python/apache_beam/yaml/generate_yaml_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,34 +229,56 @@ def add_transform_links(transform, description, provider_list):


def transform_docs(transform_base, transforms, providers, extra_docs=''):
# Allow adding transform-specific extra documentation. For example, the
# SQL transform needs a short callout on how to provide calcite connection
# properties via the YAML `options:` section.
extra_docs_out = extra_docs or ''
if transform_base.lower() == 'sql':
callout = (
"**Note on Calcite connection properties**: Some SQL functions and "
"dialect-specific behavior are controlled by Calcite connection "
"properties. In Beam YAML pipelines you can provide these under the "
"top-level `options:` key. For example (preferred as YAML mapping):\n\n"
" :::yaml\n\n"
" options:\n"
" calcite_connection_properties:\n"
" fun: postgresql\n\n"
"If your environment expects a JSON string, you can also provide the "
"properties as a JSON-formatted string (note the quoting):\n\n"
" :::yaml\n\n"
" options:\n"
" calcite_connection_properties: '{\"fun\": \"postgresql\"}'\n"
)
extra_docs_out = (extra_docs_out + "\n\n" if extra_docs_out else "") + callout

return '\n'.join([
f'## {transform_base}',
'',
longest(
lambda t: longest(
lambda p: add_transform_links(
t, p.description(t), providers.keys()), providers[t]),
transforms).replace('::\n', '\n\n :::yaml\n'),
'',
extra_docs,
'',
'### Configuration',
'',
f'## {transform_base}',
'',
longest(
lambda t: longest(
lambda p: add_transform_links(
t, p.description(t), providers.keys()), providers[t]),
transforms).replace('::\n', '\n\n :::yaml\n'),
'',
extra_docs_out,
'',
'### Configuration',
'',
longest(
lambda t: longest(
lambda p: config_docs(p.config_schema(t)), providers[t]),
transforms),
'',
'### Usage',
'',
' :::yaml',
'',
indent(
longest(
lambda t: longest(
lambda p: config_docs(p.config_schema(t)), providers[t]),
transforms),
'',
'### Usage',
'',
' :::yaml',
'',
indent(
longest(
lambda t: longest(
lambda p: pretty_example(p, t, transform_base), providers[t]),
transforms),
4),
lambda t: longest(
lambda p: pretty_example(p, t, transform_base), providers[t]),
transforms),
4),
])


Expand Down
Loading