Skip to content

Commit 3a1ae18

Browse files
authored
Split some dependencies out of main beam package into existing extras (#36697)
* Split some dependencies out of main beam package * httplib used more broadly * A few more split outs * Try to fix linting * Guard imports * yaml test exclusions * yapf * correctly skip * Fix annotations
1 parent 40e2b0d commit 3a1ae18

File tree

10 files changed

+42
-11
lines changed

10 files changed

+42
-11
lines changed

sdks/python/apache_beam/io/gcp/bigquery_tools.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@
4646

4747
import fastavro
4848
import numpy as np
49-
import regex
5049

5150
import apache_beam
5251
from apache_beam import coders
@@ -70,6 +69,7 @@
7069

7170
# Protect against environments where bigquery library is not available.
7271
try:
72+
import regex
7373
from apitools.base.py.exceptions import HttpError
7474
from apitools.base.py.exceptions import HttpForbiddenError
7575
from apitools.base.py.transfer import Upload

sdks/python/apache_beam/runners/interactive/display/pipeline_graph.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,17 @@
3232
from typing import Tuple
3333
from typing import Union
3434

35-
import pydot
36-
3735
import apache_beam as beam
3836
from apache_beam.portability.api import beam_runner_api_pb2
3937
from apache_beam.runners.interactive import interactive_environment as ie
4038
from apache_beam.runners.interactive import pipeline_instrument as inst
4139
from apache_beam.runners.interactive.display import pipeline_graph_renderer
4240

41+
try:
42+
import pydot
43+
except ImportError:
44+
pass
45+
4346
# pylint does not understand context
4447
# pylint:disable=dangerous-default-value
4548

sdks/python/apache_beam/yaml/json_utils.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,15 @@
2525
from typing import Any
2626
from typing import Optional
2727

28-
import jsonschema
29-
3028
import apache_beam as beam
3129
from apache_beam.portability.api import schema_pb2
3230
from apache_beam.typehints import schemas
3331

32+
try:
33+
import jsonschema
34+
except ImportError:
35+
pass
36+
3437
JSON_ATOMIC_TYPES_TO_BEAM = {
3538
'boolean': schema_pb2.BOOLEAN,
3639
'integer': schema_pb2.INT64,

sdks/python/apache_beam/yaml/main_test.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@
2424

2525
from apache_beam.yaml import main
2626

27+
try:
28+
import jsonschema
29+
except ImportError:
30+
jsonschema = None
31+
2732
TEST_PIPELINE = '''
2833
pipeline:
2934
type: chain
@@ -79,6 +84,7 @@
7984
'''
8085

8186

87+
@unittest.skipIf(jsonschema is None, "Yaml dependencies not installed")
8288
class MainTest(unittest.TestCase):
8389
def test_pipeline_spec_from_file(self):
8490
with tempfile.TemporaryDirectory() as tmpdir:

sdks/python/apache_beam/yaml/yaml_io_test.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@
3232
from apache_beam.typehints import schemas as schema_utils
3333
from apache_beam.yaml.yaml_transform import YamlTransform
3434

35+
try:
36+
import jsonschema
37+
except ImportError:
38+
jsonschema = None
39+
3540

3641
class FakeReadFromPubSub:
3742
def __init__(
@@ -82,6 +87,7 @@ def __call__(self, topic, *, with_attributes, id_label, timestamp_attribute):
8287
return AssertThat(equal_to(self._messages))
8388

8489

90+
@unittest.skipIf(jsonschema is None, "Yaml dependencies not installed")
8591
class YamlPubSubTest(unittest.TestCase):
8692
def test_simple_read(self):
8793
with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions(

sdks/python/apache_beam/yaml/yaml_mapping_test.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,19 @@
3030
from apache_beam.yaml import yaml_mapping
3131
from apache_beam.yaml.yaml_transform import YamlTransform
3232

33+
try:
34+
import jsonschema
35+
except ImportError:
36+
jsonschema = None
37+
3338
DATA = [
3439
beam.Row(label='11a', conductor=11, rank=0),
3540
beam.Row(label='37a', conductor=37, rank=1),
3641
beam.Row(label='389a', conductor=389, rank=2),
3742
]
3843

3944

45+
@unittest.skipIf(jsonschema is None, "Yaml dependencies not installed")
4046
class YamlMappingTest(unittest.TestCase):
4147
def test_basic(self):
4248
with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions(

sdks/python/apache_beam/yaml/yaml_transform_test.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@
2929
from apache_beam.yaml import yaml_provider
3030
from apache_beam.yaml.yaml_transform import YamlTransform
3131

32+
try:
33+
import jsonschema
34+
except ImportError:
35+
jsonschema = None
36+
3237

3338
class CreateTimestamped(beam.PTransform):
3439
_yaml_requires_inputs = False
@@ -83,6 +88,7 @@ def raise_on_big(row):
8388
}
8489

8590

91+
@unittest.skipIf(jsonschema is None, "Yaml dependencies not installed")
8692
class YamlTransformE2ETest(unittest.TestCase):
8793
def test_composite(self):
8894
with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions(

sdks/python/apache_beam/yaml/yaml_transform_unit_test.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ def new_pipeline():
5555
pickle_library='cloudpickle'))
5656

5757

58+
@unittest.skipIf(jsonschema is None, "Yaml dependencies not installed")
5859
class MainTest(unittest.TestCase):
5960
def assertYaml(self, expected, result):
6061
result = SafeLineLoader.strip_metadata(result)

sdks/python/setup.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -379,15 +379,13 @@ def get_portability_package_data():
379379
install_requires=[
380380
'crcmod>=1.7,<2.0',
381381
'cryptography>=39.0.0,<48.0.0',
382-
'orjson>=3.9.7,<4',
383382
'fastavro>=0.23.6,<2',
384383
'fasteners>=0.3,<1.0',
385384
# TODO(https://github.com/grpc/grpc/issues/37710): Unpin grpc
386385
'grpcio>=1.33.1,<2,!=1.48.0,!=1.59.*,!=1.60.*,!=1.61.*,!=1.62.0,!=1.62.1,<1.66.0; python_version <= "3.12"', # pylint: disable=line-too-long
387386
'grpcio>=1.67.0; python_version >= "3.13"',
388387
'hdfs>=2.1.0,<3.0.0',
389388
'httplib2>=0.8,<0.23.0',
390-
'jsonschema>=4.0.0,<5.0.0',
391389
'jsonpickle>=3.0.0,<4.0.0',
392390
# numpy can have breaking changes in minor versions.
393391
# Use a strict upper bound.
@@ -407,11 +405,9 @@ def get_portability_package_data():
407405
# 3. Exclude protobuf 4 versions that leak memory, see:
408406
# https://github.com/apache/beam/issues/28246
409407
'protobuf>=3.20.3,<7.0.0.dev0,!=4.0.*,!=4.21.*,!=4.22.0,!=4.23.*,!=4.24.*', # pylint: disable=line-too-long
410-
'pydot>=1.2.0,<2',
411408
'python-dateutil>=2.8.0,<3',
412409
'pytz>=2018.3',
413410
'redis>=5.0.0,<6',
414-
'regex>=2020.6.8',
415411
'requests>=2.32.4,<3.0.0',
416412
'sortedcontainers>=2.4.0',
417413
'typing-extensions>=3.7.0',
@@ -509,7 +505,9 @@ def get_portability_package_data():
509505
# --extra-index-url or --index-url in requirements.txt in
510506
# Dataflow, which allows installing python packages from private
511507
# Python repositories in GAR.
512-
'keyrings.google-artifactregistry-auth'
508+
'keyrings.google-artifactregistry-auth',
509+
'orjson>=3.9.7,<4',
510+
'regex>=2020.6.8',
513511
],
514512
'interactive': [
515513
'facets-overview>=1.1.0,<2',
@@ -520,6 +518,7 @@ def get_portability_package_data():
520518
# Skip version 6.1.13 due to
521519
# https://github.com/jupyter/jupyter_client/issues/637
522520
'jupyter-client>=6.1.11,!=6.1.13,<8.2.1',
521+
'pydot>=1.2.0,<2',
523522
'timeloop>=1.0.2,<2',
524523
'nbformat>=5.0.5,<6',
525524
'nbconvert>=6.2.0,<8',
@@ -577,6 +576,7 @@ def get_portability_package_data():
577576
'virtualenv-clone>=0.5,<1.0',
578577
# https://github.com/PiotrDabkowski/Js2Py/issues/317
579578
'js2py>=0.74,<1; python_version<"3.12"',
579+
'jsonschema>=4.0.0,<5.0.0',
580580
] + dataframe_dependency,
581581
# Keep the following dependencies in line with what we test against
582582
# in https://github.com/apache/beam/blob/master/sdks/python/tox.ini

sdks/python/tox.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ pip_pre = True
3333
# allow apps that support color to use it.
3434
passenv=TERM,CLOUDSDK_CONFIG,DOCKER_*,TESTCONTAINERS_*,TC_*,ALLOYDB_PASSWORD
3535
# Set [] options for pip installation of apache-beam tarball.
36-
extras = test,dataframe
36+
extras = test,dataframe,yaml
3737
# Don't warn that these commands aren't installed.
3838
allowlist_externals =
3939
false

0 commit comments

Comments
 (0)