Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
62 commits
Select commit Hold shift + click to select a range
b7e2cbb
update dependencies
steffnay Jun 30, 2022
951a43e
Merge branch 'main' of github.com:googleapis/python-bigquery into py-…
steffnay Jul 8, 2022
e9c57d6
deps: pyarrow extras
steffnay Jul 8, 2022
47a489b
clean up comments
steffnay Jul 8, 2022
fd0c40c
add test pyarrow skips
steffnay Jul 8, 2022
85db3e5
Merge branch 'main' into py-extra
parthea Jul 9, 2022
1fac4d2
replace storage checks
steffnay Jul 11, 2022
eaada14
Merge branch 'main' of github.com:googleapis/python-bigquery into py-…
steffnay Jul 11, 2022
61c69e9
update tests
steffnay Jul 11, 2022
95da5c7
Merge branch 'py-extra' of github.com:steffnay/python-bigquery into p…
steffnay Jul 11, 2022
e31e4ef
update tests
steffnay Jul 11, 2022
b4f7160
Update setup.py
steffnay Jul 11, 2022
2bb6461
update system tests
steffnay Jul 12, 2022
2602e4d
Merge branch 'py-extra' of github.com:steffnay/python-bigquery into p…
steffnay Jul 12, 2022
3a87275
update verify_pandas_imports
steffnay Jul 14, 2022
e0a9a2a
add pyarrow guards
steffnay Jul 14, 2022
f3dbaea
add datetime check
steffnay Jul 15, 2022
91fccef
change pyarrow import
steffnay Jul 15, 2022
ac78a33
update
steffnay Jul 15, 2022
0d89234
add pyarrow skips
steffnay Jul 21, 2022
b774b4b
merge
steffnay Jul 21, 2022
79dd4cc
fix types
steffnay Jul 21, 2022
37d7a25
lint
steffnay Jul 21, 2022
9dedf78
Update google/cloud/bigquery/client.py
steffnay Aug 1, 2022
933963e
update pyarrow version
steffnay Aug 1, 2022
93d7639
Merge branch 'py-extra' of github.com:steffnay/python-bigquery into p…
steffnay Aug 1, 2022
45eed33
update test
steffnay Aug 1, 2022
6ac7204
Merge branch 'main' into py-extra
steffnay Aug 1, 2022
af00605
lint
steffnay Aug 1, 2022
5bd1f30
Merge branch 'py-extra' of github.com:steffnay/python-bigquery into p…
steffnay Aug 1, 2022
ef20ab5
update pyarrow req
steffnay Aug 1, 2022
95aceca
update noxfile
steffnay Aug 1, 2022
12591b3
Merge branch 'main' into py-extra
steffnay Aug 5, 2022
d0e9045
remove bignum check
steffnay Aug 5, 2022
5045ead
remove comments
steffnay Aug 5, 2022
050af79
Merge branch 'main' into py-extra
steffnay Aug 18, 2022
01dd2b2
Merge branch 'main' of github.com:googleapis/python-bigquery into py-…
steffnay Sep 23, 2022
1eb5fac
add test importorskip
steffnay Sep 23, 2022
f23657b
update test
steffnay Sep 24, 2022
7138f1e
update test
steffnay Sep 24, 2022
abb9b8c
update dependency
steffnay Sep 24, 2022
d69f8ad
change version
steffnay Sep 24, 2022
caa21cb
update imports
steffnay Sep 26, 2022
17d922a
Merge branch 'main' into py-extra
steffnay Oct 3, 2022
d52b301
Merge branch 'main' into py-extra
steffnay Dec 6, 2022
21ebf7d
adjust test expectations when google-cloud-bigquery-storage is not av…
tswast Dec 8, 2022
39b173a
export pyarrow exception
tswast Dec 8, 2022
88fa115
whitespace in docstrings
tswast Dec 8, 2022
1b926aa
format minimum bqstorage version string
tswast Dec 8, 2022
d71141d
restore optional bqstorage_client
tswast Dec 8, 2022
51332d1
restore optional bqstorage_client (in table.py)
tswast Dec 8, 2022
4c296ae
synchronize constraints and setup.py
tswast Dec 8, 2022
6067f90
synchronize signatures
tswast Dec 8, 2022
6c2b8a5
remove unnecessary bignumeric_type extra
tswast Dec 8, 2022
8196a15
more constraints sync
tswast Dec 8, 2022
5bac083
remove unnecessary mock
tswast Dec 8, 2022
dafdb64
fix unittest skip
tswast Dec 8, 2022
805f5d3
synchronize constraints
tswast Dec 8, 2022
b85dcf3
adjust shapely
tswast Dec 8, 2022
bf4f218
simplify with importorskip
tswast Dec 8, 2022
794f70c
blacken
tswast Dec 8, 2022
bab28b5
Merge branch 'main' into py-extra
tswast Dec 8, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@
except (ImportError, AttributeError):
pandas = None

try:
import pyarrow
except (ImportError, AttributeError):
pyarrow = None

from google.api_core.exceptions import InternalServerError
from google.api_core.exceptions import ServiceUnavailable
from google.api_core.exceptions import TooManyRequests
Expand Down
5 changes: 5 additions & 0 deletions google/cloud/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
from google.cloud.bigquery.enums import KeyResultStatementKind
from google.cloud.bigquery.enums import SqlTypeNames
from google.cloud.bigquery.enums import StandardSqlTypeNames
from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError
from google.cloud.bigquery.exceptions import LegacyPyarrowError
from google.cloud.bigquery.external_config import ExternalConfig
from google.cloud.bigquery.external_config import BigtableOptions
from google.cloud.bigquery.external_config import BigtableColumnFamily
Expand Down Expand Up @@ -195,6 +197,9 @@
"WriteDisposition",
# EncryptionConfiguration
"EncryptionConfiguration",
# Custom exceptions
"LegacyBigQueryStorageError",
"LegacyPyarrowError",
]


Expand Down
74 changes: 72 additions & 2 deletions google/cloud/bigquery/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import math
import re
import os
from typing import Optional, Union
from typing import Any, Optional, Union

from dateutil import relativedelta
from google.cloud._helpers import UTC # type: ignore
Expand All @@ -32,6 +32,11 @@

import packaging.version

from google.cloud.bigquery.exceptions import (
LegacyBigQueryStorageError,
LegacyPyarrowError,
)

_RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f"
_TIMEONLY_WO_MICROS = "%H:%M:%S"
_TIMEONLY_W_MICROS = "%H:%M:%S.%f"
Expand All @@ -50,6 +55,10 @@
r"(?P<time_sign>-?)(?P<hours>\d+):(?P<minutes>\d+):(?P<seconds>\d+)\.?(?P<fraction>\d*)?$"
)

_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0")

_MIN_PYARROW_VERSION = packaging.version.Version("3.0.0")

_BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0")

BIGQUERY_EMULATOR_HOST = "BIGQUERY_EMULATOR_HOST"
Expand Down Expand Up @@ -83,7 +92,7 @@ def installed_version(self) -> packaging.version.Version:
getattr(bigquery_storage, "__version__", "0.0.0")
)

return self._installed_version
return self._installed_version # type: ignore

@property
def is_read_session_optional(self) -> bool:
Expand All @@ -93,6 +102,29 @@ def is_read_session_optional(self) -> bool:
"""
return self.installed_version >= _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION

def verify_version(self):
"""Verify that a recent enough version of BigQuery Storage extra is
installed.

The function assumes that google-cloud-bigquery-storage extra is
installed, and should thus be used in places where this assumption
holds.

Because `pip` can install an outdated version of this extra despite the
constraints in `setup.py`, the calling code can use this helper to
verify the version compatibility at runtime.

Raises:
LegacyBigQueryStorageError:
If the google-cloud-bigquery-storage package is outdated.
"""
if self.installed_version < _MIN_BQ_STORAGE_VERSION:
msg = (
"Dependency google-cloud-bigquery-storage is outdated, please upgrade "
f"it to version >= {_MIN_BQ_STORAGE_VERSION} (version found: {self.installed_version})."
)
raise LegacyBigQueryStorageError(msg)


class PyarrowVersions:
"""Version comparisons for pyarrow package."""
Expand Down Expand Up @@ -120,6 +152,44 @@ def installed_version(self) -> packaging.version.Version:
def use_compliant_nested_type(self) -> bool:
return self.installed_version.major >= 4

def try_import(self, raise_if_error: bool = False) -> Any:
"""Verify that a recent enough version of pyarrow extra is
installed.

The function assumes that pyarrow extra is installed, and should thus
be used in places where this assumption holds.

Because `pip` can install an outdated version of this extra despite the
constraints in `setup.py`, the calling code can use this helper to
verify the version compatibility at runtime.

Returns:
The ``pyarrow`` module or ``None``.

Raises:
LegacyPyarrowError:
If the pyarrow package is outdated and ``raise_if_error`` is ``True``.
"""
try:
import pyarrow
except ImportError as exc: # pragma: NO COVER
if raise_if_error:
raise LegacyPyarrowError(
f"pyarrow package not found. Install pyarrow version >= {_MIN_PYARROW_VERSION}."
) from exc
return None

if self.installed_version < _MIN_PYARROW_VERSION:
if raise_if_error:
msg = (
"Dependency pyarrow is outdated, please upgrade "
f"it to version >= {_MIN_PYARROW_VERSION} (version found: {self.installed_version})."
)
raise LegacyPyarrowError(msg)
return None

return pyarrow


BQ_STORAGE_VERSIONS = BQStorageVersions()
PYARROW_VERSIONS = PyarrowVersions()
Expand Down
127 changes: 74 additions & 53 deletions google/cloud/bigquery/_pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@
import queue
import warnings

from packaging import version

from google.cloud.bigquery import _helpers
from google.cloud.bigquery import schema

try:
import pandas # type: ignore

Expand All @@ -43,9 +48,7 @@
db_dtypes_import_exception = exc
date_dtype_name = time_dtype_name = "" # Use '' rather than None because pytype


import pyarrow # type: ignore
import pyarrow.parquet # type: ignore
pyarrow = _helpers.PYARROW_VERSIONS.try_import()

try:
# _BaseGeometry is used to detect shapely objevys in `bq_to_arrow_array`
Expand Down Expand Up @@ -77,10 +80,6 @@ def _to_wkb(v):
# Having BQ Storage available implies that pyarrow >=1.0.0 is available, too.
_ARROW_COMPRESSION_SUPPORT = True

from google.cloud.bigquery import _helpers
from google.cloud.bigquery import schema


_LOGGER = logging.getLogger(__name__)

_PROGRESS_INTERVAL = 0.2 # Maximum time between download status checks, in seconds.
Expand Down Expand Up @@ -141,52 +140,65 @@ def pyarrow_timestamp():
return pyarrow.timestamp("us", tz="UTC")


# This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py
# When modifying it be sure to update it there as well.
BQ_TO_ARROW_SCALARS = {
"BIGNUMERIC": pyarrow_bignumeric,
"BOOL": pyarrow.bool_,
"BOOLEAN": pyarrow.bool_,
"BYTES": pyarrow.binary,
"DATE": pyarrow.date32,
"DATETIME": pyarrow_datetime,
"FLOAT": pyarrow.float64,
"FLOAT64": pyarrow.float64,
"GEOGRAPHY": pyarrow.string,
"INT64": pyarrow.int64,
"INTEGER": pyarrow.int64,
"NUMERIC": pyarrow_numeric,
"STRING": pyarrow.string,
"TIME": pyarrow_time,
"TIMESTAMP": pyarrow_timestamp,
}
ARROW_SCALAR_IDS_TO_BQ = {
# https://arrow.apache.org/docs/python/api/datatypes.html#type-classes
pyarrow.bool_().id: "BOOL",
pyarrow.int8().id: "INT64",
pyarrow.int16().id: "INT64",
pyarrow.int32().id: "INT64",
pyarrow.int64().id: "INT64",
pyarrow.uint8().id: "INT64",
pyarrow.uint16().id: "INT64",
pyarrow.uint32().id: "INT64",
pyarrow.uint64().id: "INT64",
pyarrow.float16().id: "FLOAT64",
pyarrow.float32().id: "FLOAT64",
pyarrow.float64().id: "FLOAT64",
pyarrow.time32("ms").id: "TIME",
pyarrow.time64("ns").id: "TIME",
pyarrow.timestamp("ns").id: "TIMESTAMP",
pyarrow.date32().id: "DATE",
pyarrow.date64().id: "DATETIME", # because millisecond resolution
pyarrow.binary().id: "BYTES",
pyarrow.string().id: "STRING", # also alias for pyarrow.utf8()
# The exact scale and precision don't matter, see below.
pyarrow.decimal128(38, scale=9).id: "NUMERIC",
# The exact decimal's scale and precision are not important, as only
# the type ID matters, and it's the same for all decimal256 instances.
pyarrow.decimal256(76, scale=38).id: "BIGNUMERIC",
}
if pyarrow:
# This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py
# When modifying it be sure to update it there as well.
BQ_TO_ARROW_SCALARS = {
"BOOL": pyarrow.bool_,
"BOOLEAN": pyarrow.bool_,
"BYTES": pyarrow.binary,
"DATE": pyarrow.date32,
"DATETIME": pyarrow_datetime,
"FLOAT": pyarrow.float64,
"FLOAT64": pyarrow.float64,
"GEOGRAPHY": pyarrow.string,
"INT64": pyarrow.int64,
"INTEGER": pyarrow.int64,
"NUMERIC": pyarrow_numeric,
"STRING": pyarrow.string,
"TIME": pyarrow_time,
"TIMESTAMP": pyarrow_timestamp,
}
ARROW_SCALAR_IDS_TO_BQ = {
# https://arrow.apache.org/docs/python/api/datatypes.html#type-classes
pyarrow.bool_().id: "BOOL",
pyarrow.int8().id: "INT64",
pyarrow.int16().id: "INT64",
pyarrow.int32().id: "INT64",
pyarrow.int64().id: "INT64",
pyarrow.uint8().id: "INT64",
pyarrow.uint16().id: "INT64",
pyarrow.uint32().id: "INT64",
pyarrow.uint64().id: "INT64",
pyarrow.float16().id: "FLOAT64",
pyarrow.float32().id: "FLOAT64",
pyarrow.float64().id: "FLOAT64",
pyarrow.time32("ms").id: "TIME",
pyarrow.time64("ns").id: "TIME",
pyarrow.timestamp("ns").id: "TIMESTAMP",
pyarrow.date32().id: "DATE",
pyarrow.date64().id: "DATETIME", # because millisecond resolution
pyarrow.binary().id: "BYTES",
pyarrow.string().id: "STRING", # also alias for pyarrow.utf8()
# The exact scale and precision don't matter, see below.
pyarrow.decimal128(38, scale=9).id: "NUMERIC",
}

if version.parse(pyarrow.__version__) >= version.parse("3.0.0"):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Let's move this conditional to PyarrowVersions, similar to is_read_session_optional on the other versions class.

BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric
# The exact decimal's scale and precision are not important, as only
# the type ID matters, and it's the same for all decimal256 instances.
ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC"
_BIGNUMERIC_SUPPORT = True
else:
_BIGNUMERIC_SUPPORT = False # pragma: NO COVER

else: # pragma: NO COVER
BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER
ARROW_SCALAR_IDS_TO_BQ = {} # pragma: NO_COVER
_BIGNUMERIC_SUPPORT = False # pragma: NO COVER


BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA = {
"GEOGRAPHY": {
b"ARROW:extension:name": b"google:sqlType:geography",
Expand Down Expand Up @@ -480,6 +492,13 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
# If schema detection was not successful for all columns, also try with
# pyarrow, if available.
if unknown_type_fields:
if not pyarrow:
msg = "Could not determine the type of columns: {}".format(
", ".join(field.name for field in unknown_type_fields)
)
warnings.warn(msg)
return None # We cannot detect the schema in full.

# The augment_schema() helper itself will also issue unknown type
# warnings if detection still fails for any of the fields.
bq_schema_out = augment_schema(dataframe, bq_schema_out)
Expand Down Expand Up @@ -654,6 +673,8 @@ def dataframe_to_parquet(

This argument is ignored for ``pyarrow`` versions earlier than ``4.0.0``.
"""
pyarrow = _helpers.PYARROW_VERSIONS.try_import(raise_if_error=True)

import pyarrow.parquet # type: ignore

kwargs = (
Expand Down
Loading