Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
BLACK_PATHS = ["docs", "pandas_gbq", "tests", "noxfile.py", "setup.py"]

DEFAULT_PYTHON_VERSION = "3.8"
SYSTEM_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9"]
UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9"]
SYSTEM_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10"]
UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10"]

CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute()

Expand Down
8 changes: 6 additions & 2 deletions owlbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,16 @@
# ----------------------------------------------------------------------------

extras = ["tqdm"]
extras_by_python = {
"3.9": ["tqdm", "db-dtypes"],
}
templated_files = common.py_library(
unit_test_python_versions=["3.7", "3.8", "3.9"],
system_test_python_versions=["3.7", "3.8", "3.9"],
unit_test_python_versions=["3.7", "3.8", "3.9", "3.10"],
system_test_python_versions=["3.7", "3.8", "3.9", "3.10"],
cov_level=86,
unit_test_extras=extras,
system_test_extras=extras,
system_test_extras_by_python=extras_by_python,
intersphinx_dependencies={
"pandas": "https://pandas.pydata.org/pandas-docs/stable/",
"pydata-google-auth": "https://pydata-google-auth.readthedocs.io/en/latest/",
Expand Down
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@
# https://github.com/pydata/pandas-gbq/issues/343
"google-cloud-bigquery[bqstorage,pandas]>=1.11.1,<3.0.0dev,!=2.4.*",
]
extras = {"tqdm": "tqdm>=4.23.0"}
extras = {
"tqdm": "tqdm>=4.23.0",
"db-dtypes": "db-dtypes >=0.3.0,<2.0.0",
}

# Setup boilerplate below this line.

Expand Down
108 changes: 99 additions & 9 deletions tests/system/test_to_gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,20 @@
import pandas.testing
import pytest

try:
import db_dtypes
except ImportError:
db_dtypes = None


pytest.importorskip("google.cloud.bigquery", minversion="1.24.0")


@pytest.fixture(params=["default", "load_parquet", "load_csv"])
def api_method(request):
return request.param


@pytest.fixture
def method_under_test(credentials, project_id):
import pandas_gbq
Expand All @@ -23,7 +33,7 @@ def method_under_test(credentials, project_id):


@pytest.mark.parametrize(
["input_series"],
["input_series", "skip_csv"],
[
# Ensure that 64-bit floating point numbers are unchanged.
# See: https://github.com/pydata/pandas-gbq/issues/326
Expand All @@ -41,17 +51,13 @@ def method_under_test(credentials, project_id):
],
name="test_col",
),
False,
),
(
pandas.Series(
[
"abc",
"defg",
# Ensure that empty strings are written as empty string,
# not NULL. See:
# https://github.com/googleapis/python-bigquery-pandas/issues/366
"",
None,
# Ensure that unicode characters are encoded. See:
# https://github.com/googleapis/python-bigquery-pandas/issues/106
"信用卡",
Expand All @@ -60,23 +66,107 @@ def method_under_test(credentials, project_id):
],
name="test_col",
),
False,
),
(
pandas.Series(
[
"abc",
"defg",
# Ensure that empty strings are written as empty string,
# not NULL. See:
# https://github.com/googleapis/python-bigquery-pandas/issues/366
"",
None,
],
name="empty_strings",
),
True,
),
],
)
def test_series_round_trip(
method_under_test, random_dataset_id, bigquery_client, input_series
method_under_test,
random_dataset_id,
bigquery_client,
input_series,
api_method,
skip_csv,
):
if api_method == "load_csv" and skip_csv:
pytest.skip("Loading with CSV not supported.")
table_id = f"{random_dataset_id}.round_trip_{random.randrange(1_000_000)}"
input_series = input_series.sort_values().reset_index(drop=True)
df = pandas.DataFrame(
# Some errors only occur in multi-column dataframes. See:
# https://github.com/googleapis/python-bigquery-pandas/issues/366
{"test_col": input_series, "test_col2": input_series}
)
method_under_test(df, table_id)
method_under_test(df, table_id, api_method=api_method)

round_trip = bigquery_client.list_rows(table_id).to_dataframe()
round_trip_series = round_trip["test_col"].sort_values().reset_index(drop=True)
pandas.testing.assert_series_equal(
round_trip_series, input_series, check_exact=True,
round_trip_series, input_series, check_exact=True, check_names=False,
)


DATAFRAME_ROUND_TRIPS = [
# Ensure that a DATE column can be written with datetime64[ns] dtype
# data. See:
# https://github.com/googleapis/python-bigquery-pandas/issues/362
(
pandas.DataFrame(
{
"date_col": pandas.Series(
["2021-04-17", "1999-12-31", "2038-01-19"], dtype="datetime64[ns]",
),
}
),
[{"name": "date_col", "type": "DATE"}],
True,
),
]
if db_dtypes is not None:
DATAFRAME_ROUND_TRIPS.append(
(
pandas.DataFrame(
{
"date_col": pandas.Series(
["2021-04-17", "1999-12-31", "2038-01-19"], dtype="dbdate",
),
}
),
[{"name": "date_col", "type": "DATE"}],
False,
)
)


@pytest.mark.parametrize(
["input_df", "table_schema", "skip_csv"], DATAFRAME_ROUND_TRIPS
)
def test_dataframe_round_trip_with_table_schema(
method_under_test,
random_dataset_id,
bigquery_client,
input_df,
table_schema,
api_method,
skip_csv,
):
if api_method == "load_csv" and skip_csv:
pytest.skip("Loading with CSV not supported.")
table_id = f"{random_dataset_id}.round_trip_w_schema_{random.randrange(1_000_000)}"
method_under_test(
input_df, table_id, table_schema=table_schema, api_method=api_method
)
round_trip = bigquery_client.list_rows(table_id).to_dataframe(
dtypes=dict(zip(input_df.columns, input_df.dtypes))
)
# TODO: Need to sort by row number before comparing.
pandas.testing.assert_frame_equal(input_df, round_trip)
# round_trip_series = round_trip["test_col"].sort_values().reset_index(drop=True)
# pandas.testing.assert_series_equal(
# round_trip_series, input_series, check_exact=True,
# )