From 76d88f43b66caf0c9edfb1b2806e8836163f514b Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Tue, 20 Jul 2021 14:39:29 -0500
Subject: [PATCH 01/12] feat!: use nullable types like float and Int64 by
 default in `to_dataframe`

To override this behavior, specify the types for the desired columns with the
`dtype` argument.
---
 tests/unit/test_table_pandas.py | 69 +++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)
 create mode 100644 tests/unit/test_table_pandas.py

diff --git a/tests/unit/test_table_pandas.py b/tests/unit/test_table_pandas.py
new file mode 100644
index 000000000..a2da48343
--- /dev/null
+++ b/tests/unit/test_table_pandas.py
@@ -0,0 +1,69 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from unittest import mock
+
+import pytest
+
+from google.cloud import bigquery
+
+pandas = pytest.importorskip("pandas")
+
+
+TEST_PATH = "/v1/project/test-proj/dataset/test-dset/table/test-tbl/data"
+
+
+@pytest.fixture
+def class_under_test():
+    from google.cloud.bigquery.table import RowIterator
+
+    return RowIterator
+
+
+def test_to_dataframe_defaults_to_nullable_dtypes(class_under_test):
+    nullable_schema = [
+        bigquery.SchemaField("date_col", "DATE"),
+        bigquery.SchemaField("datetime_col", "DATETIME"),
+        bigquery.SchemaField("float_col", "FLOAT"),
+        bigquery.SchemaField("float64_col", "FLOAT64"),
+        bigquery.SchemaField("integer_col", "INTEGER"),
+        bigquery.SchemaField("int64_col", "INT64"),
+        bigquery.SchemaField(
+            "time_col", "TIME"
+        ),  # TODO: use timedelta64 dtype for this?
+        bigquery.SchemaField("timestamp_col", "TIMESTAMP"),
+    ]
+    mock_client = mock.create_autospec(bigquery.Client)
+    mock_client.project = "test-proj"
+    mock_api_request = mock.Mock()
+    rows = class_under_test(mock_client, mock_api_request, TEST_PATH, nullable_schema,)
+    rows.to_dataframe()  # TODO: if we are always using BQ Storage API for
+    # to_dataframe, maybe wait to implement until after required?
+    # TODO: behavior is based on schema (and data rows)
+    assert False
+
+
+def test_to_dataframe_bqstorage_defaults_to_nullable_dtypes(class_under_test):
+    # TODO: behavior is based on schema (and data rows)
+    assert False
+
+
+def test_to_dataframe_overrides_nullable_dtypes(class_under_test):
+    """Passing in explicit dtypes is merged with default behavior."""
+    assert False
+
+
+def test_to_dataframe_bqstorage_overrides_nullable_dtypes(class_under_test):
+    """Passing in explicit dtypes is merged with default behavior."""
+    assert False

From f2223e97d2c83c64ae1a49de40dc5af9300e3e91 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Wed, 21 Jul 2021 15:53:27 -0500
Subject: [PATCH 02/12] add test data for all scalar columns

---
 tests/data/scalars.jsonl         |  2 +
 tests/data/scalars_extreme.jsonl |  4 ++
 tests/data/scalars_schema.json   | 62 ++++++++++++++++++++++++++++
 tests/system/conftest.py         | 69 ++++++++++++++++++++++++++++++--
 tests/system/test_arrow.py       | 56 ++++++++++++++++++++++++++
 5 files changed, 189 insertions(+), 4 deletions(-)
 create mode 100644 tests/data/scalars.jsonl
 create mode 100644 tests/data/scalars_extreme.jsonl
 create mode 100644 tests/data/scalars_schema.json
 create mode 100644 tests/system/test_arrow.py

diff --git a/tests/data/scalars.jsonl b/tests/data/scalars.jsonl
new file mode 100644
index 000000000..4419a6e9a
--- /dev/null
+++ b/tests/data/scalars.jsonl
@@ -0,0 +1,2 @@
+{"bool_col": true, "bytes_col": "abcd", "date_col": "2021-07-21", "datetime_col": "2021-07-21 11:39:45", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "123456789", "numeric_col": "1.23456789", "bignumeric_col": "10.111213141516171819", "float64_col": "1.25", "string_col": "Hello, World", "time_col": "11:41:43.07616", "timestamp_col": "2021-07-21T17:43:43.945289Z"}
+{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null}
diff --git a/tests/data/scalars_extreme.jsonl b/tests/data/scalars_extreme.jsonl
new file mode 100644
index 000000000..c2a923366
--- /dev/null
+++ b/tests/data/scalars_extreme.jsonl
@@ -0,0 +1,4 @@
+{"bool_col": true, "bytes_col": "abcd", "date_col": "9999-12-31", "datetime_col": "9999-12-31 23:59:59.999999", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "9223372036854775807", "numeric_col": "9.9999999999999999999999999999999999999E+28", "bignumeric_col": "5.7896044618658097711785492504343953926634992332820282019728792003956564819967E+38", "float64_col": "+inf", "string_col": "Hello, World", "time_col": "23:59:59.99999", "timestamp_col": "9999-12-31T23:59:59.999999Z"}
+{"bool_col": false, "bytes_col": "abcd", "date_col": "0001-01-01", "datetime_col": "0001-01-01 00:00:00", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "-9223372036854775808", "numeric_col": "-9.9999999999999999999999999999999999999E+28", "bignumeric_col": "-5.7896044618658097711785492504343953926634992332820282019728792003956564819968E+38", "float64_col": "-inf", "string_col": "Hello, World", "time_col": "00:00:00", "timestamp_col": "0001-01-01T00:00:00.000000Z"}
+{"bool_col": false, "bytes_col": "", "date_col": "1970-01-01", "datetime_col": "1970-01-01 00:00:00", "geography_col": "POINT(0 0)", "int64_col": "0", "numeric_col": "0.0", "bignumeric_col": "0.0", "float64_col": 0.0, "string_col": "", "time_col": "12:00:00", "timestamp_col": "1970-01-01T00:00:00.000000Z"}
+{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null}
diff --git a/tests/data/scalars_schema.json b/tests/data/scalars_schema.json
new file mode 100644
index 000000000..00bd150fd
--- /dev/null
+++ b/tests/data/scalars_schema.json
@@ -0,0 +1,62 @@
+[
+  {
+    "mode": "NULLABLE",
+    "name": "timestamp_col",
+    "type": "TIMESTAMP"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "time_col",
+    "type": "TIME"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "float64_col",
+    "type": "FLOAT"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "datetime_col",
+    "type": "DATETIME"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "bignumeric_col",
+    "type": "BIGNUMERIC"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "numeric_col",
+    "type": "NUMERIC"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "geography_col",
+    "type": "GEOGRAPHY"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "date_col",
+    "type": "DATE"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "string_col",
+    "type": "STRING"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "bool_col",
+    "type": "BOOLEAN"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "bytes_col",
+    "type": "BYTES"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "int64_col",
+    "type": "INTEGER"
+  }
+]
diff --git a/tests/system/conftest.py b/tests/system/conftest.py
index 4eef60e92..e7d8200bb 100644
--- a/tests/system/conftest.py
+++ b/tests/system/conftest.py
@@ -12,11 +12,31 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import pathlib
+
 import pytest
+import test_utils.prefixer
 
+from google.cloud import bigquery
+from google.cloud.bigquery import enums
 from . import helpers
 
 
+prefixer = test_utils.prefixer.Prefixer("python-bigquery", "tests/system")
+
+
+DATA_DIR = pathlib.Path(__file__).parent.parent / "data"
+
+
+@pytest.fixture(scope="session", autouse=True)
+def cleanup_datasets(bigquery_client: bigquery.Client):
+    for dataset in bigquery_client.list_datasets():
+        if prefixer.should_cleanup(dataset.dataset_id):
+            bigquery_client.delete_dataset(
+                dataset, delete_contents=True, not_found_ok=True
+            )
+
+
 @pytest.fixture(scope="session")
 def bigquery_client():
     from google.cloud import bigquery
@@ -24,6 +44,11 @@ def bigquery_client():
     return bigquery.Client()
 
 
+@pytest.fixture(scope="session")
+def project_id(bigquery_client: bigquery.Client):
+    return bigquery_client.project
+
+
 @pytest.fixture(scope="session")
 def bqstorage_client(bigquery_client):
     from google.cloud import bigquery_storage
@@ -32,13 +57,49 @@ def bqstorage_client(bigquery_client):
 
 
 @pytest.fixture(scope="session")
-def dataset_id(bigquery_client):
-    dataset_id = f"bqsystem_{helpers.temp_suffix()}"
-    bigquery_client.create_dataset(dataset_id)
+def dataset_id(bigquery_client: bigquery.Client, project_id: str):
+    dataset_id = prefixer.create_prefix()
+    full_dataset_id = f"{project_id}.{dataset_id}"
+    dataset = bigquery.Dataset(full_dataset_id)
+    bigquery_client.create_dataset(dataset)
     yield dataset_id
-    bigquery_client.delete_dataset(dataset_id, delete_contents=True)
+    bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True)
 
 
 @pytest.fixture
 def table_id(dataset_id):
     return f"{dataset_id}.table_{helpers.temp_suffix()}"
+
+
+@pytest.fixture(scope="session")
+def scalars_table(bigquery_client: bigquery.Client, project_id: str, dataset_id: str):
+    schema = bigquery_client.schema_from_json(DATA_DIR / "scalars_schema.json")
+    job_config = bigquery.LoadJobConfig()
+    job_config.schema = schema
+    job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON
+    full_table_id = f"{project_id}.{dataset_id}.scalars"
+    with open(DATA_DIR / "scalars.jsonl", "rb") as data_file:
+        job = bigquery_client.load_table_from_file(
+            data_file, full_table_id, job_config=job_config
+        )
+    job.result()
+    yield full_table_id
+    bigquery_client.delete_table(full_table_id)
+
+
+@pytest.fixture(scope="session")
+def scalars_extreme_table(
+    bigquery_client: bigquery.Client, project_id: str, dataset_id: str
+):
+    schema = bigquery_client.schema_from_json(DATA_DIR / "scalars_schema.json")
+    job_config = bigquery.LoadJobConfig()
+    job_config.schema = schema
+    job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON
+    full_table_id = f"{project_id}.{dataset_id}.scalars_extreme"
+    with open(DATA_DIR / "scalars_extreme.jsonl", "rb") as data_file:
+        job = bigquery_client.load_table_from_file(
+            data_file, full_table_id, job_config=job_config
+        )
+    job.result()
+    yield full_table_id
+    bigquery_client.delete_table(full_table_id)
diff --git a/tests/system/test_arrow.py b/tests/system/test_arrow.py
new file mode 100644
index 000000000..5c0104b59
--- /dev/null
+++ b/tests/system/test_arrow.py
@@ -0,0 +1,56 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""System tests for Arrow connector."""
+
+
+def test_bqstorage(bigquery_client, scalars_table):
+    arrow_table = bigquery_client.list_rows(scalars_table).to_arrow()
+    assert arrow_table.schema is None
+
+    # timestamp_col: timestamp[us, tz=UTC]
+    # time_col: time64[us]
+    # float64_col: double
+    # datetime_col: timestamp[us]
+    #   -- field metadata --
+    #   ARROW:extension:name: 'google:sqlType:datetime'
+    # bignumeric_col: decimal256(76, 38)
+    # numeric_col: decimal128(38, 9)
+    # geography_col: string
+    #   -- field metadata --
+    #   ARROW:extension:name: 'google:sqlType:geography'
+    #   ARROW:extension:metadata: '{"encoding": "WKT"}'
+    # date_col: date32[day]
+    # string_col: string
+    # bool_col: bool
+    # bytes_col: binary
+    # int64_col: int64
+
+
+def test_rest(bigquery_client, scalars_table):
+    arrow_table = bigquery_client.list_rows(scalars_table, max_results=10).to_arrow()
+    assert arrow_table.schema is None
+
+    # timestamp_col: timestamp[us, tz=UTC]
+    # time_col: time64[us]
+    # float64_col: double
+    # datetime_col: timestamp[us]
+    # bignumeric_col: decimal256(76, 38)
+    # numeric_col: decimal128(38, 9)
+    # geography_col: string
+    # date_col: date32[day]
+    # string_col: string
+    # bool_col: bool
+    # bytes_col: binary
+    # int64_col: int64

From 07ed8717370db4659f23c561d6e7637cc37f98c5 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Wed, 21 Jul 2021 15:53:27 -0500
Subject: [PATCH 03/12] add test data for all scalar columns

---
 tests/data/scalars.jsonl         |  2 +
 tests/data/scalars_extreme.jsonl |  4 ++
 tests/data/scalars_schema.json   | 62 ++++++++++++++++++++++++++
 tests/system/conftest.py         | 69 +++++++++++++++++++++++++++--
 tests/system/test_arrow.py       | 56 +++++++++++++++++++++++
 tests/system/test_pandas.py      | 76 ++++++++++++++++++++++++++++++++
 6 files changed, 265 insertions(+), 4 deletions(-)
 create mode 100644 tests/data/scalars.jsonl
 create mode 100644 tests/data/scalars_extreme.jsonl
 create mode 100644 tests/data/scalars_schema.json
 create mode 100644 tests/system/test_arrow.py

diff --git a/tests/data/scalars.jsonl b/tests/data/scalars.jsonl
new file mode 100644
index 000000000..4419a6e9a
--- /dev/null
+++ b/tests/data/scalars.jsonl
@@ -0,0 +1,2 @@
+{"bool_col": true, "bytes_col": "abcd", "date_col": "2021-07-21", "datetime_col": "2021-07-21 11:39:45", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "123456789", "numeric_col": "1.23456789", "bignumeric_col": "10.111213141516171819", "float64_col": "1.25", "string_col": "Hello, World", "time_col": "11:41:43.07616", "timestamp_col": "2021-07-21T17:43:43.945289Z"}
+{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null}
diff --git a/tests/data/scalars_extreme.jsonl b/tests/data/scalars_extreme.jsonl
new file mode 100644
index 000000000..c2a923366
--- /dev/null
+++ b/tests/data/scalars_extreme.jsonl
@@ -0,0 +1,4 @@
+{"bool_col": true, "bytes_col": "abcd", "date_col": "9999-12-31", "datetime_col": "9999-12-31 23:59:59.999999", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "9223372036854775807", "numeric_col": "9.9999999999999999999999999999999999999E+28", "bignumeric_col": "5.7896044618658097711785492504343953926634992332820282019728792003956564819967E+38", "float64_col": "+inf", "string_col": "Hello, World", "time_col": "23:59:59.99999", "timestamp_col": "9999-12-31T23:59:59.999999Z"}
+{"bool_col": false, "bytes_col": "abcd", "date_col": "0001-01-01", "datetime_col": "0001-01-01 00:00:00", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "-9223372036854775808", "numeric_col": "-9.9999999999999999999999999999999999999E+28", "bignumeric_col": "-5.7896044618658097711785492504343953926634992332820282019728792003956564819968E+38", "float64_col": "-inf", "string_col": "Hello, World", "time_col": "00:00:00", "timestamp_col": "0001-01-01T00:00:00.000000Z"}
+{"bool_col": false, "bytes_col": "", "date_col": "1970-01-01", "datetime_col": "1970-01-01 00:00:00", "geography_col": "POINT(0 0)", "int64_col": "0", "numeric_col": "0.0", "bignumeric_col": "0.0", "float64_col": 0.0, "string_col": "", "time_col": "12:00:00", "timestamp_col": "1970-01-01T00:00:00.000000Z"}
+{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null}
diff --git a/tests/data/scalars_schema.json b/tests/data/scalars_schema.json
new file mode 100644
index 000000000..00bd150fd
--- /dev/null
+++ b/tests/data/scalars_schema.json
@@ -0,0 +1,62 @@
+[
+  {
+    "mode": "NULLABLE",
+    "name": "timestamp_col",
+    "type": "TIMESTAMP"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "time_col",
+    "type": "TIME"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "float64_col",
+    "type": "FLOAT"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "datetime_col",
+    "type": "DATETIME"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "bignumeric_col",
+    "type": "BIGNUMERIC"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "numeric_col",
+    "type": "NUMERIC"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "geography_col",
+    "type": "GEOGRAPHY"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "date_col",
+    "type": "DATE"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "string_col",
+    "type": "STRING"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "bool_col",
+    "type": "BOOLEAN"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "bytes_col",
+    "type": "BYTES"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "int64_col",
+    "type": "INTEGER"
+  }
+]
diff --git a/tests/system/conftest.py b/tests/system/conftest.py
index 4eef60e92..e7d8200bb 100644
--- a/tests/system/conftest.py
+++ b/tests/system/conftest.py
@@ -12,11 +12,31 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import pathlib
+
 import pytest
+import test_utils.prefixer
 
+from google.cloud import bigquery
+from google.cloud.bigquery import enums
 from . import helpers
 
 
+prefixer = test_utils.prefixer.Prefixer("python-bigquery", "tests/system")
+
+
+DATA_DIR = pathlib.Path(__file__).parent.parent / "data"
+
+
+@pytest.fixture(scope="session", autouse=True)
+def cleanup_datasets(bigquery_client: bigquery.Client):
+    for dataset in bigquery_client.list_datasets():
+        if prefixer.should_cleanup(dataset.dataset_id):
+            bigquery_client.delete_dataset(
+                dataset, delete_contents=True, not_found_ok=True
+            )
+
+
 @pytest.fixture(scope="session")
 def bigquery_client():
     from google.cloud import bigquery
@@ -24,6 +44,11 @@ def bigquery_client():
     return bigquery.Client()
 
 
+@pytest.fixture(scope="session")
+def project_id(bigquery_client: bigquery.Client):
+    return bigquery_client.project
+
+
 @pytest.fixture(scope="session")
 def bqstorage_client(bigquery_client):
     from google.cloud import bigquery_storage
@@ -32,13 +57,49 @@ def bqstorage_client(bigquery_client):
 
 
 @pytest.fixture(scope="session")
-def dataset_id(bigquery_client):
-    dataset_id = f"bqsystem_{helpers.temp_suffix()}"
-    bigquery_client.create_dataset(dataset_id)
+def dataset_id(bigquery_client: bigquery.Client, project_id: str):
+    dataset_id = prefixer.create_prefix()
+    full_dataset_id = f"{project_id}.{dataset_id}"
+    dataset = bigquery.Dataset(full_dataset_id)
+    bigquery_client.create_dataset(dataset)
     yield dataset_id
-    bigquery_client.delete_dataset(dataset_id, delete_contents=True)
+    bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True)
 
 
 @pytest.fixture
 def table_id(dataset_id):
     return f"{dataset_id}.table_{helpers.temp_suffix()}"
+
+
+@pytest.fixture(scope="session")
+def scalars_table(bigquery_client: bigquery.Client, project_id: str, dataset_id: str):
+    schema = bigquery_client.schema_from_json(DATA_DIR / "scalars_schema.json")
+    job_config = bigquery.LoadJobConfig()
+    job_config.schema = schema
+    job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON
+    full_table_id = f"{project_id}.{dataset_id}.scalars"
+    with open(DATA_DIR / "scalars.jsonl", "rb") as data_file:
+        job = bigquery_client.load_table_from_file(
+            data_file, full_table_id, job_config=job_config
+        )
+    job.result()
+    yield full_table_id
+    bigquery_client.delete_table(full_table_id)
+
+
+@pytest.fixture(scope="session")
+def scalars_extreme_table(
+    bigquery_client: bigquery.Client, project_id: str, dataset_id: str
+):
+    schema = bigquery_client.schema_from_json(DATA_DIR / "scalars_schema.json")
+    job_config = bigquery.LoadJobConfig()
+    job_config.schema = schema
+    job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON
+    full_table_id = f"{project_id}.{dataset_id}.scalars_extreme"
+    with open(DATA_DIR / "scalars_extreme.jsonl", "rb") as data_file:
+        job = bigquery_client.load_table_from_file(
+            data_file, full_table_id, job_config=job_config
+        )
+    job.result()
+    yield full_table_id
+    bigquery_client.delete_table(full_table_id)
diff --git a/tests/system/test_arrow.py b/tests/system/test_arrow.py
new file mode 100644
index 000000000..5c0104b59
--- /dev/null
+++ b/tests/system/test_arrow.py
@@ -0,0 +1,56 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""System tests for Arrow connector."""
+
+
+def test_bqstorage(bigquery_client, scalars_table):
+    arrow_table = bigquery_client.list_rows(scalars_table).to_arrow()
+    assert arrow_table.schema is None
+
+    # timestamp_col: timestamp[us, tz=UTC]
+    # time_col: time64[us]
+    # float64_col: double
+    # datetime_col: timestamp[us]
+    #   -- field metadata --
+    #   ARROW:extension:name: 'google:sqlType:datetime'
+    # bignumeric_col: decimal256(76, 38)
+    # numeric_col: decimal128(38, 9)
+    # geography_col: string
+    #   -- field metadata --
+    #   ARROW:extension:name: 'google:sqlType:geography'
+    #   ARROW:extension:metadata: '{"encoding": "WKT"}'
+    # date_col: date32[day]
+    # string_col: string
+    # bool_col: bool
+    # bytes_col: binary
+    # int64_col: int64
+
+
+def test_rest(bigquery_client, scalars_table):
+    arrow_table = bigquery_client.list_rows(scalars_table, max_results=10).to_arrow()
+    assert arrow_table.schema is None
+
+    # timestamp_col: timestamp[us, tz=UTC]
+    # time_col: time64[us]
+    # float64_col: double
+    # datetime_col: timestamp[us]
+    # bignumeric_col: decimal256(76, 38)
+    # numeric_col: decimal128(38, 9)
+    # geography_col: string
+    # date_col: date32[day]
+    # string_col: string
+    # bool_col: bool
+    # bytes_col: binary
+    # int64_col: int64
diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py
index ddf5eaf43..3a0896e80 100644
--- a/tests/system/test_pandas.py
+++ b/tests/system/test_pandas.py
@@ -792,3 +792,79 @@ def test_list_rows_max_results_w_bqstorage(bigquery_client):
         dataframe = row_iterator.to_dataframe(bqstorage_client=bqstorage_client)
 
     assert len(dataframe.index) == 100
+
+
+def test_list_rows_nullable_scalars_dtypes(bigquery_client, scalars_table):
+    df = bigquery_client.list_rows(scalars_table).to_dataframe()
+
+    # timestamp_col: timestamp[us, tz=UTC]
+    # time_col: time64[us]
+    # float64_col: double
+    # datetime_col: timestamp[us]
+    #   -- field metadata --
+    #   ARROW:extension:name: 'google:sqlType:datetime'
+    # bignumeric_col: decimal256(76, 38)
+    # numeric_col: decimal128(38, 9)
+    # geography_col: string
+    #   -- field metadata --
+    #   ARROW:extension:name: 'google:sqlType:geography'
+    #   ARROW:extension:metadata: '{"encoding": "WKT"}'
+    # date_col: date32[day]
+    # string_col: string
+    # bool_col: bool
+    # bytes_col: binary
+    # int64_col: int64
+
+    assert df.dtypes is None
+
+    # timestamp_col     datetime64[ns, UTC]
+    # time_col                       object  <-- use Period?
+    # float64_col                   float64
+    # datetime_col           datetime64[ns]
+    # bignumeric_col                 object  <-- probably correct
+    # numeric_col                    object  <-- probably correct
+    # geography_col                  object  <-- https://github.com/googleapis/python-bigquery/issues/792
+    # date_col                       object  <-- per https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#overview, should be datetime64[ns] (where possible)
+    # string_col                     object
+    # bool_col                       object  <-- maybe should be "boolean" (added in pandas 1.0.0)
+    # bytes_col                      object
+    # int64_col                     float64  <-- https://github.com/googleapis/python-bigquery/issues/793
+
+
+def test_list_rows_nullable_scalars_extreme_dtypes(
+    bigquery_client, scalars_extreme_table
+):
+    df = bigquery_client.list_rows(scalars_extreme_table).to_dataframe()
+
+    # timestamp_col: timestamp[us, tz=UTC]
+    # time_col: time64[us]
+    # float64_col: double
+    # datetime_col: timestamp[us]
+    #   -- field metadata --
+    #   ARROW:extension:name: 'google:sqlType:datetime'
+    # bignumeric_col: decimal256(76, 38)
+    # numeric_col: decimal128(38, 9)
+    # geography_col: string
+    #   -- field metadata --
+    #   ARROW:extension:name: 'google:sqlType:geography'
+    #   ARROW:extension:metadata: '{"encoding": "WKT"}'
+    # date_col: date32[day]
+    # string_col: string
+    # bool_col: bool
+    # bytes_col: binary
+    # int64_col: int64
+
+    assert df.dtypes is None
+
+    # timestamp_col      object
+    # time_col           object
+    # float64_col       float64
+    # datetime_col       object  <-- correct, since extreme values are out-of-bounds
+    # bignumeric_col     object
+    # numeric_col        object
+    # geography_col      object
+    # date_col           object
+    # string_col         object
+    # bool_col           object
+    # bytes_col          object
+    # int64_col         float64

From 21d43698aa44db23ae3d3b4e9c93cc1cec877b17 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Thu, 22 Jul 2021 17:07:42 -0500
Subject: [PATCH 04/12] update tests with expected dtypes

---
 tests/system/test_pandas.py | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py
index 3a0896e80..da6241171 100644
--- a/tests/system/test_pandas.py
+++ b/tests/system/test_pandas.py
@@ -795,7 +795,9 @@ def test_list_rows_max_results_w_bqstorage(bigquery_client):
 
 
 def test_list_rows_nullable_scalars_dtypes(bigquery_client, scalars_table):
-    df = bigquery_client.list_rows(scalars_table).to_dataframe()
+    df = bigquery_client.list_rows(
+        scalars_table
+    ).to_dataframe()  # dtypes={"int64_col": "Int64"})
 
     # timestamp_col: timestamp[us, tz=UTC]
     # time_col: time64[us]
@@ -815,7 +817,12 @@ def test_list_rows_nullable_scalars_dtypes(bigquery_client, scalars_table):
     # bytes_col: binary
     # int64_col: int64
 
-    assert df.dtypes is None
+    assert df.dtypes["datetime_col"].name == "datetime64[ns]"
+    assert df.dtypes["timestamp_col"].name == "datetime64[ns, UTC]"
+    assert df.dtypes["float64_col"].name == "float64"
+    assert df.dtypes["bool_col"].name == "boolean"
+    assert df.dtypes["date_col"].name == "datetime64[ns]"
+    assert df.dtypes["int64_col"].name == "Int64"
 
     # timestamp_col     datetime64[ns, UTC]
     # time_col                       object  <-- use Period?
@@ -854,8 +861,6 @@ def test_list_rows_nullable_scalars_extreme_dtypes(
     # bytes_col: binary
     # int64_col: int64
 
-    assert df.dtypes is None
-
     # timestamp_col      object
     # time_col           object
     # float64_col       float64
@@ -868,3 +873,16 @@ def test_list_rows_nullable_scalars_extreme_dtypes(
     # bool_col           object
     # bytes_col          object
     # int64_col         float64
+
+    # Extreme values are out-of-bounds for pandas datetime64 values, which use
+    # nanosecond precision.  Values before 1677-09-21 and after 2262-04-11 must
+    # be represented with object.
+    # https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timestamp-limitations
+    assert df.dtypes["date_col"].name == "object"
+    assert df.dtypes["datetime_col"].name == "object"
+    assert df.dtypes["timestamp_col"].name == "object"
+
+    # These pandas dtypes can handle the same ranges as BigQuery.
+    assert df.dtypes["float64_col"].name == "float64"
+    assert df.dtypes["bool_col"].name == "boolean"
+    assert df.dtypes["int64_col"].name == "Int64"

From 69a747f2fa45c12029aa54eea79dd5e95299107d Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 23 Jul 2021 15:19:26 -0500
Subject: [PATCH 05/12] add expected types, REST test

---
 tests/system/test_pandas.py | 110 +++++++++++++-----------------------
 1 file changed, 40 insertions(+), 70 deletions(-)

diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py
index da6241171..06bf03c6b 100644
--- a/tests/system/test_pandas.py
+++ b/tests/system/test_pandas.py
@@ -794,85 +794,46 @@ def test_list_rows_max_results_w_bqstorage(bigquery_client):
     assert len(dataframe.index) == 100
 
 
-def test_list_rows_nullable_scalars_dtypes(bigquery_client, scalars_table):
+@pytest.mark.parametrize(
+    ("max_results",), ((None,), (10,),)  # Use BQ Storage API.  # Use REST API.
+)
+def test_list_rows_nullable_scalars_dtypes(bigquery_client, scalars_table, max_results):
     df = bigquery_client.list_rows(
-        scalars_table
-    ).to_dataframe()  # dtypes={"int64_col": "Int64"})
-
-    # timestamp_col: timestamp[us, tz=UTC]
-    # time_col: time64[us]
-    # float64_col: double
-    # datetime_col: timestamp[us]
-    #   -- field metadata --
-    #   ARROW:extension:name: 'google:sqlType:datetime'
-    # bignumeric_col: decimal256(76, 38)
-    # numeric_col: decimal128(38, 9)
-    # geography_col: string
-    #   -- field metadata --
-    #   ARROW:extension:name: 'google:sqlType:geography'
-    #   ARROW:extension:metadata: '{"encoding": "WKT"}'
-    # date_col: date32[day]
-    # string_col: string
-    # bool_col: bool
-    # bytes_col: binary
-    # int64_col: int64
+        scalars_table, max_results=max_results,
+    ).to_dataframe(
+        dtypes={
+            "bool_col": "boolean",
+            "date_col": "datetime64[ns]",
+            "int64_col": "Int64",
+        }
+    )
 
-    assert df.dtypes["datetime_col"].name == "datetime64[ns]"
-    assert df.dtypes["timestamp_col"].name == "datetime64[ns, UTC]"
-    assert df.dtypes["float64_col"].name == "float64"
     assert df.dtypes["bool_col"].name == "boolean"
     assert df.dtypes["date_col"].name == "datetime64[ns]"
+    assert df.dtypes["datetime_col"].name == "datetime64[ns]"
+    assert df.dtypes["float64_col"].name == "float64"
     assert df.dtypes["int64_col"].name == "Int64"
+    assert df.dtypes["time_col"].name == "timedelta64[ns]"
+    assert df.dtypes["timestamp_col"].name == "datetime64[ns, UTC]"
+
+    # decimal.Decimal is used to avoid loss of precision.
+    assert df.dtypes["bignumeric_col"].name == "object"
+    assert df.dtypes["numeric_col"].name == "object"
 
-    # timestamp_col     datetime64[ns, UTC]
-    # time_col                       object  <-- use Period?
-    # float64_col                   float64
-    # datetime_col           datetime64[ns]
-    # bignumeric_col                 object  <-- probably correct
-    # numeric_col                    object  <-- probably correct
-    # geography_col                  object  <-- https://github.com/googleapis/python-bigquery/issues/792
-    # date_col                       object  <-- per https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#overview, should be datetime64[ns] (where possible)
-    # string_col                     object
-    # bool_col                       object  <-- maybe should be "boolean" (added in pandas 1.0.0)
-    # bytes_col                      object
-    # int64_col                     float64  <-- https://github.com/googleapis/python-bigquery/issues/793
+    # pandas uses Python string and bytes objects.
+    assert df.dtypes["bytes_col"].name == "object"
+    assert df.dtypes["string_col"].name == "object"
 
 
+@pytest.mark.parametrize(
+    ("max_results",), ((None,), (10,),)  # Use BQ Storage API.  # Use REST API.
+)
 def test_list_rows_nullable_scalars_extreme_dtypes(
-    bigquery_client, scalars_extreme_table
+    bigquery_client, scalars_extreme_table, max_results
 ):
-    df = bigquery_client.list_rows(scalars_extreme_table).to_dataframe()
-
-    # timestamp_col: timestamp[us, tz=UTC]
-    # time_col: time64[us]
-    # float64_col: double
-    # datetime_col: timestamp[us]
-    #   -- field metadata --
-    #   ARROW:extension:name: 'google:sqlType:datetime'
-    # bignumeric_col: decimal256(76, 38)
-    # numeric_col: decimal128(38, 9)
-    # geography_col: string
-    #   -- field metadata --
-    #   ARROW:extension:name: 'google:sqlType:geography'
-    #   ARROW:extension:metadata: '{"encoding": "WKT"}'
-    # date_col: date32[day]
-    # string_col: string
-    # bool_col: bool
-    # bytes_col: binary
-    # int64_col: int64
-
-    # timestamp_col      object
-    # time_col           object
-    # float64_col       float64
-    # datetime_col       object  <-- correct, since extreme values are out-of-bounds
-    # bignumeric_col     object
-    # numeric_col        object
-    # geography_col      object
-    # date_col           object
-    # string_col         object
-    # bool_col           object
-    # bytes_col          object
-    # int64_col         float64
+    df = bigquery_client.list_rows(
+        scalars_extreme_table, max_results=max_results
+    ).to_dataframe()
 
     # Extreme values are out-of-bounds for pandas datetime64 values, which use
     # nanosecond precision.  Values before 1677-09-21 and after 2262-04-11 must
@@ -883,6 +844,15 @@ def test_list_rows_nullable_scalars_extreme_dtypes(
     assert df.dtypes["timestamp_col"].name == "object"
 
     # These pandas dtypes can handle the same ranges as BigQuery.
-    assert df.dtypes["float64_col"].name == "float64"
     assert df.dtypes["bool_col"].name == "boolean"
+    assert df.dtypes["float64_col"].name == "float64"
     assert df.dtypes["int64_col"].name == "Int64"
+    assert df.dtypes["time_col"].name == "timedelta64[ns]"
+
+    # decimal.Decimal is used to avoid loss of precision.
+    assert df.dtypes["numeric_col"].name == "object"
+    assert df.dtypes["bignumeric_col"].name == "object"
+
+    # pandas uses Python string and bytes objects.
+    assert df.dtypes["bytes_col"].name == "object"
+    assert df.dtypes["string_col"].name == "object"

From 4f78e6d0e4619005581cb9447e992842ca7d1f62 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 23 Jul 2021 16:29:51 -0500
Subject: [PATCH 06/12] use dtype defaults for "easy" cases

---
 google/cloud/bigquery/_pandas_helpers.py | 31 ++++++++++++++++++++++++
 google/cloud/bigquery/table.py           | 10 ++++++++
 tests/system/test_pandas.py              | 12 +++------
 3 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py
index 2ff96da4d..a9eb7546d 100644
--- a/google/cloud/bigquery/_pandas_helpers.py
+++ b/google/cloud/bigquery/_pandas_helpers.py
@@ -18,6 +18,7 @@
 import functools
 import logging
 import queue
+from typing import Sequence
 import warnings
 
 from packaging import version
@@ -56,6 +57,14 @@
 
 _MAX_QUEUE_SIZE_DEFAULT = object()  # max queue size sentinel for BQ Storage downloads
 
+_BQ_TO_PANDAS_DTYPE_NULLSAFE = {
+    "BOOL": "boolean",
+    "BOOLEAN": "boolean",
+    "FLOAT": "float64",
+    "FLOAT64": "float64",
+    "INT64": "Int64",
+    "INTEGER": "Int64",
+}
 _PANDAS_DTYPE_TO_BQ = {
     "bool": "BOOLEAN",
     "datetime64[ns, UTC]": "TIMESTAMP",
@@ -231,6 +240,28 @@ def bq_to_arrow_schema(bq_schema):
     return pyarrow.schema(arrow_fields)
 
 
+def bq_schema_to_nullsafe_pandas_dtypes(bq_schema: Sequence[schema.SchemaField]):
+    """Return the default dtypes to use for columns in a BigQuery schema.
+
+    Only returns default dtypes which are safe to have NULL values. This
+    includes Int64, which has pandas.NA values and does not result in
+    loss-of-precision.
+
+    # TODO: document dtype mapping.
+
+    Returns:
+        Dict[str, str]: mapping from column names to dtypes
+    """
+    dtypes = {}
+    for bq_field in bq_schema:
+        if bq_field.mode.upper() not in {"NULLABLE", "REQUIRED"}:
+            continue
+        field_type = bq_field.field_type.upper()
+        if field_type in _BQ_TO_PANDAS_DTYPE_NULLSAFE:
+            dtypes[bq_field.name] = _BQ_TO_PANDAS_DTYPE_NULLSAFE[field_type]
+    return dtypes
+
+
 def bq_to_arrow_array(series, bq_field):
     arrow_type = bq_to_arrow_data_type(bq_field)
 
diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py
index 18d969a3f..1f1ea34bc 100644
--- a/google/cloud/bigquery/table.py
+++ b/google/cloud/bigquery/table.py
@@ -1906,6 +1906,14 @@ def to_dataframe(
             create_bqstorage_client=create_bqstorage_client,
         )
 
+        # Let the user-defined dtypes override the default ones.
+        # https://stackoverflow.com/a/26853961/101923
+        # TODO: test that this actually doesn't override
+        default_dtypes = _pandas_helpers.bq_schema_to_nullsafe_pandas_dtypes(
+            self.schema
+        )
+        dtypes = {**default_dtypes, **dtypes}
+
         # When converting timestamp values to nanosecond precision, the result
         # can be out of pyarrow bounds. To avoid the error when converting to
         # Pandas, we set the timestamp_as_object parameter to True, if necessary.
@@ -1931,6 +1939,8 @@ def to_dataframe(
         for column in dtypes:
             df[column] = pandas.Series(df[column], dtype=dtypes[column])
 
+        # TODO: convert TIME columns, maybe TIMESTAMP too? Only if dtypes was not set.
+
         return df
 
 
diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py
index 06bf03c6b..62704e326 100644
--- a/tests/system/test_pandas.py
+++ b/tests/system/test_pandas.py
@@ -800,20 +800,14 @@ def test_list_rows_max_results_w_bqstorage(bigquery_client):
 def test_list_rows_nullable_scalars_dtypes(bigquery_client, scalars_table, max_results):
     df = bigquery_client.list_rows(
         scalars_table, max_results=max_results,
-    ).to_dataframe(
-        dtypes={
-            "bool_col": "boolean",
-            "date_col": "datetime64[ns]",
-            "int64_col": "Int64",
-        }
-    )
+    ).to_dataframe()
 
     assert df.dtypes["bool_col"].name == "boolean"
-    assert df.dtypes["date_col"].name == "datetime64[ns]"
+    # TODO: assert df.dtypes["date_col"].name == "datetime64[ns]"
     assert df.dtypes["datetime_col"].name == "datetime64[ns]"
     assert df.dtypes["float64_col"].name == "float64"
     assert df.dtypes["int64_col"].name == "Int64"
-    assert df.dtypes["time_col"].name == "timedelta64[ns]"
+    # TODO: assert df.dtypes["time_col"].name == "timedelta64[ns]"
     assert df.dtypes["timestamp_col"].name == "datetime64[ns, UTC]"
 
     # decimal.Decimal is used to avoid loss of precision.

From d53aa689b905af7135e78ac824d1620fde25e6d1 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Wed, 28 Jul 2021 11:49:44 -0500
Subject: [PATCH 07/12] add interval

---
 google/cloud/bigquery/_pandas_helpers.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py
index f7658c194..73266befa 100644
--- a/google/cloud/bigquery/_pandas_helpers.py
+++ b/google/cloud/bigquery/_pandas_helpers.py
@@ -59,6 +59,8 @@
     "FLOAT64": "float64",
     "INT64": "Int64",
     "INTEGER": "Int64",
+    "INTERVAL": "timedelta64[ns]",  # TODO: What happens when an interval is outside of ns range?
+    "TIME": "timedelta64[ns]",
 }
 _PANDAS_DTYPE_TO_BQ = {
     "bool": "BOOLEAN",

From 6ceff2cfe29c47893fc16a01d3b3a4ba228e10cb Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 6 Aug 2021 17:01:21 -0500
Subject: [PATCH 08/12] WIP: split TIME and DATE into separate issues

---
 docs/conf.py                             |  1 +
 docs/usage/pandas.rst                    | 25 ++++++++++++++++++++++--
 google/cloud/bigquery/_pandas_helpers.py |  7 -------
 google/cloud/bigquery/table.py           |  6 +++---
 tests/system/test_pandas.py              | 20 ++++++++++++++-----
 5 files changed, 42 insertions(+), 17 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index cb347160d..09f7ea414 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -110,6 +110,7 @@
 # directories to ignore when looking for source files.
 exclude_patterns = [
     "_build",
+    "**/.nox/**/*",
     "samples/AUTHORING_GUIDE.md",
     "samples/CONTRIBUTING.md",
     "samples/snippets/README.rst",
diff --git a/docs/usage/pandas.rst b/docs/usage/pandas.rst
index 9db98dfbb..b08aebdd6 100644
--- a/docs/usage/pandas.rst
+++ b/docs/usage/pandas.rst
@@ -14,12 +14,12 @@ First, ensure that the :mod:`pandas` library is installed by running:
 
    pip install --upgrade pandas
 
-Alternatively, you can install the BigQuery python client library with
+Alternatively, you can install the BigQuery Python client library with
 :mod:`pandas` by running:
 
 .. code-block:: bash
 
-   pip install --upgrade google-cloud-bigquery[pandas]
+   pip install --upgrade 'google-cloud-bigquery[pandas]'
 
 To retrieve query results as a :class:`pandas.DataFrame`:
 
@@ -37,6 +37,27 @@ To retrieve table rows as a :class:`pandas.DataFrame`:
    :start-after: [START bigquery_list_rows_dataframe]
    :end-before: [END bigquery_list_rows_dataframe]
 
+The following data types are used when creating a pandas DataFrame.
+
+.. list-table:: Pandas Data Type Mapping
+   :header-rows: 1
+
+   * - BigQuery
+     - pandas
+     - Notes
+   * - BOOL
+     - boolean
+     -
+   * - DATETIME
+     - datetime64[ns], object
+     - object is used when there are values not representable in pandas
+   * - FLOAT64
+     - float64
+     -
+   * - INT64
+     - Int64
+     -
+
 Load a Pandas DataFrame to a BigQuery Table
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py
index 73266befa..a8f491cac 100644
--- a/google/cloud/bigquery/_pandas_helpers.py
+++ b/google/cloud/bigquery/_pandas_helpers.py
@@ -43,11 +43,6 @@
 
 _LOGGER = logging.getLogger(__name__)
 
-_NO_BQSTORAGE_ERROR = (
-    "The google-cloud-bigquery-storage library is not installed, "
-    "please install google-cloud-bigquery-storage to use bqstorage features."
-)
-
 _PROGRESS_INTERVAL = 0.2  # Maximum time between download status checks, in seconds.
 
 _MAX_QUEUE_SIZE_DEFAULT = object()  # max queue size sentinel for BQ Storage downloads
@@ -59,8 +54,6 @@
     "FLOAT64": "float64",
     "INT64": "Int64",
     "INTEGER": "Int64",
-    "INTERVAL": "timedelta64[ns]",  # TODO: What happens when an interval is outside of ns range?
-    "TIME": "timedelta64[ns]",
 }
 _PANDAS_DTYPE_TO_BQ = {
     "bool": "BOOLEAN",
diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py
index 9bee896bf..42761e7f4 100644
--- a/google/cloud/bigquery/table.py
+++ b/google/cloud/bigquery/table.py
@@ -1924,13 +1924,13 @@ def to_dataframe(
 
         extra_kwargs = {"timestamp_as_object": timestamp_as_object}
 
-        df = record_batch.to_pandas(date_as_object=date_as_object, **extra_kwargs)
+        df = record_batch.to_pandas(
+            date_as_object=date_as_object, integer_object_nulls=True, **extra_kwargs
+        )
 
         for column in dtypes:
             df[column] = pandas.Series(df[column], dtype=dtypes[column])
 
-        # TODO: convert TIME columns, maybe TIMESTAMP too? Only if dtypes was not set.
-
         return df
 
 
diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py
index 54347f4d6..411c9bed0 100644
--- a/tests/system/test_pandas.py
+++ b/tests/system/test_pandas.py
@@ -567,7 +567,7 @@ def test_query_results_to_dataframe(bigquery_client):
     for _, row in df.iterrows():
         for col in column_names:
             # all the schema fields are nullable, so None is acceptable
-            if not row[col] is None:
+            if not pandas.isna(row[col]):
                 assert isinstance(row[col], exp_datatypes[col])
 
 
@@ -597,7 +597,7 @@ def test_query_results_to_dataframe_w_bqstorage(bigquery_client):
     for index, row in df.iterrows():
         for col in column_names:
             # all the schema fields are nullable, so None is acceptable
-            if not row[col] is None:
+            if not pandas.isna(row[col]):
                 assert isinstance(row[col], exp_datatypes[col])
 
 
@@ -806,13 +806,20 @@ def test_list_rows_nullable_scalars_dtypes(bigquery_client, scalars_table, max_r
     ).to_dataframe()
 
     assert df.dtypes["bool_col"].name == "boolean"
-    # TODO: assert df.dtypes["date_col"].name == "datetime64[ns]"
     assert df.dtypes["datetime_col"].name == "datetime64[ns]"
     assert df.dtypes["float64_col"].name == "float64"
     assert df.dtypes["int64_col"].name == "Int64"
-    # TODO: assert df.dtypes["time_col"].name == "timedelta64[ns]"
     assert df.dtypes["timestamp_col"].name == "datetime64[ns, UTC]"
 
+    # object is used by default, but we can use "datetime64[ns]" automatically
+    # when data is within the supported range.
+    # https://github.com/googleapis/python-bigquery/issues/861
+    assert df.dtypes["date_col"].name == "object"
+
+    # object is used by default, but we can use "timedelta64[ns]" automatically
+    # https://github.com/googleapis/python-bigquery/issues/862
+    assert df.dtypes["time_col"].name == "object"
+
     # decimal.Decimal is used to avoid loss of precision.
     assert df.dtypes["bignumeric_col"].name == "object"
     assert df.dtypes["numeric_col"].name == "object"
@@ -844,7 +851,10 @@ def test_list_rows_nullable_scalars_extreme_dtypes(
     assert df.dtypes["bool_col"].name == "boolean"
     assert df.dtypes["float64_col"].name == "float64"
     assert df.dtypes["int64_col"].name == "Int64"
-    assert df.dtypes["time_col"].name == "timedelta64[ns]"
+
+    # object is used by default, but we can use "timedelta64[ns]" automatically
+    # https://github.com/googleapis/python-bigquery/issues/862
+    assert df.dtypes["time_col"].name == "object"
 
     # decimal.Decimal is used to avoid loss of precision.
     assert df.dtypes["numeric_col"].name == "object"

From 18152d9a2af71b5c1828a830001579f3c4d69cce Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 9 Aug 2021 09:35:08 -0500
Subject: [PATCH 09/12] WIP: unit tests

---
 google/cloud/bigquery/_pandas_helpers.py |  3 +-
 tests/unit/test_table_pandas.py          | 44 ++++++++++++++----------
 2 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py
index a8f491cac..a3f6c0468 100644
--- a/google/cloud/bigquery/_pandas_helpers.py
+++ b/google/cloud/bigquery/_pandas_helpers.py
@@ -47,6 +47,7 @@
 
 _MAX_QUEUE_SIZE_DEFAULT = object()  # max queue size sentinel for BQ Storage downloads
 
+# If you update the default dtypes, also update the docs at docs/usage/pandas.rst.
 _BQ_TO_PANDAS_DTYPE_NULLSAFE = {
     "BOOL": "boolean",
     "BOOLEAN": "boolean",
@@ -228,8 +229,6 @@ def bq_schema_to_nullsafe_pandas_dtypes(bq_schema: Sequence[schema.SchemaField])
     includes Int64, which has pandas.NA values and does not result in
     loss-of-precision.
 
-    # TODO: document dtype mapping.
-
     Returns:
         Dict[str, str]: mapping from column names to dtypes
     """
diff --git a/tests/unit/test_table_pandas.py b/tests/unit/test_table_pandas.py
index a2da48343..a6e320036 100644
--- a/tests/unit/test_table_pandas.py
+++ b/tests/unit/test_table_pandas.py
@@ -12,8 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import decimal
 from unittest import mock
 
+import pyarrow
 import pytest
 
 from google.cloud import bigquery
@@ -31,31 +33,35 @@ def class_under_test():
     return RowIterator
 
 
-def test_to_dataframe_defaults_to_nullable_dtypes(class_under_test):
+def test_to_dataframe_defaults_to_nullable_dtypes(monkeypatch, class_under_test):
+    arrow_schema = pyarrow.schema(
+        [pyarrow.field("bignumeric_col", pyarrow.decimal256(76, scale=38))]
+    )
+    arrow_table = pyarrow.Table.from_pydict(
+        {"bignumeric_col": [decimal.Decimal("123.456")]}, schema=arrow_schema,
+    )
+
     nullable_schema = [
-        bigquery.SchemaField("date_col", "DATE"),
-        bigquery.SchemaField("datetime_col", "DATETIME"),
-        bigquery.SchemaField("float_col", "FLOAT"),
-        bigquery.SchemaField("float64_col", "FLOAT64"),
-        bigquery.SchemaField("integer_col", "INTEGER"),
-        bigquery.SchemaField("int64_col", "INT64"),
-        bigquery.SchemaField(
-            "time_col", "TIME"
-        ),  # TODO: use timedelta64 dtype for this?
-        bigquery.SchemaField("timestamp_col", "TIMESTAMP"),
+        bigquery.SchemaField("bignumeric_col", "BIGNUMERIC"),
+        # bigquery.SchemaField("date_col", "DATE"),
+        # bigquery.SchemaField("datetime_col", "DATETIME"),
+        # bigquery.SchemaField("float_col", "FLOAT"),
+        # bigquery.SchemaField("float64_col", "FLOAT64"),
+        # bigquery.SchemaField("integer_col", "INTEGER"),
+        # bigquery.SchemaField("int64_col", "INT64"),
+        # bigquery.SchemaField( "time_col", "TIME"),
+        # bigquery.SchemaField("timestamp_col", "TIMESTAMP"),
     ]
     mock_client = mock.create_autospec(bigquery.Client)
     mock_client.project = "test-proj"
     mock_api_request = mock.Mock()
-    rows = class_under_test(mock_client, mock_api_request, TEST_PATH, nullable_schema,)
-    rows.to_dataframe()  # TODO: if we are always using BQ Storage API for
-    # to_dataframe, maybe wait to implement until after required?
-    # TODO: behavior is based on schema (and data rows)
-    assert False
-
+    mock_to_arrow = mock.Mock()
+    mock_to_arrow.return_value = arrow_table
+    rows = class_under_test(mock_client, mock_api_request, TEST_PATH, nullable_schema)
+    monkeypatch.setattr(rows, "to_arrow", mock_to_arrow)
+    rows.to_dataframe()
 
-def test_to_dataframe_bqstorage_defaults_to_nullable_dtypes(class_under_test):
-    # TODO: behavior is based on schema (and data rows)
+    # TODO: check dtypes, check values
     assert False
 
 

From 2e957cda82f4dafad92eafe48cbd12d2c46957f2 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 9 Aug 2021 15:03:56 -0500
Subject: [PATCH 10/12] add tests, update minimum pandas version

---
 google/cloud/bigquery/table.py      |   7 +-
 setup.py                            |   2 +-
 testing/constraints-3.6.txt         |   2 +-
 tests/unit/job/test_query_pandas.py |  22 +----
 tests/unit/test_table.py            |   8 +-
 tests/unit/test_table_pandas.py     | 133 +++++++++++++++++++++++-----
 6 files changed, 125 insertions(+), 49 deletions(-)

diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py
index 42761e7f4..d2e992dca 100644
--- a/google/cloud/bigquery/table.py
+++ b/google/cloud/bigquery/table.py
@@ -1895,13 +1895,12 @@ def to_dataframe(
             bqstorage_client=bqstorage_client,
             create_bqstorage_client=create_bqstorage_client,
         )
-
-        # Let the user-defined dtypes override the default ones.
-        # https://stackoverflow.com/a/26853961/101923
-        # TODO: test that this actually doesn't override
         default_dtypes = _pandas_helpers.bq_schema_to_nullsafe_pandas_dtypes(
             self.schema
         )
+
+        # Let the user-defined dtypes override the default ones.
+        # https://stackoverflow.com/a/26853961/101923
         dtypes = {**default_dtypes, **dtypes}
 
         # When converting timestamp values to nanosecond precision, the result
diff --git a/setup.py b/setup.py
index 5205b5365..6fa619d37 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@
     # Keep the no-op bqstorage extra for backward compatibility.
     # See: https://github.com/googleapis/python-bigquery/issues/757
     "bqstorage": [],
-    "pandas": ["pandas>=0.23.0"],
+    "pandas": ["pandas>=1.0.0"],
     "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"],
     "opentelemetry": [
         "opentelemetry-api >= 0.11b0",
diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt
index ce012f0d7..bf1f89f58 100644
--- a/testing/constraints-3.6.txt
+++ b/testing/constraints-3.6.txt
@@ -13,7 +13,7 @@ grpcio==1.38.1
 opentelemetry-api==0.11b0
 opentelemetry-instrumentation==0.11b0
 opentelemetry-sdk==0.11b0
-pandas==0.23.0
+pandas==1.0.0
 proto-plus==1.10.0
 protobuf==3.12.0
 pyarrow==3.0.0
diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py
index e5105974f..c3a9d2d1a 100644
--- a/tests/unit/job/test_query_pandas.py
+++ b/tests/unit/job/test_query_pandas.py
@@ -20,11 +20,6 @@
 import pyarrow
 import pytest
 
-try:
-    import pandas
-except (ImportError, AttributeError):  # pragma: NO COVER
-    pandas = None
-
 from google.cloud import bigquery_storage
 
 try:
@@ -36,6 +31,8 @@
 from .helpers import _make_connection
 from .helpers import _make_job_resource
 
+pandas = pytest.importorskip("pandas")
+
 
 @pytest.fixture
 def table_read_options_kwarg():
@@ -78,7 +75,6 @@ def test__contains_order_by(query, expected):
         assert not mut._contains_order_by(query)
 
 
-@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
 @pytest.mark.parametrize(
     "query",
     (
@@ -413,7 +409,6 @@ def test_to_arrow_w_tqdm_wo_query_plan():
     result_patch_tqdm.assert_called()
 
 
-@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
 def test_to_dataframe():
     from google.cloud.bigquery.job import QueryJob as target_class
 
@@ -452,7 +447,6 @@ def test_to_dataframe():
     assert list(df) == ["name", "age"]  # verify the column names
 
 
-@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
 def test_to_dataframe_ddl_query():
     from google.cloud.bigquery.job import QueryJob as target_class
 
@@ -472,7 +466,6 @@ def test_to_dataframe_ddl_query():
     assert len(df) == 0
 
 
-@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
 def test_to_dataframe_bqstorage(table_read_options_kwarg):
     from google.cloud.bigquery.job import QueryJob as target_class
 
@@ -522,7 +515,6 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg):
     )
 
 
-@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
 def test_to_dataframe_bqstorage_no_pyarrow_compression():
     from google.cloud.bigquery.job import QueryJob as target_class
 
@@ -565,7 +557,6 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression():
     )
 
 
-@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
 def test_to_dataframe_column_dtypes():
     from google.cloud.bigquery.job import QueryJob as target_class
 
@@ -617,15 +608,14 @@ def test_to_dataframe_column_dtypes():
     assert list(df) == exp_columns  # verify the column names
 
     assert df.start_timestamp.dtype.name == "datetime64[ns, UTC]"
-    assert df.seconds.dtype.name == "int64"
+    assert df.seconds.dtype.name == "Int64"
     assert df.miles.dtype.name == "float64"
     assert df.km.dtype.name == "float16"
     assert df.payment_type.dtype.name == "object"
-    assert df.complete.dtype.name == "bool"
+    assert df.complete.dtype.name == "boolean"
     assert df.date.dtype.name == "object"
 
 
-@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
 def test_to_dataframe_column_date_dtypes():
     from google.cloud.bigquery.job import QueryJob as target_class
 
@@ -657,7 +647,6 @@ def test_to_dataframe_column_date_dtypes():
     assert df.date.dtype.name == "datetime64[ns]"
 
 
-@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
 @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`")
 @mock.patch("tqdm.tqdm")
 def test_to_dataframe_with_progress_bar(tqdm_mock):
@@ -685,7 +674,6 @@ def test_to_dataframe_with_progress_bar(tqdm_mock):
     tqdm_mock.assert_called()
 
 
-@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
 @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`")
 def test_to_dataframe_w_tqdm_pending():
     from google.cloud.bigquery import table
@@ -741,7 +729,6 @@ def test_to_dataframe_w_tqdm_pending():
     )
 
 
-@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
 @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`")
 def test_to_dataframe_w_tqdm():
     from google.cloud.bigquery import table
@@ -801,7 +788,6 @@ def test_to_dataframe_w_tqdm():
     )
 
 
-@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
 @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`")
 def test_to_dataframe_w_tqdm_max_results():
     from google.cloud.bigquery import table
diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py
index 20336b227..bd1bdad29 100644
--- a/tests/unit/test_table.py
+++ b/tests/unit/test_table.py
@@ -2578,7 +2578,7 @@ def test_to_dataframe(self):
         self.assertEqual(len(df), 4)  # verify the number of rows
         self.assertEqual(list(df), ["name", "age"])  # verify the column names
         self.assertEqual(df.name.dtype.name, "object")
-        self.assertEqual(df.age.dtype.name, "int64")
+        self.assertEqual(df.age.dtype.name, "Int64")
 
     @unittest.skipIf(pandas is None, "Requires `pandas`")
     def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self):
@@ -2821,7 +2821,7 @@ def test_to_dataframe_w_various_types_nullable(self):
                 self.assertTrue(row.isnull().all())
             else:
                 self.assertIsInstance(row.start_timestamp, pandas.Timestamp)
-                self.assertIsInstance(row.seconds, float)
+                self.assertIsInstance(row.seconds, int)
                 self.assertIsInstance(row.payment_type, str)
                 self.assertIsInstance(row.complete, bool)
                 self.assertIsInstance(row.date, datetime.date)
@@ -2867,11 +2867,11 @@ def test_to_dataframe_column_dtypes(self):
         self.assertEqual(list(df), exp_columns)  # verify the column names
 
         self.assertEqual(df.start_timestamp.dtype.name, "datetime64[ns, UTC]")
-        self.assertEqual(df.seconds.dtype.name, "int64")
+        self.assertEqual(df.seconds.dtype.name, "Int64")
         self.assertEqual(df.miles.dtype.name, "float64")
         self.assertEqual(df.km.dtype.name, "float16")
         self.assertEqual(df.payment_type.dtype.name, "object")
-        self.assertEqual(df.complete.dtype.name, "bool")
+        self.assertEqual(df.complete.dtype.name, "boolean")
         self.assertEqual(df.date.dtype.name, "object")
 
     @mock.patch("google.cloud.bigquery.table.pandas", new=None)
diff --git a/tests/unit/test_table_pandas.py b/tests/unit/test_table_pandas.py
index a6e320036..a9f7ed58a 100644
--- a/tests/unit/test_table_pandas.py
+++ b/tests/unit/test_table_pandas.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import datetime
 import decimal
 from unittest import mock
 
@@ -34,23 +35,57 @@ def class_under_test():
 
 
 def test_to_dataframe_defaults_to_nullable_dtypes(monkeypatch, class_under_test):
+    # See tests/system/test_arrow.py for the actual types we get from the API.
     arrow_schema = pyarrow.schema(
-        [pyarrow.field("bignumeric_col", pyarrow.decimal256(76, scale=38))]
+        [
+            pyarrow.field("bignumeric_col", pyarrow.decimal256(76, scale=38)),
+            pyarrow.field("bool_col", pyarrow.bool_()),
+            pyarrow.field("bytes_col", pyarrow.binary()),
+            pyarrow.field("date_col", pyarrow.date32()),
+            pyarrow.field("datetime_col", pyarrow.timestamp("us", tz=None)),
+            pyarrow.field("float64_col", pyarrow.float64()),
+            pyarrow.field("int64_col", pyarrow.int64()),
+            pyarrow.field("numeric_col", pyarrow.decimal128(38, scale=9)),
+            pyarrow.field("string_col", pyarrow.string()),
+            pyarrow.field("time_col", pyarrow.time64("us")),
+            pyarrow.field(
+                "timestamp_col", pyarrow.timestamp("us", tz=datetime.timezone.utc)
+            ),
+        ]
     )
     arrow_table = pyarrow.Table.from_pydict(
-        {"bignumeric_col": [decimal.Decimal("123.456")]}, schema=arrow_schema,
+        {
+            "bignumeric_col": [decimal.Decimal("123.456789101112131415")],
+            "bool_col": [True],
+            "bytes_col": [b"Hello,\x00World!"],
+            "date_col": [datetime.date(2021, 8, 9)],
+            "datetime_col": [datetime.datetime(2021, 8, 9, 13, 30, 44, 123456)],
+            "float64_col": [1.25],
+            "int64_col": [-7],
+            "numeric_col": [decimal.Decimal("-123.456789")],
+            "string_col": ["abcdefg"],
+            "time_col": [datetime.time(14, 21, 17, 123456)],
+            "timestamp_col": [
+                datetime.datetime(
+                    2021, 8, 9, 13, 30, 44, 123456, tzinfo=datetime.timezone.utc
+                )
+            ],
+        },
+        schema=arrow_schema,
     )
 
     nullable_schema = [
         bigquery.SchemaField("bignumeric_col", "BIGNUMERIC"),
-        # bigquery.SchemaField("date_col", "DATE"),
-        # bigquery.SchemaField("datetime_col", "DATETIME"),
-        # bigquery.SchemaField("float_col", "FLOAT"),
-        # bigquery.SchemaField("float64_col", "FLOAT64"),
-        # bigquery.SchemaField("integer_col", "INTEGER"),
-        # bigquery.SchemaField("int64_col", "INT64"),
-        # bigquery.SchemaField( "time_col", "TIME"),
-        # bigquery.SchemaField("timestamp_col", "TIMESTAMP"),
+        bigquery.SchemaField("bool_col", "BOOLEAN"),
+        bigquery.SchemaField("bytes_col", "BYTES"),
+        bigquery.SchemaField("date_col", "DATE"),
+        bigquery.SchemaField("datetime_col", "DATETIME"),
+        bigquery.SchemaField("float64_col", "FLOAT"),
+        bigquery.SchemaField("int64_col", "INT64"),
+        bigquery.SchemaField("numeric_col", "NUMERIC"),
+        bigquery.SchemaField("string_col", "STRING"),
+        bigquery.SchemaField("time_col", "TIME"),
+        bigquery.SchemaField("timestamp_col", "TIMESTAMP"),
     ]
     mock_client = mock.create_autospec(bigquery.Client)
     mock_client.project = "test-proj"
@@ -59,17 +94,73 @@ def test_to_dataframe_defaults_to_nullable_dtypes(monkeypatch, class_under_test)
     mock_to_arrow.return_value = arrow_table
     rows = class_under_test(mock_client, mock_api_request, TEST_PATH, nullable_schema)
     monkeypatch.setattr(rows, "to_arrow", mock_to_arrow)
-    rows.to_dataframe()
-
-    # TODO: check dtypes, check values
-    assert False
-
-
-def test_to_dataframe_overrides_nullable_dtypes(class_under_test):
+    df = rows.to_dataframe()
+
+    # Check for expected dtypes.
+    # Keep these in sync with tests/system/test_pandas.py
+    assert df.dtypes["bignumeric_col"].name == "object"
+    assert df.dtypes["bool_col"].name == "boolean"
+    assert df.dtypes["bytes_col"].name == "object"
+    assert df.dtypes["date_col"].name == "object"
+    assert df.dtypes["datetime_col"].name == "datetime64[ns]"
+    assert df.dtypes["float64_col"].name == "float64"
+    assert df.dtypes["int64_col"].name == "Int64"
+    assert df.dtypes["numeric_col"].name == "object"
+    assert df.dtypes["string_col"].name == "object"
+    assert df.dtypes["time_col"].name == "object"
+    assert df.dtypes["timestamp_col"].name == "datetime64[ns, UTC]"
+
+    # Check for expected values.
+    assert df["bignumeric_col"][0] == decimal.Decimal("123.456789101112131415")
+    assert df["bool_col"][0]  # True
+    assert df["bytes_col"][0] == b"Hello,\x00World!"
+
+    # object is used by default, but we can use "datetime64[ns]" automatically
+    # when data is within the supported range.
+    # https://github.com/googleapis/python-bigquery/issues/861
+    assert df["date_col"][0] == datetime.date(2021, 8, 9)
+
+    assert df["datetime_col"][0] == pandas.to_datetime("2021-08-09 13:30:44.123456")
+    assert df["float64_col"][0] == 1.25
+    assert df["int64_col"][0] == -7
+    assert df["numeric_col"][0] == decimal.Decimal("-123.456789")
+    assert df["string_col"][0] == "abcdefg"
+
+    # Pandas timedelta64 might be a better choice for pandas time columns. Then
+    # they can more easily be combined with date columns to form datetimes.
+    # https://github.com/googleapis/python-bigquery/issues/862
+    assert df["time_col"][0] == datetime.time(14, 21, 17, 123456)
+
+    assert df["timestamp_col"][0] == pandas.to_datetime("2021-08-09 13:30:44.123456Z")
+
+
+def test_to_dataframe_overrides_nullable_dtypes(monkeypatch, class_under_test):
     """Passing in explicit dtypes is merged with default behavior."""
-    assert False
+    arrow_schema = pyarrow.schema(
+        [
+            pyarrow.field("int64_col", pyarrow.int64()),
+            pyarrow.field("other_int_col", pyarrow.int64()),
+        ]
+    )
+    arrow_table = pyarrow.Table.from_pydict(
+        {"int64_col": [1000], "other_int_col": [-7]}, schema=arrow_schema,
+    )
+
+    nullable_schema = [
+        bigquery.SchemaField("int64_col", "INT64"),
+        bigquery.SchemaField("other_int_col", "INT64"),
+    ]
+    mock_client = mock.create_autospec(bigquery.Client)
+    mock_client.project = "test-proj"
+    mock_api_request = mock.Mock()
+    mock_to_arrow = mock.Mock()
+    mock_to_arrow.return_value = arrow_table
+    rows = class_under_test(mock_client, mock_api_request, TEST_PATH, nullable_schema)
+    monkeypatch.setattr(rows, "to_arrow", mock_to_arrow)
+    df = rows.to_dataframe(dtypes={"other_int_col": "int8"})
 
+    assert df.dtypes["int64_col"].name == "Int64"
+    assert df["int64_col"][0] == 1000
 
-def test_to_dataframe_bqstorage_overrides_nullable_dtypes(class_under_test):
-    """Passing in explicit dtypes is merged with default behavior."""
-    assert False
+    assert df.dtypes["other_int_col"].name == "int8"
+    assert df["other_int_col"][0] == -7

From 8f90c511d9e97c0341352b71ee4499d886c9c78d Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Wed, 11 Aug 2021 09:58:15 -0500
Subject: [PATCH 11/12] add unit test for repeated fields

---
 tests/unit/test_table_pandas.py | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/tests/unit/test_table_pandas.py b/tests/unit/test_table_pandas.py
index a9f7ed58a..a223e6652 100644
--- a/tests/unit/test_table_pandas.py
+++ b/tests/unit/test_table_pandas.py
@@ -34,7 +34,7 @@ def class_under_test():
     return RowIterator
 
 
-def test_to_dataframe_defaults_to_nullable_dtypes(monkeypatch, class_under_test):
+def test_to_dataframe_nullable_scalars(monkeypatch, class_under_test):
     # See tests/system/test_arrow.py for the actual types we get from the API.
     arrow_schema = pyarrow.schema(
         [
@@ -134,7 +134,9 @@ def test_to_dataframe_defaults_to_nullable_dtypes(monkeypatch, class_under_test)
     assert df["timestamp_col"][0] == pandas.to_datetime("2021-08-09 13:30:44.123456Z")
 
 
-def test_to_dataframe_overrides_nullable_dtypes(monkeypatch, class_under_test):
+def test_to_dataframe_nullable_scalars_with_custom_dtypes(
+    monkeypatch, class_under_test
+):
     """Passing in explicit dtypes is merged with default behavior."""
     arrow_schema = pyarrow.schema(
         [
@@ -164,3 +166,27 @@ def test_to_dataframe_overrides_nullable_dtypes(monkeypatch, class_under_test):
 
     assert df.dtypes["other_int_col"].name == "int8"
     assert df["other_int_col"][0] == -7
+
+
+def test_to_dataframe_arrays(monkeypatch, class_under_test):
+    arrow_schema = pyarrow.schema(
+        [pyarrow.field("int64_repeated", pyarrow.list_(pyarrow.int64()))]
+    )
+    arrow_table = pyarrow.Table.from_pydict(
+        {"int64_repeated": [[-1, 0, 2]]}, schema=arrow_schema,
+    )
+
+    nullable_schema = [
+        bigquery.SchemaField("int64_repeated", "INT64", mode="REPEATED"),
+    ]
+    mock_client = mock.create_autospec(bigquery.Client)
+    mock_client.project = "test-proj"
+    mock_api_request = mock.Mock()
+    mock_to_arrow = mock.Mock()
+    mock_to_arrow.return_value = arrow_table
+    rows = class_under_test(mock_client, mock_api_request, TEST_PATH, nullable_schema)
+    monkeypatch.setattr(rows, "to_arrow", mock_to_arrow)
+    df = rows.to_dataframe()
+
+    assert df.dtypes["int64_repeated"].name == "object"
+    assert tuple(df["int64_repeated"][0]) == (-1, 0, 2)

From 3155dab5e8a8e6c126152399e5ba5c50dc7ef4e9 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 16 Aug 2021 10:07:25 -0500
Subject: [PATCH 12/12] Address docs nits

---
 docs/usage/pandas.rst                    | 2 +-
 google/cloud/bigquery/_pandas_helpers.py | 8 +++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/docs/usage/pandas.rst b/docs/usage/pandas.rst
index b08aebdd6..40732a298 100644
--- a/docs/usage/pandas.rst
+++ b/docs/usage/pandas.rst
@@ -66,7 +66,7 @@ As of version 1.3.0, you can use the
 to load data from a :class:`pandas.DataFrame` to a
 :class:`~google.cloud.bigquery.table.Table`. To use this function, in addition
 to :mod:`pandas`, you will need to install the :mod:`pyarrow` library. You can
-install the BigQuery python client library with :mod:`pandas` and
+install the BigQuery Python client library with :mod:`pandas` and
 :mod:`pyarrow` by running:
 
 .. code-block:: bash
diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py
index a3f6c0468..88759bd18 100644
--- a/google/cloud/bigquery/_pandas_helpers.py
+++ b/google/cloud/bigquery/_pandas_helpers.py
@@ -18,7 +18,7 @@
 import functools
 import logging
 import queue
-from typing import Sequence
+from typing import Dict, Sequence
 import warnings
 
 try:
@@ -222,7 +222,9 @@ def bq_to_arrow_schema(bq_schema):
     return pyarrow.schema(arrow_fields)
 
 
-def bq_schema_to_nullsafe_pandas_dtypes(bq_schema: Sequence[schema.SchemaField]):
+def bq_schema_to_nullsafe_pandas_dtypes(
+    bq_schema: Sequence[schema.SchemaField],
+) -> Dict[str, str]:
     """Return the default dtypes to use for columns in a BigQuery schema.
 
     Only returns default dtypes which are safe to have NULL values. This
@@ -230,7 +232,7 @@ def bq_schema_to_nullsafe_pandas_dtypes(bq_schema: Sequence[schema.SchemaField])
     loss-of-precision.
 
     Returns:
-        Dict[str, str]: mapping from column names to dtypes
+        A mapping from column names to pandas dtypes.
     """
     dtypes = {}
     for bq_field in bq_schema: