Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 7 additions & 50 deletions bigquery_storage/to_dataframe/main_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import uuid

import pytest


Expand Down Expand Up @@ -72,48 +70,11 @@ def test_table_to_dataframe(capsys, clients):
assert "country_name" in out


@pytest.fixture
def temporary_dataset(clients):
from google.cloud import bigquery

bqclient, _ = clients

# [START bigquerystorage_pandas_tutorial_all]
# [START bigquerystorage_pandas_tutorial_create_dataset]
# Set the dataset_id to the dataset used to store temporary results.
dataset_id = "query_results_dataset"
# [END bigquerystorage_pandas_tutorial_create_dataset]
# [END bigquerystorage_pandas_tutorial_all]

dataset_id = "bqstorage_to_dataset_{}".format(uuid.uuid4().hex)

# [START bigquerystorage_pandas_tutorial_all]
# [START bigquerystorage_pandas_tutorial_create_dataset]
dataset_ref = bqclient.dataset(dataset_id)
dataset = bigquery.Dataset(dataset_ref)

# Remove tables after 24 hours.
dataset.default_table_expiration_ms = 1000 * 60 * 60 * 24

bqclient.create_dataset(dataset) # API request.
# [END bigquerystorage_pandas_tutorial_create_dataset]
# [END bigquerystorage_pandas_tutorial_all]
yield dataset_ref
# [START bigquerystorage_pandas_tutorial_cleanup]
bqclient.delete_dataset(dataset_ref, delete_contents=True)
# [END bigquerystorage_pandas_tutorial_cleanup]


def test_query_to_dataframe(capsys, clients, temporary_dataset):
from google.cloud import bigquery

def test_query_to_dataframe(capsys, clients):
bqclient, bqstorageclient = clients
dataset_ref = temporary_dataset

# [START bigquerystorage_pandas_tutorial_all]
# [START bigquerystorage_pandas_tutorial_read_query_results]
import uuid

# Download query results.
query_string = """
SELECT
Expand All @@ -125,19 +86,15 @@ def test_query_to_dataframe(capsys, clients, temporary_dataset):
WHERE tags like '%google-bigquery%'
ORDER BY view_count DESC
"""
# Use a random table name to avoid overwriting existing tables.
table_id = "queryresults_" + uuid.uuid4().hex
table = dataset_ref.table(table_id)
query_config = bigquery.QueryJobConfig(
# Due to a known issue in the BigQuery Storage API, small query result
# sets cannot be downloaded. To workaround this issue, write results to
# a destination table.
destination=table
)

dataframe = (
bqclient.query(query_string, job_config=query_config)
bqclient.query(query_string)
.result()

# Note: The BigQuery Storage API cannot be used to download small query
# results, but as of google-cloud-bigquery version 1.11.1, the
# to_dataframe method will fallback to the tabledata.list API when the
# BigQuery Storage API fails to read the query results.
.to_dataframe(bqstorage_client=bqstorageclient)
)
print(dataframe.head())
Expand Down
4 changes: 2 additions & 2 deletions bigquery_storage/to_dataframe/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
google-auth==1.6.2
google-cloud-bigquery-storage==0.2.0
google-cloud-bigquery==1.8.1
google-cloud-bigquery-storage==0.3.0
google-cloud-bigquery==1.11.1
fastavro==0.21.17
pandas==0.24.0