Skip to content

Commit 3284a0d

Browse files
authored
BQ: client.extract_table starts extract job (#3991)
* BQ: client.extract_table starts extract job Add system tests for extract_table. * BigQuery: client.extract_table use `**kwargs` for Python 2.7. * BQ: extract_table. Use dict.get for kwargs. job_id instead of job_name.
1 parent bb4bc6c commit 3284a0d

8 files changed

Lines changed: 484 additions & 100 deletions

File tree

bigquery/google/cloud/bigquery/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from google.cloud.bigquery.client import Client
3333
from google.cloud.bigquery.dataset import AccessEntry
3434
from google.cloud.bigquery.dataset import Dataset
35+
from google.cloud.bigquery.job import ExtractJobConfig
3536
from google.cloud.bigquery.schema import SchemaField
3637
from google.cloud.bigquery.table import Table
3738

@@ -41,6 +42,7 @@
4142
'ArrayQueryParameter',
4243
'Client',
4344
'Dataset',
45+
'ExtractJobConfig',
4446
'ScalarQueryParameter',
4547
'SchemaField',
4648
'StructQueryParameter',

bigquery/google/cloud/bigquery/_helpers.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,82 @@ def _time_to_json(value):
299299
_SCALAR_VALUE_TO_JSON_PARAM['TIMESTAMP'] = _timestamp_to_json_parameter
300300

301301

302+
class _ApiResourceProperty(object):
303+
"""Base property implementation.
304+
305+
Values will be stored on a `_properties` helper attribute of the
306+
property's job instance.
307+
308+
:type name: str
309+
:param name: name of the property
310+
311+
:type resource_name: str
312+
:param resource_name: name of the property in the resource dictionary
313+
"""
314+
315+
def __init__(self, name, resource_name):
316+
self.name = name
317+
self.resource_name = resource_name
318+
319+
def __get__(self, instance, owner):
320+
"""Descriptor protocol: accessor"""
321+
if instance is None:
322+
return self
323+
return instance._properties.get(self.resource_name)
324+
325+
def _validate(self, value):
326+
"""Subclasses override to impose validation policy."""
327+
pass
328+
329+
def __set__(self, instance, value):
330+
"""Descriptor protocol: mutator"""
331+
self._validate(value)
332+
instance._properties[self.resource_name] = value
333+
334+
def __delete__(self, instance):
335+
"""Descriptor protocol: deleter"""
336+
del instance._properties[self.resource_name]
337+
338+
339+
class _TypedApiResourceProperty(_ApiResourceProperty):
340+
"""Property implementation: validates based on value type.
341+
342+
:type name: str
343+
:param name: name of the property
344+
345+
:type resource_name: str
346+
:param resource_name: name of the property in the resource dictionary
347+
348+
:type property_type: type or sequence of types
349+
:param property_type: type to be validated
350+
"""
351+
def __init__(self, name, resource_name, property_type):
352+
super(_TypedApiResourceProperty, self).__init__(
353+
name, resource_name)
354+
self.property_type = property_type
355+
356+
def _validate(self, value):
357+
"""Ensure that 'value' is of the appropriate type.
358+
359+
:raises: ValueError on a type mismatch.
360+
"""
361+
if value is None:
362+
return
363+
if not isinstance(value, self.property_type):
364+
raise ValueError('Required type: %s' % (self.property_type,))
365+
366+
367+
class _EnumApiResourceProperty(_ApiResourceProperty):
368+
"""Pseudo-enumeration class.
369+
370+
:type name: str
371+
:param name: name of the property.
372+
373+
:type resource_name: str
374+
:param resource_name: name of the property in the resource dictionary
375+
"""
376+
377+
302378
class _ConfigurationProperty(object):
303379
"""Base property implementation.
304380

bigquery/google/cloud/bigquery/client.py

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616

1717
from __future__ import absolute_import
1818

19+
import uuid
20+
1921
from google.api.core import page_iterator
2022
from google.cloud.client import ClientWithProject
2123
from google.cloud.bigquery._http import Connection
@@ -385,27 +387,44 @@ def copy_table(self, job_id, destination, *sources):
385387
"""
386388
return CopyJob(job_id, destination, sources, client=self)
387389

388-
def extract_table_to_storage(self, job_id, source, *destination_uris):
389-
"""Construct a job for extracting a table into Cloud Storage files.
390+
def extract_table(self, source, *destination_uris, **kwargs):
391+
"""Start a job to extract a table into Cloud Storage files.
390392
391393
See
392394
https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract
393395
394-
:type job_id: str
395-
:param job_id: Name of the job.
396-
397-
:type source: :class:`google.cloud.bigquery.table.Table`
396+
:type source: :class:`google.cloud.bigquery.table.TableReference`
398397
:param source: table to be extracted.
399398
400399
:type destination_uris: sequence of string
401-
:param destination_uris: URIs of CloudStorage file(s) into which
402-
table data is to be extracted; in format
403-
``gs://<bucket_name>/<object_name_or_glob>``.
400+
:param destination_uris:
401+
URIs of Cloud Storage file(s) into which table data is to be
402+
extracted; in format ``gs://<bucket_name>/<object_name_or_glob>``.
403+
404+
:type kwargs: dict
405+
:param kwargs: Additional keyword arguments.
406+
407+
:Keyword Arguments:
408+
* *job_config*
409+
(:class:`google.cloud.bigquery.job.ExtractJobConfig`) --
410+
(Optional) Extra configuration options for the extract job.
411+
* *job_id* (``str``) --
412+
Additional content
413+
(Optional) The ID of the job.
404414
405415
:rtype: :class:`google.cloud.bigquery.job.ExtractJob`
406416
:returns: a new ``ExtractJob`` instance
407417
"""
408-
return ExtractJob(job_id, source, destination_uris, client=self)
418+
job_config = kwargs.get('job_config')
419+
job_id = kwargs.get('job_id')
420+
if job_id is None:
421+
job_id = str(uuid.uuid4())
422+
423+
job = ExtractJob(
424+
job_id, source, list(destination_uris), client=self,
425+
job_config=job_config)
426+
job.begin()
427+
return job
409428

410429
def run_async_query(self, job_id, query,
411430
udf_resources=(), query_parameters=()):

0 commit comments

Comments
 (0)