Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion bigquery/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
"dependencies": {
"@google-cloud/bigquery": "^0.1.1",
"@google-cloud/resource": "^0.1.1",
"@google-cloud/storage": "^0.1.1",
"async": "^2.0.1",
"request": "^2.72.0"
"request": "^2.72.0",
"yargs": "^5.0.0"
},
"devDependencies": {
"mocha": "^3.0.2"
Expand Down
59 changes: 59 additions & 0 deletions bigquery/system-test/tables.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Copyright 2016, Google, Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

'use strict';

var example = require('../tables');
var options = {
bucket: 'sample-bigquery-export',
file: 'data.json',
dataset: 'github_samples',
table: 'natality'
};
var jobId = null;

describe('bigquery:tables', function () {
describe('export_table_to_gcs', function () {
it('should export data to GCS', function (done) {
example.exportTableToGCS(options, function (err, job) {
assert.ifError(err);
assert(job, 'job is not null');
assert(job.id, 'job has an id');
assert(job.id.length > 5, 'job id is 5 characters or more');
jobId = job.id;
setTimeout(done, 100); // Wait for export job to be submitted
});
});
});

describe('export_poll', function () {
it('should fetch job status', function (done) {
assert(jobId);
var poller = function (tries) {
example.pollExportJob(jobId, function (err, metadata) {
if (!err || tries === 0) {
assert.ifError(err, 'no error occurred');
assert.equal(metadata.status.state, 'DONE', 'export job is finished');
done();
} else {
setTimeout(function () {
poller(tries - 1);
}, 1000);
}
});
};

poller(60);
});
});
});
145 changes: 145 additions & 0 deletions bigquery/tables.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
// Copyright 2016, Google, Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// [START complete]
/**
* Command-line application to export a table from BigQuery to Google Cloud Storage.
*
* This sample is used on this page:
*
* https://cloud.google.com/bigquery/exporting-data-from-bigquery
* For more information, see the README.md under /bigquery.
*/

'use strict';

// [START auth]
// By default, gcloud will authenticate using the service account file specified
// by the GOOGLE_APPLICATION_CREDENTIALS environment variable and use the
// project specified by the GCLOUD_PROJECT environment variable. See
// https://googlecloudplatform.github.io/gcloud-node/#/docs/guides/authentication
var BigQuery = require('@google-cloud/bigquery');
var Storage = require('@google-cloud/storage');
var projectId = process.env.GCLOUD_PROJECT;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Delete this line. gcloud-node picks up that env var automatically.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed


// Instantiate the BigQuery and Storage clients
var bigquery = BigQuery();
var storage = Storage({
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Change to var storage = Storage();

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

projectId: projectId
});
// [END auth]

// [START export_table_to_gcs]
/**
* Export a table from BigQuery to Google Cloud Storage.
*
* @param {object} options Configuration options.
* @param {string} options.bucket A Google Cloud Storage bucket to use for storage.
* @param {string} options.file The file to save results to within Google Cloud Storage.
* @param {string} options.dataset The ID of the dataset to use.
* @param {string} options.table The ID of the project to use.
* @param {string} options.format Format to export as - either 'CSV', 'JSON', or 'AVRO'.
* @param {boolean} [options.gzip] Optional. Whether or not data should be compressed using GZIP.
* @param {function} callback Callback function to receive query results.
*/
function exportTableToGCS (options, callback) {
var gcsFileObj = storage.bucket(options.bucket).file(options.file);

// Export table
// See https://googlecloudplatform.github.io/gcloud-node/#/docs/google-cloud/latest/bigquery/table?method=export
var table = bigquery.dataset(options.dataset).table(options.table);
table.export(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a little hard to read, can we do:

var config = {
  format: options.format,
  gzip: options.gzip
};
table.export(gcsFileObj, config, function (err, job) {

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or better yet

var gcsFileObj = storage.bucket(options.bucket).file(options.file);
var table = bigquery.dataset(options.dataset).table(options.table);

var config = {
  format: options.format,
  gzip: options.gzip
};

See https://googlecloudplatform.github.io/gcloud-node/#/docs/google-cloud/latest/bigquery/table?method=export
table.export(gcsFileObj, config, function (err, job) {

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

gcsFileObj,
{ format: options.format, gzip: options.gzip },
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we extract this out onto its own line? That way the user can easily see the configuration of the export method. e.g.

var table = bigquery.dataset(options.dataset).table(options.table);
var config = {
  format: options.format,
  gzip: options.gzip
};

// Export table
// See https://googlecloudplatform.github.io/gcloud-node/#/docs/google-cloud/latest/bigquery/table?method=export
table.export(gcsFileObj, config, function (err, job) {

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

function (err, job) {
if (err) {
return callback(err);
}

console.log('ExportTableToGCS: submitted job %s!', job.id);
return callback(null, job);
}
);
}
// [END export_table_to_gcs]

// [START poll_export_job]
/**
* Check the status of a BigQuery table export job.
*
* @param {string} jobId The ID of the export job to poll.
* @param {function} callback Callback function to execute when the table is exported.
*/
function pollExportJob (jobId, callback) {
var job = bigquery.job(jobId);
job.getMetadata(function (err, metadata) {
if (err) {
return callback(err);
}
console.log('PollExportJob: job status: %s', metadata.status.state);

// If job is done, return metadata; if not, return an error.
if (metadata.status.state === 'DONE') {
return callback(null, metadata);
} else {
return callback(new Error('Job %s is not done', jobId));
}
});
}
// [END poll_export_job]
// [END complete]

// The command-line program
var cli = require('yargs');

var program = module.exports = {
exportTableToGCS: exportTableToGCS,
pollExportJob: pollExportJob,
main: function (args) {
// Run the command-line program
cli.help().strict().parse(args).argv;
}
};

cli
.command('export <bucket> <file> <dataset> <table> [--format] [--gzip]', 'Export a table from BigQuery to Google Cloud Storage.', {}, function (options) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure you want [--format] [--gzip] here. I think yargs might interpret them as optional positional arguments or as boolean options. node tables export --help will generate help text that describes the command's options.

program.exportTableToGCS(options, console.log);
})
.command('poll <jobId>', 'Check the status of a BigQuery table export job.', {}, function (options) {
program.pollExportJob(options.jobId, console.log);
})
.example('node $0 export sample-bigquery-export data.json github_samples natality JSON', 'Export github_samples:natality to gcs://sample-bigquery-export/data.json as JSON')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe show an example that uses the format and gzip options.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should I add a test as well?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, somehow we want to make sure they work.

.example('node $0 poll job_12345ABCDE', 'Check the status of BigQuery job 12345ABCDE')
.options({
format: {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These aren't global options, so you should move them into the export command. Here's an example of how: https://github.com/GoogleCloudPlatform/nodejs-docs-samples/blob/master/logging/sinks.js#L168

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

alias: 'f',
global: true,
requiresArg: true,
type: 'string',
choices: ['JSON', 'CSV', 'AVRO']
},
gzip: {
global: true,
type: 'boolean',
description: 'Whether to compress the exported table using gzip. Defaults to false.'
}
})
.wrap(100)
.recommendCommands()
.epilogue('For more information, see https://cloud.google.com/bigquery/exporting-data-from-bigquery');
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might want to change the link to just https://cloud.google.com/bigquery/docs as this file will contain other examples unrelated to exporting data from BigQuery.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.


if (module === require.main) {
program.main(process.argv.slice(2), console.log);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove the console.log from this line, it's unused in main.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

}

module.exports = program;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Delete this line, the program has already been exported above.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

Loading