diff --git a/googleapiclient/discovery.py b/googleapiclient/discovery.py index ea01a023fac..b7c884eccee 100644 --- a/googleapiclient/discovery.py +++ b/googleapiclient/discovery.py @@ -149,7 +149,9 @@ def build(serviceName, developerKey=None, model=None, requestBuilder=HttpRequest, - credentials=None): + credentials=None, + cache_discovery=True, + cache=None): """Construct a Resource for interacting with an API. Construct a Resource object for interacting with an API. The serviceName and @@ -171,6 +173,9 @@ def build(serviceName, request. credentials: oauth2client.Credentials, credentials to be used for authentication. + cache_discovery: Boolean, whether or not to cache the discovery doc. + cache: googleapiclient.discovery_cache.base.CacheBase, an optional + cache object for the discovery documents. Returns: A Resource object with methods for interacting with the service. @@ -185,22 +190,53 @@ def build(serviceName, requested_url = uritemplate.expand(discoveryServiceUrl, params) + content = _retrieve_discovery_doc(requested_url, http, cache_discovery, cache) + + return build_from_document(content, base=discoveryServiceUrl, http=http, + developerKey=developerKey, model=model, requestBuilder=requestBuilder, + credentials=credentials) + + +def _retrieve_discovery_doc(url, http, cache_discovery, cache=None): + """Retrieves the discovery_doc from cache or the internet. + + Args: + url: string, the URL of the discovery document. + http: httplib2.Http, An instance of httplib2.Http or something that acts + like it through which HTTP requests will be made. + cache_discovery: Boolean, whether or not to cache the discovery doc. + cache: googleapiclient.discovery_cache.base.Cache, an optional cache + object for the discovery documents. + + Returns: + A unicode string representation of the discovery document. + """ + if cache_discovery: + from . import discovery_cache + from .discovery_cache import base + if cache is None: + cache = discovery_cache.autodetect() + if cache: + content = cache.get(url) + if content: + return content + + actual_url = url # REMOTE_ADDR is defined by the CGI spec [RFC3875] as the environment # variable that contains the network address of the client sending the # request. If it exists then add that to the request for the discovery # document to avoid exceeding the quota on discovery requests. if 'REMOTE_ADDR' in os.environ: - requested_url = _add_query_parameter(requested_url, 'userIp', - os.environ['REMOTE_ADDR']) - logger.info('URL being requested: GET %s' % requested_url) + actual_url = _add_query_parameter(url, 'userIp', os.environ['REMOTE_ADDR']) + logger.info('URL being requested: GET %s', actual_url) - resp, content = http.request(requested_url) + resp, content = http.request(actual_url) if resp.status == 404: raise UnknownApiNameOrVersion("name: %s version: %s" % (serviceName, - version)) + version)) if resp.status >= 400: - raise HttpError(resp, content, uri=requested_url) + raise HttpError(resp, content, uri=actual_url) try: content = content.decode('utf-8') @@ -212,10 +248,9 @@ def build(serviceName, except ValueError as e: logger.error('Failed to parse as JSON: ' + content) raise InvalidJsonError() - - return build_from_document(content, base=discoveryServiceUrl, http=http, - developerKey=developerKey, model=model, requestBuilder=requestBuilder, - credentials=credentials) + if cache_discovery and cache: + cache.set(url, content) + return content @positional(1) diff --git a/googleapiclient/discovery_cache/__init__.py b/googleapiclient/discovery_cache/__init__.py new file mode 100644 index 00000000000..c56fd659f98 --- /dev/null +++ b/googleapiclient/discovery_cache/__init__.py @@ -0,0 +1,42 @@ +# Copyright 2014 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Caching utility for the discovery document.""" + +from __future__ import absolute_import + +import logging +import datetime + +DISCOVERY_DOC_MAX_AGE = 60 * 60 * 24 # 1 day + + +def autodetect(): + """Detects an appropriate cache module and returns it. + + Returns: + googleapiclient.discovery_cache.base.Cache, a cache object which + is auto detected, or None if no cache object is available. + """ + try: + from google.appengine.api import memcache + from . import appengine_memcache + return appengine_memcache.cache + except Exception: + try: + from . import file_cache + return file_cache.cache + except Exception as e: + logging.warning(e, exc_info=True) + return None diff --git a/googleapiclient/discovery_cache/appengine_memcache.py b/googleapiclient/discovery_cache/appengine_memcache.py new file mode 100644 index 00000000000..a521fc396bc --- /dev/null +++ b/googleapiclient/discovery_cache/appengine_memcache.py @@ -0,0 +1,52 @@ +# Copyright 2014 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""App Engine memcache based cache for the discovery document.""" + +import logging + +# This is only an optional dependency because we only import this +# module when google.appengine.api.memcache is available. +from google.appengine.api import memcache + +from . import base +from ..discovery_cache import DISCOVERY_DOC_MAX_AGE + +NAMESPACE = 'google-api-client' + + +class Cache(base.Cache): + """A cache with app engine memcache API.""" + + def __init__(self, max_age): + """Constructor. + + Args: + max_age: Cache expiration in seconds. + """ + self._max_age = max_age + + def get(self, url): + try: + return memcache.get(url, namespace=NAMESPACE) + except Exception as e: + logging.warning(e, exc_info=True) + + def set(self, url, content): + try: + memcache.set(url, content, time=int(self._max_age), namespace=NAMESPACE) + except Exception as e: + logging.warning(e, exc_info=True) + +cache = Cache(max_age=DISCOVERY_DOC_MAX_AGE) diff --git a/googleapiclient/discovery_cache/base.py b/googleapiclient/discovery_cache/base.py new file mode 100644 index 00000000000..00e466d1c48 --- /dev/null +++ b/googleapiclient/discovery_cache/base.py @@ -0,0 +1,45 @@ +# Copyright 2014 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""An abstract class for caching the discovery document.""" + +import abc + + +class Cache(object): + """A base abstract cache class.""" + __metaclass__ = abc.ABCMeta + + @abc.abstractmethod + def get(self, url): + """Gets the content from the memcache with a given key. + + Args: + url: string, the key for the cache. + + Returns: + object, the value in the cache for the given key, or None if the key is + not in the cache. + """ + raise NotImplementedError() + + @abc.abstractmethod + def set(self, url, content): + """Sets the given key and content in the cache. + + Args: + url: string, the key for the cache. + content: string, the discovery document. + """ + raise NotImplementedError() diff --git a/googleapiclient/discovery_cache/file_cache.py b/googleapiclient/discovery_cache/file_cache.py new file mode 100644 index 00000000000..ce540f02bd3 --- /dev/null +++ b/googleapiclient/discovery_cache/file_cache.py @@ -0,0 +1,132 @@ +# Copyright 2014 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""File based cache for the discovery document. + +The cache is stored in a single file so that multiple processes can +share the same cache. It locks the file whenever accesing to the +file. When the cache content is corrupted, it will be initialized with +an empty cache. +""" + +from __future__ import division + +import datetime +import json +import logging +import os +import tempfile +import threading + +from oauth2client.locked_file import LockedFile + +from . import base +from ..discovery_cache import DISCOVERY_DOC_MAX_AGE + +logger = logging.getLogger(__name__) + +FILENAME = 'google-api-python-client-discovery-doc.cache' +EPOCH = datetime.datetime.utcfromtimestamp(0) + + +def _to_timestamp(date): + try: + return (date - EPOCH).total_seconds() + except AttributeError: + # The following is the equivalent of total_seconds() in Python2.6. + # See also: https://docs.python.org/2/library/datetime.html + delta = date - EPOCH + return ((delta.microseconds + (delta.seconds + delta.days * 24 * 3600) + * 10**6) / 10**6) + + +def _read_or_initialize_cache(f): + f.file_handle().seek(0) + try: + cache = json.load(f.file_handle()) + except Exception: + # This means it opens the file for the first time, or the cache is + # corrupted, so initializing the file with an empty dict. + cache = {} + f.file_handle().truncate(0) + f.file_handle().seek(0) + json.dump(cache, f.file_handle()) + return cache + + +class Cache(base.Cache): + """A file based cache for the discovery documents.""" + + def __init__(self, max_age): + """Constructor. + + Args: + max_age: Cache expiration in seconds. + """ + self._max_age = max_age + self._file = os.path.join(tempfile.gettempdir(), FILENAME) + f = LockedFile(self._file, 'a+', 'r') + try: + f.open_and_lock() + if f.is_locked(): + _read_or_initialize_cache(f) + # If we can not obtain the lock, other process or thread must + # have initialized the file. + except Exception as e: + logging.warning(e, exc_info=True) + finally: + f.unlock_and_close() + + def get(self, url): + f = LockedFile(self._file, 'r+', 'r') + try: + f.open_and_lock() + if f.is_locked(): + cache = _read_or_initialize_cache(f) + if url in cache: + content, t = cache.get(url, (None, 0)) + if _to_timestamp(datetime.datetime.now()) < t + self._max_age: + return content + return None + else: + logger.debug('Could not obtain a lock for the cache file.') + return None + except Exception as e: + logger.warning(e, exc_info=True) + finally: + f.unlock_and_close() + + def set(self, url, content): + f = LockedFile(self._file, 'r+', 'r') + try: + f.open_and_lock() + if f.is_locked(): + cache = _read_or_initialize_cache(f) + cache[url] = (content, _to_timestamp(datetime.datetime.now())) + # Remove stale cache. + for k, (_, timestamp) in list(cache.items()): + if _to_timestamp(datetime.datetime.now()) >= timestamp + self._max_age: + del cache[k] + f.file_handle().truncate(0) + f.file_handle().seek(0) + json.dump(cache, f.file_handle()) + else: + logger.debug('Could not obtain a lock for the cache file.') + except Exception as e: + logger.warning(e, exc_info=True) + finally: + f.unlock_and_close() + + +cache = Cache(max_age=DISCOVERY_DOC_MAX_AGE) diff --git a/tests/test_discovery.py b/tests/test_discovery.py index fd8e01e82db..2e13e59e68d 100644 --- a/tests/test_discovery.py +++ b/tests/test_discovery.py @@ -38,6 +38,8 @@ import sys import unittest2 as unittest +import mock + from googleapiclient.discovery import _fix_up_media_upload from googleapiclient.discovery import _fix_up_method_description from googleapiclient.discovery import _fix_up_parameters @@ -50,6 +52,8 @@ from googleapiclient.discovery import ResourceMethodParameters from googleapiclient.discovery import STACK_QUERY_PARAMETERS from googleapiclient.discovery import STACK_QUERY_PARAMETER_DEFAULT_VALUE +from googleapiclient.discovery_cache import DISCOVERY_DOC_MAX_AGE +from googleapiclient.discovery_cache.base import Cache from googleapiclient.errors import HttpError from googleapiclient.errors import InvalidJsonError from googleapiclient.errors import MediaUploadSizeError @@ -338,7 +342,7 @@ def test_tests_should_be_run_with_strict_positional_enforcement(self): def test_failed_to_parse_discovery_json(self): self.http = HttpMock(datafile('malformed.json'), {'status': '200'}) try: - plus = build('plus', 'v1', http=self.http) + plus = build('plus', 'v1', http=self.http, cache_discovery=False) self.fail("should have raised an exception over malformed JSON.") except InvalidJsonError: pass @@ -413,6 +417,104 @@ def test_userip_missing_is_not_added_to_discovery_uri(self): self.assertEqual(e.uri, 'http://example.com') +class DiscoveryFromAppEngineCache(unittest.TestCase): + def test_appengine_memcache(self): + # Hack module import + self.orig_import = __import__ + self.mocked_api = mock.MagicMock() + + def import_mock(name, *args): + if name == 'google.appengine.api': + return self.mocked_api + return self.orig_import(name, *args) + + import_fullname = '__builtin__.__import__' + if sys.version_info[0] >= 3: + import_fullname = 'builtins.__import__' + + with mock.patch(import_fullname, side_effect=import_mock): + namespace = 'google-api-client' + self.http = HttpMock(datafile('plus.json'), {'status': '200'}) + + self.mocked_api.memcache.get.return_value = None + + plus = build('plus', 'v1', http=self.http) + + # memcache.get is called once + url = 'https://www.googleapis.com/discovery/v1/apis/plus/v1/rest' + self.mocked_api.memcache.get.assert_called_once_with(url, + namespace=namespace) + + # memcache.set is called once + with open(datafile('plus.json')) as f: + content = f.read() + self.mocked_api.memcache.set.assert_called_once_with( + url, content, time=DISCOVERY_DOC_MAX_AGE, namespace=namespace) + + # Returns the cached content this time. + self.mocked_api.memcache.get.return_value = content + + # Make sure the contents are returned from the cache. + # (Otherwise it should through an error) + self.http = HttpMock(None, {'status': '200'}) + + plus = build('plus', 'v1', http=self.http) + + # memcache.get is called twice + self.mocked_api.memcache.get.assert_has_calls( + [mock.call(url, namespace=namespace), + mock.call(url, namespace=namespace)]) + + # memcahce.set is called just once + self.mocked_api.memcache.set.assert_called_once_with( + url, content, time=DISCOVERY_DOC_MAX_AGE,namespace=namespace) + + +class DictCache(Cache): + def __init__(self): + self.d = {} + def get(self, url): + return self.d.get(url, None) + def set(self, url, content): + self.d[url] = content + def contains(self, url): + return url in self.d + + +class DiscoveryFromFileCache(unittest.TestCase): + def test_file_based_cache(self): + cache = mock.Mock(wraps=DictCache()) + with mock.patch('googleapiclient.discovery_cache.file_cache.cache', + new=cache): + self.http = HttpMock(datafile('plus.json'), {'status': '200'}) + + plus = build('plus', 'v1', http=self.http) + + # cache.get is called once + url = 'https://www.googleapis.com/discovery/v1/apis/plus/v1/rest' + cache.get.assert_called_once_with(url) + + # cache.set is called once + with open(datafile('plus.json')) as f: + content = f.read() + cache.set.assert_called_once_with(url, content) + + # Make sure there is a cache entry for the plus v1 discovery doc. + self.assertTrue(cache.contains(url)) + + # Make sure the contents are returned from the cache. + # (Otherwise it should through an error) + self.http = HttpMock(None, {'status': '200'}) + + plus = build('plus', 'v1', http=self.http) + + # cache.get is called twice + cache.get.assert_has_calls([mock.call(url), mock.call(url)]) + + # cahce.set is called just once + cache.set.assert_called_once_with(url, content) + + class Discovery(unittest.TestCase): def test_method_error_checking(self): @@ -548,7 +650,7 @@ def test_tunnel_patch(self): ({'status': '200'}, 'echo_request_headers_as_json'), ]) http = tunnel_patch(http) - zoo = build('zoo', 'v1', http=http) + zoo = build('zoo', 'v1', http=http, cache_discovery=False) resp = zoo.animals().patch( name='lion', body='{"description": "foo"}').execute() diff --git a/tests/test_discovery_cache.py b/tests/test_discovery_cache.py new file mode 100644 index 00000000000..a26a65b4efb --- /dev/null +++ b/tests/test_discovery_cache.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright 2014 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Discovery document cache tests.""" + +import datetime +import unittest2 as unittest + +import mock + +from googleapiclient.discovery_cache import DISCOVERY_DOC_MAX_AGE +from googleapiclient.discovery_cache.base import Cache +from googleapiclient.discovery_cache.file_cache import Cache as FileCache + + +class FileCacheTest(unittest.TestCase): + @mock.patch('googleapiclient.discovery_cache.file_cache.FILENAME', + new='google-api-python-client-discovery-doc-tests.cache') + def test_expiration(self): + def future_now(): + return datetime.datetime.now() + datetime.timedelta( + seconds=DISCOVERY_DOC_MAX_AGE) + mocked_datetime = mock.MagicMock() + mocked_datetime.datetime.now.side_effect = future_now + cache = FileCache(max_age=DISCOVERY_DOC_MAX_AGE) + first_url = 'url-1' + first_url_content = 'url-1-content' + cache.set(first_url, first_url_content) + + # Make sure the content is cached. + self.assertEqual(first_url_content, cache.get(first_url)) + + # Simulate another `set` call in the future date. + with mock.patch('googleapiclient.discovery_cache.file_cache.datetime', + new=mocked_datetime): + cache.set('url-2', 'url-2-content') + + # Make sure the content is expired + self.assertEqual(None, cache.get(first_url)) diff --git a/tox.ini b/tox.ini index 9105025fc1c..595a98a48cc 100644 --- a/tox.ini +++ b/tox.ini @@ -11,4 +11,5 @@ deps = keyring nose coverage>=3.6,<3.99 unittest2 + mock commands = nosetests --with-coverage --cover-package=googleapiclient --nocapture --cover-erase --cover-tests --cover-branches --cover-min-percentage=85