Skip to content

Commit 0c4f4c6

Browse files
committed
Adding one-time RPC to find unaliased / true dataset ID.
Also removing unnecessary functions that are no longer needed since there is no need to muck with the dataset ID.
1 parent 50f0684 commit 0c4f4c6

9 files changed

Lines changed: 176 additions & 119 deletions

File tree

gcloud/datastore/__init__.py

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,50 @@
6969
_DATASET_ENV_VAR_NAME = 'GCLOUD_DATASET_ID'
7070

7171

72+
def _find_true_dataset_id(dataset_id, connection=None):
73+
"""Find the true (unaliased) dataset ID.
74+
75+
If the given ID already has a 's~' or 'e~' prefix, does nothing.
76+
Otherwise, looks up a bogus Key('__MissingLookupKind', 1) and reads the
77+
true prefixed dataset ID from the response (either from found or from
78+
missing).
79+
80+
For some context, see:
81+
github.com/GoogleCloudPlatform/gcloud-python/pull/528
82+
github.com/GoogleCloudPlatform/google-cloud-datastore/issues/59
83+
84+
:type dataset_id: string
85+
:param dataset_id: The dataset ID to un-alias / prefix.
86+
87+
:type connection: :class:`gcloud.datastore.connection.Connection`
88+
:param connection: Optional. A connection provided to be the default.
89+
90+
:rtype: string
91+
:returns: The true / prefixed / un-aliased dataset ID.
92+
"""
93+
if dataset_id.startswith('s~') or dataset_id.startswith('e~'):
94+
return dataset_id
95+
96+
connection = connection or _implicit_environ.CONNECTION
97+
98+
# Create the bogus Key protobuf to be looked up and remove
99+
# the dataset ID so the backend won't complain.
100+
bogus_key_pb = Key('__MissingLookupKind', 1,
101+
dataset_id=dataset_id).to_protobuf()
102+
bogus_key_pb.partition_id.ClearField('dataset_id')
103+
104+
missing_pbs = []
105+
found_pbs = connection.lookup(dataset_id, [bogus_key_pb],
106+
missing=missing_pbs)
107+
# By not passing in `deferred`, lookup will continue until
108+
# all results are `found` or `missing`.
109+
all_pbs = missing_pbs + found_pbs
110+
# We only asked for one, so should only receive one.
111+
returned_pb, = all_pbs
112+
113+
return returned_pb.key.partition_id.dataset_id
114+
115+
72116
def set_default_dataset_id(dataset_id=None):
73117
"""Set default dataset ID either explicitly or implicitly as fall-back.
74118
@@ -91,6 +135,7 @@ def set_default_dataset_id(dataset_id=None):
91135
dataset_id = _implicit_environ.compute_engine_id()
92136

93137
if dataset_id is not None:
138+
dataset_id = _find_true_dataset_id(dataset_id)
94139
_implicit_environ.DATASET_ID = dataset_id
95140

96141

@@ -120,8 +165,9 @@ def set_defaults(dataset_id=None, connection=None):
120165
:type connection: :class:`gcloud.datastore.connection.Connection`
121166
:param connection: A connection provided to be the default.
122167
"""
123-
set_default_dataset_id(dataset_id=dataset_id)
168+
# Set CONNECTION first in case _find_true_dataset_id needs it.
124169
set_default_connection(connection=connection)
170+
set_default_dataset_id(dataset_id=dataset_id)
125171

126172

127173
def get_connection():

gcloud/datastore/batch.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,6 @@ def _assign_entity_to_mutation(mutation_pb, entity, auto_id_entities):
299299
auto_id = entity.key.is_partial
300300

301301
key_pb = entity.key.to_protobuf()
302-
key_pb = helpers._prepare_key_for_request(key_pb)
303302

304303
if auto_id:
305304
insert = mutation_pb.insert_auto_id.add()

gcloud/datastore/helpers.py

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
import pytz
2727
import six
2828

29-
from gcloud.datastore import _datastore_v1_pb2 as datastore_pb
3029
from gcloud.datastore.entity import Entity
3130
from gcloud.datastore.key import Key
3231

@@ -280,33 +279,6 @@ def _set_protobuf_value(value_pb, val):
280279
setattr(value_pb, attr, val)
281280

282281

283-
def _prepare_key_for_request(key_pb):
284-
"""Add protobuf keys to a request object.
285-
286-
:type key_pb: :class:`gcloud.datastore._datastore_v1_pb2.Key`
287-
:param key_pb: A key to be added to a request.
288-
289-
:rtype: :class:`gcloud.datastore._datastore_v1_pb2.Key`
290-
:returns: A key which will be added to a request. It will be the
291-
original if nothing needs to be changed.
292-
"""
293-
if key_pb.partition_id.HasField('dataset_id'):
294-
# We remove the dataset_id from the protobuf. This is because
295-
# the backend fails a request if the key contains un-prefixed
296-
# dataset ID. The backend fails because requests to
297-
# /datastore/.../datasets/foo/...
298-
# and
299-
# /datastore/.../datasets/s~foo/...
300-
# both go to the datastore given by 's~foo'. So if the key
301-
# protobuf in the request body has dataset_id='foo', the
302-
# backend will reject since 'foo' != 's~foo'.
303-
new_key_pb = datastore_pb.Key()
304-
new_key_pb.CopyFrom(key_pb)
305-
new_key_pb.partition_id.ClearField('dataset_id')
306-
key_pb = new_key_pb
307-
return key_pb
308-
309-
310282
def _add_keys_to_request(request_field_pb, key_pbs):
311283
"""Add protobuf keys to a request object.
312284
@@ -317,5 +289,4 @@ def _add_keys_to_request(request_field_pb, key_pbs):
317289
:param key_pbs: The keys to add to a request.
318290
"""
319291
for key_pb in key_pbs:
320-
key_pb = _prepare_key_for_request(key_pb)
321292
request_field_pb.add().CopyFrom(key_pb)

gcloud/datastore/query.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -449,8 +449,7 @@ def _pb_from_query(query):
449449
composite_filter.operator = datastore_pb.CompositeFilter.AND
450450

451451
if query.ancestor:
452-
ancestor_pb = helpers._prepare_key_for_request(
453-
query.ancestor.to_protobuf())
452+
ancestor_pb = query.ancestor.to_protobuf()
454453

455454
# Filter on __key__ HAS_ANCESTOR == ancestor.
456455
ancestor_filter = composite_filter.filter.add().property_filter
@@ -469,8 +468,7 @@ def _pb_from_query(query):
469468
# Set the value to filter on based on the type.
470469
if property_name == '__key__':
471470
key_pb = value.to_protobuf()
472-
property_filter.value.key_value.CopyFrom(
473-
helpers._prepare_key_for_request(key_pb))
471+
property_filter.value.key_value.CopyFrom(key_pb)
474472
else:
475473
helpers._set_protobuf_value(property_filter.value, value)
476474

gcloud/datastore/test___init__.py

Lines changed: 108 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -45,22 +45,22 @@ def test_no_env_var_set(self):
4545

4646
def test_set_from_env_var(self):
4747
from gcloud.datastore import _implicit_environ
48-
IMPLICIT_DATASET_ID = 'IMPLICIT'
48+
IMPLICIT_DATASET_ID = 's~IMPLICIT'
4949
with self._monkey(IMPLICIT_DATASET_ID):
5050
self._callFUT()
5151
self.assertEqual(_implicit_environ.DATASET_ID, IMPLICIT_DATASET_ID)
5252

5353
def test_set_explicit_w_env_var_set(self):
5454
from gcloud.datastore import _implicit_environ
55-
EXPLICIT_DATASET_ID = 'EXPLICIT'
55+
EXPLICIT_DATASET_ID = 's~EXPLICIT'
5656
with self._monkey(None):
5757
self._callFUT(EXPLICIT_DATASET_ID)
5858
self.assertEqual(_implicit_environ.DATASET_ID, EXPLICIT_DATASET_ID)
5959

6060
def test_set_explicit_no_env_var_set(self):
6161
from gcloud.datastore import _implicit_environ
62-
IMPLICIT_DATASET_ID = 'IMPLICIT'
63-
EXPLICIT_DATASET_ID = 'EXPLICIT'
62+
IMPLICIT_DATASET_ID = 's~IMPLICIT'
63+
EXPLICIT_DATASET_ID = 's~EXPLICIT'
6464
with self._monkey(IMPLICIT_DATASET_ID):
6565
self._callFUT(EXPLICIT_DATASET_ID)
6666
self.assertEqual(_implicit_environ.DATASET_ID, EXPLICIT_DATASET_ID)
@@ -73,7 +73,7 @@ def test_set_explicit_None_wo_env_var_set(self):
7373

7474
def test_set_explicit_None_w_env_var_set(self):
7575
from gcloud.datastore import _implicit_environ
76-
IMPLICIT_DATASET_ID = 'IMPLICIT'
76+
IMPLICIT_DATASET_ID = 's~IMPLICIT'
7777
with self._monkey(IMPLICIT_DATASET_ID):
7878
self._callFUT(None)
7979
self.assertEqual(_implicit_environ.DATASET_ID, IMPLICIT_DATASET_ID)
@@ -82,7 +82,7 @@ def test_set_implicit_from_appengine(self):
8282
from gcloud._testing import _Monkey
8383
from gcloud.datastore import _implicit_environ
8484

85-
APP_ENGINE_ID = 'GAE'
85+
APP_ENGINE_ID = 's~GAE'
8686
APP_IDENTITY = _AppIdentity(APP_ENGINE_ID)
8787

8888
with self._monkey(None):
@@ -95,8 +95,8 @@ def test_set_implicit_both_env_and_appengine(self):
9595
from gcloud._testing import _Monkey
9696
from gcloud.datastore import _implicit_environ
9797

98-
IMPLICIT_DATASET_ID = 'IMPLICIT'
99-
APP_IDENTITY = _AppIdentity('GAE')
98+
IMPLICIT_DATASET_ID = 's~IMPLICIT'
99+
APP_IDENTITY = _AppIdentity('s~GAE')
100100

101101
with self._monkey(IMPLICIT_DATASET_ID):
102102
with _Monkey(_implicit_environ, app_identity=APP_IDENTITY):
@@ -108,7 +108,7 @@ def _implicit_compute_engine_helper(self, status):
108108
from gcloud._testing import _Monkey
109109
from gcloud.datastore import _implicit_environ
110110

111-
COMPUTE_ENGINE_ID = 'GCE'
111+
COMPUTE_ENGINE_ID = 's~GCE'
112112
HTTPLIB2 = _Httplib2(COMPUTE_ENGINE_ID, status=status)
113113
if status == '200':
114114
EXPECTED_ID = COMPUTE_ENGINE_ID
@@ -148,9 +148,9 @@ def test_set_implicit_both_appengine_and_compute(self):
148148
from gcloud._testing import _Monkey
149149
from gcloud.datastore import _implicit_environ
150150

151-
APP_ENGINE_ID = 'GAE'
151+
APP_ENGINE_ID = 's~GAE'
152152
APP_IDENTITY = _AppIdentity(APP_ENGINE_ID)
153-
HTTPLIB2 = _Httplib2('GCE')
153+
HTTPLIB2 = _Httplib2('s~GCE')
154154

155155
with self._monkey(None):
156156
with _Monkey(_implicit_environ, app_identity=APP_IDENTITY,
@@ -164,9 +164,9 @@ def test_set_implicit_three_env_appengine_and_compute(self):
164164
from gcloud._testing import _Monkey
165165
from gcloud.datastore import _implicit_environ
166166

167-
IMPLICIT_DATASET_ID = 'IMPLICIT'
168-
APP_IDENTITY = _AppIdentity('GAE')
169-
HTTPLIB2 = _Httplib2('GCE')
167+
IMPLICIT_DATASET_ID = 's~IMPLICIT'
168+
APP_IDENTITY = _AppIdentity('s~GAE')
169+
HTTPLIB2 = _Httplib2('s~GCE')
170170

171171
with self._monkey(IMPLICIT_DATASET_ID):
172172
with _Monkey(_implicit_environ, app_identity=APP_IDENTITY,
@@ -177,6 +177,71 @@ def test_set_implicit_three_env_appengine_and_compute(self):
177177
self.assertEqual(len(HTTPLIB2._http_instances), 0)
178178

179179

180+
class Test__find_true_dataset_id(unittest2.TestCase):
181+
182+
def _callFUT(self, dataset_id, connection=None):
183+
from gcloud.datastore import _find_true_dataset_id
184+
return _find_true_dataset_id(dataset_id, connection=connection)
185+
186+
def test_prefixed(self):
187+
PREFIXED = 's~DATASET'
188+
result = self._callFUT(PREFIXED)
189+
self.assertEqual(PREFIXED, result)
190+
191+
def test_unprefixed_no_connection(self):
192+
from gcloud.datastore import _implicit_environ
193+
194+
UNPREFIXED = 'DATASET'
195+
self.assertEqual(_implicit_environ.CONNECTION, None)
196+
with self.assertRaises(AttributeError):
197+
self._callFUT(UNPREFIXED)
198+
199+
def test_unprefixed_bogus_key_miss(self):
200+
UNPREFIXED = 'DATASET'
201+
PREFIX = 's~'
202+
CONNECTION = _Connection(PREFIX, from_missing=False)
203+
result = self._callFUT(UNPREFIXED, connection=CONNECTION)
204+
205+
self.assertEqual(CONNECTION._called_dataset_id, UNPREFIXED)
206+
207+
self.assertEqual(len(CONNECTION._lookup_result), 1)
208+
self.assertEqual(CONNECTION._called_missing, [])
209+
210+
# Make sure just one.
211+
called_key_pb, = CONNECTION._called_key_pbs
212+
path_element = called_key_pb.path_element
213+
self.assertEqual(len(path_element), 1)
214+
self.assertEqual(path_element[0].kind, '__MissingLookupKind')
215+
self.assertEqual(path_element[0].id, 1)
216+
self.assertFalse(path_element[0].HasField('name'))
217+
218+
PREFIXED = PREFIX + UNPREFIXED
219+
self.assertEqual(result, PREFIXED)
220+
221+
def test_unprefixed_bogus_key_hit(self):
222+
UNPREFIXED = 'DATASET'
223+
PREFIX = 'e~'
224+
CONNECTION = _Connection(PREFIX, from_missing=True)
225+
result = self._callFUT(UNPREFIXED, connection=CONNECTION)
226+
227+
self.assertEqual(CONNECTION._called_dataset_id, UNPREFIXED)
228+
229+
self.assertEqual(CONNECTION._lookup_result, [])
230+
# Though missing=[] was called, it is copied in place.
231+
self.assertEqual(len(CONNECTION._called_missing), 1)
232+
233+
# Make sure just one.
234+
called_key_pb, = CONNECTION._called_key_pbs
235+
path_element = called_key_pb.path_element
236+
self.assertEqual(len(path_element), 1)
237+
self.assertEqual(path_element[0].kind, '__MissingLookupKind')
238+
self.assertEqual(path_element[0].id, 1)
239+
self.assertFalse(path_element[0].HasField('name'))
240+
241+
PREFIXED = PREFIX + UNPREFIXED
242+
self.assertEqual(result, PREFIXED)
243+
244+
180245
class Test_set_default_connection(unittest2.TestCase):
181246

182247
def setUp(self):
@@ -304,3 +369,32 @@ def Http(self, timeout=None):
304369
result = _Http(self)
305370
self._http_instances.append((timeout, result))
306371
return result
372+
373+
374+
class _Connection(object):
375+
376+
def __init__(self, prefix, from_missing=False):
377+
self.prefix = prefix
378+
self.from_missing = from_missing
379+
380+
def lookup(self, dataset_id, key_pbs, missing=None):
381+
from gcloud.datastore import _datastore_v1_pb2 as datastore_pb
382+
383+
# Store the arguments called with.
384+
self._called_dataset_id = dataset_id
385+
self._called_key_pbs = key_pbs
386+
self._called_missing = missing
387+
388+
key_pb, = key_pbs
389+
390+
response = datastore_pb.Entity()
391+
response.key.CopyFrom(key_pb)
392+
response.key.partition_id.dataset_id = self.prefix + dataset_id
393+
394+
if self.from_missing:
395+
missing[:] = [response]
396+
self._lookup_result = []
397+
else:
398+
self._lookup_result = [response]
399+
400+
return self._lookup_result

0 commit comments

Comments
 (0)