Skip to content

Commit 8b2e716

Browse files
committed
Add Vision text detection.
1 parent 3ea5a40 commit 8b2e716

5 files changed

Lines changed: 141 additions & 8 deletions

File tree

docs/vision-usage.rst

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -210,10 +210,12 @@ Detecting text with ORC from an image.
210210
>>> client = vision.Client()
211211
>>> image = client.image('./image.jpg')
212212
>>> text = image.detect_text()
213-
>>> text.locale
213+
>>> text[0].locale
214214
'en'
215-
>>> text.description
216-
'the full text of the image.'
215+
>>> text[0].description
216+
'some text in the image'
217+
>>> text[1].description
218+
'some other text in the image'
217219
218220
Image Properties
219221
~~~~~~~~~~~~~~~~

google/cloud/vision/entity.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ class EntityAnnotation(object):
2828
:type description: str
2929
:param description: Description of entity detected in an image.
3030
31+
:type locale: str
32+
:param locale: The language code for the locale in which the entity textual
33+
description (next field) is expressed.
34+
3135
:type locations: list of
3236
:class:`~google.cloud.vision.geometry.LocationInformation`.
3337
:param locations: List of ``LocationInformation`` instances.
@@ -38,9 +42,10 @@ class EntityAnnotation(object):
3842
:type score: float
3943
:param score: Overall score of the result. Range [0, 1].
4044
"""
41-
def __init__(self, bounds, description, locations, mid, score):
45+
def __init__(self, bounds, description, locale, locations, mid, score):
4246
self._bounds = bounds
4347
self._description = description
48+
self._locale = locale
4449
self._locations = locations
4550
self._mid = mid
4651
self._score = score
@@ -52,19 +57,21 @@ def from_api_repr(cls, response):
5257
:type response: dict
5358
:param response: Dictionary response from Vision API with entity data.
5459
55-
:rtype: :class:`~google.cloud.vision.entiy.EntityAnnotation`
60+
:rtype: :class:`~google.cloud.vision.entity.EntityAnnotation`
5661
:returns: Instance of ``EntityAnnotation``.
5762
"""
5863
bounds = []
5964
if 'boundingPoly' in response:
6065
bounds = Bounds.from_api_repr(response['boundingPoly'])
6166
description = response['description']
67+
68+
locale = response.get('locale', None)
6269
locations = [LocationInformation.from_api_repr(location)
6370
for location in response.get('locations', [])]
64-
mid = response['mid']
65-
score = response['score']
71+
mid = response.get('mid', None)
72+
score = response.get('score', None)
6673

67-
return cls(bounds, description, locations, mid, score)
74+
return cls(bounds, description, locale, locations, mid, score)
6875

6976
@property
7077
def bounds(self):
@@ -84,6 +91,15 @@ def description(self):
8491
"""
8592
return self._description
8693

94+
@property
95+
def locale(self):
96+
"""The language code for text discovered in an image.
97+
98+
:rtype: str
99+
:returns: String language code of text found in the image.
100+
"""
101+
return self._locale
102+
87103
@property
88104
def locations(self):
89105
"""Location coordinates landmarks detected.

google/cloud/vision/image.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ def _detect_annotation(self, feature):
9797
'LABEL_DETECTION': 'labelAnnotations',
9898
'LANDMARK_DETECTION': 'landmarkAnnotations',
9999
'LOGO_DETECTION': 'logoAnnotations',
100+
'TEXT_DETECTION': 'textAnnotations',
100101
}
101102
detected_objects = []
102103
result = self.client.annotate(self, [feature])
@@ -160,3 +161,16 @@ def detect_logos(self, limit=10):
160161
"""
161162
feature = Feature(FeatureTypes.LOGO_DETECTION, limit)
162163
return self._detect_annotation(feature)
164+
165+
def detect_text(self, limit=10):
166+
"""Detect text in an image.
167+
168+
:type limit: int
169+
:param limit: The maximum instances of text to find.
170+
171+
:rtype: list
172+
:returns: List of
173+
:class:`~google.cloud.vision.entity.EntityAnnotation`.
174+
"""
175+
feature = Feature(FeatureTypes.TEXT_DETECTION, limit)
176+
return self._detect_annotation(feature)

unit_tests/vision/_fixtures.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1568,3 +1568,83 @@
15681568
}]
15691569
}]
15701570
}
1571+
1572+
1573+
TEXT_DETECTION_RESPONSE = {
1574+
'responses': [
1575+
{
1576+
'textAnnotations': [
1577+
{
1578+
'locale': 'en',
1579+
'description': 'Google CloudPlatform\n',
1580+
'boundingPoly': {
1581+
'vertices': [
1582+
{
1583+
'x': 129,
1584+
'y': 694
1585+
},
1586+
{
1587+
'x': 1375,
1588+
'y': 694
1589+
},
1590+
{
1591+
'x': 1375,
1592+
'y': 835
1593+
},
1594+
{
1595+
'x': 129,
1596+
'y': 835
1597+
}
1598+
]
1599+
}
1600+
},
1601+
{
1602+
'description': 'Google',
1603+
'boundingPoly': {
1604+
'vertices': [
1605+
{
1606+
'x': 129,
1607+
'y': 694
1608+
},
1609+
{
1610+
'x': 535,
1611+
'y': 694
1612+
},
1613+
{
1614+
'x': 535,
1615+
'y': 835
1616+
},
1617+
{
1618+
'x': 129,
1619+
'y': 835
1620+
}
1621+
]
1622+
}
1623+
},
1624+
{
1625+
'description': 'CloudPlatform',
1626+
'boundingPoly': {
1627+
'vertices': [
1628+
{
1629+
'x': 567,
1630+
'y': 694
1631+
},
1632+
{
1633+
'x': 1375,
1634+
'y': 694
1635+
},
1636+
{
1637+
'x': 1375,
1638+
'y': 835
1639+
},
1640+
{
1641+
'x': 567,
1642+
'y': 835
1643+
}
1644+
]
1645+
}
1646+
}
1647+
]
1648+
}
1649+
]
1650+
}

unit_tests/vision/test_client.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,27 @@ def test_logo_detection_from_content(self):
207207
image_request['image']['content'])
208208
self.assertEqual(5, image_request['features'][0]['maxResults'])
209209

210+
def test_text_detection_from_source(self):
211+
from google.cloud.vision.entity import EntityAnnotation
212+
from unit_tests.vision._fixtures import (TEXT_DETECTION_RESPONSE as
213+
RETURNED)
214+
credentials = _Credentials()
215+
client = self._makeOne(project=self.PROJECT, credentials=credentials)
216+
client.connection = _Connection(RETURNED)
217+
218+
image = client.image(source_uri=_IMAGE_SOURCE)
219+
text = image.detect_text(limit=3)
220+
self.assertEqual(3, len(text))
221+
self.assertTrue(isinstance(text[0], EntityAnnotation))
222+
image_request = client.connection._requested[0]['data']['requests'][0]
223+
self.assertEqual(_IMAGE_SOURCE,
224+
image_request['image']['source']['gcs_image_uri'])
225+
self.assertEqual(3, image_request['features'][0]['maxResults'])
226+
self.assertEqual('en', text[0].locale)
227+
self.assertEqual('Google CloudPlatform\n', text[0].description)
228+
self.assertEqual('Google', text[1].description)
229+
self.assertEqual(694, text[0].bounds.vertices[0].y_coordinate)
230+
210231

211232
class TestVisionRequest(unittest.TestCase):
212233
def _getTargetClass(self):

0 commit comments

Comments
 (0)