diff --git a/babel/messages/extract.py b/babel/messages/extract.py index 8fe3f606c..f8495b02b 100644 --- a/babel/messages/extract.py +++ b/babel/messages/extract.py @@ -461,6 +461,8 @@ def extract_javascript(fileobj, keywords, comment_tags, options): :param comment_tags: a list of translator tags to search for and include in the results :param options: a dictionary of additional options (optional) + Supported options are: + * `jsx` -- set to false to disable JSX/E4X support. """ from babel.messages.jslexer import tokenize, unquote_string funcname = message_lineno = None @@ -472,7 +474,7 @@ def extract_javascript(fileobj, keywords, comment_tags, options): last_token = None call_stack = -1 - for token in tokenize(fileobj.read().decode(encoding)): + for token in tokenize(fileobj.read().decode(encoding), jsx=options.get("jsx", True)): if token.type == 'operator' and token.value == '(': if funcname: message_lineno = token.lineno diff --git a/babel/messages/jslexer.py b/babel/messages/jslexer.py index 22c6e1f9c..c00d8d40b 100644 --- a/babel/messages/jslexer.py +++ b/babel/messages/jslexer.py @@ -36,6 +36,7 @@ ([eE][-+]?\d+)? | (0x[a-fA-F0-9]+) )''')), + ('jsx_tag', re.compile(r'<(?:/?)\w+.+?>', re.I)), ('operator', re.compile(r'(%s)' % '|'.join(map(re.escape, operators)))), ('string', re.compile(r'''(?xs)( '(?:[^'\\]*(?:\\.[^'\\]*)*)' | @@ -127,8 +128,11 @@ def unquote_string(string): return u''.join(result) -def tokenize(source): - """Tokenize a JavaScript source. Returns a generator of tokens. +def tokenize(source, jsx=True): + """ + Tokenize JavaScript/JSX source. Returns a generator of tokens. + + :param jsx: Enable (limited) JSX parsing. """ may_divide = False pos = 0 @@ -138,6 +142,8 @@ def tokenize(source): while pos < end: # handle regular rules first for token_type, rule in rules: + if not jsx and token_type and 'jsx' in token_type: + continue match = rule.match(source, pos) if match is not None: break diff --git a/tests/messages/test_extract.py b/tests/messages/test_extract.py index fa03207c4..cf6162d15 100644 --- a/tests/messages/test_extract.py +++ b/tests/messages/test_extract.py @@ -388,97 +388,6 @@ def test_extract_strip_comment_tags(self): u'a prefix too'], messages[1][2]) -class ExtractJavaScriptTestCase(unittest.TestCase): - - def test_simple_extract(self): - buf = BytesIO(b"""\ -msg1 = _('simple') -msg2 = gettext('simple') -msg3 = ngettext('s', 'p', 42) - """) - messages = \ - list(extract.extract('javascript', buf, extract.DEFAULT_KEYWORDS, - [], {})) - - self.assertEqual([(1, 'simple', [], None), - (2, 'simple', [], None), - (3, ('s', 'p'), [], None)], messages) - - def test_various_calls(self): - buf = BytesIO(b"""\ -msg1 = _(i18n_arg.replace(/"/, '"')) -msg2 = ungettext(i18n_arg.replace(/"/, '"'), multi_arg.replace(/"/, '"'), 2) -msg3 = ungettext("Babel", multi_arg.replace(/"/, '"'), 2) -msg4 = ungettext(i18n_arg.replace(/"/, '"'), "Babels", 2) -msg5 = ungettext('bunny', 'bunnies', parseInt(Math.random() * 2 + 1)) -msg6 = ungettext(arg0, 'bunnies', rparseInt(Math.random() * 2 + 1)) -msg7 = _(hello.there) -msg8 = gettext('Rabbit') -msg9 = dgettext('wiki', model.addPage()) -msg10 = dngettext(domain, 'Page', 'Pages', 3) -""") - messages = \ - list(extract.extract('javascript', buf, extract.DEFAULT_KEYWORDS, [], - {})) - self.assertEqual([(5, (u'bunny', u'bunnies'), [], None), - (8, u'Rabbit', [], None), - (10, (u'Page', u'Pages'), [], None)], messages) - - def test_message_with_line_comment(self): - buf = BytesIO(u"""\ -// NOTE: hello -msg = _('Bonjour à tous') -""".encode('utf-8')) - messages = list(extract.extract_javascript(buf, ('_',), ['NOTE:'], {})) - self.assertEqual(u'Bonjour à tous', messages[0][2]) - self.assertEqual([u'NOTE: hello'], messages[0][3]) - - def test_message_with_multiline_comment(self): - buf = BytesIO(u"""\ -/* NOTE: hello - and bonjour - and servus */ -msg = _('Bonjour à tous') -""".encode('utf-8')) - messages = list(extract.extract_javascript(buf, ('_',), ['NOTE:'], {})) - self.assertEqual(u'Bonjour à tous', messages[0][2]) - self.assertEqual([u'NOTE: hello', 'and bonjour', ' and servus'], messages[0][3]) - - def test_ignore_function_definitions(self): - buf = BytesIO(b"""\ -function gettext(value) { - return translations[language][value] || value; -}""") - - messages = list(extract.extract_javascript(buf, ('gettext',), [], {})) - self.assertEqual(messages, []) - - def test_misplaced_comments(self): - buf = BytesIO(b"""\ -/* NOTE: this won't show up */ -foo() - -/* NOTE: this will */ -msg = _('Something') - -// NOTE: this will show up -// too. -msg = _('Something else') - -// NOTE: but this won't -bar() - -_('no comment here') -""") - messages = list(extract.extract_javascript(buf, ('_',), ['NOTE:'], {})) - self.assertEqual(u'Something', messages[0][2]) - self.assertEqual([u'NOTE: this will'], messages[0][3]) - self.assertEqual(u'Something else', messages[1][2]) - self.assertEqual([u'NOTE: this will show up', 'too.'], messages[1][3]) - self.assertEqual(u'no comment here', messages[2][2]) - self.assertEqual([], messages[2][3]) - - class ExtractTestCase(unittest.TestCase): def test_invalid_filter(self): diff --git a/tests/messages/test_js_extract.py b/tests/messages/test_js_extract.py new file mode 100644 index 000000000..ae6d277b3 --- /dev/null +++ b/tests/messages/test_js_extract.py @@ -0,0 +1,124 @@ +# -- encoding: UTF-8 -- +import pytest +from babel._compat import BytesIO +from babel.messages import extract + + +def test_simple_extract(): + buf = BytesIO(b"""\ +msg1 = _('simple') +msg2 = gettext('simple') +msg3 = ngettext('s', 'p', 42) + """) + messages = \ + list(extract.extract('javascript', buf, extract.DEFAULT_KEYWORDS, + [], {})) + + assert messages == [(1, 'simple', [], None), + (2, 'simple', [], None), + (3, ('s', 'p'), [], None)] + + +def test_various_calls(): + buf = BytesIO(b"""\ +msg1 = _(i18n_arg.replace(/"/, '"')) +msg2 = ungettext(i18n_arg.replace(/"/, '"'), multi_arg.replace(/"/, '"'), 2) +msg3 = ungettext("Babel", multi_arg.replace(/"/, '"'), 2) +msg4 = ungettext(i18n_arg.replace(/"/, '"'), "Babels", 2) +msg5 = ungettext('bunny', 'bunnies', parseInt(Math.random() * 2 + 1)) +msg6 = ungettext(arg0, 'bunnies', rparseInt(Math.random() * 2 + 1)) +msg7 = _(hello.there) +msg8 = gettext('Rabbit') +msg9 = dgettext('wiki', model.addPage()) +msg10 = dngettext(domain, 'Page', 'Pages', 3) +""") + messages = \ + list(extract.extract('javascript', buf, extract.DEFAULT_KEYWORDS, [], + {})) + assert messages == [ + (5, (u'bunny', u'bunnies'), [], None), + (8, u'Rabbit', [], None), + (10, (u'Page', u'Pages'), [], None) + ] + + +def test_message_with_line_comment(): + buf = BytesIO(u"""\ +// NOTE: hello +msg = _('Bonjour à tous') +""".encode('utf-8')) + messages = list(extract.extract_javascript(buf, ('_',), ['NOTE:'], {})) + assert messages[0][2] == u'Bonjour à tous' + assert messages[0][3] == [u'NOTE: hello'] + + +def test_message_with_multiline_comment(): + buf = BytesIO(u"""\ +/* NOTE: hello +and bonjour + and servus */ +msg = _('Bonjour à tous') +""".encode('utf-8')) + messages = list(extract.extract_javascript(buf, ('_',), ['NOTE:'], {})) + assert messages[0][2] == u'Bonjour à tous' + assert messages[0][3] == [u'NOTE: hello', 'and bonjour', ' and servus'] + + +def test_ignore_function_definitions(): + buf = BytesIO(b"""\ +function gettext(value) { +return translations[language][value] || value; +}""") + + messages = list(extract.extract_javascript(buf, ('gettext',), [], {})) + assert not messages + + +def test_misplaced_comments(): + buf = BytesIO(b"""\ +/* NOTE: this won't show up */ +foo() + +/* NOTE: this will */ +msg = _('Something') + +// NOTE: this will show up +// too. +msg = _('Something else') + +// NOTE: but this won't +bar() + +_('no comment here') +""") + messages = list(extract.extract_javascript(buf, ('_',), ['NOTE:'], {})) + assert messages[0][2] == u'Something' + assert messages[0][3] == [u'NOTE: this will'] + assert messages[1][2] == u'Something else' + assert messages[1][3] == [u'NOTE: this will show up', 'too.'] + assert messages[2][2] == u'no comment here' + assert messages[2][3] == [] + + +JSX_SOURCE = b""" +class Foo { + render() { + const value = gettext("hello"); + return ( + + + + ); + } +""" +EXPECTED_JSX_MESSAGES = ["hello", "String1", "String 2", "String 3"] + + +@pytest.mark.parametrize("jsx_enabled", (False, True)) +def test_jsx_extraction(jsx_enabled): + buf = BytesIO(JSX_SOURCE) + messages = [m[2] for m in extract.extract_javascript(buf, ('_', 'gettext'), [], {"jsx": jsx_enabled})] + if jsx_enabled: + assert messages == EXPECTED_JSX_MESSAGES + else: + assert messages != EXPECTED_JSX_MESSAGES