diff --git a/babel/plural.py b/babel/plural.py index b5ce9ba65..9cb1d5cc6 100644 --- a/babel/plural.py +++ b/babel/plural.py @@ -86,14 +86,14 @@ def __init__(self, rules): found = set() self.abstract = [] for key, expr in sorted(list(rules)): - if key == 'other': - continue if key not in _plural_tags: raise ValueError('unknown tag %r' % key) elif key in found: raise ValueError('tag %r defined twice' % key) found.add(key) - self.abstract.append((key, _Parser(expr).ast)) + ast = _Parser(expr).ast + if ast: + self.abstract.append((key, ast)) def __repr__(self): rules = self.rules @@ -303,7 +303,7 @@ class RuleError(Exception): .format(_VARS))), ('value', re.compile(r'\d+')), ('symbol', re.compile(r'%|,|!=|=')), - ('ellipsis', re.compile(r'\.\.')) + ('ellipsis', re.compile(r'\.{2,3}|\u2026', re.UNICODE)) # U+2026: ELLIPSIS ] @@ -388,6 +388,11 @@ class _Parser(object): def __init__(self, string): self.tokens = tokenize_rule(string) + if not self.tokens: + # If the pattern is only samples, it's entirely possible + # no stream of tokens whatsoever is generated. + self.ast = None + return self.ast = self.condition() if self.tokens: raise RuleError('Expected end of rule, got %r' % @@ -480,6 +485,9 @@ def _unary_compiler(tmpl): return lambda self, x: tmpl % self.compile(x) +compile_zero = lambda x: '0' + + class _Compiler(object): """The compilers are able to transform the expressions into multiple output formats. @@ -526,6 +534,12 @@ def compile_relation(self, method, expr, range_list): class _GettextCompiler(_Compiler): """Compile into a gettext plural expression.""" + compile_i = _Compiler.compile_n + compile_v = compile_zero + compile_w = compile_zero + compile_f = compile_zero + compile_t = compile_zero + def compile_relation(self, method, expr, range_list): rv = [] expr = self.compile(expr) @@ -552,10 +566,10 @@ class _JavaScriptCompiler(_GettextCompiler): # XXX: presently javascript does not support any of the # fraction support and basically only deals with integers. compile_i = lambda x: 'parseInt(n, 10)' - compile_v = lambda x: '0' - compile_w = lambda x: '0' - compile_f = lambda x: '0' - compile_t = lambda x: '0' + compile_v = compile_zero + compile_w = compile_zero + compile_f = compile_zero + compile_t = compile_zero def compile_relation(self, method, expr, range_list): code = _GettextCompiler.compile_relation( diff --git a/tests/test_plural.py b/tests/test_plural.py index fce1b8e0e..d51efef1e 100644 --- a/tests/test_plural.py +++ b/tests/test_plural.py @@ -13,7 +13,7 @@ import unittest import pytest -from babel import plural +from babel import plural, localedata from babel._compat import Decimal @@ -254,3 +254,17 @@ def test_extract_operands(source, n, i, v, w, f, t): source = Decimal(source) if isinstance(source, str) else source assert (plural.extract_operands(source) == Decimal(n), i, v, w, f, t) + + +@pytest.mark.parametrize('locale', ('ru', 'pl')) +def test_gettext_compilation(locale): + # Test that new plural form elements introduced in recent CLDR versions + # are compiled "down" to `n` when emitting Gettext rules. + ru_rules = localedata.load(locale)['plural_form'].rules + chars = 'ivwft' + # Test that these rules are valid for this test; i.e. that they contain at least one + # of the gettext-unsupported characters. + assert any((" " + ch + " ") in rule for ch in chars for rule in ru_rules.values()) + # Then test that the generated value indeed does not contain these. + ru_rules_gettext = plural.to_gettext(ru_rules) + assert not any(ch in ru_rules_gettext for ch in chars)