Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions babel/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -919,6 +919,20 @@ def text_direction(self):
"""
return ''.join(word[0] for word in self.character_order.split('-'))

@property
def unit_display_names(self):
"""Display names for units of measurement.

.. seealso::

You may want to use :py:func:`babel.units.get_unit_name` instead.

.. note:: The format of the value returned may change between
Babel versions.

"""
return self._data['unit_display_names']


def default_locale(category=None, aliases=LOCALE_ALIASES):
"""Returns the system default locale for a given category, based on
Expand Down
3 changes: 1 addition & 2 deletions babel/dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -915,8 +915,7 @@ def _iter_patterns(a_unit):
else:
yield unit_rel_patterns['past']
a_unit = 'duration-' + a_unit
yield locale._data['unit_patterns'].get(a_unit + ':' + format)
yield locale._data['unit_patterns'].get(a_unit)
yield locale._data['unit_patterns'].get(a_unit, {}).get(format)

for unit, secs_per_unit in TIMEDELTA_UNITS:
value = abs(seconds) / secs_per_unit
Expand Down
276 changes: 276 additions & 0 deletions babel/units.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,276 @@
# -- encoding: UTF-8 --

from babel._compat import string_types
from babel.core import Locale
from babel.numbers import format_decimal, LC_NUMERIC


class UnknownUnitError(ValueError):
def __init__(self, unit, locale):
ValueError.__init__(self, "%s is not a known unit in %s" % (unit, locale))


def get_unit_name(measurement_unit, length='long', locale=LC_NUMERIC):
"""
Get the display name for a measurement unit in the given locale.

>>> get_unit_name("radian", locale="en")
'radians'

Unknown units will raise exceptions:

>>> get_unit_name("battery", locale="fi")
Traceback (most recent call last):
...
UnknownUnitError: battery/long is not a known unit/length in fi

:param measurement_unit: the code of a measurement unit.
Known units can be found in the CLDR Unit Validity XML file:
http://unicode.org/repos/cldr/tags/latest/common/validity/unit.xml

:param length: "short", "long" or "narrow"
:param locale: the `Locale` object or locale identifier
:return: The unit display name, or None.
"""
locale = Locale.parse(locale)
unit = _find_unit_pattern(measurement_unit, locale=locale)
if not unit:
raise UnknownUnitError(unit=measurement_unit, locale=locale)
return locale.unit_display_names.get(unit, {}).get(length)


def _find_unit_pattern(unit_id, locale=LC_NUMERIC):
"""
Expand an unit into a qualified form.

Known units can be found in the CLDR Unit Validity XML file:
http://unicode.org/repos/cldr/tags/latest/common/validity/unit.xml

>>> _find_unit_pattern("radian", locale="en")
'angle-radian'

Unknown values will return None.

>>> _find_unit_pattern("horse", locale="en")

:param unit_id: the code of a measurement unit.
:return: A key to the `unit_patterns` mapping, or None.
"""
locale = Locale.parse(locale)
unit_patterns = locale._data["unit_patterns"]
if unit_id in unit_patterns:
return unit_id
for unit_pattern in sorted(unit_patterns, key=len):
if unit_pattern.endswith(unit_id):
return unit_pattern


def format_unit(value, measurement_unit, length='long', format=None, locale=LC_NUMERIC):
"""Format a value of a given unit.

Values are formatted according to the locale's usual pluralization rules
and number formats.

>>> format_unit(12, 'length-meter', locale='ro_RO')
u'12 metri'
>>> format_unit(15.5, 'length-mile', locale='fi_FI')
u'15,5 mailia'
>>> format_unit(1200, 'pressure-inch-hg', locale='nb')
u'1\\xa0200 tommer kvikks\\xf8lv'

Number formats may be overridden with the ``format`` parameter.

>>> from babel._compat import Decimal
>>> format_unit(Decimal("-42.774"), 'temperature-celsius', 'short', format='#.0', locale='fr')
u'-42,8 \\xb0C'

The locale's usual pluralization rules are respected.

>>> format_unit(1, 'length-meter', locale='ro_RO')
u'1 metru'
>>> format_unit(0, 'length-picometer', locale='cy')
u'0 picometr'
>>> format_unit(2, 'length-picometer', locale='cy')
u'2 bicometr'
>>> format_unit(3, 'length-picometer', locale='cy')
u'3 phicometr'

>>> format_unit(15, 'length-horse', locale='fi')
Traceback (most recent call last):
...
UnknownUnitError: length-horse is not a known unit in fi

.. versionadded:: 2.2.0

:param value: the value to format. If this is a string, no number formatting will be attempted.
:param measurement_unit: the code of a measurement unit.
Known units can be found in the CLDR Unit Validity XML file:
http://unicode.org/repos/cldr/tags/latest/common/validity/unit.xml
:param length: "short", "long" or "narrow"
:param format: An optional format, as accepted by `format_decimal`.
:param locale: the `Locale` object or locale identifier
"""
locale = Locale.parse(locale)

q_unit = _find_unit_pattern(measurement_unit, locale=locale)
if not q_unit:
raise UnknownUnitError(unit=measurement_unit, locale=locale)
unit_patterns = locale._data["unit_patterns"][q_unit].get(length, {})

if isinstance(value, string_types): # Assume the value is a preformatted singular.
formatted_value = value
plural_form = "one"
else:
formatted_value = format_decimal(value, format, locale)
plural_form = locale.plural_form(value)

if plural_form in unit_patterns:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CLDR here casually mentions length fallbacks (at least that's how I interpreted it here). It'd be awesome to see it here, I don't think it would be that difficult to add an extra iteration through a hardcoded, ordered list of length strings. If it's gross, feel free to punt.

http://www.unicode.org/reports/tr35/tr35-general.html#perUnitPatterns

return unit_patterns[plural_form].format(formatted_value)

# Fall back to a somewhat bad representation.
# nb: This is marked as no-cover, as the current CLDR seemingly has no way for this to happen.
return '%s %s' % ( # pragma: no cover
formatted_value,
(get_unit_name(measurement_unit, length=length, locale=locale) or measurement_unit)
)


def _find_compound_unit(numerator_unit, denominator_unit, locale=LC_NUMERIC):
"""
Find a predefined compound unit pattern.

Used internally by format_compound_unit.

>>> _find_compound_unit("kilometer", "hour", locale="en")
'speed-kilometer-per-hour'

>>> _find_compound_unit("mile", "gallon", locale="en")
'consumption-mile-per-gallon'

If no predefined compound pattern can be found, `None` is returned.

>>> _find_compound_unit("gallon", "mile", locale="en")

>>> _find_compound_unit("horse", "purple", locale="en")

:param numerator_unit: The numerator unit's identifier
:param denominator_unit: The denominator unit's identifier
:param locale: the `Locale` object or locale identifier
:return: A key to the `unit_patterns` mapping, or None.
:rtype: str|None
"""
locale = Locale.parse(locale)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you intend to call this "numerator_form" or "numerator_unit_pattern" instead?


# Qualify the numerator and denominator units. This will turn possibly partial
# units like "kilometer" or "hour" into actual units like "length-kilometer" and
# "duration-hour".

numerator_unit = _find_unit_pattern(numerator_unit, locale=locale)
denominator_unit = _find_unit_pattern(denominator_unit, locale=locale)

# If either was not found, we can't possibly build a suitable compound unit either.
if not (numerator_unit and denominator_unit):
return None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Maybe mention that the numerator and denominator values we'd have in this case are "length-kilometer" and "duration-hour"


# Since compound units are named "speed-kilometer-per-hour", we'll have to slice off
# the quantities (i.e. "length", "duration") from both qualified units.

bare_numerator_unit = numerator_unit.split("-", 1)[-1]
bare_denominator_unit = denominator_unit.split("-", 1)[-1]

# Now we can try and rebuild a compound unit specifier, then qualify it:

return _find_unit_pattern("%s-per-%s" % (bare_numerator_unit, bare_denominator_unit), locale=locale)


def format_compound_unit(
numerator_value, numerator_unit=None,
denominator_value=1, denominator_unit=None,
length='long', format=None, locale=LC_NUMERIC
):
"""
Format a compound number value, i.e. "kilometers per hour" or similar.

Both unit specifiers are optional to allow for formatting of arbitrary values still according
to the locale's general "per" formatting specifier.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


>>> format_compound_unit(7, denominator_value=11, length="short", locale="pt")
'7/11'

>>> format_compound_unit(150, "kilometer", denominator_unit="hour", locale="sv")
'150 kilometer per timme'

>>> format_compound_unit(150, "kilowatt", denominator_unit="year", locale="fi")
'150 kilowattia vuodessa'

>>> format_compound_unit(32.5, "ton", 15, denominator_unit="hour", locale="en")
'32.5 tons per 15 hours'

>>> format_compound_unit(160, denominator_unit="square-meter", locale="fr")
'160 par m\\xe8tre carr\\xe9'

>>> format_compound_unit(4, "meter", "ratakisko", length="short", locale="fi")
'4 m/ratakisko'

>>> format_compound_unit(35, "minute", denominator_unit="fathom", locale="sv")
'35 minuter per famn'

>>> from babel.numbers import format_currency
>>> format_compound_unit(format_currency(35, "JPY", locale="de"), denominator_unit="liter", locale="de")
'35\\xa0\\xa5 pro Liter'

See http://www.unicode.org/reports/tr35/tr35-general.html#perUnitPatterns

:param numerator_value: The numerator value. This may be a string,
in which case it is considered preformatted and the unit is ignored.
:param numerator_unit: The numerator unit. See `format_unit`.
:param denominator_value: The denominator value. This may be a string,
in which case it is considered preformatted and the unit is ignored.
:param denominator_unit: The denominator unit. See `format_unit`.
:param length: The formatting length. "short", "long" or "narrow"
:param format: An optional format, as accepted by `format_decimal`.
:param locale: the `Locale` object or locale identifier
:return: A formatted compound value.
"""
locale = Locale.parse(locale)

# Look for a specific compound unit first...

if numerator_unit and denominator_unit and denominator_value == 1:
compound_unit = _find_compound_unit(numerator_unit, denominator_unit, locale=locale)
if compound_unit:
return format_unit(numerator_value, compound_unit, length=length, format=format, locale=locale)

# ... failing that, construct one "by hand".

if isinstance(numerator_value, string_types): # Numerator is preformatted
formatted_numerator = numerator_value
elif numerator_unit: # Numerator has unit
formatted_numerator = format_unit(
numerator_value, numerator_unit, length=length, format=format, locale=locale
)
else: # Unitless numerator
formatted_numerator = format_decimal(numerator_value, format=format, locale=locale)

if isinstance(denominator_value, string_types): # Denominator is preformatted
formatted_denominator = denominator_value
elif denominator_unit: # Denominator has unit
if denominator_value == 1: # support perUnitPatterns when the denominator is 1
denominator_unit = _find_unit_pattern(denominator_unit, locale=locale)
per_pattern = locale._data["unit_patterns"].get(denominator_unit, {}).get(length, {}).get("per")
if per_pattern:
return per_pattern.format(formatted_numerator)
# See TR-35's per-unit pattern algorithm, point 3.2.
# For denominator 1, we replace the value to be formatted with the empty string;
# this will make `format_unit` return " second" instead of "1 second".
denominator_value = ""

formatted_denominator = format_unit(
denominator_value, denominator_unit, length=length, format=format, locale=locale
).strip()
else: # Bare denominator
formatted_denominator = format_decimal(denominator_value, format=format, locale=locale)

per_pattern = locale._data["compound_unit_patterns"].get("per", {}).get(length, "{0}/{1}")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same length fallback comment as for format_unit() above.


return per_pattern.format(formatted_numerator, formatted_denominator)
3 changes: 2 additions & 1 deletion docs/api/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@ public API of Babel.

core
dates
lists
languages
lists
messages/index
numbers
plural
support
units
13 changes: 13 additions & 0 deletions docs/api/units.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Units
=====

.. module:: babel.units

The unit module provides functionality to format measurement units for different
locales.

.. autofunction:: format_unit

.. autofunction:: format_compound_unit

.. autofunction:: get_unit_name
23 changes: 20 additions & 3 deletions scripts/import_cldr.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,14 +757,30 @@ def parse_currency_names(data, tree):

def parse_unit_patterns(data, tree):
unit_patterns = data.setdefault('unit_patterns', {})
compound_patterns = data.setdefault('compound_unit_patterns', {})
unit_display_names = data.setdefault('unit_display_names', {})

for elem in tree.findall('.//units/unitLength'):
unit_length_type = elem.attrib['type']
for unit in elem.findall('unit'):
unit_type = unit.attrib['type']
unit_and_length_patterns = unit_patterns.setdefault(unit_type, {}).setdefault(unit_length_type, {})
for pattern in unit.findall('unitPattern'):
box = unit_type
box += ':' + unit_length_type
unit_patterns.setdefault(box, {})[pattern.attrib['count']] = text_type(pattern.text)
unit_and_length_patterns[pattern.attrib['count']] = _text(pattern)

per_unit_pat = unit.find('perUnitPattern')
if per_unit_pat is not None:
unit_and_length_patterns['per'] = _text(per_unit_pat)

display_name = unit.find('displayName')
if display_name is not None:
unit_display_names.setdefault(unit_type, {})[unit_length_type] = _text(display_name)

for unit in elem.findall('compoundUnit'):
unit_type = unit.attrib['type']
compound_patterns.setdefault(unit_type, {})[unit_length_type] = (
_text(unit.find('compoundUnitPattern'))
)


def parse_date_fields(data, tree):
Expand Down Expand Up @@ -804,6 +820,7 @@ def parse_currency_formats(data, tree):
type = elem.attrib.get('type')
if curr_length_type:
# Handle `<currencyFormatLength type="short">`, etc.
# TODO(3.x): use nested dicts instead of colon-separated madness
type = '%s:%s' % (type, curr_length_type)
if _should_skip_elem(elem, type, currency_formats):
continue
Expand Down