Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions babel/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,24 @@ def get_global(key):
>>> get_global('zone_territories')['Europe/Berlin']
u'DE'

The keys available are:

- ``currency_fractions``
- ``language_aliases``
- ``likely_subtags``
- ``parent_exceptions``
- ``script_aliases``
- ``territory_aliases``
- ``territory_currencies``
- ``territory_languages``
- ``territory_zones``
- ``variant_aliases``
- ``win_mapping``
- ``zone_aliases``
- ``zone_territories``

.. note:: The internal structure of the data may change between versions.

.. versionadded:: 0.9

:param key: the data key
Expand Down
72 changes: 72 additions & 0 deletions babel/languages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# -- encoding: UTF-8 --
from babel.core import get_global


def get_official_languages(territory, regional=False, de_facto=False):
"""
Get the official language(s) for the given territory.

The language codes, if any are known, are returned in order of descending popularity.

If the `regional` flag is set, then languages which are regionally official are also returned.

If the `de_facto` flag is set, then languages which are "de facto" official are also returned.

.. warning:: Note that the data is as up to date as the current version of the CLDR used
by Babel. If you need scientifically accurate information, use another source!

:param territory: Territory code
:type territory: str
:param regional: Whether to return regionally official languages too
:type regional: bool
:param de_facto: Whether to return de-facto official languages too
:type de_facto: bool
:return: Tuple of language codes
:rtype: tuple[str]
"""

territory = str(territory).upper()
allowed_stati = set(("official",))
if regional:
allowed_stati.add("official_regional")
if de_facto:
allowed_stati.add("de_facto_official")

languages = get_global("territory_languages").get(territory, {})
pairs = [
(info['population_percent'], language)
for language, info in languages.items()
if info.get('official_status') in allowed_stati
]
pairs.sort(reverse=True)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would be inclined to write:

languages = get_global("territory_languages").get(territory, {})
pairs = [(info['population_percent'], language)
             for language, info in langs.iteritems()
                 if info['official_status'] in allowed_stati]
pairs.sort(reverse=True)

It should be a little faster and a bit more Pythonic to make more extensive use of comprehensions.

return tuple(lang for _, lang in pairs)



def get_territory_language_info(territory):
"""
Get a dictionary of language information for a territory.

The dictionary is keyed by language code; the values are dicts with more information.

The following keys are currently known for the values:

* `population_percent`: The percentage of the territory's population speaking the
language.
* `official_status`: An optional string describing the officiality status of the language.
Known values are "official", "official_regional" and "de_facto_official".

.. warning:: Note that the data is as up to date as the current version of the CLDR used
by Babel. If you need scientifically accurate information, use another source!

.. note:: Note that the format of the dict returned may change between Babel versions.

See http://www.unicode.org/cldr/charts/latest/supplemental/territory_language_information.html

:param territory: Territory code
:type territory: str
:return: Language information dictionary
:rtype: dict[str, dict]
"""
territory = str(territory).upper()
return get_global("territory_languages").get(territory, {}).copy()
1 change: 1 addition & 0 deletions docs/api/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ public API of Babel.
core
dates
lists
languages
messages/index
numbers
plural
Expand Down
14 changes: 14 additions & 0 deletions docs/api/languages.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Languages
=========

.. module:: babel.languages

The languages module provides functionality to access data about
languages that is not bound to a given locale.

Official Languages
------------------

.. autofunction:: get_official_languages

.. autofunction:: get_territory_language_info
11 changes: 11 additions & 0 deletions scripts/import_cldr.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ def main():
territory_currencies = global_data.setdefault('territory_currencies', {})
parent_exceptions = global_data.setdefault('parent_exceptions', {})
currency_fractions = global_data.setdefault('currency_fractions', {})
territory_languages = global_data.setdefault('territory_languages', {})

# create auxiliary zone->territory map from the windows zones (we don't set
# the 'zones_territories' map directly here, because there are some zones
Expand Down Expand Up @@ -276,6 +277,16 @@ def main():
cur_crounding = int(fraction.attrib.get('cashRounding', cur_rounding))
currency_fractions[cur_code] = (cur_digits, cur_rounding, cur_cdigits, cur_crounding)

# Languages in territories
for territory in sup.findall('.//territoryInfo/territory'):
languages = {}
for language in territory.findall('./languagePopulation'):
languages[language.attrib['type']] = {
'population_percent': float(language.attrib['populationPercent']),
'official_status': language.attrib.get('officialStatus'),
}
territory_languages[territory.attrib['type']] = languages

write_datafile(global_path, global_data, dump_json=dump_json)

# build a territory containment mapping for inheritance
Expand Down
14 changes: 14 additions & 0 deletions tests/test_languages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# -- encoding: UTF-8 --
from babel.languages import get_official_languages, get_territory_language_info


def test_official_languages():
assert get_official_languages("FI") == ("fi", "sv")
assert get_official_languages("SE") == ("sv",)
assert get_official_languages("CH") == ("de", "fr", "it")
assert get_official_languages("CH", de_facto=True) == ("de", "gsw", "fr", "it")
assert get_official_languages("CH", regional=True) == ("de", "fr", "it", "rm")


def test_get_language_info():
assert set(get_territory_language_info("HU").keys()) == set(("hu", "en", "de", "ro", "hr", "sk", "sl"))