This page contains a dynamically built list of all corpus repositories. For each language, there are two corpora, corpus-lang-orig and corpus-lang, where the former contains original files and metadata and the latter contains the corpus in text (xml) format.
Private repositories are not listed.
{% assign lang_repos = site.github.public_repositories|jsonify %}
<script src="/assets/js/langtable.js"></script> <script> const domProdLangs = document.querySelector('#corp_languges'); domProdLangs.appendChild(addCorpusTable({{lang_repos}}, 'corpus-', [])) </script> <script> const domNordLangs = document.querySelector('#geo_nordic'); domNordLangs.appendChild(addUnorderedList({{lang_repos}}, 'corpus-', ['geo-nordic'])) </script> <script> const domEuroLangs = document.querySelector('#geo_europe'); domEuroLangs.appendChild(addUnorderedList({{lang_repos}}, 'corpus-', ['geo-europe'])) </script> <script> const domRussLangs = document.querySelector('#geo_russia'); domRussLangs.appendChild(addUnorderedList({{lang_repos}}, 'corpus-', ['geo-russia'])) </script> <script> const domNorALangs = document.querySelector('#geo_northamerica'); domNorALangs.appendChild(addUnorderedList({{lang_repos}}, 'corpus-', ['geo-northamerica'])) </script> <script> const domAfricaLangs = document.querySelector('#geo_africa'); domAfricaLangs.appendChild(addUnorderedList({{lang_repos}}, 'corpus-', ['geo-africa'])) </script> <script> const domAsiaLangs = document.querySelector('#geo_asian'); domAsiaLangs.appendChild(addUnorderedList({{lang_repos}}, 'corpus-', ['geo-asian'])) </script> <script> const domOthrLangs = document.querySelector('#geo_other'); domOthrLangs.appendChild(addNegUnorderedDictList({{lang_repos}}, 'corpus-', ['geo-nordic', 'geo-europe', 'geo-russia', 'geo-northamerica', 'geo-asian', 'geo-africa'])) </script> <script> const domUndefLangs = document.querySelector('#geo_undef'); domUndefLangs.appendChild(addNegUnorderedDictList({{lang_repos}}, 'corpus-', ['geo-])) </script> <script> const domEskAleutLangs = document.querySelector('#fam_eskimo_aleut'); domEskAleutLangs.appendChild(addUnorderedList({{lang_repos}}, 'corpus-', ['langfam-eskimo-aleut'])) </script> <script> const domIndEurLangs = document.querySelector('#fam_indoeuropean'); domIndEurLangs.appendChild(addUnorderedList({{lang_repos}}, 'corpus-', ['langfam-indoeuropean'])) </script> <script> const domNigerCongoLangs = document.querySelector('#fam_nigercongo'); domNigerCongoLangs.appendChild(addUnorderedList({{lang_repos}}, 'corpus-', ['langfam-niger-congo'])) </script> <script> const domTurkicLangs = document.querySelector('#fam_turkic'); domTurkicLangs.appendChild(addUnorderedList({{lang_repos}}, 'corpus-', ['langfam-turkic'])) </script> <script> const domUralicLangs = document.querySelector('#fam_uralic'); domUralicLangs.appendChild(addUnorderedList({{lang_repos}}, 'corpus-', ['langfam-uralic'])) </script> <script> const domOthrFamLangs = document.querySelector('#fam_other'); domOthrFamLangs.appendChild(addNegUnorderedDictList({{lang_repos}}, 'corpus-', ['langfam-uralic', 'langfam-indoeuropean', 'langfam-eskimo-aleut', 'langfam-turkic', 'langfam-niger-congo'])) </script> <script> const domUndefFamLangs = document.querySelector('#fam_undef'); domUndefFamLangs.appendChild(addNegUnorderedDictList({{lang_repos}}, 'corpus-', ['langfam-'])) </script>