diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..6e631b0e --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +inflection/tools/dictionary-parser/bin/* +fst/__pycache__/* +.vscode/settings.json diff --git a/data/tools/new_lexeme_sample.json b/data/tools/new_lexeme_sample.json index 5edaa33f..e6e479ae 100644 --- a/data/tools/new_lexeme_sample.json +++ b/data/tools/new_lexeme_sample.json @@ -1,271 +1,5 @@ [ - { - "lemma": "дворац", - "grammaticalCategory": "noun", - "language": "sr", - "grammaticalGender": "masculine", - "forms": [ - { - "value": "дворац", - "grammaticalFeatures": [ - "singular", - "nominative" - ] - }, - { - "value": "двораца", - "grammaticalFeatures": [ - "singular", - "genitive" - ] - }, - { - "value": "дворцу", - "grammaticalFeatures": [ - "singular", - "dative" - ] - }, - { - "value": "дворац", - "grammaticalFeatures": [ - "singular", - "accusative" - ] - }, - { - "value": "дворцу", - "grammaticalFeatures": [ - "singular", - "vocative" - ] - }, - { - "value": "дворцом", - "grammaticalFeatures": [ - "singular", - "instrumental" - ] - }, - { - "value": "дворцу", - "grammaticalFeatures": [ - "singular", - "locative" - ] - }, - { - "value": "дворци", - "grammaticalFeatures": [ - "plural", - "nominative" - ] - }, - { - "value": "двораца", - "grammaticalFeatures": [ - "plural", - "genitive" - ] - }, - { - "value": "дворцима", - "grammaticalFeatures": [ - "plural", - "dative" - ] - }, - { - "value": "дворце", - "grammaticalFeatures": [ - "plural", - "accusative" - ] - }, - { - "value": "дворци", - "grammaticalFeatures": [ - "plural", - "vocative" - ] - }, - { - "value": "дворцима", - "grammaticalFeatures": [ - "plural", - "instrumental" - ] - }, - { - "value": "дворцима", - "grammaticalFeatures": [ - "plural", - "locative" - ] - } - ] - }, - { - "lemma": "април", - "grammaticalCategory": "noun", - "language": "sr", - "grammaticalGender": "masculine", - "forms": [ - { - "value": "април", - "grammaticalFeatures": [ - "singular", - "nominative" - ] - }, - { - "value": "априла", - "grammaticalFeatures": [ - "singular", - "genitive" - ] - }, - { - "value": "априлу", - "grammaticalFeatures": [ - "singular", - "dative" - ] - }, - { - "value": "април", - "grammaticalFeatures": [ - "singular", - "accusative" - ] - }, - { - "value": "априлу", - "grammaticalFeatures": [ - "singular", - "vocative" - ] - }, - { - "value": "априлом", - "grammaticalFeatures": [ - "singular", - "instrumental" - ] - }, - { - "value": "априлу", - "grammaticalFeatures": [ - "singular", - "locative" - ] - }, - { - "value": "априли", - "grammaticalFeatures": [ - "plural", - "nominative" - ] - }, - { - "value": "априла", - "grammaticalFeatures": [ - "plural", - "genitive" - ] - }, - { - "value": "априлима", - "grammaticalFeatures": [ - "plural", - "dative" - ] - }, - { - "value": "априле", - "grammaticalFeatures": [ - "plural", - "accusative" - ] - }, - { - "value": "априли", - "grammaticalFeatures": [ - "plural", - "vocative" - ] - }, - { - "value": "априлима", - "grammaticalFeatures": [ - "plural", - "instrumental" - ] - }, - { - "value": "априлима", - "grammaticalFeatures": [ - "plural", - "locative" - ] - } - ] - }, - { - "lemma": "коњ", - "grammaticalCategory": "noun", - "language": "sr", - "grammaticalGender": "masculine", - "forms": [ - { - "value": "коњ", - "grammaticalFeatures": [ - "singular", - "nominative" - ] - }, - { - "value": "коњ", - "grammaticalFeatures": [ - "singular", - "genitive" - ] - }, - { - "value": "коњ", - "grammaticalFeatures": [ - "singular", - "dative" - ] - }, - { - "value": "коњ", - "grammaticalFeatures": [ - "singular", - "accusative" - ] - }, - { - "value": "коњ", - "grammaticalFeatures": [ - "singular", - "vocative" - ] - }, - { - "value": "коњ", - "grammaticalFeatures": [ - "singular", - "instrumental" - ] - }, - { - "value": "коњ", - "grammaticalFeatures": [ - "singular", - "locative" - ] - } - ] - } +{"lemma":"дворац","grammaticalCategory":"noun","language":"sr","grammaticalGender":"masculine","forms":[{"value":"дворац","grammaticalFeatures":["singular","nominative"]},{"value":"двораца","grammaticalFeatures":["singular","genitive"]},{"value":"дворцу","grammaticalFeatures":["singular","dative"]},{"value":"дворац","grammaticalFeatures":["singular","accusative"]},{"value":"дворцу","grammaticalFeatures":["singular","vocative"]},{"value":"дворцом","grammaticalFeatures":["singular","instrumental"]},{"value":"дворцу","grammaticalFeatures":["singular","locative"]},{"value":"дворци","grammaticalFeatures":["plural","nominative"]},{"value":"двораца","grammaticalFeatures":["plural","genitive"]},{"value":"дворцима","grammaticalFeatures":["plural","dative"]},{"value":"дворце","grammaticalFeatures":["plural","accusative"]},{"value":"дворци","grammaticalFeatures":["plural","vocative"]},{"value":"дворцима","grammaticalFeatures":["plural","instrumental"]},{"value":"дворцима","grammaticalFeatures":["plural","locative"]}],"descriptions":[{"language":"sr","value":"Средњевековно пребивалижте владара, поседује и одбрамебене особине."},{"language":"en","value":"Castle"}]}, +{"lemma":"април","grammaticalCategory":"noun","language":"sr","grammaticalGender":"masculine","forms":[{"value":"април","grammaticalFeatures":["singular","nominative"]},{"value":"априла","grammaticalFeatures":["singular","genitive"]},{"value":"априлу","grammaticalFeatures":["singular","dative"]},{"value":"април","grammaticalFeatures":["singular","accusative"]},{"value":"априлу","grammaticalFeatures":["singular","vocative"]},{"value":"априлом","grammaticalFeatures":["singular","instrumental"]},{"value":"априлу","grammaticalFeatures":["singular","locative"]},{"value":"априли","grammaticalFeatures":["plural","nominative"]},{"value":"априла","grammaticalFeatures":["plural","genitive"]},{"value":"априлима","grammaticalFeatures":["plural","dative"]},{"value":"априле","grammaticalFeatures":["plural","accusative"]},{"value":"априли","grammaticalFeatures":["plural","vocative"]},{"value":"априлима","grammaticalFeatures":["plural","instrumental"]},{"value":"априлима","grammaticalFeatures":["plural","locative"]}],"descriptions":[{"language":"sr","value":"Четврти месец у години"},{"language":"en","value":"April"}]}, +{"lemma":"коњ","grammaticalCategory":"noun","language":"sr","grammaticalGender":"masculine","forms":[{"value":"коњ","grammaticalFeatures":["singular","nominative"]},{"value":"коњ","grammaticalFeatures":["singular","genitive"]},{"value":"коњ","grammaticalFeatures":["singular","dative"]},{"value":"коњ","grammaticalFeatures":["singular","accusative"]},{"value":"коњ","grammaticalFeatures":["singular","vocative"]},{"value":"коњ","grammaticalFeatures":["singular","instrumental"]},{"value":"коњ","grammaticalFeatures":["singular","locative"]}],"descriptions":[{"language":"sr","value":"Четвороножна животиња, која се често користи за јахање или пренос терета."},{"language":"en","value":"Horse"}]} ] \ No newline at end of file diff --git a/data/tools/wikidata_upload.py b/data/tools/wikidata_upload.py index a08da05c..34957aa5 100644 --- a/data/tools/wikidata_upload.py +++ b/data/tools/wikidata_upload.py @@ -190,6 +190,16 @@ def build_tfsl_lexemes(new_lexemes): features.append(WIKI_TYPES[feature]) lexeme += tfsl.LexemeForm(form["value"] @ wiki_lang, features) + if "descriptions" in new_lexeme: + glosses = [] + for gloss in new_lexeme["descriptions"]: + glosses.append( + gloss["value"] @ tfsl.languages.get_first_lang(gloss["language"]) + ) + lexeme += tfsl.LexemeSense(glosses) + else: + print(f'WARNING: Description missing for {new_lexeme["lemma"]}.') + tfsl_lexemes.append(lexeme) return tfsl_lexemes