Skip to content

Commit 53b14ee

Browse files
authored
Landing sr support for dictionary words (#111)
* Adding more tests - currently failing. * Reorg code to use dictionary inflector, tests still fail. Gitignore updates. * Tests passing locally. Some similar words are not passing when not in dictionary.
1 parent f925ad7 commit 53b14ee

File tree

6 files changed

+2307
-1880
lines changed

6 files changed

+2307
-1880
lines changed

.gitignore

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1-
inflection/tools/dictionary-parser/bin/*
2-
fst/__pycache__/*
1+
build/*
2+
.vscode/launch.json
33
.vscode/settings.json
4+
fst/__pycache__/*
5+
inflection/tools/dictionary-parser/bin/*

inflection/resources/org/unicode/inflection/dictionary/dictionary_sr.lst

Lines changed: 1113 additions & 1054 deletions
Large diffs are not rendered by default.

inflection/resources/org/unicode/inflection/dictionary/inflectional_sr.xml

Lines changed: 1121 additions & 795 deletions
Large diffs are not rendered by default.

inflection/src/inflection/grammar/synthesis/SrGrammarSynthesizer_SrDisplayFunction.cpp

Lines changed: 48 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,13 @@ namespace inflection::grammar::synthesis {
2525
SrGrammarSynthesizer_SrDisplayFunction::SrGrammarSynthesizer_SrDisplayFunction(const ::inflection::dialog::SemanticFeatureModel& model)
2626
: super()
2727
, dictionary(*npc(::inflection::dictionary::DictionaryMetaData::createDictionary(::inflection::util::LocaleUtils::SERBIAN())))
28-
, countFeature(model.getFeature(GrammemeConstants::NUMBER))
29-
, genderFeature(model.getFeature(GrammemeConstants::GENDER))
30-
, partOfSpeechFeature(model.getFeature(GrammemeConstants::POS))
28+
, caseFeature(*npc(model.getFeature(GrammemeConstants::CASE)))
29+
, numberFeature(*npc(model.getFeature(GrammemeConstants::NUMBER)))
30+
, genderFeature(*npc(model.getFeature(GrammemeConstants::GENDER)))
31+
, inflector(::inflection::dictionary::Inflector::getInflector(::inflection::util::LocaleUtils::SERBIAN()))
3132
, tokenizer(::inflection::tokenizer::TokenizerFactory::createTokenizer(::inflection::util::LocaleUtils::SERBIAN()))
3233
, dictionaryInflector(::inflection::util::LocaleUtils::SERBIAN(),{
33-
{GrammemeConstants::POS_NOUN(), GrammemeConstants::POS_ADJECTIVE(), GrammemeConstants::POS_VERB()},
34-
{GrammemeConstants::PERSON_THIRD(), GrammemeConstants::PERSON_FIRST(), GrammemeConstants::PERSON_SECOND()},
34+
{GrammemeConstants::POS_NOUN(), GrammemeConstants::POS_ADJECTIVE()},
3535
{GrammemeConstants::NUMBER_SINGULAR(), GrammemeConstants::NUMBER_PLURAL()},
3636
{GrammemeConstants::GENDER_MASCULINE(), GrammemeConstants::GENDER_FEMININE(), GrammemeConstants::GENDER_NEUTER()}
3737
}, {}, true)
@@ -42,23 +42,54 @@ SrGrammarSynthesizer_SrDisplayFunction::~SrGrammarSynthesizer_SrDisplayFunction(
4242
{
4343
}
4444

45-
::inflection::dialog::DisplayValue * SrGrammarSynthesizer_SrDisplayFunction::getDisplayValue(const dialog::SemanticFeatureModel_DisplayData &displayData, const ::std::map<::inflection::dialog::SemanticFeature, ::std::u16string> &constraints, bool enableInflectionGuess) const
45+
::std::u16string SrGrammarSynthesizer_SrDisplayFunction::inflectString(const ::std::map<::inflection::dialog::SemanticFeature, ::std::u16string>& constraints, const ::std::u16string& lemma) const
4646
{
47-
const auto displayValue = GrammarSynthesizerUtil::getTheBestDisplayValue(displayData, constraints);
48-
if (displayValue == nullptr) {
49-
return nullptr;
47+
::std::u16string countString(GrammarSynthesizerUtil::getFeatureValue(constraints, numberFeature));
48+
::std::u16string caseString(GrammarSynthesizerUtil::getFeatureValue(constraints, caseFeature));
49+
auto genderString = GrammarSynthesizerUtil::getFeatureValue(constraints, genderFeature);
50+
51+
::std::u16string inflection;
52+
53+
// The nominative/caseless is unmarked in the patterns so we need to do something like this
54+
::std::vector<::std::u16string> string_constraints;
55+
if (!countString.empty()) {
56+
string_constraints.emplace_back(countString);
5057
}
51-
::std::u16string displayString = displayValue->getDisplayString();
52-
if (displayString.empty()) {
53-
return nullptr;
58+
if (!caseString.empty() && caseString != GrammemeConstants::CASE_NOMINATIVE()) {
59+
string_constraints.emplace_back(caseString);
5460
}
61+
if (!genderString.empty()) {
62+
string_constraints.emplace_back(genderString);
63+
}
64+
// The nominative/caseless is unmarked in the patterns, so we need to do something like this
65+
int64_t wordGrammemes = 0;
66+
dictionary.getCombinedBinaryType(&wordGrammemes, lemma);
5567

56-
// To make compiler quiet about unused variable.
57-
if (enableInflectionGuess)
58-
return nullptr;
68+
auto inflectionResult = dictionaryInflector.inflect(lemma, wordGrammemes, string_constraints, {});
69+
if (inflectionResult) {
70+
inflection = *inflectionResult;
71+
}
72+
73+
if (inflection.empty()) {
74+
return lemma;
75+
}
76+
77+
return inflection;
78+
}
5979

60-
// TODO Implement the rest
61-
return nullptr;
80+
::inflection::dialog::DisplayValue * SrGrammarSynthesizer_SrDisplayFunction::getDisplayValue(const dialog::SemanticFeatureModel_DisplayData &displayData, const ::std::map<::inflection::dialog::SemanticFeature, ::std::u16string> &constraints, bool /* enableInflectionGuess */) const
81+
{
82+
::std::u16string displayString;
83+
if (!displayData.getValues().empty()) {
84+
displayString = displayData.getValues()[0].getDisplayString();
85+
}
86+
if (displayString.empty()) {
87+
return nullptr;
88+
}
89+
if (dictionary.isKnownWord(displayString)) {
90+
displayString = inflectString(constraints, displayString);
91+
}
92+
return new ::inflection::dialog::DisplayValue(displayString, constraints);
6293
}
6394

6495
} // namespace inflection::grammar::synthesis

inflection/src/inflection/grammar/synthesis/SrGrammarSynthesizer_SrDisplayFunction.hpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,22 +21,22 @@ class inflection::grammar::synthesis::SrGrammarSynthesizer_SrDisplayFunction
2121
public:
2222
typedef ::inflection::dialog::DefaultDisplayFunction super;
2323

24-
private:
25-
const ::inflection::dictionary::DictionaryMetaData& dictionary;
26-
const ::inflection::dialog::SemanticFeature* countFeature { };
27-
const ::inflection::dialog::SemanticFeature* genderFeature { };
28-
const ::inflection::dialog::SemanticFeature* partOfSpeechFeature { };
29-
const ::std::unique_ptr<::inflection::tokenizer::Tokenizer> tokenizer;
30-
const ::inflection::dialog::DictionaryLookupInflector dictionaryInflector;
31-
32-
public:
3324
::inflection::dialog::DisplayValue * getDisplayValue(const dialog::SemanticFeatureModel_DisplayData &displayData, const ::std::map<::inflection::dialog::SemanticFeature, ::std::u16string> &constraints, bool enableInflectionGuess) const override;
3425

35-
public:
3626
explicit SrGrammarSynthesizer_SrDisplayFunction(const ::inflection::dialog::SemanticFeatureModel& model);
3727
~SrGrammarSynthesizer_SrDisplayFunction() override;
3828

39-
public:
4029
SrGrammarSynthesizer_SrDisplayFunction(SrGrammarSynthesizer_SrDisplayFunction&) = delete;
4130
SrGrammarSynthesizer_SrDisplayFunction& operator=(const SrGrammarSynthesizer_SrDisplayFunction&) = delete;
31+
32+
private:
33+
::std::u16string inflectString(const ::std::map<::inflection::dialog::SemanticFeature, ::std::u16string>& constraints, const ::std::u16string& lemma) const;
34+
35+
const ::inflection::dictionary::DictionaryMetaData& dictionary;
36+
const ::inflection::dialog::SemanticFeature& caseFeature;
37+
const ::inflection::dialog::SemanticFeature& numberFeature;
38+
const ::inflection::dialog::SemanticFeature& genderFeature;
39+
const ::inflection::dictionary::Inflector &inflector;
40+
const ::std::unique_ptr<::inflection::tokenizer::Tokenizer> tokenizer;
41+
::inflection::dialog::DictionaryLookupInflector dictionaryInflector;
4242
};

inflection/test/resources/inflection/dialog/inflection/sr.xml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,18 @@
44
License & terms of use: http://www.unicode.org/copyright.html
55
-->
66
<inflectionTest locale="sr">
7-
<!-- Some nominative case tests where we don't expect changes -->
7+
<!-- Tests of dictionary words -->
88
<test><source case="nominative" number="singular">једро</source><result>једро</result></test>
9+
<test><source case="nominative" number="plural">једро</source><result>једра</result></test>
910
<test><source case="nominative" number="singular">жена</source><result>жена</result></test>
1011
<test><source case="nominative" number="singular">камен</source><result>камен</result></test>
1112
<test><source case="nominative" pos="proper-noun">Петар</source><result>Петар</result></test>
13+
<test><source case="vocative" number="singular" gender="feminine" pos="proper-noun">Љубица</source><result>Љубице</result></test>
14+
<test><source case="vocative" number="singular" gender="feminine" pos="noun">Србија</source><result>Србијо</result></test>
15+
<test><source case="instrumental" number="plural" gender="masculine" pos="noun">становник</source><result>становницима</result></test>
16+
<test><source case="genitive" number="plural" gender="neuter" pos="adjective">плава</source><result>плавe</result></test>
17+
<!-- Words not in the dictionary but similar in shape -->
18+
<!-- test><source case="vocative" number="singular" gender="masculine" pos="noun">уранак</source><result>уранче</result></test -->
19+
<!-- test><source case="vocative" number="singular" gender="masculine" pos="noun">игроказ</source><result>игрокаже</result></test -->
20+
<!-- test><source case="vocative" number="singular" gender="masculine" pos="noun">пашњак</source><result>пашњаче</result></test -->
1221
</inflectionTest>

0 commit comments

Comments
 (0)