Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion inflection/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ inflection_debug_vars(NUM_PROCESSORS_VAL)
# Unicode Inflection cache variables
set(NUM_PROCESSORS ${NUM_PROCESSORS_VAL} CACHE STRING "Number of cores to be used in make")

# Morphun options
option(PROFILING "Turn on code profiling" OFF)

add_compile_options(${CXX_STD_LIB_FLAG})
Expand Down
22 changes: 21 additions & 1 deletion inflection/resources/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,26 @@ foreach (LOCALE IN LISTS BINARY_DICT_LOCALES)
endforeach ()
# -------- End Dictionary files section

# ======== Start tok dictionary files section
set(BINARY_TOK_DICT_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/org/unicode/inflection/tokenizer)
set(BINARY_TOK_DICT_DIR_ROOT ${INFLECTION_DATA_ROOT}/inflection/tokenizer)
file(MAKE_DIRECTORY ${BINARY_TOK_DICT_DIR_ROOT})

file(GLOB_RECURSE BINARY_TOK_DICT_SRCS CONFIGURE_DEPENDS ${BINARY_TOK_DICT_SRC_DIR}/*/tokenizer.dictionary)
foreach (BINARY_TOK_DICT_SRC IN LISTS BINARY_TOK_DICT_SRCS)
string(REGEX REPLACE "^${BINARY_TOK_DICT_SRC_DIR}/(.*)\\.dictionary" "${BINARY_TOK_DICT_DIR_ROOT}/\\1.tokd" BINARY_TOK_DICT ${BINARY_TOK_DICT_SRC})
get_filename_component(BINARY_TOK_DICT_DIR ${BINARY_TOK_DICT} DIRECTORY)
list(APPEND RESOURCE_DIRS ${BINARY_TOK_DICT_DIR})
list(APPEND BINARY_TOK_DICTS ${BINARY_TOK_DICT})

add_custom_command(
OUTPUT ${BINARY_TOK_DICT}
COMMAND ${LIBRARY_PATH_NAME}=${ICU_LIB_DIRECTORY} ${CMAKE_CURRENT_BINARY_DIR}/../tools/buildTokDictionary/buildTokDictionary ${BINARY_TOK_DICT_SRC} ${BINARY_TOK_DICT}
DEPENDS buildTokDictionary ${BINARY_TOK_DICT_SRC}
)
endforeach()
# -------- End tok dictionary section

file(GLOB_RECURSE RESOURCE_BINARIES ${CMAKE_CURRENT_SOURCE_DIR}/share/*)
install_build_resources(
RESOURCE_BINARIES
Expand All @@ -155,7 +175,7 @@ install_build_resources(
RESOURCE_BINARIES_DIST
)

add_custom_target(inflection-data ALL DEPENDS ${BINARY_DICTS} ${RESOURCE_BINARIES_DIST})
add_custom_target(inflection-data ALL DEPENDS ${BINARY_DICTS} ${BINARY_TOK_DICTS} ${RESOURCE_BINARIES_DIST})

#Make directories for all generated resource files
list(REMOVE_DUPLICATES RESOURCE_DIRS)
Expand Down
166 changes: 102 additions & 64 deletions inflection/resources/org/unicode/inflection/dictionary/dictionary_ar.lst

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Est: singular masculine vowel-start noun proper-noun inflection=40
Eurockéennes de Belfort: plural feminine vowel-start noun inflection=b0
FIAC: singular feminine abbreviation noun
Fashion week: singular feminine noun inflection=28
France: singular plural feminine noun proper-noun inflection=7
France: singular feminine noun proper-noun inflection=7
FrancoFolies: plural masculine noun proper-noun inflection=36
Francofolies: plural feminine noun proper-noun inflection=67
Félix: singular plural masculine noun proper-noun inflection=5
Expand Down
229 changes: 195 additions & 34 deletions inflection/resources/org/unicode/inflection/dictionary/dictionary_he.lst

Large diffs are not rendered by default.

295 changes: 70 additions & 225 deletions inflection/resources/org/unicode/inflection/dictionary/dictionary_it.lst

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ bluetooth: vowel-end noun
abby: vowel-end
c&c: vowel-end
cgv: vowel-end
chatgpt: vowel-end
cj: vowel-end
cnh: vowel-end
cs: vowel-end
Expand Down
Original file line number Diff line number Diff line change
@@ -1,63 +1,40 @@
Ås: noun proper-noun inflection=6
absurd: singular indefinite adjective
administrasjon: singular indefinite masculine noun inflection=2
administrasjonen: singular definite masculine noun inflection=2
administrasjonene: plural definite masculine noun inflection=2
administrasjoner: plural indefinite masculine noun inflection=2
ås: singular genitive indefinite masculine feminine noun inflection=314
avtale: singular indefinite masculine noun verb inflection=7
belta: plural definite neuter noun inflection=16
beltas: plural genitive definite neuter noun inflection=16
belte: singular indefinite neuter noun inflection=16
belter: plural indefinite neuter noun inflection=1 inflection=16
belters: plural genitive indefinite neuter noun inflection=16
beltes: singular genitive indefinite neuter noun inflection=16
beltet: singular definite neuter noun inflection=1 inflection=16
beltets: singular genitive definite neuter noun inflection=16
bokmerke: singular indefinite neuter noun inflection=f
fast: singular indefinite adjective verb
administrasjon: singular indefinite masculine noun inflection=1
all: determiner
Apple: singular masculine noun proper-noun inflection=7
ås: singular genitive indefinite masculine feminine noun inflection=17e
avtale: singular indefinite masculine noun verb inflection=4
belta: plural definite neuter noun inflection=d
belte: singular indefinite neuter noun inflection=d
beltene: plural definite neuter noun inflection=d
bildøra: singular definite feminine noun inflection=64
bokmerke: singular indefinite neuter noun inflection=d
budskap: singular plural indefinite neuter noun inflection=6
ekte: singular plural definite indefinite adjective
fast: singular indefinite adjective
gammel: singular indefinite adjective
god: singular indefinite adjective
høyest: singular plural indefinite adjective
høyeste: singular plural definite adjective
jazz: singular indefinite masculine noun verb inflection=12
lampa: singular definite feminine noun inflection=78
lampe: singular indefinite masculine feminine noun inflection=78
lunken: singular indefinite masculine adjective noun inflection=1
møte: singular indefinite neuter noun verb inflection=16
møter: plural indefinite neuter noun verb inflection=16
øvinga: singular definite feminine noun inflection=3
vogntoga: plural definite neuter noun inflection=4
budskap: singular plural indefinite neuter noun inflection=9
budskapa: plural definite neuter noun inflection=9
budskapas: plural genitive definite neuter noun inflection=9
budskapene: plural definite neuter noun inflection=9
budskapenes: plural genitive definite neuter noun inflection=9
budskaper: plural indefinite neuter noun inflection=9
budskapers: plural genitive indefinite neuter noun inflection=9
budskapet: singular definite neuter noun inflection=9
budskapets: singular genitive definite neuter noun inflection=9
budskaps: singular plural genitive indefinite neuter noun inflection=9
mann: singular indefinite masculine noun verb inflection=87
menn: plural indefinite masculine noun inflection=87
bildør: singular indefinite masculine feminine noun inflection=d2
bildøra: singular definite feminine noun inflection=d2
bildøras: singular genitive definite feminine noun inflection=d2
bildøren: singular definite masculine noun inflection=d2
bildørene: plural definite masculine feminine noun inflection=d2
bildørenes: plural genitive definite masculine feminine noun inflection=d2
bildørens: singular genitive definite masculine noun inflection=d2
bildører: plural indefinite masculine feminine noun inflection=d2
bildørers: plural genitive indefinite masculine feminine noun inflection=d2
bildørs: singular genitive indefinite masculine feminine noun inflection=d2
jazz: singular indefinite masculine noun verb inflection=b
kjør: verb
lampa: singular definite feminine noun inflection=43
lampe: singular indefinite masculine feminine noun inflection=43
lunken: singular indefinite adjective
makaber: singular indefinite adjective
mor: singular indefinite masculine feminine noun verb inflection=30 inflection=3b
moren: singular definite masculine noun inflection=30
mann: singular indefinite masculine noun verb inflection=4c
mor: singular indefinite masculine feminine noun verb inflection=31 inflection=3b
moren: singular definite masculine noun inflection=31
møte: singular indefinite neuter noun verb inflection=d
møter: plural indefinite neuter noun verb inflection=d
norsk: singular indefinite masculine adjective noun verb inflection=1f
opp: adverb
øvinga: singular definite feminine noun inflection=2
praktisk: singular indefinite adjective
rosa: singular plural definite indefinite adjective
tet: singular indefinite masculine noun inflection=1
tykk: singular indefinite adjective
vennlig: singular indefinite adjective
praktisk: singular indefinite adjective
norsk: singular indefinite masculine adjective noun verb
god: singular indefinite adjective
skjørtet: noun inflection=1
vogntoga: plural definite neuter noun inflection=3
==============================================
Manually curated for tests to pass
Copyright 2024-2024 Apple Inc. All rights reserved.
Loading