Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion inflection/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ inflection_debug_vars(NUM_PROCESSORS_VAL)
# Unicode Inflection cache variables
set(NUM_PROCESSORS ${NUM_PROCESSORS_VAL} CACHE STRING "Number of cores to be used in make")

# Morphun options
option(PROFILING "Turn on code profiling" OFF)

add_compile_options(${CXX_STD_LIB_FLAG})
Expand Down
22 changes: 21 additions & 1 deletion inflection/resources/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,26 @@ foreach (LOCALE IN LISTS BINARY_DICT_LOCALES)
endforeach ()
# -------- End Dictionary files section

# ======== Start tok dictionary files section
set(BINARY_TOK_DICT_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/org/unicode/inflection/tokenizer)
set(BINARY_TOK_DICT_DIR_ROOT ${INFLECTION_DATA_ROOT}/inflection/tokenizer)
file(MAKE_DIRECTORY ${BINARY_TOK_DICT_DIR_ROOT})

file(GLOB_RECURSE BINARY_TOK_DICT_SRCS CONFIGURE_DEPENDS ${BINARY_TOK_DICT_SRC_DIR}/*/tokenizer.dictionary)
foreach (BINARY_TOK_DICT_SRC IN LISTS BINARY_TOK_DICT_SRCS)
string(REGEX REPLACE "^${BINARY_TOK_DICT_SRC_DIR}/(.*)\\.dictionary" "${BINARY_TOK_DICT_DIR_ROOT}/\\1.tokd" BINARY_TOK_DICT ${BINARY_TOK_DICT_SRC})
get_filename_component(BINARY_TOK_DICT_DIR ${BINARY_TOK_DICT} DIRECTORY)
list(APPEND RESOURCE_DIRS ${BINARY_TOK_DICT_DIR})
list(APPEND BINARY_TOK_DICTS ${BINARY_TOK_DICT})

add_custom_command(
OUTPUT ${BINARY_TOK_DICT}
COMMAND ${LIBRARY_PATH_NAME}=${ICU_LIB_DIRECTORY} ${CMAKE_CURRENT_BINARY_DIR}/../tools/buildTokDictionary/buildTokDictionary ${BINARY_TOK_DICT_SRC} ${BINARY_TOK_DICT}
DEPENDS buildTokDictionary ${BINARY_TOK_DICT_SRC}
)
endforeach()
# -------- End tok dictionary section

file(GLOB_RECURSE RESOURCE_BINARIES ${CMAKE_CURRENT_SOURCE_DIR}/share/*)
install_build_resources(
RESOURCE_BINARIES
Expand All @@ -155,7 +175,7 @@ install_build_resources(
RESOURCE_BINARIES_DIST
)

add_custom_target(inflection-data ALL DEPENDS ${BINARY_DICTS} ${RESOURCE_BINARIES_DIST})
add_custom_target(inflection-data ALL DEPENDS ${BINARY_DICTS} ${BINARY_TOK_DICTS} ${RESOURCE_BINARIES_DIST})

#Make directories for all generated resource files
list(REMOVE_DUPLICATES RESOURCE_DIRS)
Expand Down
166 changes: 102 additions & 64 deletions inflection/resources/org/unicode/inflection/dictionary/dictionary_ar.lst

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Est: singular masculine vowel-start noun proper-noun inflection=40
Eurockéennes de Belfort: plural feminine vowel-start noun inflection=b0
FIAC: singular feminine abbreviation noun
Fashion week: singular feminine noun inflection=28
France: singular plural feminine noun proper-noun inflection=7
France: singular feminine noun proper-noun inflection=7
FrancoFolies: plural masculine noun proper-noun inflection=36
Francofolies: plural feminine noun proper-noun inflection=67
Félix: singular plural masculine noun proper-noun inflection=5
Expand Down
229 changes: 195 additions & 34 deletions inflection/resources/org/unicode/inflection/dictionary/dictionary_he.lst

Large diffs are not rendered by default.

295 changes: 70 additions & 225 deletions inflection/resources/org/unicode/inflection/dictionary/dictionary_it.lst

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ bluetooth: vowel-end noun
abby: vowel-end
c&c: vowel-end
cgv: vowel-end
chatgpt: vowel-end
cj: vowel-end
cnh: vowel-end
cs: vowel-end
Expand Down
Original file line number Diff line number Diff line change
@@ -1,63 +1,40 @@
Ås: noun proper-noun inflection=6
absurd: singular indefinite adjective
administrasjon: singular indefinite masculine noun inflection=2
administrasjonen: singular definite masculine noun inflection=2
administrasjonene: plural definite masculine noun inflection=2
administrasjoner: plural indefinite masculine noun inflection=2
ås: singular genitive indefinite masculine feminine noun inflection=314
avtale: singular indefinite masculine noun verb inflection=7
belta: plural definite neuter noun inflection=16
beltas: plural genitive definite neuter noun inflection=16
belte: singular indefinite neuter noun inflection=16
belter: plural indefinite neuter noun inflection=1 inflection=16
belters: plural genitive indefinite neuter noun inflection=16
beltes: singular genitive indefinite neuter noun inflection=16
beltet: singular definite neuter noun inflection=1 inflection=16
beltets: singular genitive definite neuter noun inflection=16
bokmerke: singular indefinite neuter noun inflection=f
fast: singular indefinite adjective verb
administrasjon: singular indefinite masculine noun inflection=1
all: determiner
Apple: singular masculine noun proper-noun inflection=7
ås: singular genitive indefinite masculine feminine noun inflection=17e
avtale: singular indefinite masculine noun verb inflection=4
belta: plural definite neuter noun inflection=d
belte: singular indefinite neuter noun inflection=d
beltene: plural definite neuter noun inflection=d
bildøra: singular definite feminine noun inflection=64
bokmerke: singular indefinite neuter noun inflection=d
budskap: singular plural indefinite neuter noun inflection=6
ekte: singular plural definite indefinite adjective
fast: singular indefinite adjective
gammel: singular indefinite adjective
god: singular indefinite adjective
høyest: singular plural indefinite adjective
høyeste: singular plural definite adjective
jazz: singular indefinite masculine noun verb inflection=12
lampa: singular definite feminine noun inflection=78
lampe: singular indefinite masculine feminine noun inflection=78
lunken: singular indefinite masculine adjective noun inflection=1
møte: singular indefinite neuter noun verb inflection=16
møter: plural indefinite neuter noun verb inflection=16
øvinga: singular definite feminine noun inflection=3
vogntoga: plural definite neuter noun inflection=4
budskap: singular plural indefinite neuter noun inflection=9
budskapa: plural definite neuter noun inflection=9
budskapas: plural genitive definite neuter noun inflection=9
budskapene: plural definite neuter noun inflection=9
budskapenes: plural genitive definite neuter noun inflection=9
budskaper: plural indefinite neuter noun inflection=9
budskapers: plural genitive indefinite neuter noun inflection=9
budskapet: singular definite neuter noun inflection=9
budskapets: singular genitive definite neuter noun inflection=9
budskaps: singular plural genitive indefinite neuter noun inflection=9
mann: singular indefinite masculine noun verb inflection=87
menn: plural indefinite masculine noun inflection=87
bildør: singular indefinite masculine feminine noun inflection=d2
bildøra: singular definite feminine noun inflection=d2
bildøras: singular genitive definite feminine noun inflection=d2
bildøren: singular definite masculine noun inflection=d2
bildørene: plural definite masculine feminine noun inflection=d2
bildørenes: plural genitive definite masculine feminine noun inflection=d2
bildørens: singular genitive definite masculine noun inflection=d2
bildører: plural indefinite masculine feminine noun inflection=d2
bildørers: plural genitive indefinite masculine feminine noun inflection=d2
bildørs: singular genitive indefinite masculine feminine noun inflection=d2
jazz: singular indefinite masculine noun verb inflection=b
kjør: verb
lampa: singular definite feminine noun inflection=43
lampe: singular indefinite masculine feminine noun inflection=43
lunken: singular indefinite adjective
makaber: singular indefinite adjective
mor: singular indefinite masculine feminine noun verb inflection=30 inflection=3b
moren: singular definite masculine noun inflection=30
mann: singular indefinite masculine noun verb inflection=4c
mor: singular indefinite masculine feminine noun verb inflection=31 inflection=3b
moren: singular definite masculine noun inflection=31
møte: singular indefinite neuter noun verb inflection=d
møter: plural indefinite neuter noun verb inflection=d
norsk: singular indefinite masculine adjective noun verb inflection=1f
opp: adverb
øvinga: singular definite feminine noun inflection=2
praktisk: singular indefinite adjective
rosa: singular plural definite indefinite adjective
tet: singular indefinite masculine noun inflection=1
tykk: singular indefinite adjective
vennlig: singular indefinite adjective
praktisk: singular indefinite adjective
norsk: singular indefinite masculine adjective noun verb
god: singular indefinite adjective
skjørtet: noun inflection=1
vogntoga: plural definite neuter noun inflection=3
==============================================
Manually curated for tests to pass
Copyright 2024-2024 Apple Inc. All rights reserved.
Loading
Loading