Skip to content

Commit 76eaf77

Browse files
committed
Added missing defaults in Tokenizer
1 parent 51f1f19 commit 76eaf77

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

src/main/scala/com/johnsnowlabs/nlp/annotators/Tokenizer.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,12 @@ class Tokenizer(override val uid: String) extends AnnotatorApproach[TokenizerMod
9696
$(splitChars)
9797
}
9898

99+
setDefault(
100+
targetPattern -> "\\S+",
101+
contextChars -> Array(".", ",", ";", ":", "!", "?", "*", "-", "(", ")", "\"", "'"),
102+
caseSensitiveExceptions -> true
103+
)
104+
99105
def buildRuleFactory: RuleFactory = {
100106
val rules = ArrayBuffer.empty[String]
101107

0 commit comments

Comments
 (0)