Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ name := getPackageName(is_silicon, is_gpu, is_aarch64)

organization := "com.johnsnowlabs.nlp"

version := "6.0.5"
version := "6.1.0-rc1"

(ThisBuild / scalaVersion) := scalaVer

Expand Down
10 changes: 5 additions & 5 deletions project/Dependencies.scala
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,11 @@ object Dependencies {
val azureIdentity = "com.azure" % "azure-identity" % azureIdentityVersion % Provided
val azureStorage = "com.azure" % "azure-storage-blob" % azureStorageVersion % Provided

val llamaCppVersion = "0.1.6"
val llamaCppCPU = "com.johnsnowlabs.nlp" %% "jsl-llamacpp-cpu" % llamaCppVersion
val llamaCppGPU = "com.johnsnowlabs.nlp" %% "jsl-llamacpp-gpu" % llamaCppVersion
val llamaCppSilicon = "com.johnsnowlabs.nlp" %% "jsl-llamacpp-silicon" % llamaCppVersion
val llamaCppAarch64 = "com.johnsnowlabs.nlp" %% "jsl-llamacpp-aarch64" % llamaCppVersion
val llamaCppVersion = "1.0.1"
val llamaCppCPU = "com.johnsnowlabs.nlp" % "jsl-llamacpp-cpu" % llamaCppVersion
val llamaCppGPU = "com.johnsnowlabs.nlp" % "jsl-llamacpp-gpu" % llamaCppVersion
val llamaCppSilicon = "com.johnsnowlabs.nlp" % "jsl-llamacpp-silicon" % llamaCppVersion
val llamaCppAarch64 = "com.johnsnowlabs.nlp" % "jsl-llamacpp-aarch64" % llamaCppVersion

val jsoupVersion = "1.18.2"

Expand Down
4 changes: 3 additions & 1 deletion python/sparknlp/annotator/seq2seq/auto_gguf_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,9 @@ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.seq2seq.AutoGGUFMo
nCtx=4096,
nBatch=512,
embedding=False,
nPredict=100
nPredict=100,
nGpuLayers=99,
systemPrompt="You are a helpful assistant."
)

@staticmethod
Expand Down
166 changes: 100 additions & 66 deletions python/sparknlp/common/properties.py

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions python/test/annotator/embeddings/auto_gguf_embeddings_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,8 @@ def runTest(self):
.setInputCols("document")
.setOutputCol("embeddings")
.setBatchSize(4)
.setNUbatch(2048)
.setNBatch(2048)
.setNUbatch(4096)
.setNBatch(4096)
)
pipeline = Pipeline().setStages([self.document_assembler, model])
results = pipeline.fit(self.long_data).transform(self.long_data)
Expand Down
30 changes: 15 additions & 15 deletions python/test/annotator/seq2seq/auto_gguf_model_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def runTest(self):
.setOutputCol("completions")
.setBatchSize(4)
.setNPredict(20)
.setNGpuLayers(5)
.setNGpuLayers(99)
.setTemperature(0.4)
.setTopK(40)
.setTopP(0.9)
Expand Down Expand Up @@ -78,7 +78,7 @@ def runTest(self):
DocumentAssembler().setInputCol("text").setOutputCol("document")
)

model = (
model: AutoGGUFModel = (
AutoGGUFModel.pretrained()
.setInputCols("document")
.setOutputCol("completions")
Expand All @@ -87,23 +87,23 @@ def runTest(self):

# Model Parameters
model.setNThreads(8)
model.setNThreadsDraft(8)
# model.setNThreadsDraft(8)
model.setNThreadsBatch(8)
model.setNThreadsBatchDraft(8)
# model.setNThreadsBatchDraft(8)
model.setNCtx(512)
model.setNBatch(32)
model.setNUbatch(32)
model.setNDraft(5)
model.setNChunks(-1)
model.setNSequences(1)
model.setPSplit(0.1)
# model.setNChunks(-1)
# model.setNSequences(1)
# model.setPSplit(0.1)
model.setNGpuLayers(99)
model.setNGpuLayersDraft(99)
model.setGpuSplitMode("NONE")
model.setMainGpu(0)
model.setTensorSplit([])
model.setGrpAttnN(1)
model.setGrpAttnW(512)
# model.setTensorSplit([])
# model.setGrpAttnN(1)
# model.setGrpAttnW(512)
model.setRopeFreqBase(1.0)
model.setRopeFreqScale(1.0)
model.setYarnExtFactor(1.0)
Expand All @@ -113,14 +113,14 @@ def runTest(self):
model.setYarnOrigCtx(0)
model.setDefragmentationThreshold(-1.0)
model.setNumaStrategy("DISTRIBUTE")
model.setRopeScalingType("UNSPECIFIED")
model.setRopeScalingType("NONE")
model.setPoolingType("NONE")
model.setModelDraft("")
model.setLookupCacheStaticFilePath("/tmp/sparknlp-llama-cpp-cache")
model.setLookupCacheDynamicFilePath("/tmp/sparknlp-llama-cpp-cache")
# model.setLookupCacheStaticFilePath("/tmp/sparknlp-llama-cpp-cache")
# model.setLookupCacheDynamicFilePath("/tmp/sparknlp-llama-cpp-cache")
model.setEmbedding(False)
model.setFlashAttention(False)
model.setInputPrefixBos(False)
# model.setInputPrefixBos(False)
model.setUseMmap(False)
model.setUseMlock(False)
model.setNoKvOffload(False)
Expand Down Expand Up @@ -164,7 +164,7 @@ def runTest(self):
# Special PySpark Parameters (Scala StructFeatures)
model.setTokenIdBias({0: 0.0, 1: 0.0})
model.setTokenBias({"!": 0.0, "?": 0.0})
model.setLoraAdapters({" ": 0.0})
# model.setLoraAdapters({" ": 0.0})

pipeline = Pipeline().setStages([document_assembler, model])
results = pipeline.fit(data).transform(data)
Expand Down
4 changes: 2 additions & 2 deletions src/main/scala/com/johnsnowlabs/ml/gguf/GGUFWrapper.scala
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
*/
package com.johnsnowlabs.ml.gguf

import com.johnsnowlabs.nlp.llama.{LlamaModel, ModelParameters}
import com.johnsnowlabs.nlp.util.io.ResourceHelper
import de.kherud.llama.{LlamaModel, ModelParameters}
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.SparkFiles
import org.apache.spark.sql.SparkSession
Expand All @@ -42,7 +42,7 @@ class GGUFWrapper(var modelFileName: String, var modelFolder: String) extends Se
val modelFilePath = SparkFiles.get(modelFileName)

if (Paths.get(modelFilePath).toFile.exists()) {
modelParameters.setModelFilePath(modelFilePath)
modelParameters.setModel(modelFilePath)
llamaModel = GGUFWrapper.withSafeGGUFModelLoader(modelParameters)
} else
throw new IllegalStateException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
*/
package com.johnsnowlabs.ml.gguf

import com.johnsnowlabs.nlp.llama.{LlamaModel, ModelParameters}
import de.kherud.llama.{LlamaModel, ModelParameters}
import com.johnsnowlabs.nlp.util.io.ResourceHelper
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.SparkFiles
Expand Down Expand Up @@ -44,8 +44,8 @@ class GGUFWrapperMultiModal(var modelFileName: String, var mmprojFileName: Strin
Paths.get(modelFilePath).toFile.exists() && Paths.get(mmprojFilePath).toFile.exists()

if (filesExist) {
modelParameters.setModelFilePath(modelFilePath)
modelParameters.setMMProj(mmprojFilePath)
modelParameters.setModel(modelFilePath)
// modelParameters.setMMProj(mmprojFilePath) // TODO: Vision models implementation
llamaModel = GGUFWrapperMultiModal.withSafeGGUFModelLoader(modelParameters)
} else
throw new IllegalStateException(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package com.johnsnowlabs.nlp

import com.johnsnowlabs.nlp.annotators.seq2seq.AutoGGUFModel
import com.johnsnowlabs.nlp.llama.InferenceParameters
import com.johnsnowlabs.nlp.llama.args._
import de.kherud.llama.InferenceParameters
import de.kherud.llama.args._
import com.johnsnowlabs.nlp.serialization.StructFeature
import org.apache.spark.ml.param._

Expand Down
Loading
Loading