mindverse · zpitroda · Apr 29, 2025 · Apr 29, 2025 · Apr 29, 2025 · Apr 29, 2025
diff --git a/README.md b/README.md
@@ -230,7 +230,7 @@ For data synthesis, we utilized [GraphRAG](https://github.com/microsoft/graphrag
 
 For model deployment, we utilized [llama.cpp](https://github.com/ggml-org/llama.cpp), which provides efficient inference capabilities.
 
-Our base models primarily come from the [Qwen2.5](https://huggingface.co/Qwen) series.
+Our base models primarily come from the [Qwen3](https://huggingface.co/Qwen) series.
 
 We also want to extend our sincere gratitude to all users who have experienced Second Me. We recognize that there is significant room for optimization throughout the entire pipeline, and we are fully committed to iterative improvements to ensure everyone can enjoy the best possible experience locally.
 

diff --git a/README_ja.md b/README_ja.md
@@ -198,7 +198,7 @@ Made with [contrib.rocks](https://contrib.rocks).
 
 モデルのデプロイには、効率的な推論機能を提供する[llama.cpp](https://github.com/ggml-org/llama.cpp)を使用しました。
 
-私たちのベースモデルは主に[Qwen2.5](https://huggingface.co/Qwen)シリーズから来ています。
+私たちのベースモデルは主に[Qwen3](https://huggingface.co/Qwen)シリーズから来ています。
 
 また、Second Meを体験してくれたすべてのユーザーに心から感謝します。パイプライン全体で最適化の余地が大いにあることを認識しており、皆さんがローカルで最高の体験を楽しめるようにするために、継続的な改善に全力を尽くします。
 

diff --git a/dependencies/llama.cpp.zip b/dependencies/llama.cpp.zip
diff --git a/docs/Custom Model Config(Ollama).md b/docs/Custom Model Config(Ollama).md
@@ -33,7 +33,7 @@ First, download and install Ollama from the official website:
 
 ```bash
 curl http://127.0.0.1:11434/v1/chat/completions -H "Content-Type: application/json" -d '{
-  "model": "qwen2.5:0.5b",
+  "model": "qwen3:0.6b",
   "messages": [
     {"role": "user", "content": "Why is the sky blue?"}
   ]
@@ -84,7 +84,7 @@ EMBEDDING_MAX_TEXT_LENGTH=embedding_model_context_length
 
 ```
 Chat:
-Model Name: qwen2.5:0.5b
+Model Name: qwen3:0.6b
 API Key: ollama
 API Endpoint: http://127.0.0.1:11434/v1
 
@@ -98,7 +98,7 @@ API Endpoint: http://127.0.0.1:11434/v1
 
 ```
 Chat:
-Model Name: qwen2.5:0.5b
+Model Name: qwen3:0.6b
 API Key: ollama
 API Endpoint: http://host.docker.internal:11434/v1
 

diff --git a/lpm_frontend/src/app/dashboard/train/training/page.tsx b/lpm_frontend/src/app/dashboard/train/training/page.tsx
@@ -49,20 +49,20 @@ interface TrainingDetail {
 
 const baseModelOptions = [
   {
-    value: 'Qwen2.5-0.5B-Instruct',
-    label: 'Qwen2.5-0.5B-Instruct (8GB+ RAM Recommended)'
+    value: 'Qwen3-0.6B',
+    label: 'Qwen3-0.6B (8GB+ RAM Recommended)'
   },
   {
-    value: 'Qwen2.5-1.5B-Instruct',
-    label: 'Qwen2.5-1.5B-Instruct (16GB+ RAM Recommended)'
+    value: 'Qwen3-1.7B',
+    label: 'Qwen3-1.7B (16GB+ RAM Recommended)'
   },
   {
-    value: 'Qwen2.5-3B-Instruct',
-    label: 'Qwen2.5-3B-Instruct (32GB+ RAM Recommended)'
+    value: 'Qwen3-4B',
+    label: 'Qwen3-4B (32GB+ RAM Recommended)'
   },
   {
-    value: 'Qwen2.5-7B-Instruct',
-    label: 'Qwen2.5-7B-Instruct (64GB+ RAM Recommended)'
+    value: 'Qwen3-8B',
+    label: 'Qwen3-8B (64GB+ RAM Recommended)'
   }
 ];
 

diff --git a/lpm_frontend/src/store/useTrainingStore.ts b/lpm_frontend/src/store/useTrainingStore.ts
@@ -151,7 +151,7 @@ export const useTrainingStore = create<ModelState>((set, get) => ({
 
     try {
       const res = await getTrainProgress({
-        model_name: config.model_name || 'Qwen2.5-0.5B-Instruct'
+        model_name: config.model_name || 'Qwen3-0.6B'
       });
 
       if (res.data.code === 0) {

diff --git a/lpm_kernel/L2/gguf-py/gguf/constants.py b/lpm_kernel/L2/gguf-py/gguf/constants.py
@@ -247,6 +247,7 @@ class MODEL_ARCH(IntEnum):
     STABLELM = auto()
     QWEN = auto()
     QWEN2 = auto()
+    QWEN3 = auto()
     QWEN2MOE = auto()
     QWEN2VL = auto()
     PHI2 = auto()
@@ -434,6 +435,7 @@ class MODEL_TENSOR(IntEnum):
     MODEL_ARCH.STABLELM: "stablelm",
     MODEL_ARCH.QWEN: "qwen",
     MODEL_ARCH.QWEN2: "qwen2",
+    MODEL_ARCH.QWEN3: "qwen3",
     MODEL_ARCH.QWEN2MOE: "qwen2moe",
     MODEL_ARCH.QWEN2VL: "qwen2vl",
     MODEL_ARCH.PHI2: "phi2",

diff --git a/lpm_kernel/L2/mlx_training/convert_and_serve.sh b/lpm_kernel/L2/mlx_training/convert_and_serve.sh
@@ -1,4 +1,4 @@
-mlx_lm.fuse --model mlx-community/Qwen2.5-7B-Instruct-4bit \
+mlx_lm.fuse --model mlx-community/Qwen3-8B-4bit \
 --adapter-path "resources/model/output/mlx/adapters" \
 --save-path "resources/model/output/mlx" 
 

diff --git a/lpm_kernel/L2/mlx_training/lora_config.yaml b/lpm_kernel/L2/mlx_training/lora_config.yaml
@@ -1,5 +1,5 @@
 # The path to the local model directory or Hugging Face repo.
-model: "mlx-community/Qwen2.5-7B-Instruct-4bit"
+model: "mlx-community/Qwen3-8B-4bit"
 
 # Whether or not to train (boolean)
 train: true

diff --git a/lpm_kernel/L2/utils.py b/lpm_kernel/L2/utils.py
@@ -645,11 +645,11 @@ def save_hf_model(model_name=None, log_file_path=None) -> str:
             config = Config()
             model_name = config.get("training", {}).get("model_name")
             if not model_name:
-                logger.warning("No model name provided and none found in config. Using Qwen2.5-0.5B-Instruct as fallback.")
-                model_name = "Qwen2.5-0.5B-Instruct"
+                logger.warning("No model name provided and none found in config. Using Qwen3-0.6B as fallback.")
+                model_name = "Qwen3-0.6B"
         except Exception as e:
-            logger.warning(f"Failed to get model name from config: {str(e)}. Using Qwen2.5-0.5B-Instruct as fallback.")
-            model_name = "Qwen2.5-0.5B-Instruct"
+            logger.warning(f"Failed to get model name from config: {str(e)}. Using Qwen3-0.6B as fallback.")
+            model_name = "Qwen3-0.6B"
 
     base_dir = os.path.join(os.getcwd(), "resources/L2/base_models")
     # Normalize model name and check for path traversal attempts

diff --git a/lpm_kernel/api/domains/kernel2/routes_l2.py b/lpm_kernel/api/domains/kernel2/routes_l2.py
@@ -76,7 +76,7 @@ def downloadModel():
 
     Request body:
     {
-        "model_name": str  # Model name, e.g. "Qwen/Qwen2.5-0.5B-Instruct"
+        "model_name": str  # Model name, e.g. "Qwen/Qwen3-0.6B"
     }
 
     Returns:

diff --git a/lpm_kernel/api/domains/trainprocess/training_params_manager.py b/lpm_kernel/api/domains/trainprocess/training_params_manager.py
@@ -18,7 +18,7 @@ class TrainingParamsManager:
 
     # Default training parameters
     _default_training_params = {
-        "model_name": "Qwen2.5-0.5B-Instruct",
+        "model_name": "Qwen3-0.6B",
         "learning_rate": 1e-4,
         "number_of_epochs": 3,
         "concurrency_threads": 2,

diff --git a/pyproject.toml b/pyproject.toml
@@ -42,8 +42,8 @@ pytest = "7.4.4"
 ruff = "0.1.15"
 pandas = "2.2.3"
 fnllm = {extras = ["azure", "openai"], version = "0.1.2"}
-transformers = "4.47.1"
-torch = "2.5.1"
+transformers = "4.51.3" 
+torch = "2.7.0"
 peft = "0.14.0"
 trl = "0.13.0"
 gguf = "0.10.0"