diff --git a/pyproject.toml b/pyproject.toml
index 523794ee1..f9a33a861 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,10 +37,10 @@ triton = [
 ]
 
 huggingface = [
-    "unsloth_zoo>=2025.5.8",
+    "unsloth_zoo>=2025.5.10",
     "packaging",
     "tyro",
-    "transformers==4.51.3,!=4.47.0",
+    "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2",
     "datasets>=3.4.1",
     "sentencepiece>=0.2.0",
     "tqdm",
@@ -48,7 +48,7 @@ huggingface = [
     "wheel>=0.42.0",
     "numpy",
     "accelerate>=0.34.1",
-    "trl>=0.7.9,!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,!=0.15.0,<=0.15.2",
+    "trl>=0.7.9,!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,!=0.15.0",
     "peft>=0.7.1,!=0.11.0",
     "protobuf<4.0.0",
     "huggingface_hub",
@@ -381,10 +381,10 @@ colab-ampere-torch220 = [
     "flash-attn>=2.6.3",
 ]
 colab-new = [
-    "unsloth_zoo>=2025.5.8",
+    "unsloth_zoo>=2025.5.9",
     "packaging",
     "tyro",
-    "transformers==4.51.3,!=4.47.0",
+    "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2",
     "datasets>=3.4.1",
     "sentencepiece>=0.2.0",
     "tqdm",
@@ -399,7 +399,7 @@ colab-new = [
 ]
 colab-no-deps = [
     "accelerate>=0.34.1",
-    "trl>=0.7.9,!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,!=0.15.0,<=0.15.2",
+    "trl>=0.7.9,!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,!=0.15.0",
     "peft>=0.7.1",
     "xformers",
     "bitsandbytes>=0.45.5",
diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
index 964e874c5..932542806 100644
--- a/unsloth/models/_utils.py
+++ b/unsloth/models/_utils.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2025.5.7"
+__version__ = "2025.5.8"
 
 __all__ = [
     "SUPPORTS_BFLOAT16",
diff --git a/unsloth/models/rl.py b/unsloth/models/rl.py
index b385dba2e..e5cb22643 100644
--- a/unsloth/models/rl.py
+++ b/unsloth/models/rl.py
@@ -395,7 +395,7 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"):
     if trainer_file in RL_METRICS_CHANGES:
         process_extra_args = RL_METRICS_CHANGES[trainer_file]
         for process_extra_arg in process_extra_args:
-            other_metrics_processor += process_extra_arg(call_args, extra_args)
+            other_metrics_processor += process_extra_arg(old_RLTrainer_source, old_RLConfig_source)
     pass
 
     # Add statistics as well!
@@ -481,6 +481,39 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"):
         extra_args += num_proc_check
     pass
 
+    # Check for loss_type = dr_grpo and scale_rewards for GRPO
+    if "loss_type" in call_args and "scale_rewards" in call_args:
+        check_dr_grpo = \
+        "if loss_type.lower() == 'dr_grpo':\n"\
+        "    loss_type = 'dr_grpo'\n"\
+        "elif loss_type.lower() == 'dapo':\n"\
+        "    loss_type = 'dapo'\n"\
+        "if loss_type.lower() == 'dr_grpo':\n"\
+        "    if scale_rewards == None:\n"\
+        "        scale_rewards = True\n"\
+        "    elif scale_rewards == True:\n"\
+        "        print('The Dr GRPO paper recommends setting `scale_rewards` to False! Will override. Set it to `None` to force False.')\n"\
+        "        scale_rewards = False\n"\
+        "elif loss_type.lower() == 'dapo':\n"\
+        "    print('The DAPO paper recommends `mask_truncated_completions = True`')\n"\
+        "    print('The DAPO paper recommends `epsilon_high = 0.28`')\n"\
+        "    mask_truncated_completions = True\n"\
+        "    epsilon_high = 0.28\n"\
+        "\n"
+        extra_args += check_dr_grpo
+    pass
+
+    # Check GRPO num_generations mismatch
+    if "per_device_train_batch_size" in call_args and "num_generations" in call_args: 
+        check_num_generations = \
+        "if (per_device_train_batch_size // num_generations) * num_generations != per_device_train_batch_size:\n"\
+        "    print('Unsloth: We now expect `per_device_train_batch_size` to be a multiple of `num_generations`.\\n"\
+                   "We will change the batch size of ' + str(per_device_train_batch_size) + ' to the `num_generations` of ' + str(num_generations))\n"\
+        "    per_device_train_batch_size = num_generations\n"\
+        "\n"
+        extra_args += check_num_generations
+    pass
+
     # Edit config with anything extra
     if trainer_file in RL_CONFIG_CHANGES:
         process_extra_args = RL_CONFIG_CHANGES[trainer_file]
diff --git a/unsloth/models/rl_replacements.py b/unsloth/models/rl_replacements.py
index 2ff0e253e..171e75d19 100644
--- a/unsloth/models/rl_replacements.py
+++ b/unsloth/models/rl_replacements.py
@@ -363,13 +363,27 @@ def grpo_trainer_fix_batch_size(RLTrainer_source, RLConfig_source):
 def grpo_trainer_metrics(RLTrainer_source, RLConfig_source):
     if "reward_funcs" not in RLTrainer_source: return ""
 
+    # For new TRL we have /mean and /std
+    use_mean = "rewards/{reward_func_name}/mean" in RLTrainer_source
+    use_std  = "rewards/{reward_func_name}/std"  in RLTrainer_source
+    if not use_mean:
+        use_normal = "rewards/{reward_func_name}" in RLTrainer_source
+    else:
+        use_normal = False
+    pass
+
     log_metrics = \
     "if not isinstance(reward_funcs, list): _reward_funcs = [reward_funcs]\n"\
     "else: _reward_funcs = reward_funcs\n"\
     "for reward_func in _reward_funcs:\n"\
     "    try:\n"\
     "        reward_func_name = reward_func.__name__\n"\
-    "        other_metrics.append(f'rewards/{reward_func_name}')\n"\
+   f"        if {use_mean}:\n"\
+    "            other_metrics.append(f'rewards/{reward_func_name}/mean')\n"\
+   f"        if {use_std}:\n"\
+    "            other_metrics.append(f'rewards/{reward_func_name}/std')\n"\
+   f"        if {use_normal}:\n"\
+    "            other_metrics.append(f'rewards/{reward_func_name}')\n"\
     "    except: pass\n"
     return log_metrics
 pass