PacificAI
diff --git a/‎langtest/modelhandler/llm_modelhandler.py‎
Lines changed: 6 additions & 2 deletions b/‎langtest/modelhandler/llm_modelhandler.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎langtest/modelhandler/modelhandler.py‎
Lines changed: 6 additions & 2 deletions b/‎langtest/modelhandler/modelhandler.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎langtest/transform/clinical.py‎
Lines changed: 153 additions & 0 deletions b/‎langtest/transform/clinical.py‎
Lines changed: 153 additions & 0 deletions
@@ -1,7 +1,7 @@
 import importlib
 import inspect
 
-from typing import Any, List, Union
+from typing import Any, List, Type, Union, TypeVar
 import langchain.llms as lc
 import langchain.chat_models as chat_models
 from langchain.chains.llm import LLMChain
@@ -33,6 +33,8 @@ class PretrainedModelForQA(ModelAPI):
         ConfigError: If there is an error in the model configuration.
     """
 
+    _T = TypeVar("_T", bound="PretrainedModelForQA")
+
     HUB_PARAM_MAPPING = {
         "azure-openai": "max_tokens",
         "ai21": "maxTokens",
@@ -60,7 +62,9 @@ def __init__(self, hub: str, model: Any, *args, **kwargs):
         self.predict.cache_clear()
 
     @classmethod
-    def load_model(cls, hub: str, path: str, *args, **kwargs) -> "PretrainedModelForQA":
+    def load_model(
+        cls: Type[_T], hub: str, path: str, *args, **kwargs
+    ) -> "PretrainedModelForQA":
         """Load the pretrained model.
 
         Args:
 
@@ -2,6 +2,7 @@
 from collections import defaultdict
 from typing import Union
 from functools import lru_cache
+from typing import Any, Type, TypeVar
 from langtest.utils.lib_manager import try_import_lib
 
 RENAME_HUBS = {
@@ -34,10 +35,13 @@ class ModelAPI(ABC):
     Implementations should inherit from this class and override load_model() and predict() methods.
     """
 
-    model_registry = defaultdict(lambda: defaultdict(lambda: ModelAPI))
+    _T = TypeVar("_T", bound="ModelAPI")
 
+    model_registry: defaultdict[str, dict[str, Type[_T]]] = defaultdict(dict)
+
+    @classmethod
     @abstractmethod
-    def load_model(cls, *args, **kwargs):
+    def load_model(cls: Type[_T], *args: Any, **kwargs: Any) -> _T:
         """Load the model."""
         raise NotImplementedError()
 
 
@@ -9,6 +9,7 @@
 import importlib_resources
 from langtest.errors import Errors, Warnings
 from langtest.modelhandler.modelhandler import ModelAPI
+from langtest.tasks.task import TaskManager
 from langtest.transform.base import ITests, TestFactory
 from langtest.transform.utils import GENERIC2BRAND_TEMPLATE, filter_unique_samples
 from langtest.utils.custom_types.helpers import (
@@ -924,3 +925,155 @@ def evaluate_responses(
         )
 
         return evaluator.aggregate_results(data_retriever, results)
+
+
+class MedFuzz(BaseClinical):
+    alias_name = "medfuzz"
+    supported_tasks = ["question-answering", "text-generation"]
+
+    @staticmethod
+    def transform(sample_list: List[Sample], *args, **kwargs):
+        # return super().transform(*args, **kwargs)
+        from langtest.transform.utils import AttackerLLM, TargetLLM
+        from langtest.utils.custom_types.sample import MedFuzzSample
+        from tqdm.auto import tqdm
+
+        try:
+            attacker_model_info = kwargs.get("attacker_llm", None)
+            if attacker_model_info is not None:
+                task = TaskManager("question-answering")
+                model = task.model(
+                    model_path=attacker_model_info["model"],
+                    model_hub=attacker_model_info["hub"],
+                    model_type=attacker_model_info["type"],
+                )
+            else:
+                from textwrap import dedent
+
+                error_message = dedent(
+                    """
+                    Attack model information is not provided in Configuration. Please provide the attack model information.
+                    {
+                        "medfuzz": {
+                            "attacker_llm": {
+                                "model": "<model_name>",
+                                "hub": "<model_hub>",
+                                "type": "<chat | completion>"
+                            }
+                        }
+                    }
+                """
+                ).strip()
+
+                raise ValueError(error_message)
+
+            # model = task.model(model=kwargs)
+
+            samples = tqdm(
+                sample_list,
+                desc="Transforming the samples",
+                unit="samples",
+                position=1,
+            )
+
+            transformed_samples = []
+            for sample in samples:
+                # llms
+
+                llm_attacker = AttackerLLM(model)
+                llm_target = TargetLLM(model)
+
+                # sample
+                med_sample = MedFuzzSample(**sample.dict())
+                med_sample.test_type = "medfuzz"
+                med_sample.category = "clinical"
+
+                if med_sample.options not in [None, ""]:
+                    med_sample.original_question = (
+                        f"{med_sample.original_question}\n{med_sample.options}"
+                    )
+                    med_sample.options = None
+
+                # ot = llm_target.process_user_text(f"{med_sample.original_question}\n{med_sample.options}")
+                ot = llm_target.process_user_text(med_sample.original_question)
+
+                # generate the attack plan
+                llm_attacker.generate_attack_plan(
+                    benchmark_item=med_sample.original_question,
+                    correct_answer="".join(med_sample.expected_results),
+                    reasoning=ot["reasoning"],
+                    confidence=ot["confidence_scores"],
+                )
+
+                # med_sample.perturbed_context = llm_attacker.generate_modified_question(
+                #     med_sample.original_question
+                # )
+                med_sample.perturbed_question = llm_attacker.generate_modified_question(
+                    med_sample.original_question
+                )
+
+                med_sample.expected_results = "".join(
+                    map(str, med_sample.expected_results)
+                )[:1]
+
+                transformed_samples.append(med_sample)
+
+            return transformed_samples
+        except Exception:
+            import traceback
+
+            traceback.print_exc()
+            raise
+
+    @staticmethod
+    async def run(sample_list: List[Sample], model: ModelAPI, *args, **kwargs):
+        # return super().run(*args, **kwargs)
+        from langtest.transform.utils import TargetLLM
+
+        progress_bar = kwargs.get("progress_bar", False)
+
+        for sample in sample_list:
+            if sample.state != "done":
+                target_llm = TargetLLM(model)
+
+                response = target_llm.process_user_text(sample.perturbed_question)
+
+                sample.actual_results = response.get("final_answer", "")
+
+                # del
+                del target_llm
+
+                sample.state = "done"
+
+            if progress_bar:
+                progress_bar.update(1)
+
+        return sample_list
+
+    @staticmethod
+    def ollama_model_client(model, messages):
+        from ollama import Client
+
+        client = Client()
+
+        res = client.chat(
+            model=model,
+            messages=messages,
+            options={
+                "temperature": 0.9,
+            },
+        )
+        return res.message.content
+
+    @staticmethod
+    def openai_model_client(model, messages):
+        import openai
+
+        client = openai.Client()
+
+        res = (
+            client.chat.completions.create(model=model, messages=messages)
+            .choices[0]
+            .message.content
+        )
+        return res