seperate llm invoke into a standalone function for tracing

louie-tsai · louie-tsai · commit 8a85e0677658 · 2025-04-02T08:16:43.000-07:00
Signed-off-by: Tsai, Louie &lt;louie.tsai@intel.com&gt;
diff --git a/comps/agent/src/integrations/strategy/planexec/planner.py b/comps/agent/src/integrations/strategy/planexec/planner.py
@@ -69,10 +69,15 @@ class grade(BaseModel):
         output_parser = PydanticToolsParser(tools=[grade], first_tool_only=True)
         self.chain = plan_check_prompt | llm | output_parser
 
+    @opea_telemetry
+    def __llm_invoke__(self, state):
+        scored_result = self.chain.invoke(state)
+        return scored_result
+
     @opea_telemetry
     def __call__(self, state):
         # print("---CALL PlanStepChecker---")
-        scored_result = self.chain.invoke(state)
+        scored_result = self.__llm_invoke__(state)
         score = scored_result.binary_score
         print(f"Task is {state['context']}, Score is {score}")
         if score.startswith("yes"):
@@ -93,6 +98,11 @@ def __init__(self, llm, plan_checker=None, is_vllm=False):
         self.llm = planner_prompt | llm | output_parser
         self.plan_checker = plan_checker
 
+    @opea_telemetry
+    def __llm_invoke__(self, messages):
+        plan = self.llm.invoke(messages)
+        return plan
+
     @opea_telemetry
     def __call__(self, state):
         print("---CALL Planner---")
@@ -102,7 +112,7 @@ def __call__(self, state):
         while not success:
             while not success:
                 try:
-                    plan = self.llm.invoke({"messages": [("user", state["messages"][-1].content)]})
+                    plan = self.__llm_invoke__({"messages": [("user", state["messages"][-1].content)]})
                     print("Generated plan: ", plan)
                     success = True
                 except OutputParserException as e:
@@ -168,14 +178,19 @@ def __init__(self, llm, is_vllm=False):
         output_parser = PydanticToolsParser(tools=[Response], first_tool_only=True)
         self.llm = answer_make_prompt | llm | output_parser
 
+    @opea_telemetry
+    def __llm_invoke__(self, state):
+        output = self.llm.invoke(state)
+        return output
+
     @opea_telemetry
     def __call__(self, state):
         print("---CALL AnswerMaker---")
         success = False
         # sometime, LLM will not provide accurate steps per ask, try more than one time until success
         while not success:
             try:
-                output = self.llm.invoke(state)
+                output = self.__llm_invoke__(state)
                 print("Generated response: ", output.response)
                 success = True
             except OutputParserException as e:
@@ -205,10 +220,15 @@ class grade(BaseModel):
         output_parser = PydanticToolsParser(tools=[grade], first_tool_only=True)
         self.chain = answer_check_prompt | llm | output_parser
 
+    @opea_telemetry
+    def __llm_invoke__(self, state):
+        output = self.chain.invoke(state)
+        return output
+
     @opea_telemetry
     def __call__(self, state):
         print("---CALL FinalAnswerChecker---")
-        scored_result = self.chain.invoke(state)
+        scored_result = self.__llm_invoke__(state)
         score = scored_result.binary_score
         print(f"Answer is {state['response']}, Grade of good response is {score}")
         if score.startswith("yes"):
@@ -225,14 +245,19 @@ def __init__(self, llm, answer_checker=None):
         self.llm = replanner_prompt | llm | output_parser
         self.answer_checker = answer_checker
 
+    @opea_telemetry
+    def __llm_invoke__(self, state):
+        output = self.llm.invoke(state)
+        return output
+
     @opea_telemetry
     def __call__(self, state):
         print("---CALL Replanner---")
         success = False
         # sometime, LLM will not provide accurate steps per ask, try more than one time until success
         while not success:
             try:
-                output = self.llm.invoke(state)
+                output = self.__llm_invoke__(state)
                 success = True
                 print("Replan: ", output)
             except OutputParserException as e:
diff --git a/comps/agent/src/integrations/strategy/ragagent/planner.py b/comps/agent/src/integrations/strategy/ragagent/planner.py
@@ -43,12 +43,17 @@ class QueryWriter:
     def __init__(self, llm, tools):
         self.llm = llm.bind_tools(tools)
 
+    @opea_telemetry
+    def __llm_invoke__(self, messages):
+        response = self.llm.invoke(messages)
+        return response
+
     @opea_telemetry
     def __call__(self, state):
         print("---CALL QueryWriter---")
         messages = state["messages"]
 
-        response = self.llm.invoke(messages)
+        response = self.__llm_invoke__(messages)
         # We return a list, because this will get added to the existing list
         return {"messages": [response], "output": response}
 
@@ -195,6 +200,11 @@ def __init__(self, args, tools):
         self.tools = tools
         self.chain = prompt | llm | output_parser
 
+    @opea_telemetry
+    def __llm_invoke__(self, question, history, feedback):
+        response = self.chain.invoke({"question": question, "history": history, "feedback": feedback})
+        return response
+
     @opea_telemetry
     def __call__(self, state):
         from .utils import assemble_history, convert_json_to_tool_call
@@ -206,7 +216,7 @@ def __call__(self, state):
         history = assemble_history(messages)
         feedback = instruction
 
-        response = self.chain.invoke({"question": question, "history": history, "feedback": feedback})
+        response = self.__llm_invoke__(question, history, feedback)
         print("Response from query writer llm: ", response)
 
         ############ allow multiple tool calls in one AI message ############
@@ -244,6 +254,11 @@ def __init__(self, args):
         llm = setup_chat_model(args)
         self.chain = prompt | llm
 
+    @opea_telemetry
+    def __llm_invoke__(self, question, docs):
+        scored_result = self.chain.invoke({"question": question, "context": docs})
+        return scored_result
+
     @opea_telemetry
     def __call__(self, state) -> Literal["generate", "rewrite"]:
         from .utils import aggregate_docs
@@ -255,7 +270,7 @@ def __call__(self, state) -> Literal["generate", "rewrite"]:
         docs = aggregate_docs(messages)
         print("@@@@ Docs: ", docs)
 
-        scored_result = self.chain.invoke({"question": question, "context": docs})
+        scored_result = self.__llm_invoke__(question, docs)
 
         score = scored_result.content
         print("@@@@ Score: ", score)
@@ -287,6 +302,11 @@ def __init__(self, args):
         llm = setup_chat_model(args)
         self.rag_chain = prompt | llm
 
+    @opea_telemetry
+    def __llm_invoke__(self, docs, question, query_time):
+        response = self.rag_chain.invoke({"context": docs, "question": question, "time": query_time})
+        return response
+
     @opea_telemetry
     def __call__(self, state):
         from .utils import aggregate_docs
@@ -299,7 +319,7 @@ def __call__(self, state):
         question = messages[0].content
         docs = aggregate_docs(messages)
 
-        response = self.rag_chain.invoke({"context": docs, "question": question, "time": query_time})
+        response = self.__llm_invoke__(docs, question, query_time)
         print("@@@@ Used this doc for generation:\n", docs)
         print("@@@@ Generated response: ", response)
         return {"messages": [response], "output": response}
diff --git a/comps/agent/src/integrations/strategy/react/planner.py b/comps/agent/src/integrations/strategy/react/planner.py
@@ -213,6 +213,14 @@ def __init__(self, tools, args, store=None, **kwargs):
         self.memory_type = args.memory_type
         self.store = store
 
+    @opea_telemetry
+    def __llm_invoke__(self, query, history, tools_descriptions, thread_history):
+        # invoke chain: raw output from llm
+        response = self.chain.invoke(
+            {"input": query, "history": history, "tools": tools_descriptions, "thread_history": thread_history}
+        )
+        return response
+
     @opea_telemetry
     def __call__(self, state, config):
 
@@ -245,9 +253,7 @@ def __call__(self, state, config):
         print("@@@ Tools description: ", tools_descriptions)
 
         # invoke chain: raw output from llm
-        response = self.chain.invoke(
-            {"input": query, "history": history, "tools": tools_descriptions, "thread_history": thread_history}
-        )
+        response = self.__llm_invoke__(query, history, tools_descriptions, thread_history)
         response = response.content
 
         # parse tool calls or answers from raw output: result is a list
diff --git a/comps/agent/src/integrations/strategy/sqlagent/planner.py b/comps/agent/src/integrations/strategy/sqlagent/planner.py
@@ -60,6 +60,11 @@ def __init__(self, args, tools):
             self.column_embeddings = self.embed_model.encode(self.values_descriptions)
             print("Done embedding column descriptions")
 
+    @opea_telemetry
+    def __llm_invoke__(self, prompt):
+        output = self.chain.invoke(prompt)
+        return output
+
     @opea_telemetry
     def __call__(self, state):
         print("----------Call Agent Node----------")
@@ -88,7 +93,7 @@ def __call__(self, state):
             history=history,
         )
 
-        output = self.chain.invoke(prompt)
+        output = self.__llm_invoke__(prompt)
         output = self.output_parser.parse(
             output.content, history, table_schema, hints, question, state["messages"]
         )  # text: str, history: str, db_schema: str, hint: str
@@ -195,6 +200,11 @@ def __init__(self, args, llm, tools):
             self.embed_model = SentenceTransformer("BAAI/bge-large-en-v1.5")
             self.column_embeddings = self.embed_model.encode(self.values_descriptions)
 
+    @opea_telemetry
+    def __llm_invoke__(self, chain, state):
+        response = chain.invoke(state)
+        return response
+
     @opea_telemetry
     def __call__(self, state):
         print("----------Call Agent Node----------")
@@ -216,7 +226,7 @@ def __call__(self, state):
         )
 
         chain = state_modifier_runnable | self.llm
-        response = chain.invoke(state)
+        response = self.__llm_invoke__(chain, state)
 
         return {"messages": [response], "hint": hints}
 
@@ -248,12 +258,7 @@ def get_sql_query_and_result(self, state):
         return query, result
 
     @opea_telemetry
-    def __call__(self, state):
-        print("----------Call Query Fixer Node----------")
-        table_schema, _ = get_table_schema(self.args.db_path)
-        question = state["messages"][0].content
-        hint = state["hint"]
-        query, result = self.get_sql_query_and_result(state)
+    def __llm_invoke__(self, table_schema, question, hint, query, result):
         response = self.chain.invoke(
             {
                 "DATABASE_SCHEMA": table_schema,
@@ -263,6 +268,16 @@ def __call__(self, state):
                 "RESULT": result,
             }
         )
+        return response
+
+    @opea_telemetry
+    def __call__(self, state):
+        print("----------Call Query Fixer Node----------")
+        table_schema, _ = get_table_schema(self.args.db_path)
+        question = state["messages"][0].content
+        hint = state["hint"]
+        query, result = self.get_sql_query_and_result(state)
+        response = self.__llm_invoke__(table_schema, question, hint, query, result)
         # print("@@@@@ Query fixer output:\n", response.content)
         return {"messages": [response]}