diff --git a/.github/workflows/docker/compose/text2graph-compose.yaml b/.github/workflows/docker/compose/text2graph-compose.yaml
new file mode 100644
index 0000000000..7b0d21b044
--- /dev/null
+++ b/.github/workflows/docker/compose/text2graph-compose.yaml
@@ -0,0 +1,9 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# this file should be run in the root of the repo
+services:
+ text2graph:
+ build:
+ dockerfile: comps/text2graph/src/Dockerfile
+ image: ${REGISTRY:-opea}/text2graph:${TAG:-latest}
diff --git a/comps/cores/mega/constants.py b/comps/cores/mega/constants.py
index c90aff0df9..e36693f5ac 100644
--- a/comps/cores/mega/constants.py
+++ b/comps/cores/mega/constants.py
@@ -34,6 +34,7 @@ class ServiceType(Enum):
ANIMATION = 17
IMAGE2IMAGE = 18
TEXT2SQL = 19
+ TEXT2GRAPH = 20
class MegaServiceEndpoint(Enum):
diff --git a/comps/text2graph/deployment/docker_compose/README.md b/comps/text2graph/deployment/docker_compose/README.md
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/comps/text2graph/deployment/docker_compose/compose.yaml b/comps/text2graph/deployment/docker_compose/compose.yaml
new file mode 100644
index 0000000000..9ed116e83f
--- /dev/null
+++ b/comps/text2graph/deployment/docker_compose/compose.yaml
@@ -0,0 +1,29 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+ text2graph:
+ image: opea/text2graph:latest
+ container_name: text2graph
+ ports:
+ - ${TEXT2GRAPH_PORT:-8090}:8090
+ environment:
+ - no_proxy=${no_proxy}
+ - https_proxy=${https_proxy}
+ - http_proxy=${http_proxy}
+ - LLM_MODEL_ID=${LLM_MODEL_ID:-"Babelscape/rebel-large"}
+ - HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+ ipc: host
+ restart: always
+
+ text2graph-gaudi:
+ image: opea/text2graph:${TAG:-latest}
+ container_name: text2graph-gaudi-server
+ ports:
+ - ${TEXT2GRAPH_PORT:-9090}:8080
+ environment:
+ - TGI_LLM_ENDPOINT=${TGI_LLM_ENDPOINT:-8080}:8080
+
+networks:
+ default:
+ driver: bridge
diff --git a/comps/text2graph/src/Dockerfile b/comps/text2graph/src/Dockerfile
new file mode 100644
index 0000000000..3f26b79540
--- /dev/null
+++ b/comps/text2graph/src/Dockerfile
@@ -0,0 +1,47 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM ubuntu:22.04
+
+WORKDIR /home/graph_extract
+
+FROM python:3.11-slim
+ENV LANG=C.UTF-8
+ARG ARCH=cpu
+
+RUN apt-get update -y && apt-get install vim -y && apt-get install -y --no-install-recommends --fix-missing \
+ build-essential
+
+RUN useradd -m -s /bin/bash user && \
+ mkdir -p /home/user && \
+ chown -R user /home/user/
+
+COPY comps /home/user/comps
+
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
+ if [ ${ARCH} = "cpu" ]; then \
+ pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/text2graph/src/requirements.txt; \
+ else \
+ pip install --no-cache-dir -r /home/user/comps/text2graph/src/requirements.txt; \
+ fi
+
+ENV https_proxy=${https_proxy}
+ENV http_proxy=${http_proxy}
+ENV no_proxy=${no_proxy}
+ENV LLM_ID=${LLM_ID:-"Babelscape/rebel-large"}
+ENV SPAN_LENGTH=${SPAN_LENGTH:-"1024"}
+ENV OVERLAP=${OVERLAP:-"100"}
+ENV MAX_LENGTH=${MAX_NEW_TOKENS:-"256"}
+ENV HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+ENV HF_TOKEN=${HF_TOKEN}
+ENV LLM_MODEL_ID=${LLM_ID}
+ENV TGI_PORT=8008
+ENV PYTHONPATH="/home/user/":$PYTHONPATH
+
+USER user
+
+WORKDIR /home/user/comps/text2graph/src/
+
+RUN bash -c 'source /home/user/comps/text2graph/src/setup_service_env.sh'
+
+ENTRYPOINT ["python", "opea_text2graph_microservice.py"]
diff --git a/comps/text2graph/src/README.md b/comps/text2graph/src/README.md
new file mode 100644
index 0000000000..205fa8a418
--- /dev/null
+++ b/comps/text2graph/src/README.md
@@ -0,0 +1,118 @@
+# Text to graph triplet extractor
+
+Creating graphs from text is about converting unstructured text into structured data is challenging.
+It's gained significant traction with the advent of Large Language Models (LLMs), bringing it more into the mainstream. There are two main approaches to extract graph triplets depending on the types of LLM architectures like decode and encoder-decoder models.
+
+## Decoder Models
+
+Decoder-only models are faster during inference as they skip the encoding. This is ideal for tasks where the
+input-output mapping is simpler or where multitasking is required. It is suitable for generating outputs based on
+prompts or when computational efficiency is a priority. In certain cases, the decoder only models struggle with
+tasks requiring deep contextual understanding or when input-output structures are highly heterogeneous.
+
+## Encoder-decoder models
+
+This microservice provides an encoder decoder architecture approach to graph triplet extraction. Models like REBEL, is based on the BART family/like model and fine-tuned for relation extraction and classification tasks. The approach works better when handling complex relations applications and data source. Encoder decoder models often achieve high performance on benchmarks due to their ability to encode contextual information effectively. It is suitable for tasks requiring detailed parsing of text into structured formats, such as knowledge graph construction from unstructured data.
+
+# Features
+
+Input text from a document or string(s) in text format and the graph triplets and nodes are identified.
+Subsequent processing needs to be done such as performing entity disambiguation to merge duplicate entities
+before generating cypher code
+
+## Implementation
+
+The text-to-graph microservice able to extract from unstructured text in document, textfile, or string formats
+The service is hosted in a docker. The text2graph extraction requires logic and LLMs to be hosted.
+LLM hosting is done with TGI for Gaudi's and natively running on CPUs for CPU.
+
+# 🚀1. Start Microservice with Docker
+
+Option 1 running on CPUs
+
+## Install Requirements
+
+```bash
+ pip install -r requirements.txt
+```
+
+## Environment variables : Configure LLM Parameters based on the model selected.
+
+```
+export LLM_ID=${LLM_ID:-"Babelscape/rebel-large"}
+export SPAN_LENGTH=${SPAN_LENGTH:-"1024"}
+export OVERLAP=${OVERLAP:-"100"}
+export MAX_LENGTH=${MAX_NEW_TOKENS:-"256"}
+export HUGGINGFACEHUB_API_TOKEN=""
+export LLM_MODEL_ID=${LLM_ID}
+export TGI_PORT=8008
+```
+
+##Echo env variables
+
+```
+echo "Extractor details"
+echo LLM_ID=${LLM_ID}
+echo SPAN_LENGTH=${SPAN_LENGTH}
+echo OVERLAP=${OVERLAP}
+echo MAX_LENGTH=${MAX_LENGTH}
+```
+
+### Start TGI Service
+
+```bash
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
+export TGI_PORT=8008
+
+docker run -d --name="text2graph-tgi-endpoint" --ipc=host -p $TGI_PORT:80 -v ./data:/data --shm-size 1g -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e model=${LLM_MODEL_ID} ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id $LLM_MODEL_ID
+```
+
+### Verify the TGI Service
+
+```bash
+export your_ip=$(hostname -I | awk '{print $1}')
+curl http://${your_ip}:${TGI_PORT}/generate \
+ -X POST \
+ -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
+ -H 'Content-Type: application/json'
+```
+
+### Setup Environment Variables to host TGI
+
+```bash
+export TGI_LLM_ENDPOINT="http://${your_ip}:${TGI_PORT}"
+```
+
+### Start Text2Graph Microservice with Docker
+
+Command to build text2graph microservice
+
+```bash
+docker build -f Dockerfile -t user_name:graph_extractor ../../../
+```
+
+Command to launch text2graph microservice
+
+```bash
+docker run -i -t --net=host --ipc=host -p 8090 user_name:graph_extractor
+```
+
+The docker launches the text2graph microservice. To run it interactive.
+
+# Validation and testing
+
+## Text to triplets
+
+Test directory is under GenAIComps/tests/text2graph/
+There are two files in this directory.
+
+- example_from_file.py : Example python script that downloads a text file and extracts triplets
+
+- test_text2graph_opea.sh : The main script that checks for health and builds docker, extracts and generates triplets.
+
+## Check if services are up
+
+### Setup validation process
+
+For set up use http://localhost:8090/docs for swagger documentation, list of commands, interactive GUI.
diff --git a/comps/text2graph/src/integrations/graph_agent.py b/comps/text2graph/src/integrations/graph_agent.py
new file mode 100644
index 0000000000..1c70498ece
--- /dev/null
+++ b/comps/text2graph/src/integrations/graph_agent.py
@@ -0,0 +1,189 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import csv
+import math
+import os
+import re
+from typing import List, Tuple
+
+import pandas as pd
+import torch
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+
+
+class TripletExtractor:
+ def triplet_extractor(self, text):
+ triplets = []
+ relation, subject, relation, object_ = "", "", "", ""
+ text = text.strip()
+ current = "x"
+ for token in text.replace("", "").replace("", "").replace("", "").split():
+ if token == "":
+ current = "t"
+ if relation != "":
+ triplets.append({"head": subject.strip(), "type": relation.strip(), "tail": object_.strip()})
+ relation = ""
+ subject = ""
+ elif token == "":
+ current = "s"
+ if relation != "":
+ triplets.append({"head": subject.strip(), "type": relation.strip(), "tail": object_.strip()})
+ object_ = ""
+ elif token == "":
+ current = "o"
+ relation = ""
+ else:
+ if current == "t":
+ subject += " " + token
+ elif current == "s":
+ object_ += " " + token
+ elif current == "o":
+ relation += " " + token
+ if subject != "" and relation != "" and object_ != "":
+ triplets.append({"head": subject.strip(), "type": relation.strip(), "tail": object_.strip()})
+ return triplets
+
+
+class TripletBuilder:
+ def __init__(self):
+ # Load model and tokenizer
+ MODEL_NAME = os.environ.get("LLM_MODEL_ID", "Babelscape/rebel-large")
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+ model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
+
+ ## Defines
+ self.span_length = int(os.environ.get("SPAN_LENGTH", "1024"))
+ self.overlap = int(os.environ.get("OVERLAP", "100"))
+ self.model = model
+ self.tokenizer = tokenizer
+
+ async def cal_index_span(self, total_tokens, span_length, overlap):
+ num_spans = math.ceil(total_tokens / span_length) + 1 # Calculate number of spans and assign to num_spans
+ spans = [] # Initialize an empty list to store the spans
+ start = 0
+ for i in range(num_spans): # Iterate using the calculated num_spans
+ start = i * (span_length - overlap)
+ end = min(start + span_length, total_tokens) # Calculate end
+ if end >= total_tokens:
+ end = total_tokens
+ start = end - span_length
+ if span_length <= overlap:
+ raise ValueError("Indexing is incorrect something is wrong")
+
+ spans.append([start, end]) # Append the span to the list
+ return spans
+
+ async def gen_tokenize(self, text: str) -> List[str]:
+ # print(f'entering tokenizer {text[:100]}')
+ tensor_tokens = self.tokenizer([text], return_tensors="pt")
+ # print(f'done entering tokenizer {tensor_tokens}')
+ return tensor_tokens
+
+ ## code
+ async def extract_graph(self, text):
+ # print(f'Entering graph extraction')
+ tokenize_input = await self.gen_tokenize(text)
+ total_tokens = len(tokenize_input["input_ids"][0])
+ span_index_gen = await self.cal_index_span(total_tokens, self.span_length, self.overlap)
+ tensor_ids = [torch.tensor(tokenize_input["input_ids"][0][start:end]) for start, end in span_index_gen]
+ tensor_masks = [torch.tensor(tokenize_input["attention_mask"][0][start:end]) for start, end in span_index_gen]
+ rearrange_inputs = {"input_ids": torch.stack(tensor_ids), "attention_mask": torch.stack(tensor_masks)}
+
+ # generate relations
+ MAX_LENGTH = int(os.environ.get("MAX_LENGTH", "256"))
+ num_return_sequences = 3
+ gen_kwargs = {
+ "max_length": MAX_LENGTH,
+ "length_penalty": 0,
+ "num_beams": 3,
+ "num_return_sequences": num_return_sequences,
+ }
+
+ generated_tokens = self.model.generate(**rearrange_inputs, **gen_kwargs)
+
+ # decode relations
+ decoded_preds = self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=False)
+
+ # create kb
+ tripmgr = TripletManager()
+ tripext = TripletExtractor()
+ i = 0
+
+ for sentence_pred in decoded_preds:
+ current_span_index = i // num_return_sequences
+ relations = tripext.triplet_extractor(sentence_pred)
+ for relation in relations:
+ tripmgr.add_relation(relation)
+ i += 1
+ return tripmgr
+
+
+class TripletManager:
+ def __init__(self):
+ self.entities = {} # { entity_title: {...} }
+ self.relations = [] # [ head: entity_title, type: category, tail: entity_title]
+
+ def are_relations_equal(self, relation1, relation2):
+ """Check if two relations are equal."""
+ head_match = relation1["head"] == relation2["head"]
+ type_match = relation1["type"] == relation2["type"]
+ tail_match = relation1["tail"] == relation2["tail"]
+ all_match = head_match and type_match and tail_match
+ return all_match
+
+ def exists_relation(self, relation1):
+ """Check if relation exists."""
+ return any(self.are_relations_equal(relation1, relation2) for relation2 in self.relations)
+
+ def merge_relations(self, relation2):
+ """Merge two relations."""
+ relation1 = [r for r in self.relations if self.are_relations_equal(relation2, r)][0]
+
+ def exists_entity(self, entity_title):
+ return entity_title in self.entities
+
+ def add_entity(self, entity):
+ """Check if entry exists and add if not."""
+ if self.exists_entity(entity): # Directly check if the entity exists
+ return
+ self.entities[entity] = {"title": entity} # Create a dictionary for the entity
+ return
+
+ def add_relation(self, relation):
+ """Add entry checking to see if it needs merge or create a new entry."""
+ candidate_entities = [relation["head"], relation["tail"]]
+
+ # manage new entities
+ for entity in candidate_entities:
+ self.add_entity(entity)
+
+ # manage new relation
+ if not self.exists_relation(relation):
+ self.relations.append(relation)
+ else:
+ self.merge_relations(relation)
+
+ def write_to_csv(self, WRITE_TO_CSV=False):
+ """Saves the entities and relations to a CSV file."""
+ struct_entity = {"entity": [], "details": []}
+ struct_triplets = {"head": [], "type": [], "tail": []}
+
+ # Instead of appending, build lists of entities and relations
+ entity_data = []
+ for entity in self.entities.items():
+ entity_data.append(entity)
+
+ relation_data = []
+ for relation in self.relations:
+ relation_data.append(relation)
+
+ # Create DataFrames from the collected data
+ df_entity = pd.DataFrame(entity_data, columns=["entity", "details"])
+ df_relation = pd.DataFrame(relation_data)
+
+ # Write to CSV if requested
+ if WRITE_TO_CSV:
+ df_entity.to_csv("entities.csv", index=True)
+ df_relation.to_csv("relations.csv", index=True)
+ return df_entity, df_relation
diff --git a/comps/text2graph/src/integrations/opea.py b/comps/text2graph/src/integrations/opea.py
new file mode 100644
index 0000000000..bc23b9d076
--- /dev/null
+++ b/comps/text2graph/src/integrations/opea.py
@@ -0,0 +1,73 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import annotations
+
+import os
+import time
+from typing import Annotated, Optional
+
+from langchain.agents.agent_types import AgentType
+from langchain_huggingface import HuggingFaceEndpoint
+from pydantic import BaseModel, Field
+
+from comps import CustomLogger, OpeaComponent, OpeaComponentRegistry, ServiceType
+from comps.text2graph.src.integrations.graph_agent import TripletBuilder, TripletExtractor, TripletManager
+
+logger = CustomLogger("comps-text2graph")
+logflag = os.getenv("LOGFLAG", False)
+
+graph_params = {
+ "max_string_length": 3600,
+}
+
+generation_params = {
+ "max_new_tokens": 1024,
+ "top_k": 10,
+ "top_p": 0.95,
+ "temperature": 0.01,
+ "repetition_penalty": 1.03,
+ "streaming": True,
+}
+
+
+class Input(BaseModel):
+ input_text: str
+
+
+@OpeaComponentRegistry.register("OPEA_TEXT2GRAPH")
+class OpeaText2GRAPH(OpeaComponent):
+ """A specialized text to graph triplet converter."""
+
+ def __init__(self, name: str, description: str, config: dict = None):
+ super().__init__(name, ServiceType.TEXT2GRAPH.name.lower(), description, config)
+ health_status = self.check_health()
+ if not health_status:
+ logger.error("OpeaText2GRAPH health check failed.")
+
+ async def check_health(self) -> bool:
+ """Checks the health of the TGI service.
+
+ Returns:
+ bool: True if the service is reachable and healthy, False otherwise.
+ """
+ try:
+ return True
+ except Exception as e:
+ return False
+
+ async def invoke(self, input_text: str):
+ """Invokes the text2graph service to generate graph(s) for the provided input.
+
+ input:
+ input: text document
+ Returns:
+ text : dict
+ """
+
+ tb = TripletBuilder()
+ graph_triplets = await tb.extract_graph(input_text)
+
+ result = {"graph_triplets": graph_triplets}
+
+ return result
diff --git a/comps/text2graph/src/opea_text2graph_microservice.py b/comps/text2graph/src/opea_text2graph_microservice.py
new file mode 100644
index 0000000000..96b5d90058
--- /dev/null
+++ b/comps/text2graph/src/opea_text2graph_microservice.py
@@ -0,0 +1,54 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import pathlib
+import sys
+
+from fastapi.exceptions import HTTPException
+
+from comps import CustomLogger, OpeaComponentLoader, opea_microservices, register_microservice
+from comps.text2graph.src.integrations.opea import Input, OpeaText2GRAPH
+
+cur_path = pathlib.Path(__file__).parent.resolve()
+comps_path = os.path.join(cur_path, "../../../")
+sys.path.append(comps_path)
+
+logger = CustomLogger("text2graph")
+logflag = os.getenv("LOGFLAG", False)
+
+text2graph_component_name = os.getenv("TEXT2GRAPH_COMPONENT_NAME", "OPEA_TEXT2GRAPH")
+
+# Initialize OpeaComponentLoader
+loader = OpeaComponentLoader(
+ text2graph_component_name,
+ description=f"OPEA text2graph Component: {text2graph_component_name}",
+)
+
+
+@register_microservice(
+ name="opea_service@text2graph",
+ endpoint="/v1/text2graph",
+ host="0.0.0.0",
+ port=8090,
+)
+async def execute_agent(input_text: str):
+ """Execute triplet extraction from text file.
+
+ This function takes an Input object containing the input text and database connection information.
+ It uses the execute function from the text2graph module to execute the graph query and returns the result.
+ Args:
+ input (Input): An Input object with the input text
+ Returns:
+ dict: A dictionary with head, tail and type linking head and tail
+ """
+ print("===============================================================")
+ print("===================ENTERING THIS EXECUTE AGENT=================")
+ print("===============================================================")
+ results = await loader.invoke(input_text)
+ return {"result": results}
+
+
+if __name__ == "__main__":
+ logger.info("OPEA Text2GRAPH Microservice is starting...")
+ opea_microservices["opea_service@text2graph"].start()
diff --git a/comps/text2graph/src/requirements.txt b/comps/text2graph/src/requirements.txt
new file mode 100644
index 0000000000..45ff5d4379
--- /dev/null
+++ b/comps/text2graph/src/requirements.txt
@@ -0,0 +1,18 @@
+docarray[full]
+fastapi
+hanging_threads
+langchain==0.2.9
+langchain-huggingface
+langchain_community==0.2.7
+numpy
+opentelemetry-api
+opentelemetry-exporter-otlp
+opentelemetry-sdk
+pandas
+prometheus_fastapi_instrumentator
+psycopg2-binary
+pyarrow
+pydantic
+shortuuid
+sqlalchemy
+uvicorn
diff --git a/comps/text2graph/src/setup_service_env.sh b/comps/text2graph/src/setup_service_env.sh
new file mode 100755
index 0000000000..68e271926a
--- /dev/null
+++ b/comps/text2graph/src/setup_service_env.sh
@@ -0,0 +1,29 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+#######################################################################
+# Proxy
+#######################################################################
+export https_proxy=${https_proxy}
+export http_proxy=${http_proxy}
+export no_proxy=${no_proxy}
+################################################################
+# Configure LLM Parameters based on the model selected.
+################################################################
+export LLM_ID=${LLM_ID:-"Babelscape/rebel-large"}
+export SPAN_LENGTH=${SPAN_LENGTH:-"1024"}
+export OVERLAP=${OVERLAP:-"100"}
+export MAX_LENGTH=${MAX_NEW_TOKENS:-"256"}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HF_TOKEN=${HF_TOKEN}
+export LLM_MODEL_ID=${LLM_ID}
+export TGI_PORT=8008
+export PYTHONPATH="/home/user/"
+################################################################
+### Echo env variables
+################################################################
+echo "Extractor details"
+echo LLM_ID=${LLM_ID}
+echo SPAN_LENGTH=${SPAN_LENGTH}
+echo OVERLAP=${OVERLAP}
+echo MAX_LENGTH=${MAX_LENGTH}
diff --git a/tests/text2graph/example_from_file.py b/tests/text2graph/example_from_file.py
new file mode 100755
index 0000000000..fd5d5d861f
--- /dev/null
+++ b/tests/text2graph/example_from_file.py
@@ -0,0 +1,63 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import subprocess
+import sys
+from urllib.parse import quote
+
+import requests
+
+################################################################
+# Download the text file to extract fraph from
+################################################################
+# Define the input data : big text file and feed it
+
+TEMP_DIR = os.path.join(os.getcwd(), "tmpdata")
+FILE_URL = "https://gist.githubusercontent.com/wey-gu/75d49362d011a0f0354d39e396404ba2/raw/0844351171751ebb1ce54ea62232bf5e59445bb7/paul_graham_essay.txt"
+command = ["wget", "-P", TEMP_DIR, FILE_URL]
+try:
+ result = subprocess.run(command, check=True, capture_output=True, text=True)
+ print(f"Download successful. Output:\n{result.stdout}")
+except subprocess.CalledProcessError as e:
+ print(f"Download failed. Error:\n{e.stderr}")
+
+
+text = open(f"{TEMP_DIR}/paul_graham_essay.txt").read()
+encoded_data2 = quote(text)
+
+
+##################################################################
+# Function to parse the output to decipher if
+# triplets head->relation->tail was extracted
+##################################################################
+def run_check_keywords(response):
+ # Check for keywords in the response
+ if all(key in response.text.lower() for key in ["head", "tail", "type"]):
+ print("TEST PASS :: All three keys (head, tail, type) exist in the response.")
+ return True
+
+ print("TEST FAIL: No keyword found")
+ return False
+
+
+##################################################################
+# Extract graph from text2graph
+##################################################################
+PORT = 8090
+BASE_URL = f"http://localhost:{PORT}/v1/text2graph"
+headers = {"accept": "application/json"}
+
+# Send the text as a query parameter
+response = requests.post(url=BASE_URL, params={"input_text": text}, headers=headers)
+print(f"{response.json()}")
+if response.status_code == 200:
+ print(f"Microservice response code: {response.status_code}")
+else:
+ print(f"Error: {response.status_code}")
+ print(response.text)
+
+# Check to make sure all works
+success = run_check_keywords(response)
+# Exit with appropriate status code
+sys.exit(0 if success else 1)
diff --git a/tests/text2graph/test_text2graph_opea.sh b/tests/text2graph/test_text2graph_opea.sh
new file mode 100755
index 0000000000..ffe701bbf6
--- /dev/null
+++ b/tests/text2graph/test_text2graph_opea.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -x
+WORKPATH=$(git rev-parse --show-toplevel)
+TAG='latest'
+LOG_PATH="$WORKPATH/comps/text2graph/deployment/docker_compose"
+source $WORKPATH/comps/text2graph/src/setup_service_env.sh
+
+
+echo $WORKPATH
+ip_address=$(hostname -I | awk '{print $1}')
+service_name="text2graph"
+
+function build_docker_graph() {
+ echo "=================== START BUILD DOCKER ========================"
+ cd $WORKPATH
+ echo $(pwd)
+ docker build --no-cache -t opea/test2graph:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/text2graph/src/Dockerfile .
+ if [ $? -ne 0 ]; then
+ echo "opea/text2graph built fail"
+ exit 1
+ else
+ echo "opea/text2graph built successful"
+ fi
+ echo "=================== END BUILD DOCKER ========================"
+}
+
+function start_service() {
+ echo "=================== START SERVICE ========================"
+ cd $WORKPATH/comps/text2graph/deployment/docker_compose
+ docker compose up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log
+
+ sleep 10s
+ echo "=================== END SERVICE ========================"
+}
+
+function validate_microservice() {
+ echo "=================== START VALIDATE ========================"
+ cd $WORKPATH/tests/text2graph
+ python3 example_from_file.py
+ echo "=================== END VALIDATE ========================"
+}
+
+function stop_docker() {
+ echo "=================== START STOP DOCKER ========================"
+ cd $WORKPATH/comps/text2graph/deployment/docker_compose
+ docker compose -f compose.yaml down ${service_name} --remove-orphans
+ echo "=================== END STOP DOCKER ========================"
+}
+
+function main() {
+
+ stop_docker
+
+ build_docker_graph
+ start_service
+ validate_microservice
+
+ stop_docker
+ echo y | docker system prune
+
+}
+
+main