diff --git a/.github/workflows/docker/compose/text2kg-compose.yaml b/.github/workflows/docker/compose/text2kg-compose.yaml new file mode 100644 index 0000000000..8c8e1de03b --- /dev/null +++ b/.github/workflows/docker/compose/text2kg-compose.yaml @@ -0,0 +1,9 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# this file should be run in the root of the repo +services: + text2kg: + build: + dockerfile: comps/text2kg/src/Dockerfile + image: ${REGISTRY:-opea}/text2kg:${TAG:-latest} diff --git a/comps/cores/mega/constants.py b/comps/cores/mega/constants.py index c978049c69..0723bbd12a 100644 --- a/comps/cores/mega/constants.py +++ b/comps/cores/mega/constants.py @@ -36,6 +36,7 @@ class ServiceType(Enum): TEXT2SQL = 19 TEXT2GRAPH = 20 TEXT2CYPHER = 21 + TEXT2KG = 22 STRUCT2GRAPH = 23 diff --git a/comps/text2kg/deployment/docker_compose/README.md b/comps/text2kg/deployment/docker_compose/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/comps/text2kg/deployment/docker_compose/compose.yaml b/comps/text2kg/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..799e9f4a4a --- /dev/null +++ b/comps/text2kg/deployment/docker_compose/compose.yaml @@ -0,0 +1,29 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +include: + - ../../../third_parties/tgi/deployment/docker_compose/compose.yaml + - ../../../third_parties/neo4j/deployment/docker_compose/compose.yaml + +services: + text2kg: + image: ${REGISTRY:-opea}/text2kg:${TAG:-latest} + container_name: text2kg + ports: + - ${TEXT2KG_PORT:-8090}:8090 + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - LLM_MODEL_ID=${LLM_MODEL_ID:-"HuggingFaceH4/zephyr-7b-alpha"} + - HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + depends_on: + - tgi-server + - neo4j-apoc + ipc: host + network_mode: "host" + restart: always + +networks: + default: + driver: bridge diff --git a/comps/text2kg/deployment/docker_compose/custom-override.yml b/comps/text2kg/deployment/docker_compose/custom-override.yml new file mode 100644 index 0000000000..ef80090341 --- /dev/null +++ b/comps/text2kg/deployment/docker_compose/custom-override.yml @@ -0,0 +1,5 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +services: + tgi-server: + runtime: runc diff --git a/comps/text2kg/src/Dockerfile b/comps/text2kg/src/Dockerfile new file mode 100644 index 0000000000..0cd826cf69 --- /dev/null +++ b/comps/text2kg/src/Dockerfile @@ -0,0 +1,51 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM ubuntu:22.04 + +WORKDIR /home/graph_extract + +FROM python:3.11-slim +ENV LANG=C.UTF-8 +ARG ARCH=cpu + +RUN apt-get update -y && apt-get install vim wget -y && apt-get install -y --no-install-recommends --fix-missing \ + build-essential + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +COPY comps /home/user/comps + +RUN mkdir -p /home/users/comps/text2kg/src/tmpddata && cd /home/users/comps/text2kg/src/tmpddata + +RUN pip install --no-cache-dir --upgrade pip setuptools && \ + if [ ${ARCH} = "cpu" ]; then \ + pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/text2kg/src/requirements.txt; \ + else \ + pip install --no-cache-dir -r /home/user/comps/text2kg/src/requirements.txt; \ + fi + +ENV https_proxy=${https_proxy} +ENV http_proxy=${http_proxy} +ENV no_proxy=${no_proxy} +ENV LLM_ID=${LLM_ID:-"HuggingFaceH4/zephyr-7b-alpha"} +ENV SPAN_LENGTH=${SPAN_LENGTH:-"1024"} +ENV OVERLAP=${OVERLAP:-"100"} +ENV MAX_LENGTH=${MAX_NEW_TOKENS:-"256"} +ENV HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} +ENV HF_TOKEN=${HF_TOKEN} +ENV TGI_PORT=8008 +ENV PYTHONPATH="/home/user/":$PYTHONPATH + +USER user + +WORKDIR /home/user/comps/text2kg/src/ + + +WORKDIR /home/user/comps/text2kg/src/ + +RUN bash -c 'source /home/user/comps/text2kg/src/environment_setup.sh' + +ENTRYPOINT ["python", "opea_text2kg_microservice.py"] diff --git a/comps/text2kg/src/README.md b/comps/text2kg/src/README.md new file mode 100644 index 0000000000..d2223cd696 --- /dev/null +++ b/comps/text2kg/src/README.md @@ -0,0 +1,99 @@ +# Text to knowledge graph (text2kg) microservice + +Text to Knowledge Graph (text2kg) Microservice enables the conversion of unstructured text into structured data by generating graph triplets. This process, which can be complex, has become more accessible with the rise of Large Language Models (LLMs), making it a mainstream solution for data extraction tasks. We are using a decoder-only model for this application's purpose. +This microservice can be run on cpu or hpu and instructions for the same are mentioned below. + +## Decoder-Only Models + +Decoder-only models are optimized for fast inference by skipping the encoding step. They work well for tasks where input-output mappings are relatively simple, or when multitasking is required. These models are ideal when computational efficiency and prompt-based output generation are priorities. However, decoder-only models may struggle with tasks that require deep contextual understanding or when input-output structures are highly complex or varied. + +## Features + +Input Formats: Accepts text from documents, text files, or strings\*. + +Output: Answer to the query asked by the user. + +## 🚀 1. Start individual microservices using docker cli (Option 1) + +Update the environment_setup.sh file with your device and user information, and source it using - + +```bash +source comps/text2kg/src/environment_setup.sh +``` + +If you skip this step, you can export variables related to individual services as mentioned in each of the microservices. + +### 1. TGI + +Refer to [this link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/tgi/README.md) to start and verify the TGI microservice. + +### 2. Neo4J + +Refer to [this link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/neo4j/src/README.md) to start and verify the neo4j microservice. + +```bash +export DATA_DIRECTORY=$(pwd) +export ENTITIES="PERSON,PLACE,ORGANIZATION" +export RELATIONS="HAS,PART_OF,WORKED_ON,WORKED_WITH,WORKED_AT" +export VALIDATION_SCHEMA='{ + "PERSON": ["HAS", "PART_OF", "WORKED_ON", "WORKED_WITH", "WORKED_AT"], + "PLACE": ["HAS", "PART_OF", "WORKED_AT"], + "ORGANIZATION": ["HAS", "PART_OF", "WORKED_WITH"] +}' +``` + +### 3. Text2kg + +```bash +cd comps/text2kg/src/ +export TEXT2KG_PORT=8090 +``` + +Build the text2kg docker image + +```bash +docker build -f Dockerfile -t opea/text2kg:latest ../../../ +``` + +Launch the docker container + +```bash +docker run -i -t --net=host --ipc=host -p TEXT2KG_PORT -e HF_TOKEN=${HF_TOKEN} -e LLM_MODEL_ID=${LLM_MODEL_ID} opea/text2kg:latest -v data:/home/user/comps/text2kg/src/data /bin/bash +``` + +## 🚀 2. Start text2kg and dependent microservices with docker-compose (Option 2) + +```bash +cd comps/text2kg/deployment/docker_compose/ +``` + +Export service name and log path + +```bash +export service_name="text2kg" +export LOG_PATH=$PWD +``` + +Export NEO4J variables - refer to section 1.2.b. +Launch using the following command to run on cpu + +```bash +docker compose -f compose.yaml -f custom-override.yml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log +``` + +Launch using the following command to run on gaudi + +```bash +docker compose -f compose.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log +``` + +## 3. Check the service using API endpoint + +```bash +curl -X 'POST' \ + 'http://localhost:TEXT2KG_PORT/v1/text2kg?input_text=Who%20is%20paul%20graham%3F' \ + -H 'accept: application/json' \ + -d '' +``` + +- Make sure your input document/string has the necessary information that can be extracted. diff --git a/comps/text2kg/src/environment_setup.sh b/comps/text2kg/src/environment_setup.sh new file mode 100644 index 0000000000..e7a08bd0fe --- /dev/null +++ b/comps/text2kg/src/environment_setup.sh @@ -0,0 +1,42 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +#!/bin/bash +####################################################################### +# Proxy +####################################################################### +export https_proxy=${https_proxy} +export http_proxy=${http_proxy} +export no_proxy=${no_proxy} +export your_ip=${your_ip} +################################################################ +# Configure LLM Parameters based on the model selected. +################################################################ + +export HF_TOKEN=${HF_TOKEN} + +export LLM_ID=${LLM_ID:-"HuggingFaceH4/zephyr-7b-alpha"} +export LLM_MODEL_ID=${LLM_MODEL_ID:-"HuggingFaceH4/zephyr-7b-alpha"} +export LLM_ENDPOINT_PORT=${LLM_ENDPOINT_PORT:-"9001"} + +export TGI_PORT=8008 +export PYTHONPATH="/home/user/" +export TGI_LLM_ENDPOINT="http://${your_ip}:${TGI_PORT}" + +export NEO4J_USERNAME=${NEO4J_USERNAME:-"neo4j"} +export NEO4J_PASSWORD=${NEO4J_PASSWORD:-"neo4j_password"} +export NEO4J_URL=${NEO4J_URL:-"neo4j://localhost:7687"} +export NEO4J_URI=${NEO4J_URI:-"neo4j://localhost:7687"} +export NEO4J_PORT1={$NEO4J_PORT1:-7474}:7474 +export NEO4J_PORT2={$NEO4J_PORT2:-7687}:7687 +export NEO4J_AUTH=neo4j/password +export NEO4J_PLUGINS=\[\"apoc\"\] + +export DATA_DIRECTORY=$(pwd) +export ENTITIES="PERSON,PLACE,ORGANIZATION" +export RELATIONS="HAS,PART_OF,WORKED_ON,WORKED_WITH,WORKED_AT" +export VALIDATION_SCHEMA='{ + "PERSON": ["HAS", "PART_OF", "WORKED_ON", "WORKED_WITH", "WORKED_AT"], + "PLACE": ["HAS", "PART_OF", "WORKED_AT"], + "ORGANIZATION": ["HAS", "PART_OF", "WORKED_WITH"] +}' +export TEXT2KG_PORT=8090 diff --git a/comps/text2kg/src/integrations/kg_graph_agent.py b/comps/text2kg/src/integrations/kg_graph_agent.py new file mode 100644 index 0000000000..50b04744d9 --- /dev/null +++ b/comps/text2kg/src/integrations/kg_graph_agent.py @@ -0,0 +1,95 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +import json +import os +import subprocess +from typing import Literal + +import neo4j +import nest_asyncio +from llama_index.core import KnowledgeGraphIndex, Settings, SimpleDirectoryReader, StorageContext +from llama_index.core.prompts import PromptTemplate +from llama_index.embeddings.huggingface import HuggingFaceEmbedding +from llama_index.graph_stores.neo4j import Neo4jGraphStore, Neo4jPropertyGraphStore +from llama_index.llms.huggingface import HuggingFaceLLM + +nest_asyncio.apply() + + +class GenerateKG: + def __init__(self, llm, embedding_model, data_directory): + self.data_directory = data_directory + self.llm = llm + self.embed_model = embedding_model + Settings.llm = self.llm + Settings.embed_model = self.embed_model + self.NEO4J_URL = os.environ.get["NEO4J_URL"] + self.NEO4J_URI = os.environ.get["NEO4J_URI"] + self.NEO4J_USERNAME = os.environ.get["NEO4J_USERNAME"] + self.NEO4J_PASSWORD = os.environ.get["NEO4J_PASSWORD"] + self.NEO4J_DATABASE = os.environ["NEO4J_DATABASE"] + print(" loading and preparing llm and embedding models") + + def __load_docs(self): + + DATA_DIRECTORY = os.environ.get["DATA_DIRECTORY"] + reader = SimpleDirectoryReader(input_dir=DATA_DIRECTORY) + documents = reader.load_data() + print("loading documents") + + return documents + + # ------------------------------------------------------------------------------- + # Link up to Neo4j + # ------------------------------------------------------------------------------- + def __neo4j_link(self): + + graph_store = Neo4jGraphStore( + username=self.NEO4J_USERNAME, + password=self.NEO4J_PASSWORD, + url=self.NEO4J_URL, + database=self.NEO4J_DATABASE, + ) + return graph_store + + def __graph_index(self, documents, llm, embed_model, graph_store): + + entities = os.environ.get("ENTITIES", "").split(",") + relations = os.environ.get("RELATIONS", "").split(",") + + # Get and parse validation schema + validation_schema_str = os.environ.get("VALIDATION_SCHEMA", "{}") + try: + validation_schema = json.loads(validation_schema_str) + except json.JSONDecodeError: + print("Warning: Could not parse VALIDATION_SCHEMA") + validation_schema = {} + + storage_context = StorageContext.from_defaults(graph_store=graph_store) + neo4j_index = KnowledgeGraphIndex.from_documents( + documents=documents, + max_triplets_per_chunk=3, + storage_context=storage_context, + embed_model=embed_model, + include_embeddings=True, + ) + return neo4j_index + + def __create_index(self, documents, embed_model, llm): + """Creates index in neo4j database.""" + graph_store = self.__neo4j_link( + self.NEO4J_URL, self.NEO4J_URI, self.NEO4J_USERNAME, self.NEO4J_PASSWORD, self.NEO4J_DATABASE + ) + neo4j_index = self.__graph_index(documents, llm, embed_model, graph_store) + print(f" neo4j index {neo4j_index.index_struct}") + print("creating graph index for documents") + return neo4j_index + + def prepare_and_save_graphdb(self): + """Load, chunk, and create graph and load it into neo4j database.""" + print("entering prepare and save for structured data") + docs = self.__load_docs() + neo4j_index = self.__create_index(docs, self.embed_model, self.llm) + print("Preparing graphdb...") + print("GraphDB is created and saved.") + return neo4j_index diff --git a/comps/text2kg/src/integrations/opea.py b/comps/text2kg/src/integrations/opea.py new file mode 100644 index 0000000000..0a65a15c98 --- /dev/null +++ b/comps/text2kg/src/integrations/opea.py @@ -0,0 +1,93 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import os +import time +from typing import Annotated, Optional + +import requests +from langchain.agents.agent_types import AgentType +from langchain_huggingface import HuggingFaceEndpoint +from pydantic import BaseModel, Field + +from comps import CustomLogger, OpeaComponent, OpeaComponentRegistry, ServiceType +from comps.text2kg.src.integrations.kg_graph_agent import GenerateKG + +logger = CustomLogger("comps-text2kg") +logflag = os.getenv("LOGFLAG", False) + +graph_params = { + "max_string_length": 3600, +} + +generation_params = { + "max_new_tokens": 1024, + "top_k": 10, + "top_p": 0.95, + "temperature": 0.01, + "repetition_penalty": 1.03, + "streaming": True, +} + +TGI_LLM_ENDPOINT = os.environ.get("TGI_LLM_ENDPOINT") + +llm = HuggingFaceEndpoint( + endpoint_url=TGI_LLM_ENDPOINT, + task="text-generation", + **generation_params, +) + + +class Input(BaseModel): + input_text: str + + +@OpeaComponentRegistry.register("OPEA_TEXT2KG") +class OpeaText2KG(OpeaComponent): + """A specialized text to graph triplet converter.""" + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.TEXT2KG.name.lower(), description, config) + global neo4j_index + health_status = self.check_health() + if not health_status: + logger.error("OpeaText2KG health check failed.") + gdb = GenerateKG( + data_directory="data/", embedding_model="BAAI/bge-small-en-v1.5", llm="HuggingFaceH4/zephyr-7b-alpha" + ) + neo4j_index = gdb.prepare_and_save_graphdb() + + async def check_health(self) -> bool: + """Checks the health of connection to the neo4j service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + try: + response = requests.get("http://localhost:7474", timeout=5) + if response.status_code == 200: + return True + else: + logger.error(f"Health check failed with status code: {response.status_code}") + return False + except Exception as e: + logger.error(f"Health check failed: {e}") + return False + + async def invoke(self, input_text: str): + """Invokes the text2kg service to generate graph(s) for the provided input. + + input: + input: text document + Returns: + text : dict + """ + + query_engine = neo4j_index.as_query_engine(include_text=False, response_mode="tree_summarize") + + result = query_engine.query(input_text) + print(result) + + return result diff --git a/comps/text2kg/src/opea_text2kg_microservice.py b/comps/text2kg/src/opea_text2kg_microservice.py new file mode 100644 index 0000000000..a0955f2b99 --- /dev/null +++ b/comps/text2kg/src/opea_text2kg_microservice.py @@ -0,0 +1,54 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import pathlib +import sys + +from fastapi.exceptions import HTTPException + +from comps import CustomLogger, OpeaComponentLoader, opea_microservices, register_microservice +from comps.text2kg.src.integrations.opea import Input, OpeaText2KG + +cur_path = pathlib.Path(__file__).parent.resolve() +comps_path = os.path.join(cur_path, "../../../") +sys.path.append(comps_path) + +logger = CustomLogger("text2kg") +logflag = os.getenv("LOGFLAG", False) + +text2kg_component_name = os.getenv("TEXT2KG_COMPONENT_NAME", "OPEA_TEXT2KG") + +# Initialize OpeaComponentLoader +loader = OpeaComponentLoader( + text2kg_component_name, + description=f"OPEA text2kg Component: {text2kg_component_name}", +) + + +@register_microservice( + name="opea_service@text2kg", + endpoint="/v1/text2kg", + host="0.0.0.0", + port=os.getenv("TEXT2KG_PORT"), +) +async def execute_agent(input_text: str): + """Execute triplet extraction from text file. + + This function takes an Input object containing the input text and database connection information. + It uses the execute function from the text2kg module to execute the graph query and returns the result. + Args: + input (Input): An Input object with the input text + Returns: + dict: A dictionary with head, tail and type linking head and tail + """ + print("===============================================================") + print("===================ENTERING THIS EXECUTE AGENT=================") + print("===============================================================") + result = await loader.invoke(input_text) + return {"result": result} + + +if __name__ == "__main__": + logger.info("OPEA Text2KG Microservice is starting...") + opea_microservices["opea_service@text2kg"].start() diff --git a/comps/text2kg/src/requirements.txt b/comps/text2kg/src/requirements.txt new file mode 100644 index 0000000000..1861e1a6f4 --- /dev/null +++ b/comps/text2kg/src/requirements.txt @@ -0,0 +1,20 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +docarray[full] +fastapi +langchain_huggingface +langchain_neo4j +llama-index-graph-stores-neo4j +llama_index +llama_index-embeddings-huggingface +llama_index-llms-huggingface +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +pandas +prometheus_fastapi_instrumentator +pydantic +pyprojroot +shortuuid +transformers +uvicorn diff --git a/comps/third_parties/neo4j/deployment/docker_compose/compose.yaml b/comps/third_parties/neo4j/deployment/docker_compose/compose.yaml index fe36e6e936..d040a565dc 100644 --- a/comps/third_parties/neo4j/deployment/docker_compose/compose.yaml +++ b/comps/third_parties/neo4j/deployment/docker_compose/compose.yaml @@ -25,6 +25,7 @@ services: - NEO4J_apoc_import_file_use__neo4j__config=true - NEO4J_dbms_security_procedures_unrestricted=apoc.\* - NEO4J_server_bolt_advertised__address=localhost:${NEO4J_PORT2} + - NEO4J_server_config_strict__validation_enabled=false restart: always healthcheck: test: wget http://localhost:7474 || exit 1 diff --git a/tests/text2kg/test_text2kg.sh b/tests/text2kg/test_text2kg.sh new file mode 100644 index 0000000000..2ba6b830de --- /dev/null +++ b/tests/text2kg/test_text2kg.sh @@ -0,0 +1,92 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x +WORKPATH=$(git rev-parse --show-toplevel) +TAG='comps' +LOG_PATH="$WORKPATH/comps/text2kg/deployment/docker_compose" +export DATA_PATH=${model_cache} +source $WORKPATH/comps/text2kg/src/environment_setup.sh + + +echo $WORKPATH +ip_address=$(hostname -I | awk '{print $1}') +service_name="text2kg" + +function build_docker() { + echo "=================== START BUILD DOCKER ========================" + cd $WORKPATH + echo $(pwd) + docker build --no-cache -t opea/text2kg:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/text2kg/src/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/text2kg built fail" + exit 1 + else + echo "opea/text2kg built successful" + fi + echo "=================== END BUILD DOCKER ========================" +} + +function start_service() { + echo "=================== START SERVICE ========================" + cd $WORKPATH/comps/text2kg/deployment/docker_compose + docker compose -f compose.yaml -f custom-override.yml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log + + sleep 10s + echo "=================== END SERVICE ========================" +} + +function validate_microservice() { + echo "=================== START VALIDATE ========================" + cd $WORKPATH/tests/text2kg + + # Download test file + FILE_URL="https://gist.githubusercontent.com/wey-gu/75d49362d011a0f0354d39e396404ba2/raw/0844351171751ebb1ce54ea62232bf5e59445bb7/paul_graham_essay.txt" + wget -P "$DATA_DIRECTORY" "$FILE_URL" + + if wget -P "$DATA_DIRECTORY" "$FILE_URL"; then + echo "Download successful" + else + echo "Download failed" + return 1 + fi + + # Test API endpoint + result=$(curl -X POST \ + -H "accept: application/json" \ + -d "" \ + http://localhost:8090/v1/text2kg?input_text=Who%20is%20paul%20graham%3F) + + if [[ $result == *"output"* ]]; then + echo $result + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs text2kg > ${LOG_PATH}/text2kg.log + return 1 + fi + + echo "=================== END VALIDATE ========================" +} + +function stop_docker() { + echo "=================== START STOP DOCKER ========================" + cd $WORKPATH/comps/text2kg/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} --remove-orphans + echo "=================== END STOP DOCKER ========================" +} + +function main() { + + stop_docker + + build_docker + start_service + validate_microservice + + stop_docker + +} + +main diff --git a/tests/text2kg/test_text2kg_on_intel_hpu.sh b/tests/text2kg/test_text2kg_on_intel_hpu.sh new file mode 100644 index 0000000000..a0572d4da2 --- /dev/null +++ b/tests/text2kg/test_text2kg_on_intel_hpu.sh @@ -0,0 +1,93 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x +WORKPATH=$(git rev-parse --show-toplevel) +TAG='comps' +LOG_PATH="$WORKPATH/comps/text2kg/deployment/docker_compose" +export DATA_PATH=${model_cache} +source $WORKPATH/comps/text2kg/src/environment_setup.sh + + +echo $WORKPATH +ip_address=$(hostname -I | awk '{print $1}') +service_name="text2kg-gaudi" + +function build_docker() { + echo "=================== START BUILD DOCKER ========================" + cd $WORKPATH + echo $(pwd) + docker build --no-cache -t opea/text2kg:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/text2kg/src/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/text2kg built fail" + exit 1 + else + echo "opea/text2kg built successful" + fi + echo "=================== END BUILD DOCKER ========================" +} + +function start_service() { + echo "=================== START SERVICE ========================" + cd $WORKPATH/comps/text2kg/deployment/docker_compose + docker compose -f compose.yaml -f custom-override.yml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log + + sleep 10s + echo "=================== END SERVICE ========================" +} + +function validate_microservice() { + echo "=================== START VALIDATE ========================" + cd $WORKPATH/tests/text2kg + + # Download test file + FILE_URL="https://gist.githubusercontent.com/wey-gu/75d49362d011a0f0354d39e396404ba2/raw/0844351171751ebb1ce54ea62232bf5e59445bb7/paul_graham_essay.txt" + wget -P "$DATA_DIRECTORY" "$FILE_URL" + + if wget -P "$DATA_DIRECTORY" "$FILE_URL"; then + echo "Download successful" + else + echo "Download failed" + return 1 + fi + + # Test API endpoint + result=$(curl -X POST \ + -H "accept: application/json" \ + -d "" \ + http://localhost:8090/v1/text2kg?input_text=Who%20is%20paul%20graham%3F) + + if [[ $result == *"output"* ]]; then + echo $result + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs text2kg > ${LOG_PATH}/text2kg.log + return 1 + fi + + + echo "=================== END VALIDATE ========================" +} + +function stop_docker() { + echo "=================== START STOP DOCKER ========================" + cd $WORKPATH/comps/text2kg/deployment/docker_compose + docker compose -f compose.yaml down ${service_name} --remove-orphans + echo "=================== END STOP DOCKER ========================" +} + +function main() { + + stop_docker + + build_docker + start_service + validate_microservice + + stop_docker + +} + +main