opea-project · lvliang-intel · Oct 21, 2024 · Oct 7, 2024 · Oct 7, 2024 · Oct 8, 2024
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# this file should be run in the root of the repo
+# images used by GenAIExamples: animation, wav2lip, wav2lip-gaudi
+services:
+  animation:
+    build:
+      dockerfile: comps/animation/wav2lip/Dockerfile
+    image: ${REGISTRY:-opea}/animation:${TAG:-latest}
+  wav2lip:
+    build:
+      dockerfile: comps/animation/wav2lip/dependency/Dockerfile
+    image: ${REGISTRY:-opea}/wav2lip:${TAG:-latest}
+  wav2lip-gaudi:
+    build:
+      dockerfile: comps/animation/wav2lip/dependency/Dockerfile.intel_hpu
+    image: ${REGISTRY:-opea}/wav2lip-gaudi:${TAG:-latest}
@@ -58,6 +58,7 @@
     VideoQnAGateway,
     VisualQnAGateway,
     MultimodalQnAGateway,
+    AvatarChatbotGateway,
 )
 
 # Telemetry

@@ -0,0 +1,24 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Use a base image
+FROM python:3.11-slim
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+USER user
+
+ENV LANG=C.UTF-8
+ARG ARCH=cpu
+
+COPY comps /home/user/comps
+
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/user/comps/animation/wav2lip/requirements.txt ;
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user
+
+WORKDIR /home/user/comps/animation/wav2lip
+
+ENTRYPOINT ["python3", "animation.py"]
@@ -0,0 +1,128 @@
+# Avatar Animation Microservice
+
+The avatar animation model is a combination of two models: Wav2Lip and GAN-based face generator (GFPGAN). The Wav2Lip model is used to generate lip movements from an audio file, and the GFPGAN model is used to generate a high-quality face image from a low-quality face image. The avatar animation microservices takes an audio piece and a low-quality face image/video as input, fuses mel-spectrogram from the audio with frame(s) from the image/video, and generates a high-quality video of the face image with lip movements synchronized with the audio.
+
+# 🚀1. Start Microservice with Docker (option 1)
+
+## 1.1 Build the Docker images
+
+### 1.1.1 Wav2Lip Server image
+
+```bash
+git clone https://github.com/opea-project/GenAIComps.git
+cd GenAIComps
+```
+
+- Xeon CPU
+
+```bash
+docker build -t opea/wav2lip:latest -f comps/animation/wav2lip/dependency/Dockerfile .
+```
+
+- Gaudi2 HPU
+
+```bash
+docker build -t opea/wav2lip-gaudi:latest -f comps/animation/wav2lip/dependency/Dockerfile.intel_hpu .
+```
+
+### 1.1.2 Animation server image
+
+```bash
+docker build -t opea/animation:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/animation/wav2lip/Dockerfile .
+```
+
+## 1.2. Set environment variables
+
+- Xeon CPU
+
+```bash
+export ip_address=$(hostname -I | awk '{print $1}')
+export DEVICE="cpu"
+export WAV2LIP_PORT=7860
+export ANIMATION_PORT=9066
+export INFERENCE_MODE='wav2lip+gfpgan'
+export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
+export FACE="comps/animation/wav2lip/assets/img/avatar1.jpg"
+# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
+export AUDIO='None'
+export FACESIZE=96
+export OUTFILE="comps/animation/wav2lip/assets/outputs/result.mp4"
+export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
+export UPSCALE_FACTOR=1
+export FPS=10
+```
+
+- Gaudi2 HPU
+
+```bash
+export ip_address=$(hostname -I | awk '{print $1}')
+export DEVICE="hpu"
+export WAV2LIP_PORT=7860
+export ANIMATION_PORT=9066
+export INFERENCE_MODE='wav2lip+gfpgan'
+export CHECKPOINT_PATH='/usr/local/lib/python3.10/dist-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
+export FACE="comps/animation/wav2lip/assets/img/avatar1.jpg"
+# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
+export AUDIO='None'
+export FACESIZE=96
+export OUTFILE="comps/animation/wav2lip/assets/outputs/result.mp4"
+export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
+export UPSCALE_FACTOR=1
+export FPS=10
+```
+
+# 🚀2. Run the Docker container
+
+## 2.1 Run Wav2Lip Microservice
+
+- Xeon CPU
+
+```bash
+docker run --privileged -d --name "wav2lip-service" -p 7860:7860 --ipc=host -w /home/user/comps/animation/wav2lip -e PYTHON=/usr/bin/python3.11 -e DEVICE=$DEVICE -e INFERENCE_MODE=$INFERENCE_MODE -e CHECKPOINT_PATH=$CHECKPOINT_PATH -e FACE=$FACE -e AUDIO=$AUDIO -e FACESIZE=$FACESIZE -e OUTFILE=$OUTFILE -e GFPGAN_MODEL_VERSION=$GFPGAN_MODEL_VERSION -e UPSCALE_FACTOR=$UPSCALE_FACTOR -e FPS=$FPS -e WAV2LIP_PORT=$WAV2LIP_PORT opea/wav2lip:latest
+```
+
+- Gaudi2 HPU
+
+```bash
+docker run --privileged -d --name "wav2lip-gaudi-service" -p 7860:7860 --runtime=habana --cap-add=sys_nice --net=host --ipc=host -w /home/user/comps/animation/wav2lip -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e PYTHON=/usr/bin/python3.10 -e DEVICE=$DEVICE -e INFERENCE_MODE=$INFERENCE_MODE -e CHECKPOINT_PATH=$CHECKPOINT_PATH -e FACE=$FACE -e AUDIO=$AUDIO -e FACESIZE=$FACESIZE -e OUTFILE=$OUTFILE -e GFPGAN_MODEL_VERSION=$GFPGAN_MODEL_VERSION -e UPSCALE_FACTOR=$UPSCALE_FACTOR -e FPS=$FPS -e WAV2LIP_PORT=$WAV2LIP_PORT opea/wav2lip-gaudi:latest
+```
+
+## 2.2 Run Animation Microservice
+
+```bash
+docker run -d -p 9066:9066 --ipc=host --name "animation-service" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e WAV2LIP_ENDPOINT=http://$ip_address:7860 opea/animation:latest
+```
+
+# 🚀3. Validate Microservice
+
+Once microservice starts, user can use below script to validate the running microservice.
+
+## 3.1 Validate Wav2Lip service
+
+```bash
+cd GenAIComps/comps/animation/wav2lip
+python3 dependency/check_wav2lip_server.py
+```
+
+## 3.2 Validate Animation service
+
+```bash
+cd GenAIComps
+export ip_address=$(hostname -I | awk '{print $1}')
+curl http://${ip_address}:9066/v1/animation -X POST -H "Content-Type: application/json" -d @comps/animation/wav2lip/assets/audio/sample_question.json
+```
+
+or
+
+```bash
+cd GenAIComps/comps/animation/wav2lip
+python3 check_animation_server.py
+```
+
+The expected output is a message similar to the following:
+
+```bash
+{'wav2lip_result': '.../GenAIComps/comps/animation/wav2lip/assets/outputs/result.mp4'}
+```
+
+Please find "comps/animation/wav2lip/assets/outputs/result.mp4" as a reference generated video.
@@ -0,0 +1,62 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+# Copyright (c) 2013--2023, librosa development team.
+# Copyright 1999-2003 The OpenLDAP Foundation, Redwood City, California, USA.  All Rights Reserved.
+# Copyright (c) 2012, Anaconda, Inc. All rights reserved.
+
+import json
+import os
+import time
+
+import requests
+
+# GenAIComps
+from comps import CustomLogger
+
+logger = CustomLogger("animation")
+logflag = os.getenv("LOGFLAG", False)
+from comps import (
+    Base64ByteStrDoc,
+    ServiceType,
+    VideoPath,
+    opea_microservices,
+    register_microservice,
+    register_statistics,
+    statistics_dict,
+)
+
+
+# Register the microservice
+@register_microservice(
+    name="opea_service@animation",
+    service_type=ServiceType.ANIMATION,
+    endpoint="/v1/animation",
+    host="0.0.0.0",
+    port=9066,
+    input_datatype=Base64ByteStrDoc,
+    output_datatype=VideoPath,
+)
+@register_statistics(names=["opea_service@animation"])
+async def animate(audio: Base64ByteStrDoc):
+    start = time.time()
+
+    byte_str = audio.byte_str
+    inputs = {"audio": byte_str}
+    if logflag:
+        logger.info(inputs)
+
+    response = requests.post(url=f"{wav2lip_endpoint}/v1/wav2lip", data=json.dumps(inputs), proxies={"http": None})
+
+    outfile = response.json()["wav2lip_result"]
+    if logflag:
+        logger.info(response)
+        logger.info(f"Video generated successfully, check {outfile} for the result.")
+
+    statistics_dict["opea_service@animation"].append_latency(time.time() - start, None)
+    return VideoPath(video_path=outfile)
+
+
+if __name__ == "__main__":
+    wav2lip_endpoint = os.getenv("WAV2LIP_ENDPOINT", "http://localhost:7860")
+    logger.info("[animation - router] Animation initialized.")
+    opea_microservices["opea_service@animation"].start()
@@ -0,0 +1,21 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import json
+import os
+
+import requests
+
+ip_address = os.environ.get("ip_address")
+endpoint = f"http://{ip_address}:9066/v1/animation"
+outfile = os.environ.get("OUTFILE")
+
+# Read the JSON file
+with open("assets/audio/sample_question.json", "r") as file:
+    data = json.load(file)
+
+response = requests.post(url=endpoint, json=data, headers={"Content-Type": "application/json"}, proxies={"http": None})
+print(f"Status code: {response.status_code}")
+if response.status_code == 200:
+    print(f"Check {outfile} for the result.")
+print(response.json())
@@ -0,0 +1,76 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Use a base image
+FROM python:3.11-slim
+
+# Set environment variables
+ENV LANG=en_US.UTF-8
+ENV PYTHONPATH=/usr/local/lib/python3.11/site-packages:/home:/home/user
+ENV PYTHON=/usr/bin/python3.11
+
+# Install dependencies
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    yasm \
+    build-essential \
+    pkg-config \
+    libx264-dev \
+    git \
+    nasm \
+    wget \
+    libgl1-mesa-glx \
+    libglib2.0-0 \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install GenAIComps
+RUN mkdir -p /home/user/comps
+COPY comps /home/user/comps
+COPY comps/animation/wav2lip/dependency/entrypoint.sh /usr/local/bin/entrypoint.sh
+
+# Install ffmpeg with x264 software codec
+RUN git clone https://github.com/FFmpeg/FFmpeg.git /home/user/comps/animation/wav2lip/FFmpeg
+WORKDIR /home/user/comps/animation/wav2lip/FFmpeg
+RUN ./configure --enable-gpl --enable-libx264 --enable-cross-compile && \
+    make -j$(nproc-1) && \
+    make install && \
+    hash -r
+RUN chmod +x $(which ffmpeg)
+
+# Upgrade pip
+RUN python3 -m pip install --upgrade pip
+
+# Install Wav2Lip from pip
+RUN pip install --no-deps Wav2Lipy
+RUN rm /usr/local/lib/python3.11/site-packages/Wav2Lip/__init__.py && touch /usr/local/lib/python3.11/site-packages/Wav2Lip/__init__.py
+ENV PYTHONPATH="$PYTHONPATH:/usr/local/lib/python3.11/site-packages/Wav2Lip"
+
+# Install GFPGAN from pip
+RUN pip install --no-deps gfpgan
+RUN touch /usr/local/lib/python3.11/site-packages/gfpgan/__init__.py
+ENV PYTHONPATH="$PYTHONPATH:/usr/local/lib/python3.11/site-packages/gfpgan"
+
+# Download pre-trained models
+WORKDIR /usr/local/lib/python3.11/site-packages
+RUN chmod +x /home/user/comps/animation/wav2lip/dependency/download_ckpts.sh
+RUN /home/user/comps/animation/wav2lip/dependency/download_ckpts.sh
+
+# Install pip dependencies
+RUN pip install -r /home/user/comps/animation/wav2lip/requirements.txt
+
+# Custom patches
+# Modify the degradations.py file to import rgb_to_grayscale from torchvision.transforms.functional
+RUN sed -i 's/from torchvision.transforms.functional_tensor import rgb_to_grayscale/from torchvision.transforms.functional import rgb_to_grayscale/' /usr/local/lib/python3.11/site-packages/basicsr/data/degradations.py
+
+# Modify the core.py file to include 'hpu' in the device check
+RUN sed -i "s/if 'cpu' not in device and 'cuda' not in device:/if 'cpu' not in device and 'cuda' not in device and 'hpu' not in device:/" /usr/local/lib/python3.11/site-packages/Wav2Lip/face_detection/detection/core.py
+
+# To be compatible with librosa==0.10.2, instead of librosa==0.7.0 because the co-dependency numba==0.48 cannot be pip installed
+RUN sed -i 's/hp.sample_rate, hp.n_fft/sr=hp.sample_rate, n_fft=hp.n_fft/' /usr/local/lib/python3.11/site-packages/Wav2Lip/audio.py
+
+# Set the working directory
+WORKDIR /home/user/comps/animation/wav2lip/
+
+# Define the command to run when the container starts
+RUN chmod +x /usr/local/bin/entrypoint.sh
+ENV DEVICE="cpu"
+ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]