Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
f0c7a02
updates needed for demo
ctao456 Oct 7, 2024
81a5943
Merge branch 'ctao/demo' of https://github.com/ctao456/GenAIComps int…
ctao456 Oct 7, 2024
dbf8ad3
original pr content
ctao456 Oct 8, 2024
98cbb1c
Merge branch 'opea-project:main' into ctao/opea
ctao456 Oct 8, 2024
d85f33a
Revert "updates needed for demo"
ctao456 Oct 8, 2024
a8363b7
remove improper images
ctao456 Oct 8, 2024
4c22b40
Addressed some comments on previous pr
ctao456 Oct 8, 2024
dc59a77
Add Dockerfile for cpu support
ctao456 Oct 9, 2024
bf38204
CODEOWNER: Update comp CODEOWNER (#757)
hteeyeoh Oct 8, 2024
b932b94
Add stable diffusion microservice (#729)
XinyuYe-Intel Oct 8, 2024
7fb8c6b
Compatible with different platforms. (#766)
ZePan110 Oct 8, 2024
817c0f1
Optimize path and link validity check. (#745)
ZePan110 Oct 9, 2024
a14c76f
Add timeout for ut test (#773)
chensuyue Oct 9, 2024
8fae482
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 9, 2024
b9420c7
Merge branch 'opea-project:main' into ctao/opea
ctao456 Oct 9, 2024
1879a1d
test hyperlink
ctao456 Oct 10, 2024
cc938fb
test hyperlink
ctao456 Oct 10, 2024
697fdbd
test hyperlink issue
ctao456 Oct 10, 2024
120edae
test hyperlink issue
ctao456 Oct 10, 2024
4bfd1f3
put back hyperlinks in readme
ctao456 Oct 10, 2024
10b3b34
remove possible error hyperlink
ctao456 Oct 10, 2024
09ab934
put hyperlink back
ctao456 Oct 10, 2024
901adcc
Merge branch 'opea-project:main' into ctao/opea
ctao456 Oct 14, 2024
303eb1f
major update to use FastAPI for wav2lip, and structure component format
ctao456 Oct 15, 2024
d058496
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 15, 2024
d6b3508
Merge branch 'opea-project:main' into ctao/opea
ctao456 Oct 15, 2024
ff0ec2d
Add dockerfiles in animation-compose-cd.yaml
ctao456 Oct 16, 2024
87f51e6
Fix end of file issue in animation-compose-cd.yaml
ctao456 Oct 16, 2024
3520b9e
Merge branch 'main' into ctao/opea
chensuyue Oct 17, 2024
c4f43da
Fix Docker deployment on Xeon
ctao456 Oct 18, 2024
f41f974
Merge branch 'ctao/opea' of https://github.com/ctao456/GenAIComps int…
ctao456 Oct 18, 2024
b7752b5
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 18, 2024
22cc67a
add versioning for all pip packages
ctao456 Oct 18, 2024
fd214e7
e2e test script for animation
ctao456 Oct 18, 2024
48a23f6
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 18, 2024
a75bcc6
update e2e test script
ctao456 Oct 18, 2024
46d293e
Merge branch 'ctao/opea' of https://github.com/ctao456/GenAIComps int…
ctao456 Oct 18, 2024
f637e1e
update e2e test script
ctao456 Oct 18, 2024
a3e2148
Merge branch 'opea-project:main' into ctao/opea
ctao456 Oct 18, 2024
d9f152b
update readme
ctao456 Oct 18, 2024
e08f0fb
Merge branch 'ctao/opea' of https://github.com/ctao456/GenAIComps int…
ctao456 Oct 18, 2024
f2e6b34
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 18, 2024
72a6527
update
ctao456 Oct 19, 2024
dd849ef
update
ctao456 Oct 19, 2024
fecedab
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 19, 2024
f3f3343
update gateway
ctao456 Oct 19, 2024
8790139
udpate gateway
ctao456 Oct 19, 2024
26deb88
Fix AVATAR_CHATBOT
ctao456 Oct 19, 2024
5d8fef8
update gateway
ctao456 Oct 19, 2024
a0b4d13
update
ctao456 Oct 19, 2024
9a705ca
test
ctao456 Oct 19, 2024
e177ba2
update
ctao456 Oct 19, 2024
d36d0fe
update
ctao456 Oct 19, 2024
e90f5cf
update gateway
ctao456 Oct 19, 2024
dda2e31
fix max_tokens in AvatarChatbot gateway
ctao456 Oct 19, 2024
83186de
test
ctao456 Oct 19, 2024
38e4d46
update
ctao456 Oct 19, 2024
7aa56f4
update
ctao456 Oct 19, 2024
155ef8e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 19, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .github/workflows/docker/compose/animation-compose-cd.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

# this file should be run in the root of the repo
# images used by GenAIExamples: animation, wav2lip, wav2lip-gaudi
services:
animation:
build:
dockerfile: comps/animation/wav2lip/Dockerfile
image: ${REGISTRY:-opea}/animation:${TAG:-latest}
wav2lip:
build:
dockerfile: comps/animation/wav2lip/dependency/Dockerfile
image: ${REGISTRY:-opea}/wav2lip:${TAG:-latest}
wav2lip-gaudi:
build:
dockerfile: comps/animation/wav2lip/dependency/Dockerfile.intel_hpu
image: ${REGISTRY:-opea}/wav2lip-gaudi:${TAG:-latest}
1 change: 1 addition & 0 deletions comps/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
VideoQnAGateway,
VisualQnAGateway,
MultimodalQnAGateway,
AvatarChatbotGateway,
)

# Telemetry
Expand Down
24 changes: 24 additions & 0 deletions comps/animation/wav2lip/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

# Use a base image
FROM python:3.11-slim

RUN useradd -m -s /bin/bash user && \
mkdir -p /home/user && \
chown -R user /home/user/
USER user

ENV LANG=C.UTF-8
ARG ARCH=cpu

COPY comps /home/user/comps

RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r /home/user/comps/animation/wav2lip/requirements.txt ;

ENV PYTHONPATH=$PYTHONPATH:/home/user

WORKDIR /home/user/comps/animation/wav2lip

ENTRYPOINT ["python3", "animation.py"]
128 changes: 128 additions & 0 deletions comps/animation/wav2lip/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# Avatar Animation Microservice

The avatar animation model is a combination of two models: Wav2Lip and GAN-based face generator (GFPGAN). The Wav2Lip model is used to generate lip movements from an audio file, and the GFPGAN model is used to generate a high-quality face image from a low-quality face image. The avatar animation microservices takes an audio piece and a low-quality face image/video as input, fuses mel-spectrogram from the audio with frame(s) from the image/video, and generates a high-quality video of the face image with lip movements synchronized with the audio.

# 🚀1. Start Microservice with Docker (option 1)

## 1.1 Build the Docker images

### 1.1.1 Wav2Lip Server image

```bash
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
```

- Xeon CPU

```bash
docker build -t opea/wav2lip:latest -f comps/animation/wav2lip/dependency/Dockerfile .
```

- Gaudi2 HPU

```bash
docker build -t opea/wav2lip-gaudi:latest -f comps/animation/wav2lip/dependency/Dockerfile.intel_hpu .
```

### 1.1.2 Animation server image

```bash
docker build -t opea/animation:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/animation/wav2lip/Dockerfile .
```

## 1.2. Set environment variables

- Xeon CPU

```bash
export ip_address=$(hostname -I | awk '{print $1}')
export DEVICE="cpu"
export WAV2LIP_PORT=7860
export ANIMATION_PORT=9066
export INFERENCE_MODE='wav2lip+gfpgan'
export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
export FACE="comps/animation/wav2lip/assets/img/avatar1.jpg"
# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
export AUDIO='None'
export FACESIZE=96
export OUTFILE="comps/animation/wav2lip/assets/outputs/result.mp4"
export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
export UPSCALE_FACTOR=1
export FPS=10
```

- Gaudi2 HPU

```bash
export ip_address=$(hostname -I | awk '{print $1}')
export DEVICE="hpu"
export WAV2LIP_PORT=7860
export ANIMATION_PORT=9066
export INFERENCE_MODE='wav2lip+gfpgan'
export CHECKPOINT_PATH='/usr/local/lib/python3.10/dist-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
export FACE="comps/animation/wav2lip/assets/img/avatar1.jpg"
# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
export AUDIO='None'
export FACESIZE=96
export OUTFILE="comps/animation/wav2lip/assets/outputs/result.mp4"
export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
export UPSCALE_FACTOR=1
export FPS=10
```

# 🚀2. Run the Docker container

## 2.1 Run Wav2Lip Microservice

- Xeon CPU

```bash
docker run --privileged -d --name "wav2lip-service" -p 7860:7860 --ipc=host -w /home/user/comps/animation/wav2lip -e PYTHON=/usr/bin/python3.11 -e DEVICE=$DEVICE -e INFERENCE_MODE=$INFERENCE_MODE -e CHECKPOINT_PATH=$CHECKPOINT_PATH -e FACE=$FACE -e AUDIO=$AUDIO -e FACESIZE=$FACESIZE -e OUTFILE=$OUTFILE -e GFPGAN_MODEL_VERSION=$GFPGAN_MODEL_VERSION -e UPSCALE_FACTOR=$UPSCALE_FACTOR -e FPS=$FPS -e WAV2LIP_PORT=$WAV2LIP_PORT opea/wav2lip:latest
```

- Gaudi2 HPU

```bash
docker run --privileged -d --name "wav2lip-gaudi-service" -p 7860:7860 --runtime=habana --cap-add=sys_nice --net=host --ipc=host -w /home/user/comps/animation/wav2lip -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e PYTHON=/usr/bin/python3.10 -e DEVICE=$DEVICE -e INFERENCE_MODE=$INFERENCE_MODE -e CHECKPOINT_PATH=$CHECKPOINT_PATH -e FACE=$FACE -e AUDIO=$AUDIO -e FACESIZE=$FACESIZE -e OUTFILE=$OUTFILE -e GFPGAN_MODEL_VERSION=$GFPGAN_MODEL_VERSION -e UPSCALE_FACTOR=$UPSCALE_FACTOR -e FPS=$FPS -e WAV2LIP_PORT=$WAV2LIP_PORT opea/wav2lip-gaudi:latest
```

## 2.2 Run Animation Microservice

```bash
docker run -d -p 9066:9066 --ipc=host --name "animation-service" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e WAV2LIP_ENDPOINT=http://$ip_address:7860 opea/animation:latest
```

# 🚀3. Validate Microservice

Once microservice starts, user can use below script to validate the running microservice.

## 3.1 Validate Wav2Lip service

```bash
cd GenAIComps/comps/animation/wav2lip
python3 dependency/check_wav2lip_server.py
```

## 3.2 Validate Animation service

```bash
cd GenAIComps
export ip_address=$(hostname -I | awk '{print $1}')
curl http://${ip_address}:9066/v1/animation -X POST -H "Content-Type: application/json" -d @comps/animation/wav2lip/assets/audio/sample_question.json
```

or

```bash
cd GenAIComps/comps/animation/wav2lip
python3 check_animation_server.py
```

The expected output is a message similar to the following:

```bash
{'wav2lip_result': '.../GenAIComps/comps/animation/wav2lip/assets/outputs/result.mp4'}
```

Please find "comps/animation/wav2lip/assets/outputs/result.mp4" as a reference generated video.
62 changes: 62 additions & 0 deletions comps/animation/wav2lip/animation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
# Copyright (c) 2013--2023, librosa development team.
# Copyright 1999-2003 The OpenLDAP Foundation, Redwood City, California, USA. All Rights Reserved.
# Copyright (c) 2012, Anaconda, Inc. All rights reserved.

import json
import os
import time

import requests

# GenAIComps
from comps import CustomLogger

logger = CustomLogger("animation")
logflag = os.getenv("LOGFLAG", False)
from comps import (
Base64ByteStrDoc,
ServiceType,
VideoPath,
opea_microservices,
register_microservice,
register_statistics,
statistics_dict,
)


# Register the microservice
@register_microservice(
name="opea_service@animation",
service_type=ServiceType.ANIMATION,
endpoint="/v1/animation",
host="0.0.0.0",
port=9066,
input_datatype=Base64ByteStrDoc,
output_datatype=VideoPath,
)
@register_statistics(names=["opea_service@animation"])
async def animate(audio: Base64ByteStrDoc):
start = time.time()

byte_str = audio.byte_str
inputs = {"audio": byte_str}
if logflag:
logger.info(inputs)

response = requests.post(url=f"{wav2lip_endpoint}/v1/wav2lip", data=json.dumps(inputs), proxies={"http": None})

outfile = response.json()["wav2lip_result"]
if logflag:
logger.info(response)
logger.info(f"Video generated successfully, check {outfile} for the result.")

statistics_dict["opea_service@animation"].append_latency(time.time() - start, None)
return VideoPath(video_path=outfile)


if __name__ == "__main__":
wav2lip_endpoint = os.getenv("WAV2LIP_ENDPOINT", "http://localhost:7860")
logger.info("[animation - router] Animation initialized.")
opea_microservices["opea_service@animation"].start()
Binary file not shown.
3 changes: 3 additions & 0 deletions comps/animation/wav2lip/assets/audio/sample_question.json

Large diffs are not rendered by default.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added comps/animation/wav2lip/assets/img/avatar3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added comps/animation/wav2lip/assets/img/avatar5.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added comps/animation/wav2lip/assets/img/avatar6.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added comps/animation/wav2lip/assets/img/gaudi.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added comps/animation/wav2lip/assets/img/opea_qr.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added comps/animation/wav2lip/assets/img/xeon.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
21 changes: 21 additions & 0 deletions comps/animation/wav2lip/check_animation_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

import json
import os

import requests

ip_address = os.environ.get("ip_address")
endpoint = f"http://{ip_address}:9066/v1/animation"
outfile = os.environ.get("OUTFILE")

# Read the JSON file
with open("assets/audio/sample_question.json", "r") as file:
data = json.load(file)

response = requests.post(url=endpoint, json=data, headers={"Content-Type": "application/json"}, proxies={"http": None})
print(f"Status code: {response.status_code}")
if response.status_code == 200:
print(f"Check {outfile} for the result.")
print(response.json())
76 changes: 76 additions & 0 deletions comps/animation/wav2lip/dependency/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

# Use a base image
FROM python:3.11-slim

# Set environment variables
ENV LANG=en_US.UTF-8
ENV PYTHONPATH=/usr/local/lib/python3.11/site-packages:/home:/home/user
ENV PYTHON=/usr/bin/python3.11

# Install dependencies
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
yasm \
build-essential \
pkg-config \
libx264-dev \
git \
nasm \
wget \
libgl1-mesa-glx \
libglib2.0-0 \
&& rm -rf /var/lib/apt/lists/*

# Install GenAIComps
RUN mkdir -p /home/user/comps
COPY comps /home/user/comps
COPY comps/animation/wav2lip/dependency/entrypoint.sh /usr/local/bin/entrypoint.sh

# Install ffmpeg with x264 software codec
RUN git clone https://github.com/FFmpeg/FFmpeg.git /home/user/comps/animation/wav2lip/FFmpeg
WORKDIR /home/user/comps/animation/wav2lip/FFmpeg
RUN ./configure --enable-gpl --enable-libx264 --enable-cross-compile && \
make -j$(nproc-1) && \
make install && \
hash -r
RUN chmod +x $(which ffmpeg)

# Upgrade pip
RUN python3 -m pip install --upgrade pip

# Install Wav2Lip from pip
RUN pip install --no-deps Wav2Lipy
RUN rm /usr/local/lib/python3.11/site-packages/Wav2Lip/__init__.py && touch /usr/local/lib/python3.11/site-packages/Wav2Lip/__init__.py
ENV PYTHONPATH="$PYTHONPATH:/usr/local/lib/python3.11/site-packages/Wav2Lip"

# Install GFPGAN from pip
RUN pip install --no-deps gfpgan
RUN touch /usr/local/lib/python3.11/site-packages/gfpgan/__init__.py
ENV PYTHONPATH="$PYTHONPATH:/usr/local/lib/python3.11/site-packages/gfpgan"

# Download pre-trained models
WORKDIR /usr/local/lib/python3.11/site-packages
RUN chmod +x /home/user/comps/animation/wav2lip/dependency/download_ckpts.sh
RUN /home/user/comps/animation/wav2lip/dependency/download_ckpts.sh

# Install pip dependencies
RUN pip install -r /home/user/comps/animation/wav2lip/requirements.txt

# Custom patches
# Modify the degradations.py file to import rgb_to_grayscale from torchvision.transforms.functional
RUN sed -i 's/from torchvision.transforms.functional_tensor import rgb_to_grayscale/from torchvision.transforms.functional import rgb_to_grayscale/' /usr/local/lib/python3.11/site-packages/basicsr/data/degradations.py

# Modify the core.py file to include 'hpu' in the device check
RUN sed -i "s/if 'cpu' not in device and 'cuda' not in device:/if 'cpu' not in device and 'cuda' not in device and 'hpu' not in device:/" /usr/local/lib/python3.11/site-packages/Wav2Lip/face_detection/detection/core.py

# To be compatible with librosa==0.10.2, instead of librosa==0.7.0 because the co-dependency numba==0.48 cannot be pip installed
RUN sed -i 's/hp.sample_rate, hp.n_fft/sr=hp.sample_rate, n_fft=hp.n_fft/' /usr/local/lib/python3.11/site-packages/Wav2Lip/audio.py

# Set the working directory
WORKDIR /home/user/comps/animation/wav2lip/

# Define the command to run when the container starts
RUN chmod +x /usr/local/bin/entrypoint.sh
ENV DEVICE="cpu"
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
Loading