opea-project · lvliang-intel · Oct 12, 2024 · Oct 11, 2024 · Oct 11, 2024 · Oct 11, 2024
@@ -0,0 +1,32 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.10-slim
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+# Install system dependencies
+RUN apt-get update && \
+    apt-get install -y ffmpeg git-lfs git wget vim build-essential && \
+    pip install --upgrade pip
+
+# Clone source repo
+RUN git clone https://github.com/RVC-Boss/GPT-SoVITS.git
+# Download pre-trained models, and prepare env
+RUN git clone https://huggingface.co/lj1995/GPT-SoVITS pretrained_models
+RUN mv pretrained_models/*  GPT-SoVITS/GPT_SoVITS/pretrained_models/ && \
+    rm -rf pretrained_models && \
+    pip install --no-cache-dir -r GPT-SoVITS/requirements.txt && \
+    python -m nltk.downloader averaged_perceptron_tagger_eng cmudict
+
+RUN mv GPT-SoVITS /home/user/
+
+# USER user
+# ENV LANG=C.UTF-8
+
+WORKDIR /home/user/GPT-SoVITS
+
+RUN wget "https://github.com/intel/intel-extension-for-transformers/raw/refs/heads/main/intel_extension_for_transformers/neural_chat/assets/audio/welcome_cn.wav"
+
+ENTRYPOINT ["python", "api.py", "--default_refer_path", "./welcome_cn.wav", "--default_refer_text", "欢迎使用", "--default_refer_language", "zh"]
@@ -0,0 +1,56 @@
+# GPT-SoVITS Microservice
+
+[GPT-SoVITS](https://github.com/RVC-Boss/GPT-SoVITS) allows you to to do zero-shot voice cloning and text to speech of multi languages such as English, Japanese, Korean, Cantonese and Chinese.
+
+This microservice is validated on Xeon/CUDA. HPU support is under development.
+
+## Build the Image
+
+```bash
+docker build -t opea/gpt-sovits:latest --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -f comps/tts/gpt-sovits/Dockerfile .
+```
+
+## Start the Service
+
+```bash
+docker run  -itd -p 9880:9880 -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/gpt-sovits:latest
+```
+
+## Test
+
+- Chinese only
+
+```bash
+curl localhost:9880/ -XPOST -d '{
+    "text": "先帝创业未半而中道崩殂，今天下三分，益州疲弊，此诚危急存亡之秋也。",
+    "text_language": "zh"
+}' --output out.wav
+```
+
+- English only
+
+```bash
+curl localhost:9880/ -XPOST -d '{
+    "text": "Discuss the evolution of text-to-speech (TTS) technology from its early beginnings to the present day. Highlight the advancements in natural language processing that have contributed to more realistic and human-like speech synthesis. Also, explore the various applications of TTS in education, accessibility, and customer service, and predict future trends in this field. Write a comprehensive overview of text-to-speech (TTS) technology.",
+    "text_language": "en"
+}' --output out.wav
+```
+
+- Auto detection of languages
+
+```bash
+curl localhost:9880/ -XPOST -d '{
+    "text": "Hi 你好，这里是一个 cross-lingual 的例子。",
+    "text_language": "auto"
+}' --output out.wav
+```
+
+- Change reference audio
+
+```bash
+curl localhost:9880/change_refer -d '{
+    "refer_wav_path": "path_to_your_audio.wav",
+    "prompt_text": "transcription_of_your_audio",
+    "prompt_language": "language_of_your_audio"
+}'
+```
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# Copyright (C) 2024 Intel Corporation
		# SPDX-License-Identifier: Apache-2.0