我们提供一套兼容 OpenAI API 的使用方式,只需要安装 StackFlow 包即可。
sudo apt install lib-llm llm-sys llm-cosy-voice llm-openai-api sudo apt install llm-model-cosyvoice2-0.5b-axcl curl http://127.0.0.1:8000/v1/audio/speech \
-H "Content-Type: application/json" \
-d '{
"model": "CosyVoice2-0.5B-axcl",
"response_format": "wav",
"input": "君不见黄河之水天上来,奔流到海不复回。君不见高堂明镜悲白发,朝如青丝暮成雪。人生得意须尽欢,莫使金樽空对月。天生我材必有用,千金散尽还复来。"
}' \
-o output.wav from pathlib import Path
from openai import OpenAI
client = OpenAI(
api_key="sk-",
base_url="http://127.0.0.1:8000/v1"
)
speech_file_path = Path(__file__).parent / "output.wav"
with client.audio.speech.with_streaming_response.create(
model="CosyVoice2-0.5B-axcl",
voice="prompt_data",
response_format="wav",
input='君不见黄河之水天上来,奔流到海不复回。君不见高堂明镜悲白发,朝如青丝暮成雪。人生得意须尽欢,莫使金樽空对月。天生我材必有用,千金散尽还复来。',
) as response:
response.stream_to_file(speech_file_path) git clone --recurse-submodules https://huggingface.co/M5Stack/CosyVoice2-scripts 文件说明
m5stack@raspberrypi:~/rsp/CosyVoice2-scripts $ ls -lh
total 28K
drwxrwxr-x 2 m5stack m5stack 4.0K Nov 6 15:18 asset
drwxrwxr-x 2 m5stack m5stack 4.0K Nov 6 15:18 CosyVoice-BlankEN
drwxrwxr-x 2 m5stack m5stack 4.0K Nov 6 15:19 frontend-onnx
drwxrwxr-x 3 m5stack m5stack 4.0K Nov 6 15:18 pengzhendong
-rw-rw-r-- 1 m5stack m5stack 24 Nov 6 15:18 README.md
-rw-rw-r-- 1 m5stack m5stack 103 Nov 6 15:18 requirements.txt
drwxrwxr-x 3 m5stack m5stack 4.0K Nov 6 15:18 scripts python -m venv cosyvoice source cosyvoice/bin/activate pip install -r requirements.txt python3 scripts/process_prompt.py --prompt_text asset/zh_woman1.txt --prompt_speech asset/zh_woman1.wav --output zh_woman1 成功生成音频特征文件
(cosyvoice) m5stack@raspberrypi:~/rsp/CosyVoice2-scripts $ python3 scripts/process_prompt.py --prompt_text asset/zh_woman1.txt --prompt_speech asset/zh_woman1.wav --output zh_woman1
2025-11-06 15:54:43.619688866 [W:onnxruntime:Default, device_discovery.cc:164 DiscoverDevicesForPlatform] GPU device discovery failed: device_discovery.cc:89 ReadFileContents Failed to open file: "/sys/class/drm/card1/device/vendor"
prompt_text 希望你以后能够做的比我还好呦。
fmax 8000
prompt speech token size: torch.Size([1, 87]) cp -r zh_woman1 /opt/m5stack/data/CosyVoice2-0.5B-axcl/ sudo systemctl restart llm-sys # 重置模型配置 curl http://127.0.0.1:8000/v1/audio/speech \
-H "Content-Type: application/json" \
-d '{
"model": "CosyVoice2-0.5B-axcl",
"voice": "zh_woman1",
"response_format": "wav",
"input": "君不见黄河之水天上来,奔流到海不复回。君不见高堂明镜悲白发,朝如青丝暮成雪。人生得意须尽欢,莫使金樽空对月。天生我材必有用,千金散尽还复来。"
}' \
-o output.wav from pathlib import Path
from openai import OpenAI
client = OpenAI(
api_key="sk-",
base_url="http://127.0.0.1:8000/v1"
)
speech_file_path = Path(__file__).parent / "output.wav"
with client.audio.speech.with_streaming_response.create(
model="CosyVoice2-0.5B-axcl",
voice="zh_woman1",
response_format="wav",
input='君不见黄河之水天上来,奔流到海不复回。君不见高堂明镜悲白发,朝如青丝暮成雪。人生得意须尽欢,莫使金樽空对月。天生我材必有用,千金散尽还复来。',
) as response:
response.stream_to_file(speech_file_path)