git clone https://huggingface.co/AXERA-TECH/CosyVoice2
File description:
m5stack@raspberrypi:~/rsp/CosyVoice2 $ ls -lh
total 26M
drwxrwxr-x 2 m5stack m5stack 4.0K Sep 27 15:54 asset
-rwxrwxr-x 1 m5stack m5stack 0 Sep 18 14:32 config.json
drwxrwxr-x 2 m5stack m5stack 4.0K Sep 5 19:12 CosyVoice-BlankEN-Ax650-prefill_512
drwxrwxr-x 2 m5stack m5stack 4.0K Sep 5 19:11 frontend-onnx
-rwxrwxr-x 1 m5stack m5stack 9.3M Sep 27 15:33 main_api_ax650
-rwxrwxr-x 1 m5stack m5stack 1.9M Sep 27 15:33 main_api_axcl_aarch64
-rwxrwxr-x 1 m5stack m5stack 2.0M Sep 27 15:33 main_api_axcl_x86
-rwxrwxr-x 1 m5stack m5stack 9.2M Sep 18 09:35 main_ax650
-rwxrwxr-x 1 m5stack m5stack 1.8M Sep 27 15:33 main_axcl_aarch64
-rwxrwxr-x 1 m5stack m5stack 1.9M Sep 27 15:33 main_axcl_x86
drwxrwxr-x 3 m5stack m5stack 4.0K Sep 27 15:33 onnxruntime-linux-aarch64-1.23.0
drwxrwxr-x 3 m5stack m5stack 4.0K Sep 27 15:33 onnxruntime-linux-x64-1.23.0
drwxrwxr-x 2 m5stack m5stack 4.0K Sep 15 10:07 prompt_files
-rwxrwxr-x 1 m5stack m5stack 7.9K Sep 27 15:33 README.md
-rwxrwxr-x 1 m5stack m5stack 895 Sep 27 15:33 run_api_ax650.sh
-rwxrwxr-x 1 m5stack m5stack 997 Sep 27 15:33 run_api_axcl_aarch64.sh
-rwxrwxr-x 1 m5stack m5stack 989 Sep 27 15:33 run_api_axcl_x86.sh
-rwxrwxr-x 1 m5stack m5stack 865 Sep 27 15:33 run_ax650.sh
-rwxrwxr-x 1 m5stack m5stack 967 Sep 27 15:33 run_axcl_aarch64.sh
-rwxrwxr-x 1 m5stack m5stack 959 Sep 27 15:33 run_axcl_x86.sh
drwxrwxr-x 5 m5stack m5stack 4.0K Sep 27 15:33 scripts
drwxrwxr-x 2 m5stack m5stack 4.0K Sep 27 15:33 token2wav-axmodels
python -m venv cosyvoice
source cosyvoice/bin/activate
pip install -r scripts/requirements.txt
cd scripts
python cosyvoice2_tokenizer.py
(cosyvoice) m5stack@raspberrypi:~/rsp/CosyVoice2/scripts $ python cosyvoice2_tokenizer.py
[14990, 1879]
http://localhost:12345
./run_axcl_aarch64.sh
Successful startup output as follows:
m5stack@raspberrypi:~/rsp/CosyVoice2 $ ./run_axcl_aarch64.sh
rm: cannot remove 'output*.wav': No such file or directory
[I][ main][ 291]: device: 0
[I][ run][ 30]: AXCLWorker start with devid 0
[I][ Init][ 135]: LLM init start
[I][ Init][ 34]: connect http://127.0.0.1:12345 ok
bos_id: 0, eos_id: 1773
...
Successfully saved audio to output.wav (32-bit Float PCM).
[I][ tts][ 225]: tts total use time: 21.555 s
Voice generation pipeline completed.
[I][ run][ 80]: AXCLWorker exit with devid 0
m5stack@raspberrypi:~/rsp/CosyVoice2 $ cat ./run_axcl_aarch64.sh
export LD_LIBRARY_PATH=onnxruntime-linux-aarch64-1.23.0/lib:$LD_LIBRARY_PATH
LLM_DIR=CosyVoice-BlankEN-Ax650-prefill_512/
TOKEN2WAV_DIR=token2wav-axmodels/
rm output*.wav
./main_axcl_aarch64 \
--template_filename_axmodel "${LLM_DIR}/qwen2_p128_l%d_together.axmodel" \
--token2wav_axmodel_dir $TOKEN2WAV_DIR \
--n_timesteps 10 \
--axmodel_num 24 \
--bos 0 --eos 0 \
--filename_tokenizer_model "http://127.0.0.1:12345" \
--filename_post_axmodel "${LLM_DIR}/qwen2_post.axmodel" \
--filename_decoder_axmodel "${LLM_DIR}/llm_decoder.axmodel" \
--filename_tokens_embed "${LLM_DIR}/model.embed_tokens.weight.bfloat16.bin" \
--filename_llm_embed "${LLM_DIR}/llm.llm_embedding.float16.bin" \
--filename_speech_embed "${LLM_DIR}/llm.speech_embedding.float16.bin" \
--continue 1 \
--devices "0," \
--prompt_files prompt_files \
--text "The Yellow River comes from the sky, rushing to the sea, never to return. Before the mirror in the hall, grieving the white hair; in the morning like black silk, by night becomes snow."
chmod 777 output*.wav
Successful startup output as follows:
m5stack@raspberrypi:~/rsp/CosyVoice2 $ ./run_axcl_aarch64.sh
[I][ main][ 291]: device: 0
[I][ run][ 30]: AXCLWorker start with devid 0
[I][ Init][ 135]: LLM init start
[I][ Init][ 34]: connect http://127.0.0.1:12345 ok
bos_id: 0, eos_id: 1773
...
Voice generation pipeline completed.
Type "q" to exit, Ctrl+c to stop current running
text >> Nice to meet you
...
Voice generation pipeline completed.
text >>