pdf-icon

Module LLM - Voice Assistant

This example demonstrates how to use the M5ModuleLLM library on the Arduino platform to implement a voice assistant using Whisper, LLM, and MeloTTS.

Preparation

  1. Refer to Module LLM Arduino Quick Start to set up the environment and install the M5ModuleLLM driver library.

  2. Refer to Module LLM Software Package Update Guide to install the following model packages.

apt install llm-whisper llm-kws llm-vad llm-llm llm-melotts
apt install llm-model-qwen2.5-0.5b-prefill-20e llm-model-melotts-en-default llm-model-whisper-tiny llm-model-silero-vad llm-model-sherpa-onnx-kws-zipformer-gigaspeech-3.3m-2024-01-01 llm-model-melotts-en-default
  1. The hardware used in the following example includes:

VoiceAssistant CoreS3

/*
 * SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD
 *
 * SPDX-License-Identifier: MIT
 */
#include <Arduino.h>
#include <M5Unified.h>
#include <M5ModuleLLM.h>

M5ModuleLLM module_llm;

/* Must be capitalized */
String wake_up_keyword = "HELLO";
// String wake_up_keyword = "你好你好";
String kws_work_id;
String vad_work_id;
String whisper_work_id;
String llm_work_id;
String melotts_work_id;
String language;

void setup()
{
    M5.begin();
    M5.Display.setTextSize(2);
    M5.Display.setTextScroll(true);
    // M5.Display.setFont(&fonts::efontCN_12);  // Support Chinese display
    // M5.Display.setFont(&fonts::efontJA_12);  // Support Japanese display

    language = "en_US";
    // language = "zh_CN";
    // language = "ja_JP";

    /* Init module serial port */
    int rxd = M5.getPin(m5::pin_name_t::port_c_rxd);
    int txd = M5.getPin(m5::pin_name_t::port_c_txd);
    Serial2.begin(115200, SERIAL_8N1, rxd, txd);

    /* Init module */
    module_llm.begin(&Serial2);

    /* Make sure module is connected */
    M5.Display.printf(">> Check ModuleLLM connection..\n");
    while (1) {
        if (module_llm.checkConnection()) {
            break;
        }
    }

    /* Reset ModuleLLM */
    M5.Display.printf(">> Reset ModuleLLM..\n");
    module_llm.sys.reset();

    /* Setup Audio module */
    M5.Display.printf(">> Setup audio..\n");
    module_llm.audio.setup();

    /* Setup KWS module and save returned work id */
    M5.Display.printf(">> Setup kws..\n");
    m5_module_llm::ApiKwsSetupConfig_t kws_config;
    kws_config.kws = wake_up_keyword;
    kws_work_id    = module_llm.kws.setup(kws_config, "kws_setup", language);

    /* Setup VAD module and save returned work id */
    M5.Display.printf(">> Setup vad..\n");
    m5_module_llm::ApiVadSetupConfig_t vad_config;
    vad_config.input = {"sys.pcm", kws_work_id};
    vad_work_id      = module_llm.vad.setup(vad_config, "vad_setup");

    /* Setup Whisper module and save returned work id */
    M5.Display.printf(">> Setup whisper..\n");
    m5_module_llm::ApiWhisperSetupConfig_t whisper_config;
    whisper_config.input    = {"sys.pcm", kws_work_id, vad_work_id};
    whisper_config.language = "en";
    // whisper_config.language = "zh";
    // whisper_config.language = "ja";
    whisper_work_id = module_llm.whisper.setup(whisper_config, "whisper_setup");

    M5.Display.printf(">> Setup llm..\n");
    llm_work_id = module_llm.llm.setup();

    M5.Display.printf(">> Setup melotts..\n\n");
    m5_module_llm::ApiMelottsSetupConfig_t melotts_config;
    melotts_config.input = {"tts.utf-8.stream", llm_work_id};
    melotts_work_id      = module_llm.melotts.setup(melotts_config, "melotts_setup", language);

    M5.Display.printf(">> Setup ok\n>> Say \"%s\" to wakeup\n", wake_up_keyword.c_str());
}

void loop()
{
    /* Update ModuleLLM */
    module_llm.update();

    /* Handle module response messages */
    for (auto& msg : module_llm.msg.responseMsgList) {
        /* If KWS module message */
        if (msg.work_id == kws_work_id) {
            M5.Display.setTextColor(TFT_GREENYELLOW);
            M5.Display.printf(">> Keyword detected\n");
        }

        if (msg.work_id == vad_work_id) {
            M5.Display.setTextColor(TFT_GREENYELLOW);
            M5.Display.printf(">> vad detected\n");
        }
        /* If ASR module message */
        if (msg.work_id == whisper_work_id) {
            /* Check message object type */
            if (msg.object == "asr.utf-8") {
                /* Parse message json and get ASR result */
                JsonDocument doc;
                deserializeJson(doc, msg.raw_msg);
                String asr_result = doc["data"].as<String>();

                M5.Display.setTextColor(TFT_YELLOW);
                M5.Display.printf(">> %s\n", asr_result.c_str());

                module_llm.llm.inferenceAndWaitResult(llm_work_id, asr_result.c_str(), [](String& result) {
                    /* Show result on screen */
                    handleLLMResult(result);
                });
            }
        }
    }

    /* Clear handled messages */
    module_llm.msg.responseMsgList.clear();
}

void handleLLMResult(String& result)
{
    M5.Display.printf("%s", result.c_str());
}
  1. Upload the code to CoreS3.
  1. After initialization is complete, say the wake word "HELLO" to wake up the voice assistant. The screen will display "Keyword detected".

  2. After waking up, you can ask any question. The screen will show the ASR result. Once recognition is complete, it will display the LLM output, and TTS will play the synthesized voice.

Note: The models used in this example only support English recognition. To use other languages, refer to the language configuration instructions below.

More Languages

The language of the wake word determines which model must be installed. Configure the appropriate language in the code. ASR language must be set manually. TTS requires the matching language model.

Chinese KWS model

apt install llm-model-sherpa-onnx-kws-zipformer-wenetspeech-3.3m-2024-01-01
language = "zh_CN";
String wake_up_keyword = "你好你好";

English KWS model

apt install llm-model-sherpa-onnx-kws-zipformer-gigaspeech-3.3m-2024-01-01
String language = "en_US";
String wake_up_keyword = "HELLO";

ASR Chinese Configuration

whisper_config.language = "zh";

ASR English Configuration

whisper_config.language = "en";

ASR Japanese Configuration

whisper_config.language = "ja";

Chinese TTS Model

apt install llm-model-melotts-zh-cn
melotts_config.model = "melotts-zh-cn";

English TTS Model

apt install llm-model-melotts-en-us
melotts_config.model = "melotts-en-us";
apt install llm-model-melotts-en-default
melotts_config.model = "melotts-en-default";

Japanese TTS Model

apt install llm-model-melotts-ja-jp
melotts_config.model = "melotts-ja-jp";
On This Page