```// SPDX-FileCopyrightText: 2024 LiveKit, Inc. /...
# ask-ai
w
Copy code
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
//
// SPDX-License-Identifier: Apache-2.0
import {
  type JobContext,
  type JobProcess,
  WorkerOptions,
  cli,
  defineAgent,
  llm,
  pipeline,
} from '@livekit/agents';
import * as deepgram from '@livekit/agents-plugin-deepgram';
import * as elevenlabs from '@livekit/agents-plugin-elevenlabs';
import * as openai from '@livekit/agents-plugin-openai';
import * as silero from '@livekit/agents-plugin-silero';
import dotenv from 'dotenv';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { z } from 'zod';

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const envPath = path.join(__dirname, '../.env.local');
dotenv.config({ path: envPath });

export default defineAgent({
  prewarm: async (proc: JobProcess) => {
    proc.userData.vad = await silero.VAD.load();
  },
  entry: async (ctx: JobContext) => {
    const vad = ctx.proc.userData.vad! as silero.VAD;
    const initialContext = new llm.ChatContext().append({
      role: llm.ChatRole.SYSTEM,
      text:
        'You are Grace, a friendly, professional, and highly competent AI customer support agent.' +
        'You should use short and concise responses, and avoiding usage of unpronounceable ' +
        'punctuation.',
    });

    await ctx.connect();
    console.log('waiting for participant');
    const participant = await ctx.waitForParticipant();
    console.log(`starting assistant example agent for ${participant.identity}`);
    console.log(participant.name);

    const fncCtx: llm.FunctionContext = {
      weather: {
        description: 'Get the weather in a location',
        parameters: z.object({
          location: z.string().describe('The location to get the weather for'),
        }),
        execute: async ({ location }) => {
          console.debug(`executing weather function for ${location}`);
          const response = await fetch(`<https://wttr.in/${location}?format=%C+%t>`);
          if (!response.ok) {
            throw new Error(`Weather API returned status: ${response.status}`);
          }
          const weather = await response.text();
          return `The weather in ${location} right now is ${weather}.`;
        },
      },
    };

    const agent = new pipeline.VoicePipelineAgent(
      vad,
      new deepgram.STT(),
      new openai.LLM(),
      new elevenlabs.TTS(),
      { chatCtx: initialContext, fncCtx },
    );
    console.log('starting agent');
    agent.start(ctx.room, participant);
    console.log('agent started');

    console.log('saying hello');
    await agent.say('Hey, how can I help you today', true);
    console.log('said hello');
  },
});

cli.runApp(new WorkerOptions({ agent: fileURLToPath(import.meta.url) }));
so I have that code there and ti seems to work completely fine and it logs when it says hello but when I hook it up to my frontend nothing happens. it does the logging in the console but I cant hear anything the frontend worked completely fine with just the multlimodal agent but now with the voicepipeline it doesn't work and all it says is Agent connected but did not complete initializing.
t
Thanks for reaching out, Christian. I'm an AI assistant and I'm here to help. Give me a moment while I research this topic.
The issue where the VoicePipelineAgent logs "Agent connected but did not complete initializing" and fails to produce audio output typically occurs when the agent gets stuck in the
initializing
state and does not transition to a ready state within 10 seconds. Here are the common causes and steps to resolve this: • Audio Track Publishing Issues: Ensure that the audio track is successfully published. If track publishing fails, the agent remains stuck in the
initializing
state. • Component Initialization Problems: Verify that all required components (VAD, STT, LLM, TTS) are properly initialized. The agent needs to set up audio sources, playout, and participant linking correctly. • Room Connection Issues: Confirm that the agent is properly connected to the room and that participants are present. The agent waits for participants before completing initialization. • Missing Dependencies: Ensure all required dependencies are correctly configured and available. Debugging Steps: 1. Check the agent logs for any initialization errors. 2. Verify that the room connection is established and participants are detected. 3. Ensure audio permissions are granted and track publishing is successful. 4. Confirm that all pipeline components (VAD, STT, LLM, TTS) are properly configured and initialized. 5. Check for any network connectivity issues that might prevent the agent from functioning correctly. If the issue persists after following these steps, further investigation into the specific configuration and logs may be required. Sources: components/session-view.tsx | AI voice agents | LiveKit Docs | agents/src/pipeline/pipeline_agent.ts | livekit-agents/livekit/agents/voice/room_io/room_io.py
w
[111524.014] INFO (51308): starting worker version: "0.1.0" [111524.038] INFO (51308): Server is listening on port 63277 [111524.192] INFO (51308): registered worker version: "0.1.0" id: "AW_VNgoFxNxzevZ" server_info: { "edition": "Cloud", "version": "1.9.0", "protocol": 16, "region": "US West B", "nodeId": "NC_OPHOENIX1B_sDT9PAwXHpyv", "debugInfo": "", "agentProtocol": 0 } [111600.908] INFO (51308): received job request version: "0.1.0" job: { "id": "AJ_xmnoannxn7MQ", "type": "JT_ROOM", "room": { "sid": "RM_vFfU57dFs7Fy", "name": "voice_assistant_room_7633", "emptyTimeout": 300, "maxParticipants": 0, "creationTime": "1753636560", "turnPassword": "", "enabledCodecs": [ { "mime": "video/H264", "fmtpLine": "" }, { "mime": "video/VP8", "fmtpLine": "" }, { "mime": "video/VP9", "fmtpLine": "" }, { "mime": "video/AV1", "fmtpLine": "" }, { "mime": "audio/red", "fmtpLine": "" }, { "mime": "audio/opus", "fmtpLine": "" } ], "metadata": "", "numParticipants": 0, "activeRecording": false, "numPublishers": 0, "version": { "unixMicro": "1753636560796077", "ticks": 0 }, "departureTimeout": 20, "creationTimeMs": "1753636560720" }, "namespace": "", "metadata": "", "agentName": "", "state": { "status": "JS_RUNNING", "error": "", "startedAt": "1753636560922452030", "endedAt": "0", "updatedAt": "1753636560922452030", "participantIdentity": "", "workerId": "AW_VNgoFxNxzevZ" }, "dispatchId": "" } resuming: false agentName: "" waiting for participant starting assistant example agent for voice_assistant_user_5199 Christian agent started Christian [111601.649] DEBUG (lk-rtc/51378): Connect callback received [111647.520] INFO (51378): job completed. jobID: "AJ_xmnoannxn7MQ" [111647.520] INFO (51378): Job process shutdown jobID: "AJ_xmnoannxn7MQ"
t
Hey there! 👋 I see you're experiencing some audio output challenges with the VoicePipelineAgent. Don't worry—we'll work through this together and get things running smoothly. • Audio Track Publication Issues: Let's first make sure your audio track is being shared and received properly. The agent needs this to kick off speech synthesis. • Browser Audio Playback Restrictions: Browsers can be a bit protective about audio. We'll want to check that your browser is giving the green light for audio playback and look out for any notifications about
RoomEvent.AudioPlaybackStatusChanged
. • Speech Synthesis Pipeline Issues: Sometimes the Text-to-Speech (TTS) component can get a bit temperamental. We'll verify that it's generating audio frames and that everything in the audio pipeline is connected. • Configuration Issues: We'll double-check that the agent is properly linked to a participant and that your room connection is active. If the participant linking isn't working, the agent won't be able to process input or generate responses. Debugging Steps: 1. Let's confirm the audio track is being published and subscribed to correctly. 2. Check those browser audio playback permissions using
Room.canPlaybackAudio
. 3. Keep an eye on the agent's state changes and speech handle lifecycle. 4. Make sure the TTS component is generating audio frames as it should. 5. Verify that the participant is connected and the agent is linked up. Pro tip: Keep track of events like
VPAEvent.AGENT_STARTED_SPEAKING
and
VPAEvent.AGENT_STOPPED_SPEAKING
to monitor the agent's speech activity. If you're still scratching your head, I'm here to help you troubleshoot! Let me know how it goes. 😊 Sources: agents/src/pipeline/pipeline_agent.ts | README.md