wooden-megabyte-91316
07/27/2025, 5:11 PM// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
//
// SPDX-License-Identifier: Apache-2.0
import {
type JobContext,
type JobProcess,
WorkerOptions,
cli,
defineAgent,
llm,
pipeline,
} from '@livekit/agents';
import * as deepgram from '@livekit/agents-plugin-deepgram';
import * as elevenlabs from '@livekit/agents-plugin-elevenlabs';
import * as openai from '@livekit/agents-plugin-openai';
import * as silero from '@livekit/agents-plugin-silero';
import dotenv from 'dotenv';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { z } from 'zod';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const envPath = path.join(__dirname, '../.env.local');
dotenv.config({ path: envPath });
export default defineAgent({
prewarm: async (proc: JobProcess) => {
proc.userData.vad = await silero.VAD.load();
},
entry: async (ctx: JobContext) => {
const vad = ctx.proc.userData.vad! as silero.VAD;
const initialContext = new llm.ChatContext().append({
role: llm.ChatRole.SYSTEM,
text:
'You are Grace, a friendly, professional, and highly competent AI customer support agent.' +
'You should use short and concise responses, and avoiding usage of unpronounceable ' +
'punctuation.',
});
await ctx.connect();
console.log('waiting for participant');
const participant = await ctx.waitForParticipant();
console.log(`starting assistant example agent for ${participant.identity}`);
console.log(participant.name);
const fncCtx: llm.FunctionContext = {
weather: {
description: 'Get the weather in a location',
parameters: z.object({
location: z.string().describe('The location to get the weather for'),
}),
execute: async ({ location }) => {
console.debug(`executing weather function for ${location}`);
const response = await fetch(`<https://wttr.in/${location}?format=%C+%t>`);
if (!response.ok) {
throw new Error(`Weather API returned status: ${response.status}`);
}
const weather = await response.text();
return `The weather in ${location} right now is ${weather}.`;
},
},
};
const agent = new pipeline.VoicePipelineAgent(
vad,
new deepgram.STT(),
new openai.LLM(),
new elevenlabs.TTS(),
{ chatCtx: initialContext, fncCtx },
);
console.log('starting agent');
agent.start(ctx.room, participant);
console.log('agent started');
console.log('saying hello');
await agent.say('Hey, how can I help you today', true);
console.log('said hello');
},
});
cli.runApp(new WorkerOptions({ agent: fileURLToPath(import.meta.url) }));
so I have that code there and ti seems to work completely fine and it logs when it says hello but when I hook it up to my frontend nothing happens. it does the logging in the console but I cant hear anything
the frontend worked completely fine with just the multlimodal agent but now with the voicepipeline it doesn't work and all it says is Agent connected but did not complete initializing.tall-airline-16756
07/27/2025, 5:11 PMtall-airline-16756
07/27/2025, 5:12 PMinitializing
state and does not transition to a ready state within 10 seconds. Here are the common causes and steps to resolve this:
• Audio Track Publishing Issues: Ensure that the audio track is successfully published. If track publishing fails, the agent remains stuck in the initializing
state.
• Component Initialization Problems: Verify that all required components (VAD, STT, LLM, TTS) are properly initialized. The agent needs to set up audio sources, playout, and participant linking correctly.
• Room Connection Issues: Confirm that the agent is properly connected to the room and that participants are present. The agent waits for participants before completing initialization.
• Missing Dependencies: Ensure all required dependencies are correctly configured and available.
Debugging Steps:
1. Check the agent logs for any initialization errors.
2. Verify that the room connection is established and participants are detected.
3. Ensure audio permissions are granted and track publishing is successful.
4. Confirm that all pipeline components (VAD, STT, LLM, TTS) are properly configured and initialized.
5. Check for any network connectivity issues that might prevent the agent from functioning correctly.
If the issue persists after following these steps, further investigation into the specific configuration and logs may be required.
Sources: components/session-view.tsx | AI voice agents | LiveKit Docs | agents/src/pipeline/pipeline_agent.ts | livekit-agents/livekit/agents/voice/room_io/room_io.pywooden-megabyte-91316
07/27/2025, 5:18 PMtall-airline-16756
07/27/2025, 5:19 PMRoomEvent.AudioPlaybackStatusChanged
.
• Speech Synthesis Pipeline Issues: Sometimes the Text-to-Speech (TTS) component can get a bit temperamental. We'll verify that it's generating audio frames and that everything in the audio pipeline is connected.
• Configuration Issues: We'll double-check that the agent is properly linked to a participant and that your room connection is active. If the participant linking isn't working, the agent won't be able to process input or generate responses.
Debugging Steps:
1. Let's confirm the audio track is being published and subscribed to correctly.
2. Check those browser audio playback permissions using Room.canPlaybackAudio
.
3. Keep an eye on the agent's state changes and speech handle lifecycle.
4. Make sure the TTS component is generating audio frames as it should.
5. Verify that the participant is connected and the agent is linked up.
Pro tip: Keep track of events like VPAEvent.AGENT_STARTED_SPEAKING
and VPAEvent.AGENT_STOPPED_SPEAKING
to monitor the agent's speech activity. If you're still scratching your head, I'm here to help you troubleshoot! Let me know how it goes. 😊 Sources: agents/src/pipeline/pipeline_agent.ts | README.md