chilly-nest-87883
07/27/2025, 11:01 PMtall-airline-16756
07/27/2025, 11:01 PMtall-airline-16756
07/27/2025, 11:01 PMfrom livekit.plugins.deepgram import stt
deepgram_stt = stt.STT(
model="nova-2-general",
interim_results=True, # Enable interim transcripts
smart_format=True,
punctuate=True,
language="en-US",
)
2. Process STT Events for Custom Interruption Logic
Access interim transcripts by processing the STT event stream. The Deepgram implementation emits INTERIM_TRANSCRIPT
events when is_final_transcript
is False:
from livekit import agents, rtc
from livekit.plugins import deepgram
from livekit.agents.stt import SpeechEventType, SpeechEvent
from typing import AsyncIterable
async def process_track(ctx: agents.JobContext, track: rtc.Track):
stt = deepgram.STT(interim_results=True)
stt_stream = stt.stream()
audio_stream = rtc.AudioStream(track)
ctx.create_task(process_text_from_speech(stt_stream))
async for audio_event in audio_stream:
stt_stream.push_frame(audio_event.frame)
stt_stream.end_input()
async def process_text_from_speech(stream: AsyncIterable[SpeechEvent]):
async for event in stream:
if event.type == SpeechEventType.INTERIM_TRANSCRIPT:
interim_text = event.alternatives[0].text
# Implement your custom interruption logic here
if should_interrupt(interim_text):
trigger_interruption()
elif event.type == SpeechEventType.FINAL_TRANSCRIPT:
final_text = event.alternatives[0].text
# Handle final transcript
3. Use Agent Activity Handlers
The agent activity system provides access to the current transcript, including interim results:
# In your agent activity handler
def on_interim_transcript(self, ev: stt.SpeechEvent) -> None:
# Access the interim transcript text
interim_text = ev.alternatives[0].text
# Implement custom interruption logic
if self.should_interrupt_based_on_interim(interim_text):
# Trigger interruption
if (self._current_speech is not None
and not self._current_speech.interrupted
and self._current_speech.allow_interruptions):
self._current_speech.interrupt()
4. Access Current Transcript Property
The audio recognition system maintains a current_transcript
property that combines final and interim transcripts:
# Access the current transcript including interim results
current_text = self._audio_recognition.current_transcript
# Use this for interruption decisions
if self.should_interrupt(current_text):
# Implement interruption logic
pass
These steps should allow you to effectively access and utilize interim transcripts for your custom interruption logic.
Sources: livekit-plugins/livekit-plugins-google/livekit/plugins/google/stt.py | Deepgram integration guide | LiveKit Docs