jolly-eve-3916
07/26/2025, 9:15 PMtall-airline-16756
07/26/2025, 9:15 PMtall-airline-16756
07/26/2025, 9:16 PMagent.py
from reading out stage directions like thoughtful pause from the Claude LLM, you can filter these out before the text reaches the TTS engine. Here are two approaches you can use:
1. Using before_tts_cb
in VoicePipelineAgent
import re
from typing import AsyncIterable
from livekit.agents.pipeline import VoicePipelineAgent
async def _before_tts_cb(agent: VoicePipelineAgent, text: str | AsyncIterable[str]):
# Pattern to match stage directions like *thoughtful pause*
stage_direction_pattern = r'\*[^*]+\*'
if isinstance(text, str):
# Handle non-streaming text
result = re.sub(stage_direction_pattern, '', text).strip()
return result
else:
# Handle streaming text
async def process_stream():
async for chunk in text:
processed = re.sub(stage_direction_pattern, '', chunk)
if processed: # Only yield non-empty chunks
yield processed
return process_stream()
# Initialize the agent with the callback
agent = VoicePipelineAgent(
vad=ctx.proc.userdata["vad"],
stt=openai.STT(),
llm=openai.LLM.with_groq(model="deepseek-r1-distill-llama-70b"),
tts=openai.TTS(),
before_tts_cb=_before_tts_cb,
chat_ctx=initial_ctx
)
2. Overriding llm_node
in a Custom Agent
For more control, you can override the llm_node
method in a custom Agent class to filter out stage directions:
import re
from livekit.agents.voice import Agent
class FilteredAgent(Agent):
def __init__(self):
super().__init__(
instructions="You are a helpful agent.",
stt=deepgram.STT(),
llm=openai.LLM.with_groq(model="deepseek-r1-distill-llama-70b"),
tts=openai.TTS(),
vad=silero.VAD.load()
)
async def llm_node(self, chat_ctx, tools, model_settings=None):
stage_direction_pattern = r'\*[^*]+\*'
async def process_stream():
async with self.llm.chat(chat_ctx=chat_ctx, tools=tools, tool_choice=None) as stream:
async for chunk in stream:
if chunk is None:
continue
content = getattr(chunk.delta, 'content', None) if hasattr(chunk, 'delta') else str(chunk)
if content is None:
yield chunk
continue
# Remove stage directions
processed_content = re.sub(stage_direction_pattern, '', content)
if processed_content != content:
if hasattr(chunk, 'delta') and hasattr(chunk.delta, 'content'):
chunk.delta.content = processed_content
else:
chunk = processed_content
yield chunk
return process_stream()
Both methods use the regular expression r'\*[^*]+\*'
to match and remove text enclosed in asterisks. The first approach is simpler and works well with VoicePipelineAgent
, while the second provides more granular control for custom implementations.
Sources: chain-of-thought-tts/agent.py | pipeline-tts/tts_node.py