chilly-ghost-74326
08/19/2025, 10:19 AMtall-airline-16756
08/19/2025, 10:24 AMtall-airline-16756
08/19/2025, 10:24 AMchilly-ghost-74326
08/19/2025, 10:25 AMtall-airline-16756
08/19/2025, 10:27 AMimport time
from livekit.agents import MultimodalAgent, AutoSubscribe, JobContext
from livekit.plugins.openai.realtime import RealtimeModel
class LatencyTrackingAgent:
def __init__(self):
self.audio_input_timestamp = None
self.first_audio_output_timestamp = None
self.latency_measurements = []
async def setup_agent(self, ctx: JobContext):
model = RealtimeModel()
session = model.session()
# Keep track of when audio input gets committed
@session.on("openai_client_event_queued")
def on_client_event(event):
if event.get("type") == "input_audio_buffer.commit":
self.audio_input_timestamp = time.time()
# Keep track of when the first audio response comes through
@session.on("openai_server_event_received")
def on_server_event(event):
if event.get("type") == "response.audio.delta":
if self.first_audio_output_timestamp is None:
self.first_audio_output_timestamp = time.time()
if self.audio_input_timestamp:
latency = self.first_audio_output_timestamp - self.audio_input_timestamp
self.latency_measurements.append(latency)
print(f"Audio-to-audio latency: {latency*1000:.2f}ms")
return session
2. Accessing Internal Timing Data:
You can also peek inside the response generation objects to grab timing info and calculate how long processing takes:
async def track_response_latency(session):
# Check if there's a current generation object
if hasattr(session, '_current_generation') and session._current_generation:
generation = session._current_generation
if generation._first_token_timestamp and generation._created_timestamp:
processing_latency = generation._first_token_timestamp - generation._created_timestamp
print(f"Model processing latency: {processing_latency*1000:.2f}ms")
3. End-to-End Latency Measurement:
If you want the full picture from audio input all the way to output, try this:
class ComprehensiveLatencyTracker:
def __init__(self):
self.input_start_time = None
self.commit_time = None
self.response_start_time = None
self.first_audio_time = None
def track_input_audio_buffer_events(self, session):
@session.on("openai_client_event_queued")
def on_client_event(event):
event_type = event.get("type")
if event_type == "input_audio_buffer.append" and self.input_start_time is None:
self.input_start_time = time.time()
elif event_type == "input_audio_buffer.commit":
self.commit_time = time.time()
@session.on("openai_server_event_received")
def on_server_event(event):
event_type = event.get("type")
if event_type == "response.created":
self.response_start_time = time.time()
elif event_type == "response.audio.delta" and self.first_audio_time is None:
self.first_audio_time = time.time()
self.calculate_latencies()
def calculate_latencies(self):
if all([self.input_start_time, self.commit_time, self.response_start_time, self.first_audio_time]):
input_to_commit = (self.commit_time - self.input_start_time) * 1000
commit_to_response = (self.response_start_time - self.commit_time) * 1000
response_to_audio = (self.first_audio_time - self.response_start_time) * 1000
total_latency = (self.first_audio_time - self.input_start_time) * 1000
print(f"Input buffering: {input_to_commit:.2f}ms")
print(f"Processing latency: {commit_to_response:.2f}ms")
print(f"Audio generation: {response_to_audio:.2f}ms")
print(f"Total audio-to-audio latency: {total_latency:.2f}ms")
Hopefully, these tips help you measure and fine-tune latency throughout your audio pipeline. If you have any questions or want to dive deeper, just let me know—I'm here to help!