|
1 | 1 | """Recording-related utility functions for cleanup and download handling.""" |
2 | 2 |
|
3 | | -import fractions |
4 | 3 | import logging |
5 | 4 | import os |
6 | 5 | import shutil |
7 | 6 | import tempfile |
8 | 7 | import threading |
9 | | -import time |
10 | 8 | from pathlib import Path |
11 | 9 |
|
12 | 10 | from aiortc import MediaStreamTrack |
13 | 11 | from aiortc.contrib.media import MediaRecorder, MediaRelay |
14 | | -from aiortc.mediastreams import VIDEO_CLOCK_RATE, VIDEO_TIME_BASE |
| 12 | +from av import VideoFrame |
15 | 13 |
|
16 | 14 | logger = logging.getLogger(__name__) |
17 | 15 |
|
|
21 | 19 | "download": "scope_download_", |
22 | 20 | } |
23 | 21 |
|
24 | | -# Environment variables |
25 | | -RECORDING_ENABLED = os.getenv("RECORDING_ENABLED", "false").lower() == "true" |
26 | | -RECORDING_STARTUP_CLEANUP_ENABLED = ( |
27 | | - os.getenv("RECORDING_STARTUP_CLEANUP_ENABLED", "true").lower() == "true" |
28 | | -) |
29 | | - |
30 | 22 | RECORDING_MAX_FPS = 30.0 # Must match MediaRecorder's hardcoded rate=30 |
31 | 23 |
|
32 | 24 |
|
33 | | -class TimestampNormalizingTrack(MediaStreamTrack): |
34 | | - """Wraps a track and assigns wall-clock timestamps starting from 0. |
35 | | -
|
36 | | - Uses monotonic wall-clock time to compute PTS so that the recorded |
37 | | - MP4 plays back at real-time speed regardless of the source track's |
38 | | - own PTS cadence. This is critical for cloud-relay recordings where |
39 | | - frames may arrive slower than the source track's nominal rate (e.g. |
40 | | - CloudTrack stamps every frame at 1/30 s intervals even when network |
41 | | - round-trips deliver them at 10-15 FPS). |
42 | | -
|
43 | | - Important: We must create a copy of the frame rather than modifying it |
44 | | - in place, because the relay shares frame objects across all subscribers. |
45 | | - Modifying in place would affect the WebRTC sender and cause encoding errors. |
46 | | - """ |
47 | | - |
48 | | - def __init__(self, source_track: MediaStreamTrack): |
49 | | - super().__init__() |
50 | | - self.kind = source_track.kind |
51 | | - self._source = source_track |
52 | | - self._start_time: float | None = None |
53 | | - self._last_frame_time: float | None = None |
54 | | - self._min_frame_interval = 1.0 / RECORDING_MAX_FPS |
55 | | - |
56 | | - async def recv(self): |
57 | | - import av |
58 | | - |
59 | | - while True: |
60 | | - frame = await self._source.recv() |
61 | | - |
62 | | - # Frame rate limiting - skip frames arriving faster than MAX_RECORDING_FPS |
63 | | - current_time = time.monotonic() |
64 | | - if self._last_frame_time is not None: |
65 | | - elapsed = current_time - self._last_frame_time |
66 | | - if elapsed < self._min_frame_interval: |
67 | | - continue # Skip this frame |
68 | | - self._last_frame_time = current_time |
69 | | - |
70 | | - if self._start_time is None: |
71 | | - self._start_time = current_time |
72 | | - |
73 | | - # Create a new frame with wall-clock-based timestamp. |
74 | | - # Pad to even dimensions — libx264 requires width and height divisible by 2. |
75 | | - arr = frame.to_ndarray(format="rgb24") |
76 | | - h, w = arr.shape[:2] |
77 | | - pad_w = w % 2 |
78 | | - pad_h = h % 2 |
79 | | - if pad_w or pad_h: |
80 | | - import numpy as np |
81 | | - |
82 | | - arr = np.pad(arr, ((0, pad_h), (0, pad_w), (0, 0)), mode="edge") |
83 | | - new_frame = av.VideoFrame.from_ndarray(arr, format="rgb24") |
84 | | - new_frame.pts = int((current_time - self._start_time) * VIDEO_CLOCK_RATE) |
85 | | - new_frame.time_base = VIDEO_TIME_BASE |
86 | | - return new_frame |
87 | | - |
88 | | - def stop(self): |
89 | | - self._source.stop() |
90 | | - super().stop() |
91 | | - |
92 | | - |
93 | | -class AudioTimestampNormalizingTrack(MediaStreamTrack): |
94 | | - """Wraps an audio track and assigns wall-clock timestamps starting from 0. |
95 | | -
|
96 | | - Analogous to TimestampNormalizingTrack but for AudioFrame objects. |
97 | | - Uses wall-clock time for PTS to stay in sync with the video track's |
98 | | - wall-clock timestamps. Unlike video, audio frames are not rate-limited |
99 | | - here because the source AudioProcessingTrack already paces at 20ms |
100 | | - intervals. |
101 | | - """ |
102 | | - |
103 | | - kind = "audio" |
104 | | - |
105 | | - def __init__(self, source_track: MediaStreamTrack): |
106 | | - super().__init__() |
107 | | - self._source = source_track |
108 | | - self._start_time: float | None = None |
109 | | - |
110 | | - async def recv(self): |
111 | | - from av import AudioFrame as AvAudioFrame |
112 | | - |
113 | | - frame = await self._source.recv() |
114 | | - |
115 | | - current_time = time.monotonic() |
116 | | - if self._start_time is None: |
117 | | - self._start_time = current_time |
| 25 | +def ensure_even_video_frame(frame: VideoFrame) -> VideoFrame: |
| 26 | + """Pad odd-dimension video frames so encoders like libx264 accept them.""" |
| 27 | + pts = frame.pts |
| 28 | + time_base = frame.time_base |
| 29 | + arr = frame.to_ndarray(format="rgb24") |
| 30 | + h, w = arr.shape[:2] |
| 31 | + pad_w = w % 2 |
| 32 | + pad_h = h % 2 |
| 33 | + if not (pad_w or pad_h): |
| 34 | + return frame |
118 | 35 |
|
119 | | - # Create a copy with wall-clock PTS (relay shares frame objects, |
120 | | - # so we must not mutate in place). |
121 | | - new_frame = AvAudioFrame( |
122 | | - format=frame.format.name, |
123 | | - layout=frame.layout.name, |
124 | | - samples=frame.samples, |
125 | | - ) |
126 | | - new_frame.sample_rate = frame.sample_rate |
127 | | - new_frame.pts = int((current_time - self._start_time) * frame.sample_rate) |
128 | | - new_frame.time_base = fractions.Fraction(1, frame.sample_rate) |
129 | | - for i, plane in enumerate(frame.planes): |
130 | | - new_frame.planes[i].update(bytes(plane)) |
131 | | - return new_frame |
| 36 | + import numpy as np |
132 | 37 |
|
133 | | - def stop(self): |
134 | | - self._source.stop() |
135 | | - super().stop() |
| 38 | + padded = np.pad(arr, ((0, pad_h), (0, pad_w), (0, 0)), mode="edge") |
| 39 | + even_frame = VideoFrame.from_ndarray(padded, format="rgb24") |
| 40 | + even_frame.pts = pts |
| 41 | + if time_base is not None: |
| 42 | + even_frame.time_base = time_base |
| 43 | + return even_frame |
136 | 44 |
|
137 | 45 |
|
138 | 46 | class RecordingManager: |
@@ -182,42 +90,34 @@ def _stop_track_safe(track: MediaStreamTrack | None) -> None: |
182 | 90 | logger.warning(f"Error stopping recording track: {e}") |
183 | 91 |
|
184 | 92 | def _create_recording_track(self) -> MediaStreamTrack | None: |
185 | | - """Create a video recording track. |
186 | | -
|
187 | | - Returns None if no video track is configured. The track is wrapped |
188 | | - in TimestampNormalizingTrack to ensure frame timestamps start from 0 |
189 | | - for each new recording. |
190 | | - """ |
| 93 | + """Create a video recording track, preserving source timestamps.""" |
191 | 94 | if self.video_track is None: |
192 | 95 | return None |
193 | 96 | if self.relay: |
194 | | - relay_track = self.relay.subscribe(self.video_track) |
195 | | - return TimestampNormalizingTrack(relay_track) |
196 | | - else: |
197 | | - logger.warning("No relay available for recording, using track directly") |
198 | | - return TimestampNormalizingTrack(self.video_track) |
| 97 | + return self.relay.subscribe(self.video_track) |
| 98 | + logger.warning("No relay available for recording, using track directly") |
| 99 | + return self.video_track |
199 | 100 |
|
200 | 101 | def _create_audio_recording_track(self) -> MediaStreamTrack | None: |
201 | | - """Create an audio recording track. |
202 | | -
|
203 | | - Returns None if no audio track is configured. |
204 | | - """ |
| 102 | + """Create an audio recording track, preserving source timestamps.""" |
205 | 103 | if self.audio_track is None: |
206 | 104 | return None |
207 | 105 | if self.audio_relay: |
208 | | - relay_track = self.audio_relay.subscribe(self.audio_track) |
209 | | - return AudioTimestampNormalizingTrack(relay_track) |
210 | | - else: |
211 | | - logger.warning( |
212 | | - "No audio relay available for recording, using track directly" |
213 | | - ) |
214 | | - return AudioTimestampNormalizingTrack(self.audio_track) |
| 106 | + return self.audio_relay.subscribe(self.audio_track) |
| 107 | + logger.warning("No audio relay available for recording, using track directly") |
| 108 | + return self.audio_track |
215 | 109 |
|
216 | 110 | def _create_media_recorder(self, file_path: str) -> MediaRecorder: |
217 | 111 | """Create a MediaRecorder instance with standard settings.""" |
218 | 112 | return MediaRecorder( |
219 | 113 | file_path, |
220 | 114 | format="mp4", |
| 115 | + options={ |
| 116 | + # force timestamps to start at zero |
| 117 | + "use_editlist": "0", |
| 118 | + # allows playback before file is fully loaded, eg over http |
| 119 | + "movflags": "+faststart", |
| 120 | + }, |
221 | 121 | ) |
222 | 122 |
|
223 | 123 | async def start_recording(self): |
@@ -450,12 +350,6 @@ def cleanup_recording_files(): |
450 | 350 | Clean up all recording files from previous sessions. |
451 | 351 | This handles cases where the process crashed and files weren't cleaned up. |
452 | 352 | """ |
453 | | - if not RECORDING_STARTUP_CLEANUP_ENABLED: |
454 | | - logger.info( |
455 | | - "Recording startup cleanup disabled via RECORDING_STARTUP_CLEANUP_ENABLED" |
456 | | - ) |
457 | | - return |
458 | | - |
459 | 353 | temp_dir = Path(tempfile.gettempdir()) |
460 | 354 | if not temp_dir.exists(): |
461 | 355 | return |
|
0 commit comments