From 25e267778b0a0a5228c9a8dde0eccfa80a081f4f Mon Sep 17 00:00:00 2001 From: Igor Loskutov Date: Fri, 9 Jan 2026 15:55:49 -0500 Subject: [PATCH 01/20] brady bunch PRD/tasks --- PRD.md | 1198 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ TASKS.md | 538 ++++++++++++++++++++++++ 2 files changed, 1736 insertions(+) create mode 100644 PRD.md create mode 100644 TASKS.md diff --git a/PRD.md b/PRD.md new file mode 100644 index 00000000..8265b8f3 --- /dev/null +++ b/PRD.md @@ -0,0 +1,1198 @@ +# PRD: Dual Recording Support (Cloud + Raw-Tracks) + +## Overview + +Enable simultaneous Daily.co cloud recording (Brady Bunch grid MP4) and raw-tracks recording (per-participant WebM audio files) for meetings. Cloud recording provides instant playback with video layout, raw-tracks enable high-quality per-speaker transcription. + +## Goals + +1. Store both cloud MP4 and raw-tracks WebM files from Daily.co meetings +2. Enable users to play back original cloud recording alongside processed transcription +3. Maintain existing transcription quality (raw-tracks only, no changes) +4. Keep implementation simple - validate approach works, add complexity only if needed + +## Non-Goals + +- Video player implementation (audio playback only in MVP) +- Cloud recording as transcription fallback (if raw-tracks fails, transcript fails) +- Transcribing cloud MP4 audio +- Room-level toggle (always enable both recordings for all meetings) +- Unit tests (manual validation only) + +--- + +## Technical Background + +### Daily.co Recording Configuration + +**Room `enable_recording` property:** +- **Purpose:** Allows manual recording start (does NOT auto-start) +- **Values:** Single string - `"cloud"` OR `"raw-tracks"` OR `"local"` (NOT array) +- **Current code:** Set to `"raw-tracks"` when Reflector's `room.recording_type == "cloud"` (line 61-62 of `server/reflector/video_platforms/daily.py`) + +**JWT `start_cloud_recording` property:** +- **Purpose:** Auto-starts cloud recording when participant joins +- **Current code:** Set to `true` but **DEAD CODE** - room has `enable_recording: "raw-tracks"` so JWT setting ignored (line 189 in `daily.py`, called at line 589 in `views/rooms.py`) + +**Frontend `startRecording()` call:** +- **Current code:** Every participant calls `startRecording({ type: "raw-tracks" })` on join (line 236 of `www/app/[roomName]/components/DailyRoom.tsx`) +- **Behavior:** Multiple participants = multiple calls to Daily.co + +**instanceId:** +- **Purpose:** UUID identifying a recording session (per Daily.co docs) +- **Per-user:** Each participant joining generates their own instanceId (ephemeral, not stored) +- **Question:** Can cloud + raw-tracks share same instanceId? Docs unclear - needs validation + +### Daily.co Support Guidance + +From Discord conversation (Kyle + rajneesh, Sept 2024): + +> **Kyle:** "We need both raw-tracks and cloud recording. When we start recording from JS and backend it doesn't work." +> +> **rajneesh (Daily.co support):** "It is possible to get both 'raw-tracks' and 'cloud-recording' at the same time. Start the cloud recording using **daily-js API** and then start raw-tracks using the **REST API endpoint**. Please note you need to pass a unique instanceId in the startRecording call." + +**Interpretation:** +- Cloud recording: Start via `frame.startRecording()` (daily-js, frontend) +- Raw-tracks recording: Start via `POST /rooms/:name/recordings/start` (REST API, backend) +- Both need instanceId parameter + +--- + +## Solution Architecture + +### Recording Trigger Flow + +``` +User joins meeting + ↓ +Frontend: Generate instanceId (useState, per component mount) + ↓ +Frontend: Start cloud recording via daily-js + ↓ (instanceId: ) +Frontend: Call backend endpoint + ↓ +Backend: Start raw-tracks via Daily.co REST API + ↓ (instanceId: - SAME as cloud) + ↓ +Daily.co: Process recordings (assumption: handles duplicate start calls) + ↓ +Webhook: recording.ready-to-download (type: "cloud") + ↓ +Backend: Store s3_key in meeting table + ↓ +Webhook: recording.ready-to-download (type: "raw-tracks") + ↓ +Backend: Queue multitrack processing (existing pipeline) +``` + +**Key assumptions:** +1. **Every participant** calls both APIs (no first-participant detection) +2. **Daily.co handles idempotency** - multiple start calls to same room don't create duplicate recordings +3. **Same instanceId works** for both cloud and raw-tracks + +**Validation required:** Test that multiple participants starting recordings doesn't cause issues. + +--- + +## Implementation Steps + +### Phase 0: Preparation & Validation + +#### Step 0.1: Prototype instanceId approach + +**Before implementing**, validate Daily.co behavior: + +```bash +# Test script: server/scripts/test_daily_dual_recording.py + +# Test 1: Same instanceId +# 1. Create Daily room with enable_recording: "raw-tracks" +# 2. POST /rooms/:name/recordings/start { type: "cloud", instance_id: "test-123" } +# 3. POST /rooms/:name/recordings/start { type: "raw-tracks", instance_id: "test-123" } +# Expected: Both succeed, webhooks arrive with same/different instance_id + +# Test 2: Multiple start calls (simulate multiple participants) +# 1. POST /rooms/:name/recordings/start { type: "cloud", instance_id: "test-123" } +# 2. POST /rooms/:name/recordings/start { type: "cloud", instance_id: "test-123" } # duplicate +# Expected: Second call is idempotent (no error, no duplicate recording) + +# Test 3: Different instanceIds (fallback if same fails) +# 1. POST /rooms/:name/recordings/start { type: "cloud", instance_id: "cloud-123" } +# 2. POST /rooms/:name/recordings/start { type: "raw-tracks", instance_id: "raw-456" } +# Expected: Both succeed independently +``` + +**Document findings:** +- Add results to `server/DAILYCO_TEST.md` +- Update PRD if different instanceId approach required + +**Acceptance criteria:** +- Know which instanceId strategy works +- Know if Daily.co handles duplicate start calls gracefully + +--- + +#### Step 0.2: Remove dead JWT start_cloud_recording + +**File:** `server/reflector/video_platforms/daily.py` + +**Current code (lines 177-200):** +```python +async def create_meeting_token( + self, + room_name: DailyRoomName, + start_cloud_recording: bool, # ← Dead parameter + enable_recording_ui: bool, + user_id: NonEmptyString | None = None, + is_owner: bool = False, + max_recording_duration_seconds: int | None = None, +) -> NonEmptyString: + start_cloud_recording_opts = None + if start_cloud_recording and max_recording_duration_seconds: + start_cloud_recording_opts = {"maxDuration": max_recording_duration_seconds} + + properties = MeetingTokenProperties( + room_name=room_name, + user_id=user_id, + start_cloud_recording=start_cloud_recording, # ← Dead code + start_cloud_recording_opts=start_cloud_recording_opts, # ← Dead code + enable_recording_ui=enable_recording_ui, + is_owner=is_owner, + ) + request = CreateMeetingTokenRequest(properties=properties) + result = await self._api_client.create_meeting_token(request) + return result.token +``` + +**Changes:** +```python +async def create_meeting_token( + self, + room_name: DailyRoomName, + enable_recording_ui: bool, + user_id: NonEmptyString | None = None, + is_owner: bool = False, +) -> NonEmptyString: + # Removed: start_cloud_recording, max_recording_duration_seconds, start_cloud_recording_opts + + properties = MeetingTokenProperties( + room_name=room_name, + user_id=user_id, + enable_recording_ui=enable_recording_ui, + is_owner=is_owner, + ) + request = CreateMeetingTokenRequest(properties=properties) + result = await self._api_client.create_meeting_token(request) + return result.token +``` + +**File:** `server/reflector/views/rooms.py` + +**Update call site (around line 587-593):** +```python +# Before +token = await client.create_meeting_token( + meeting.room_name, + start_cloud_recording=meeting.recording_type == "cloud", # ← Remove + enable_recording_ui=enable_recording_ui, + user_id=user_id, + is_owner=user_id == room.user_id, + max_recording_duration_seconds=remaining_seconds, # ← Remove +) + +# After +token = await client.create_meeting_token( + meeting.room_name, + enable_recording_ui=enable_recording_ui, + user_id=user_id, + is_owner=user_id == room.user_id, +) +``` + +**Verification:** +```bash +cd server && uv run mypy reflector/video_platforms/daily.py reflector/views/rooms.py +``` + +**Expected:** No type errors + +--- + +#### Step 0.3: Update frontend recording start + +**File:** `www/app/[roomName]/components/DailyRoom.tsx` + +**Current code (lines 231-243):** +```typescript +const handleFrameJoinMeeting = useCallback( + (startRecording: (args: { type: "raw-tracks" }) => void) => { + try { + if (meeting.recording_type === "cloud") { + console.log("Starting cloud recording"); + startRecording({ type: "raw-tracks" }); // ← Wrong type, every participant calls + } + } catch (error) { + console.error("Failed to start recording:", error); + } + }, + [meeting.recording_type], +); +``` + +**Changes:** +```typescript +const [recordingInstanceId] = useState(() => crypto.randomUUID()); + +const handleFrameJoinMeeting = useCallback( + (startRecording: (args: { type: "raw-tracks" | "cloud", instanceId: string }) => void) => { + try { + if (meeting.recording_type === "cloud") { + console.log("Starting dual recording", { instanceId: recordingInstanceId }); + + // 1. Start cloud recording via daily-js (frontend) + startRecording({ + type: "cloud", + instanceId: recordingInstanceId + }); + + // 2. Start raw-tracks via backend REST API + fetch(`/v1/meetings/${meeting.id}/recordings/start`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + type: "raw-tracks", + instanceId: recordingInstanceId // SAME instanceId + }) + }) + .then(res => { + if (!res.ok) throw new Error(`HTTP ${res.status}`); + console.log("Raw-tracks recording started via backend"); + }) + .catch(err => { + console.error("Failed to start raw-tracks recording:", err); + }); + } + } catch (error) { + console.error("Failed to start recordings:", error); + } + }, + [meeting.recording_type, recordingInstanceId, meeting.id], +); +``` + +**Verification:** +```bash +cd www && pnpm tsc --noEmit +``` + +**Expected:** No type errors + +--- + +### Phase 1: Database Schema + +#### Step 1.1: Add cloud recording fields to Meeting table + +**File:** `server/reflector/db/migrations/versions/YYYYMMDD_HHMM_add_cloud_recording.py` (NEW) + +**Migration:** +```python +"""add cloud recording support + +Revision ID: +Revises: +Create Date: 2026-01-09 12:00:00.000000 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '' +down_revision = '' +branch_labels = None +depends_on = None + + +def upgrade(): + op.add_column('meeting', sa.Column('cloud_recording_s3_key', sa.String(), nullable=True)) + op.add_column('meeting', sa.Column('cloud_recording_duration', sa.Integer(), nullable=True)) + + +def downgrade(): + op.drop_column('meeting', 'cloud_recording_duration') + op.drop_column('meeting', 'cloud_recording_s3_key') +``` + +**Run migration:** +```bash +cd server && uv run alembic revision --autogenerate -m "add cloud recording support" +cd server && uv run alembic upgrade head +``` + +**Verification:** +```bash +docker compose exec postgres psql -U reflector -d reflector -c "\d meeting" | grep cloud_recording +``` + +**Expected output:** +``` + cloud_recording_s3_key | character varying | | | + cloud_recording_duration | integer | | | +``` + +--- + +#### Step 1.2: Update DB models + +**File:** `server/reflector/db/meetings.py` + +**Changes (around line 94):** +```python +class Meeting(BaseModel): + id: str + room_name: str + room_url: str + host_room_url: str + start_date: datetime + end_date: datetime + room_id: str | None + is_locked: bool = False + room_mode: Literal["normal", "group"] = "normal" + recording_type: Literal["none", "local", "cloud"] = "cloud" + recording_trigger: Literal[ + "none", "prompt", "automatic", "automatic-2nd-participant" + ] = "automatic-2nd-participant" + num_clients: int = 0 + is_active: bool = True + calendar_event_id: str | None = None + calendar_metadata: dict[str, Any] | None = None + platform: Platform = WHEREBY_PLATFORM + # NEW FIELDS: + cloud_recording_s3_key: str | None = None + cloud_recording_duration: int | None = None +``` + +**Add to table definition (around line 14):** +```python +meetings = sa.Table( + "meeting", + metadata, + sa.Column("id", sa.String, primary_key=True), + # ... existing columns ... + sa.Column("cloud_recording_s3_key", sa.String, nullable=True), + sa.Column("cloud_recording_duration", sa.Integer, nullable=True), + # ... rest of columns ... +) +``` + +**Verification:** +```bash +cd server && uv run mypy reflector/db/meetings.py +``` + +**Expected:** No type errors + +--- + +### Phase 2: Daily.co API Client Extension + +#### Step 2.1: Add start_recording method + +**File:** `server/reflector/dailyco_api/client.py` + +**Add method after `list_recordings`:** +```python +async def start_recording( + self, + room_name: str, + recording_type: Literal["cloud", "raw-tracks"], + instance_id: str, +) -> dict: + """Start recording via REST API. + + Reference: https://docs.daily.co/reference/rest-api/rooms/recordings/start + + Args: + room_name: Daily.co room name + recording_type: "cloud" (Brady Bunch MP4) or "raw-tracks" (per-participant WebM) + instance_id: UUID for this recording session (same ID can be used for both types) + + Returns: + Recording start confirmation from Daily.co API + """ + client = await self._get_client() + response = await client.post( + f"/rooms/{room_name}/recordings/start", + json={ + "type": recording_type, + "instance_id": instance_id, + }, + ) + return await self._handle_response(response, "start_recording") +``` + +**Verification:** +```bash +cd server && uv run mypy reflector/dailyco_api/client.py +``` + +**Expected:** No type errors + +--- + +#### Step 2.2: Expose method in DailyClient wrapper + +**File:** `server/reflector/video_platforms/daily.py` + +**Add method after `create_meeting_token`:** +```python +async def start_recording( + self, + room_name: str, + recording_type: Literal["cloud", "raw-tracks"], + instance_id: str, +) -> dict: + """Start recording via Daily.co REST API. + + Proxies call to Daily.co REST API endpoint. + """ + return await self._api_client.start_recording( + room_name=room_name, + recording_type=recording_type, + instance_id=instance_id, + ) +``` + +**Verification:** +```bash +cd server && uv run mypy reflector/video_platforms/daily.py +``` + +**Expected:** No type errors + +--- + +### Phase 3: Backend Endpoint for Raw-Tracks Start + +#### Step 3.1: Create meetings API endpoint + +**File:** `server/reflector/views/meetings.py` (NEW) + +```python +import logging +from fastapi import APIRouter, HTTPException +from fastapi.responses import RedirectResponse +from pydantic import BaseModel + +from reflector.db.meetings import meetings_controller +from reflector.storage import get_dailyco_storage +from reflector.video_platforms.factory import create_platform_client + +logger = logging.getLogger(__name__) + +router = APIRouter() + + +class StartRecordingRequest(BaseModel): + type: str # "raw-tracks" (cloud started from frontend) + instanceId: str + + +@router.post("/meetings/{meeting_id}/recordings/start") +async def start_recording(meeting_id: str, body: StartRecordingRequest): + """Start raw-tracks recording via Daily.co REST API. + + Called by frontend after starting cloud recording via daily-js. + Uses same instanceId to link both recordings. + + Note: No authentication required - anonymous users supported. + """ + meeting = await meetings_controller.get_by_id(meeting_id) + if not meeting: + raise HTTPException(status_code=404, detail="Meeting not found") + + if body.type != "raw-tracks": + raise HTTPException( + status_code=400, + detail="Only raw-tracks can be started via this endpoint (cloud uses daily-js)", + ) + + try: + client = create_platform_client("daily") + result = await client.start_recording( + room_name=meeting.room_name, + recording_type=body.type, + instance_id=body.instanceId, + ) + + logger.info( + "Started raw-tracks recording via REST API", + extra={ + "meeting_id": meeting_id, + "room_name": meeting.room_name, + "instance_id": body.instanceId, + } + ) + + return {"status": "ok", "result": result} + + except Exception as e: + logger.error( + "Failed to start raw-tracks recording", + extra={"meeting_id": meeting_id, "error": str(e)} + ) + raise HTTPException(status_code=500, detail=f"Failed to start recording: {str(e)}") + + +@router.get("/meetings/{meeting_id}/cloud-recording") +async def get_cloud_recording(meeting_id: str): + """Serve cloud recording MP4 file. + + Returns redirect to S3 presigned URL from DAILYCO_STORAGE bucket. + Daily.co writes MP4 there, we read via presigned URL. + + Note: No authentication required - anonymous users supported. + """ + meeting = await meetings_controller.get_by_id(meeting_id) + if not meeting: + raise HTTPException(status_code=404, detail="Meeting not found") + + if not meeting.cloud_recording_s3_key: + raise HTTPException(status_code=404, detail="Cloud recording not available") + + # Generate presigned URL for Daily.co S3 bucket (where Daily writes MP4) + storage = get_dailyco_storage() + presigned_url = await storage.get_file_url( + meeting.cloud_recording_s3_key, + expires_in=3600, # 1 hour + ) + + return RedirectResponse(url=presigned_url) +``` + +**File:** `server/reflector/app.py` + +**Register router:** +```python +from reflector.views.meetings import router as meetings_router + +app.include_router(meetings_router, prefix="/v1/meetings", tags=["meetings"]) +``` + +**Verification:** +```bash +cd server && uv run mypy reflector/views/meetings.py reflector/app.py +``` + +**Expected:** No type errors + +--- + +### Phase 4: Webhook Handler Updates + +#### Step 4.1: Update recording.ready-to-download handler + +**File:** `server/reflector/views/daily.py` + +**Replace `_handle_recording_ready` function (around line 174):** +```python +async def _handle_recording_ready(event: RecordingReadyEvent): + room_name = event.payload.room_name + recording_id = event.payload.recording_id + recording_type = event.payload.type # "cloud" or "raw-tracks" + + logger.info( + "Recording ready for download", + extra={ + "room_name": room_name, + "recording_id": recording_id, + "recording_type": recording_type, + "platform": "daily", + } + ) + + bucket_name = settings.DAILYCO_STORAGE_AWS_BUCKET_NAME + if not bucket_name: + logger.error("DAILYCO_STORAGE_AWS_BUCKET_NAME not configured") + return + + if recording_type == "cloud": + # Cloud recording: single MP4 file written by Daily.co to DAILYCO_STORAGE bucket + s3_key = event.payload.s3_key + + # Store cloud recording reference in meeting table + meeting = await meetings_controller.get_by_room_name(room_name) + if not meeting: + logger.warning( + "Cloud recording: meeting not found", + extra={"room_name": room_name, "recording_id": recording_id} + ) + return + + await meetings_controller.update_meeting( + meeting.id, + cloud_recording_s3_key=s3_key, + cloud_recording_duration=event.payload.duration, + ) + + logger.info( + "Cloud recording stored", + extra={ + "meeting_id": meeting.id, + "s3_key": s3_key, + "duration": event.payload.duration, + } + ) + + elif recording_type == "raw-tracks": + # Existing multi-track processing (unchanged) + tracks = event.payload.tracks + if not tracks: + logger.warning( + "raw-tracks recording: missing tracks array", + extra={"room_name": room_name, "recording_id": recording_id} + ) + return + + track_keys = [t.s3Key for t in tracks if t.type == "audio"] + + logger.info( + "Raw-tracks recording queuing processing", + extra={ + "recording_id": recording_id, + "room_name": room_name, + "num_tracks": len(track_keys), + } + ) + + process_multitrack_recording.delay( + bucket_name=bucket_name, + daily_room_name=room_name, + recording_id=recording_id, + track_keys=track_keys, + ) + + else: + logger.warning( + "Unknown recording type", + extra={"recording_type": recording_type, "recording_id": recording_id} + ) +``` + +**Verification:** +```bash +cd server && uv run mypy reflector/views/daily.py +``` + +**Expected:** No type errors + +--- + +### Phase 5: Meeting API Updates + +#### Step 5.1: Add cloud recording info to Meeting response + +**File:** `server/reflector/views/rooms.py` + +**Update Meeting schema (around line 55):** +```python +class Meeting(BaseModel): + id: str + room_name: str + room_url: str + host_room_url: str + start_date: datetime + end_date: datetime + user_id: str | None = None + room_id: str | None = None + is_locked: bool = False + room_mode: Literal["normal", "group"] = "normal" + recording_type: Literal["none", "local", "cloud"] = "cloud" + recording_trigger: Literal[ + "none", "prompt", "automatic", "automatic-2nd-participant" + ] = "automatic-2nd-participant" + num_clients: int = 0 + is_active: bool = True + calendar_event_id: str | None = None + calendar_metadata: dict[str, Any] | None = None + platform: Platform + # NEW FIELDS: + cloud_recording_available: bool = False + cloud_recording_duration: int | None = None +``` + +**Update `rooms_join_meeting` handler to include cloud recording info:** +```python +@router.post("/rooms/{room_name}/meeting/{meeting_id}/join", response_model=Meeting) +async def rooms_join_meeting( + room_name: str, + meeting_id: str, + user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)], +): + # ... existing logic ... + + # Build response with cloud recording info + meeting_dict = meeting.__dict__.copy() + meeting_dict["cloud_recording_available"] = bool(meeting.cloud_recording_s3_key) + meeting_dict["cloud_recording_duration"] = meeting.cloud_recording_duration + + return Meeting(**meeting_dict) +``` + +**Verification:** +```bash +cd server && uv run mypy reflector/views/rooms.py +``` + +**Expected:** No type errors + +--- + +### Phase 6: Frontend Display + +#### Step 6.1: Update transcript page to show cloud recording + +**File:** `www/app/(app)/transcripts/[transcriptId]/page.tsx` + +**Add after existing audio player section:** +```typescript +{transcript.meeting?.cloud_recording_available && ( + + + + + Original Cloud Recording + + + + Brady Bunch grid layout (MP4 with video and mixed audio from Daily.co) + + + {transcript.meeting.cloud_recording_duration && ( + + Duration: {Math.floor(transcript.meeting.cloud_recording_duration / 60)}m{' '} + {transcript.meeting.cloud_recording_duration % 60}s + + )} + + ⚠️ Large file (~38 MB/minute). May take time to load. + + +)} +``` + +**Verification:** +```bash +cd www && pnpm tsc --noEmit +``` + +**Expected:** No type errors + +--- + +### Phase 7: Manual Testing & Validation + +#### Step 7.1: End-to-end validation + +**Prerequisites:** +```bash +# Ensure services running +docker compose up -d postgres redis server worker + +# Verify env vars +grep -E "DAILY_API_KEY|DAILYCO_STORAGE" server/.env +``` + +**Test procedure:** + +**1. Create room:** +```bash +curl -X POST http://localhost:1250/v1/rooms \ + -H "Content-Type: application/json" \ + -d '{ + "name": "dual-recording-test", + "platform": "daily", + "recording_type": "cloud", + "recording_trigger": "automatic-2nd-participant", + "zulip_auto_post": false, + "zulip_stream": "", + "zulip_topic": "", + "is_locked": false, + "room_mode": "normal", + "is_shared": true, + "webhook_url": "", + "webhook_secret": "", + "skip_consent": false + }' +``` + +**2. Create meeting:** +```bash +curl -X POST http://localhost:1250/v1/rooms/dual-recording-test/meeting \ + -H "Content-Type: application/json" \ + -d '{"allow_duplicated": false}' +``` + +**3. Join meeting (single user):** +- Open `http://localhost:3000/rooms/dual-recording-test` +- Click meeting link +- Allow camera/microphone + +**4. Monitor browser console:** +``` +Starting dual recording { instanceId: "abc-def-..." } +Raw-tracks recording started via backend +``` + +**5. Monitor server logs:** +```bash +docker compose logs server --tail 50 --follow | grep recording +``` + +**Expected:** +``` +[info] Started raw-tracks recording via REST API | meeting_id=... | instance_id=abc-def-... +``` + +**6. Speak for 20-30 seconds, then leave meeting** + +**7. Wait 2-5 minutes for Daily.co processing** + +**8. Monitor webhooks:** +```bash +docker compose logs server --tail 100 --follow | grep "recording.ready" +``` + +**Expected (2 separate webhooks):** +``` +[info] Recording ready for download | recording_type=cloud +[info] Cloud recording stored | s3_key=monadical/.../....mp4 + +[info] Recording ready for download | recording_type=raw-tracks +[info] Raw-tracks recording queuing processing | num_tracks=1 +``` + +**9. Verify database:** +```bash +docker compose exec postgres psql -U reflector -d reflector -c " + SELECT + m.id, + m.cloud_recording_s3_key, + m.cloud_recording_duration, + t.status, + t.title + FROM meeting m + LEFT JOIN transcript t ON t.meeting_id = m.id + WHERE m.room_name LIKE 'dual-recording-test-%' + ORDER BY m.created_at DESC + LIMIT 1; +" +``` + +**Expected:** +``` + id | cloud_recording_s3_key | cloud_recording_duration | status | title +--------------------------------------+-------------------------------------------+--------------------------+--------+--------------------- + | monadical/dual-recording-test-.../....mp4 | 23 | ended | Test Recording +``` + +**10. Test cloud recording endpoint:** +```bash +MEETING_ID="" +curl -I "http://localhost:1250/v1/meetings/$MEETING_ID/cloud-recording" +``` + +**Expected:** +``` +HTTP/1.1 307 Temporary Redirect +location: https://reflector-dailyco-local.s3.amazonaws.com/.../recording.mp4?X-Amz-... +``` + +**11. Test frontend display:** +- Navigate to `http://localhost:3000/transcripts/` +- Verify "Original Cloud Recording" section appears +- Click play button +- Verify audio plays + +**Expected:** Both audio players functional + +--- + +#### Step 7.2: Test multiple participants (validate Daily.co behavior) + +**Test:** Do multiple participants starting recordings cause issues? + +**Setup:** +1. Create new meeting (same room or new) +2. Open 2 browser windows (or 1 normal + 1 incognito) + +**Procedure:** +1. Join meeting from both windows **nearly simultaneously** (within 1 second) +2. Check browser console in both - both should call startRecording +3. Check server logs - should see 2x backend recording start calls +4. Check Daily.co dashboard - how many recording instances created? + +**Expected outcomes:** + +**Best case:** Daily.co handles idempotency +- Only 1 cloud recording created +- Only 1 raw-tracks recording created +- Both webhooks arrive once + +**Acceptable case:** Duplicate recordings but no errors +- 2 cloud recordings created (can delete extra later) +- 2 raw-tracks recordings created (can delete extra later) +- Processing succeeds + +**Bad case:** Errors or corruption +- Daily.co returns error on duplicate start +- Recordings fail +- Transcription broken + +**Action based on results:** +- **Best/Acceptable:** Ship as-is, document behavior +- **Bad:** Implement lock mechanism (see Alternative Solutions below) + +--- + +## Alternative Solutions + +### If Multiple Participants Cause Issues (Implement ONLY If Needed) + +**Symptom:** Daily.co returns error when multiple participants start recordings, OR duplicate recordings cause problems + +**Solution A: Database Lock (Simple)** + +Add first-participant detection with DB-level locking: + +```python +# server/reflector/db/meetings.py - add new field +class Meeting(BaseModel): + # ... existing fields ... + recording_started: bool = False # NEW + +# Add column migration +op.add_column('meeting', sa.Column('recording_started', sa.Boolean(), nullable=False, server_default=sa.false())) + +# In views/rooms.py - rooms_join_meeting +async def rooms_join_meeting(...): + # ... existing logic ... + + # Check and set recording_started atomically + async with get_database().transaction(): + meeting = await meetings_controller.get_by_id(meeting_id) + is_first_participant = not meeting.recording_started + + if is_first_participant: + await meetings_controller.update_meeting( + meeting.id, + recording_started=True + ) + + meeting_dict["is_first_participant"] = is_first_participant + return Meeting(**meeting_dict) +``` + +**Frontend change:** +```typescript +// Only start if is_first_participant +if (joinedMeeting.is_first_participant) { + startRecording({ type: "cloud", instanceId }); + fetch(`/v1/meetings/${meeting.id}/recordings/start`, ...); +} +``` + +**Pro:** Simple, uses existing DB infrastructure +**Con:** Extra DB roundtrip on join + +--- + +**Solution B: Redis Lock (Better Performance)** + +Use Redis for distributed locking: + +```python +# In views/rooms.py +from reflector.redis_cache import RedisAsyncLock + +async def rooms_join_meeting(...): + # ... existing logic ... + + is_first_participant = False + lock_key = f"meeting:{meeting_id}:recording-start" + + try: + async with RedisAsyncLock(lock_key, timeout=5, blocking_timeout=0): + # Check if recording already started + if not await redis.get(f"meeting:{meeting_id}:recording-started"): + await redis.set(f"meeting:{meeting_id}:recording-started", "1") + is_first_participant = True + except LockError: + # Another participant is starting recording right now + is_first_participant = False + + meeting_dict["is_first_participant"] = is_first_participant + return Meeting(**meeting_dict) +``` + +**Pro:** Fast, no DB changes needed +**Con:** Depends on Redis being available (already required) + +--- + +### If Same instanceId Causes Conflicts + +**Symptom:** Daily.co returns error when starting raw-tracks with same instanceId as cloud + +**Solution:** Use different instanceIds for cloud vs raw-tracks + +**Frontend changes:** +```typescript +const [cloudInstanceId] = useState(() => crypto.randomUUID()); +const [rawInstanceId] = useState(() => crypto.randomUUID()); + +// Use different IDs +startRecording({ type: "cloud", instanceId: cloudInstanceId }); + +fetch(`/v1/meetings/${meeting.id}/recordings/start`, { + body: JSON.stringify({ + type: "raw-tracks", + instanceId: rawInstanceId // DIFFERENT + }) +}); +``` + +**Backend:** No changes needed (accepts any instanceId) + +**Correlation:** Rely on `type` field in webhook payload only (both have same `room_name`) + +**Documentation:** Add note to DAILYCO_TEST.md explaining different instanceId requirement + +--- + +## Success Criteria + +### Functional Requirements + +- [ ] Both cloud and raw-tracks recordings start when user joins meeting +- [ ] Cloud recording webhook stores S3 key in meeting table (DAILYCO_STORAGE bucket) +- [ ] Raw-tracks webhook triggers existing multitrack pipeline (unchanged) +- [ ] Cloud recording accessible via `/v1/meetings/{id}/cloud-recording` endpoint (presigned URL) +- [ ] Transcript page displays cloud recording audio player when available +- [ ] Existing transcription quality unchanged (raw-tracks only) +- [ ] Dead code removed (JWT start_cloud_recording) + +### Validation Requirements + +- [ ] Prototype test confirms same instanceId works for both cloud and raw-tracks +- [ ] Multi-participant test confirms Daily.co handles duplicate starts gracefully (or lock implemented) +- [ ] End-to-end test shows both webhooks arriving and data stored correctly +- [ ] Cloud recording playback works in frontend + +### Non-Functional Requirements + +- [ ] No performance degradation in webhook handling +- [ ] Database migration runs without errors +- [ ] Type checking passes (mypy, tsc) + +### Rollback Plan + +If issues detected in production: + +**Immediate mitigation:** +1. Revert frontend DailyRoom.tsx to remove dual recording start +2. This stops cloud recordings (raw-tracks continue normally) + +**Database rollback:** +```bash +docker compose exec server uv run alembic downgrade -1 +``` + +**Code rollback:** +1. Revert frontend changes +2. Revert backend webhook handler to only handle raw-tracks +3. Keep API endpoints (harmless if unused) + +--- + +## Storage Impact Estimation + +**Scenario:** 10 meetings/day, 30 minutes average, 2 participants + +**Before (raw-tracks only):** +- Raw tracks: 2 participants × 30 min × 0.1 MB/min = 6 MB/meeting +- Processed MP3: ~2 MB/meeting +- Total: 8 MB/meeting × 10 = **80 MB/day** = 2.4 GB/month + +**After (with cloud recording enabled):** +- Raw tracks: 6 MB/meeting +- Processed MP3: 2 MB/meeting +- Cloud MP4: 30 min × 38 MB/min = 1,140 MB/meeting +- Total: 1,148 MB/meeting × 10 = **11.5 GB/day** = 345 GB/month + +**S3 Cost (us-east-1 standard):** +- Storage: $0.023/GB/month +- Before: 2.4 GB/month = **$0.06/month** +- After: 345 GB/month = **$7.94/month** + +**Daily.co cost:** Check pricing page for cloud recording charges (separate from raw-tracks) + +**Recommendation:** +- Consider lifecycle policy for cloud MP4s (e.g., delete after 90 days if not accessed) +- Monitor actual usage and adjust retention as needed + +--- + +## Timeline Estimate + +**Phase 0 (Preparation):** 2-4 hours +- Prototype instanceId approach +- Remove dead code +- Update frontend recording start + +**Phase 1 (Database):** 1 hour +- Migration, model updates, verification + +**Phase 2 (API Client):** 1 hour +- Add start_recording methods, type checks + +**Phase 3 (Backend Endpoint):** 2 hours +- Create meetings router, endpoint implementation + +**Phase 4 (Webhook Handler):** 2 hours +- Update _handle_recording_ready, type discrimination + +**Phase 5 (Meeting API):** 1 hour +- Schema updates, response updates + +**Phase 6 (Frontend Display):** 2 hours +- Transcript page updates (cloud recording player) + +**Phase 7 (Testing):** 4 hours +- End-to-end manual test +- Multi-participant validation +- Alternative approach if needed + +**Total:** ~15-17 hours (2 days) + +**Buffer for issues:** +4 hours (lock implementation if needed, debugging) + +**Realistic estimate:** 2-3 days + +--- + +## Appendix: Key Daily.co Documentation + +- [Recording calls with the Daily API](https://docs.daily.co/guides/products/live-streaming-recording/recording-calls-with-the-daily-api) +- [startRecording() - Daily.js](https://docs.daily.co/reference/daily-js/instance-methods/start-recording) +- [POST /rooms/:name/recordings/start](https://docs.daily.co/reference/rest-api/rooms/recordings/start) +- [Multi-instance recording](https://docs.daily.co/guides/products/live-streaming-recording/multi-instance-live-streaming-recording) +- [Webhooks - recording.ready-to-download](https://docs.daily.co/reference/rest-api/webhooks/events/recording-ready-to-download) diff --git a/TASKS.md b/TASKS.md new file mode 100644 index 00000000..a2042dba --- /dev/null +++ b/TASKS.md @@ -0,0 +1,538 @@ +# TASKS: Dual Recording Support (Cloud + Raw-Tracks) + +## Overview + +Tasks extracted from PRD.md for implementing simultaneous Daily.co cloud recording (MP4) and raw-tracks recording (per-participant WebM). + +--- + +## Phase 0: Preparation & Validation + +### Task 0.1: Prototype instanceId approach ⚠️ BLOCKER + +**Priority:** MUST DO FIRST (blocks all implementation) + +**Goal:** Validate Daily.co behavior with same/different instanceId for cloud + raw-tracks + +**Steps:** +1. Create test script `server/scripts/test_daily_dual_recording.py` +2. Test 1: Same instanceId for both cloud and raw-tracks +3. Test 2: Multiple start calls (simulate multiple participants) +4. Test 3: Different instanceIds (fallback if same fails) +5. Document findings in `server/DAILYCO_TEST.md` +6. Update PRD if different approach needed + +**Acceptance Criteria:** +- [ ] Know which instanceId strategy works +- [ ] Know if Daily.co handles duplicate start calls gracefully +- [ ] Documented in DAILYCO_TEST.md with recommendation +- [ ] Decision made: proceed with same instanceId OR different instanceIds + +**Files:** `server/scripts/test_daily_dual_recording.py`, `server/DAILYCO_TEST.md` + +**References:** PRD lines 100-132 + +--- + +### Task 0.2: Remove dead JWT start_cloud_recording code + +**Priority:** High (cleanup before adding new features) + +**Goal:** Remove unused JWT `start_cloud_recording` parameter that's been dead code + +**Changes:** +1. `server/reflector/video_platforms/daily.py`: + - Remove `start_cloud_recording` parameter from `create_meeting_token()` + - Remove `max_recording_duration_seconds` parameter + - Remove `start_cloud_recording_opts` logic + - Update `MeetingTokenProperties` instantiation + +2. `server/reflector/views/rooms.py`: + - Update `create_meeting_token()` call site (around line 589) + - Remove `start_cloud_recording=meeting.recording_type == "cloud"` argument + - Remove `max_recording_duration_seconds` argument + +**Acceptance Criteria:** +- [ ] `start_cloud_recording` parameter removed from method signature +- [ ] `max_recording_duration_seconds` parameter removed +- [ ] `start_cloud_recording_opts` logic removed +- [ ] Call site in `rooms.py` updated +- [ ] Type checking passes: `cd server && uv run mypy reflector/video_platforms/daily.py reflector/views/rooms.py` +- [ ] No errors + +**Files:** `server/reflector/video_platforms/daily.py`, `server/reflector/views/rooms.py` + +**References:** PRD lines 134-218 + +--- + +### Task 0.3: Update frontend recording start logic + +**Priority:** High (foundation for dual recording) + +**Goal:** Change frontend to start cloud recording via daily-js and trigger backend for raw-tracks + +**Changes:** +1. `www/app/[roomName]/components/DailyRoom.tsx`: + - Add `useState` for `recordingInstanceId` (crypto.randomUUID()) + - Update `handleFrameJoinMeeting` callback: + - Change `type: "raw-tracks"` to `type: "cloud"` + - Add `instanceId` parameter to startRecording + - Add fetch call to backend `/v1/meetings/${meeting.id}/recordings/start` + - Pass same instanceId to backend + - Update dependencies array + +**Acceptance Criteria:** +- [ ] recordingInstanceId generated once per component mount +- [ ] Cloud recording started via daily-js with instanceId +- [ ] Backend endpoint called with type="raw-tracks" and same instanceId +- [ ] Error handling for backend call +- [ ] Console logging shows both actions +- [ ] Type checking passes: `cd www && pnpm tsc --noEmit` + +**Files:** `www/app/[roomName]/components/DailyRoom.tsx` + +**References:** PRD lines 220-289 + +--- + +## Phase 1: Database Schema + +### Task 1.1: Add cloud recording fields to Meeting table + +**Priority:** High (required for data storage) + +**Goal:** Add database columns to store cloud recording S3 key and duration + +**Steps:** +1. Generate migration: `cd server && uv run alembic revision --autogenerate -m "add cloud recording support"` +2. Verify migration adds: + - `meeting.cloud_recording_s3_key` (String, nullable) + - `meeting.cloud_recording_duration` (Integer, nullable) +3. Run migration: `cd server && uv run alembic upgrade head` +4. Verify columns exist in database + +**Acceptance Criteria:** +- [ ] Migration file created in `server/reflector/db/migrations/versions/` +- [ ] Migration adds `cloud_recording_s3_key` column +- [ ] Migration adds `cloud_recording_duration` column +- [ ] Migration runs without errors +- [ ] Verification query shows columns: `docker compose exec postgres psql -U reflector -d reflector -c "\d meeting" | grep cloud_recording` +- [ ] Both columns visible in output + +**Files:** `server/reflector/db/migrations/versions/_add_cloud_recording_support.py` + +**References:** PRD lines 291-343 + +--- + +### Task 1.2: Update DB models with cloud recording fields + +**Priority:** High (required after migration) + +**Goal:** Add cloud recording fields to SQLAlchemy table and Pydantic model + +**Changes:** +1. `server/reflector/db/meetings.py`: + - Add columns to `meetings` table definition (~line 14): + - `sa.Column("cloud_recording_s3_key", sa.String, nullable=True)` + - `sa.Column("cloud_recording_duration", sa.Integer, nullable=True)` + - Add fields to `Meeting` model (~line 94): + - `cloud_recording_s3_key: str | None = None` + - `cloud_recording_duration: int | None = None` + +**Acceptance Criteria:** +- [ ] Table definition includes both new columns +- [ ] Model includes both new fields with proper types +- [ ] Type checking passes: `cd server && uv run mypy reflector/db/meetings.py` + +**Files:** `server/reflector/db/meetings.py` + +**References:** PRD lines 345-395 + +--- + +## Phase 2: Daily.co API Client Extension + +### Task 2.1: Add start_recording method to Daily.co API client + +**Priority:** Medium (foundation for backend endpoint) + +**Goal:** Add REST API method to start recordings via Daily.co API + +**Changes:** +1. `server/reflector/dailyco_api/client.py`: + - Add `start_recording()` method after `list_recordings()` + - Parameters: `room_name`, `recording_type` (Literal["cloud", "raw-tracks"]), `instance_id` + - POST to `/rooms/{room_name}/recordings/start` + - Return response via `_handle_response()` + - Add docstring with reference to Daily.co docs + +**Acceptance Criteria:** +- [ ] Method signature: `async def start_recording(self, room_name: str, recording_type: Literal["cloud", "raw-tracks"], instance_id: str) -> dict` +- [ ] POSTs to correct endpoint with JSON body +- [ ] Docstring includes Daily.co docs link +- [ ] Type checking passes: `cd server && uv run mypy reflector/dailyco_api/client.py` + +**Files:** `server/reflector/dailyco_api/client.py` + +**References:** PRD lines 397-443 + +--- + +### Task 2.2: Expose start_recording in DailyClient wrapper + +**Priority:** Medium (required for views layer) + +**Goal:** Add proxy method in DailyClient to call Daily.co API client + +**Changes:** +1. `server/reflector/video_platforms/daily.py`: + - Add `start_recording()` method after `create_meeting_token()` + - Proxy call to `self._api_client.start_recording()` + - Same parameters and return type + +**Acceptance Criteria:** +- [ ] Method added with same signature as Task 2.1 +- [ ] Proxies call to `_api_client.start_recording()` +- [ ] Docstring explains purpose +- [ ] Type checking passes: `cd server && uv run mypy reflector/video_platforms/daily.py` + +**Files:** `server/reflector/video_platforms/daily.py` + +**References:** PRD lines 445-475 + +--- + +## Phase 3: Backend Endpoint for Raw-Tracks Start + +### Task 3.1: Create meetings API endpoint for recording start + +**Priority:** High (required for frontend integration) + +**Goal:** Create backend endpoint for frontend to trigger raw-tracks recording + +**Steps:** +1. Create new file `server/reflector/views/meetings.py` +2. Implement two endpoints: + - `POST /meetings/{meeting_id}/recordings/start` - Start raw-tracks via Daily.co REST API + - `GET /meetings/{meeting_id}/cloud-recording` - Serve cloud recording MP4 (presigned URL) +3. Register router in `server/reflector/app.py` + +**Acceptance Criteria:** +- [ ] POST endpoint validates meeting exists (404 if not) +- [ ] POST endpoint validates type="raw-tracks" (400 if cloud) +- [ ] POST endpoint calls Daily.co API with instanceId +- [ ] POST endpoint logs success/failure +- [ ] GET endpoint returns 307 redirect to S3 presigned URL +- [ ] GET endpoint returns 404 if no cloud recording +- [ ] Router registered with prefix `/v1/meetings` +- [ ] Type checking passes: `cd server && uv run mypy reflector/views/meetings.py reflector/app.py` +- [ ] No authentication required (anonymous users supported) + +**Files:** `server/reflector/views/meetings.py` (NEW), `server/reflector/app.py` + +**References:** PRD lines 477-590 + +--- + +## Phase 4: Webhook Handler Updates + +### Task 4.1: Update recording.ready-to-download webhook handler + +**Priority:** High (required for data persistence) + +**Goal:** Handle both cloud and raw-tracks webhooks, store cloud recording metadata + +**Changes:** +1. `server/reflector/views/daily.py`: + - Replace `_handle_recording_ready()` function (~line 174) + - Add logic to discriminate by `event.payload.type`: + - `"cloud"`: Store S3 key and duration in meeting table + - `"raw-tracks"`: Queue multitrack processing (existing behavior) + - Add logging for both paths + +**Acceptance Criteria:** +- [ ] Cloud recording: stores `cloud_recording_s3_key` and `cloud_recording_duration` in meeting table +- [ ] Cloud recording: logs meeting_id, s3_key, duration +- [ ] Raw-tracks: queues `process_multitrack_recording` task (unchanged from existing code) +- [ ] Raw-tracks: logs recording_id, room_name, num_tracks (unchanged from existing code) +- [ ] Unknown types: log warning +- [ ] Type checking passes: `cd server && uv run mypy reflector/views/daily.py` + +**Files:** `server/reflector/views/daily.py` + +**References:** PRD lines 592-690 + +--- + +## Phase 5: Meeting API Updates + +### Task 5.1: Add cloud recording info to Meeting API response + +**Priority:** Medium (required for frontend display) + +**Goal:** Expose cloud recording availability and duration in meeting response + +**Changes:** +1. `server/reflector/views/rooms.py`: + - Update `Meeting` response schema (~line 55): + - Add `cloud_recording_available: bool = False` + - Add `cloud_recording_duration: int | None = None` + - Update `rooms_join_meeting` handler: + - Set `cloud_recording_available = bool(meeting.cloud_recording_s3_key)` + - Set `cloud_recording_duration = meeting.cloud_recording_duration` + +**Acceptance Criteria:** +- [ ] Schema includes `cloud_recording_available` field +- [ ] Schema includes `cloud_recording_duration` field +- [ ] Handler computes availability from S3 key presence +- [ ] Handler includes duration +- [ ] Type checking passes: `cd server && uv run mypy reflector/views/rooms.py` + +**Files:** `server/reflector/views/rooms.py` + +**References:** PRD lines 692-751 + +--- + +## Phase 6: Frontend Display + +### Task 6.1: Display cloud recording player on transcript page + +**Priority:** Medium (user-facing feature) + +**Goal:** Show cloud recording audio player when available + +**Changes:** +1. `www/app/(app)/transcripts/[transcriptId]/page.tsx`: + - Add conditional section after existing audio player + - Check `transcript.meeting?.cloud_recording_available` + - Render audio element with src pointing to `/v1/meetings/${transcript.meeting_id}/cloud-recording` + - Show duration if available (formatted as minutes:seconds) + - Display warning about large file size (~38 MB/minute) + - Add icon and labels ("Original Cloud Recording", "Brady Bunch grid layout") + +**Acceptance Criteria:** +- [ ] Section only shows when `cloud_recording_available` is true +- [ ] Audio element src uses backend endpoint +- [ ] Duration displayed in human-readable format (e.g., "5m 23s") +- [ ] Warning text about large file size visible +- [ ] Descriptive text explains "Brady Bunch grid layout" +- [ ] Type checking passes: `cd www && pnpm tsc --noEmit` +- [ ] Styling consistent with existing audio player + +**Files:** `www/app/(app)/transcripts/[transcriptId]/page.tsx` + +**References:** PRD lines 753-799 + +--- + +## Phase 7: Manual Testing & Validation + +### Task 7.1: End-to-end validation (single participant) + +**Priority:** BLOCKER (must pass before deployment) + +**Goal:** Validate full flow from meeting creation to cloud recording playback + +**Test Procedure:** +1. Start services: `docker compose up -d postgres redis server worker` +2. Create test room via API (name: "dual-recording-test") +3. Create meeting +4. Join meeting from frontend +5. Verify browser console shows dual recording start +6. Verify server logs show raw-tracks start call +7. Speak for 20-30 seconds, leave meeting +8. Wait 2-5 minutes for Daily.co processing +9. Verify webhooks received (cloud + raw-tracks) +10. Check database for cloud_recording_s3_key and transcript status +11. Test cloud recording endpoint returns 307 redirect +12. Test frontend displays cloud recording player +13. Test audio playback works + +**Acceptance Criteria:** +- [ ] Cloud recording webhook stores S3 key in meeting table +- [ ] Raw-tracks webhook triggers transcription pipeline +- [ ] Database query shows both cloud_recording_s3_key and transcript.title populated +- [ ] Cloud recording endpoint returns 307 with S3 presigned URL +- [ ] Frontend shows "Original Cloud Recording" section +- [ ] Audio player loads and plays cloud recording +- [ ] Processed transcript shows correct transcription (from raw-tracks) + +**Test Artifacts:** +- Screenshot of browser console showing dual recording start +- Server log snippet showing webhook handling +- Database query results +- Screenshot of transcript page with cloud recording player + +**References:** PRD lines 801-926 + +--- + +### Task 7.2: Multi-participant validation (idempotency test) + +**Priority:** BLOCKER (determines if lock mechanism needed) + +**Goal:** Validate Daily.co behavior with multiple participants starting recordings + +**Test Procedure:** +1. Create new meeting +2. Open 2 browser windows (normal + incognito) +3. Join meeting from both windows nearly simultaneously (within 1 second) +4. Check browser consoles in both windows +5. Check server logs for 2x backend recording start calls +6. Check Daily.co dashboard for number of recordings created +7. Wait for webhooks +8. Analyze results + +**Expected Outcomes:** +- **Best case:** Daily.co handles idempotency (1 cloud + 1 raw-tracks recording) +- **Acceptable:** Duplicate recordings but no errors +- **Bad:** Errors or corruption + +**Acceptance Criteria:** +- [ ] Documented which outcome occurred +- [ ] If Best/Acceptable: proceed to deployment +- [ ] If Bad: implement Alternative Solution A or B from PRD +- [ ] Decision documented in DAILYCO_TEST.md + +**Action Items Based on Results:** +- Best/Acceptable → Ship as-is +- Bad → Add Task 7.3 (Implement lock mechanism) + +**References:** PRD lines 928-963 + +--- + +### Task 7.3: Implement lock mechanism (CONDITIONAL - only if Task 7.2 fails) + +**Priority:** High (only if multi-participant test shows errors) + +**Goal:** Prevent duplicate recording starts using database or Redis lock + +**Two Options:** + +**Option A: Database Lock** +- Add `recording_started: bool` field to Meeting model +- Add migration for new column +- Use database transaction to atomically check and set flag +- Return `is_first_participant` in join response +- Frontend only starts recordings if first participant + +**Option B: Redis Lock** +- Use `RedisAsyncLock` from existing codebase +- Lock key: `meeting:{meeting_id}:recording-start` +- Check/set `meeting:{meeting_id}:recording-started` flag +- Return `is_first_participant` in join response +- Frontend only starts recordings if first participant + +**Acceptance Criteria:** +- [ ] Multi-participant test (Task 7.2 rerun) shows only 1 recording created +- [ ] No race conditions under simultaneous joins +- [ ] Type checking passes +- [ ] Test with 3+ participants shows consistent behavior + +**Files:** +- Option A: `server/reflector/db/meetings.py`, `server/reflector/views/rooms.py`, `www/app/[roomName]/components/DailyRoom.tsx` +- Option B: `server/reflector/views/rooms.py`, `www/app/[roomName]/components/DailyRoom.tsx` + +**References:** PRD lines 966-1048 + +--- + +## Success Criteria (Overall Project) + +### Functional Requirements +- [ ] Both cloud and raw-tracks recordings start when user joins meeting +- [ ] Cloud recording webhook stores S3 key in meeting table (DAILYCO_STORAGE bucket) +- [ ] Raw-tracks webhook triggers existing multitrack pipeline (unchanged) +- [ ] Cloud recording accessible via `/v1/meetings/{id}/cloud-recording` endpoint +- [ ] Transcript page displays cloud recording audio player when available +- [ ] Existing transcription quality unchanged (raw-tracks only) +- [ ] Dead code removed (JWT start_cloud_recording) + +### Validation Requirements +- [ ] Prototype test (Task 0.1) confirms instanceId strategy +- [ ] Multi-participant test (Task 7.2) confirms Daily.co behavior documented +- [ ] End-to-end test (Task 7.1) shows both webhooks and data stored correctly +- [ ] Cloud recording playback works in frontend + +### Non-Functional Requirements +- [ ] No performance degradation in webhook handling +- [ ] Database migration runs without errors +- [ ] Type checking passes (mypy, tsc) + +--- + +## Dependencies + +**Critical Path:** +``` +Task 0.1 (instanceId validation) + ↓ +Task 0.2 + Task 0.3 (cleanup + frontend setup) + ↓ +Task 1.1 + Task 1.2 (database schema) + ↓ +Task 2.1 + Task 2.2 (API client) + ↓ +Task 3.1 (backend endpoint) + ↓ +Task 4.1 (webhook handler) + ↓ +Task 5.1 + Task 6.1 (API response + frontend display) + ↓ +Task 7.1 (E2E test) + ↓ +Task 7.2 (multi-participant test) + ↓ +[Task 7.3 if needed] (lock mechanism) +``` + +**Parallel Work Possible:** +- Tasks 1.1 + 1.2 can be done together (schema + models) +- Tasks 2.1 + 2.2 can be done together (API client layers) +- Tasks 5.1 + 6.1 can be done together (backend response + frontend display) + +--- + +## Rollback Plan + +**Immediate Mitigation (Production Issues):** +1. Revert frontend `DailyRoom.tsx` to remove dual recording start +2. This stops cloud recordings, raw-tracks continue normally + +**Database Rollback:** +```bash +docker compose exec server uv run alembic downgrade -1 +``` + +**Code Rollback:** +1. Revert frontend changes (Task 0.3, Task 6.1) +2. Revert webhook handler (Task 4.1) to only handle raw-tracks +3. Keep API endpoints (Task 3.1) - harmless if unused + +--- + +## Storage Impact + +**Current (raw-tracks only):** 80 MB/day = 2.4 GB/month = $0.06/month S3 +**With cloud recording:** 11.5 GB/day = 345 GB/month = $7.94/month S3 + +**Recommendation:** Consider S3 lifecycle policy for cloud MP4s (delete after 90 days) + +--- + +## Estimated Effort + +**Phase 0:** 2-4 hours (prototype + cleanup) +**Phase 1:** 1 hour (database) +**Phase 2:** 1 hour (API client) +**Phase 3:** 2 hours (backend endpoint) +**Phase 4:** 2 hours (webhook handler) +**Phase 5:** 1 hour (meeting API) +**Phase 6:** 2 hours (frontend display) +**Phase 7:** 4 hours (testing) + +**Total:** 15-17 hours base + 4 hours buffer = 19-21 hours (2-3 days) From cbf8058901ad633fc822f57f01273afa3b78a26d Mon Sep 17 00:00:00 2001 From: Igor Loskutov Date: Fri, 9 Jan 2026 17:08:10 -0500 Subject: [PATCH 02/20] clean dead daily.co code --- server/reflector/video_platforms/daily.py | 9 ++------- server/reflector/views/rooms.py | 1 - 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/server/reflector/video_platforms/daily.py b/server/reflector/video_platforms/daily.py index f71e959b..80962300 100644 --- a/server/reflector/video_platforms/daily.py +++ b/server/reflector/video_platforms/daily.py @@ -179,21 +179,16 @@ def verify_webhook_signature( async def create_meeting_token( self, room_name: DailyRoomName, - start_cloud_recording: bool, enable_recording_ui: bool, user_id: NonEmptyString | None = None, is_owner: bool = False, max_recording_duration_seconds: int | None = None, ) -> NonEmptyString: - start_cloud_recording_opts = None - if start_cloud_recording and max_recording_duration_seconds: - start_cloud_recording_opts = {"maxDuration": max_recording_duration_seconds} - properties = MeetingTokenProperties( room_name=room_name, user_id=user_id, - start_cloud_recording=start_cloud_recording, - start_cloud_recording_opts=start_cloud_recording_opts, + start_cloud_recording=False, + start_cloud_recording_opts=None, enable_recording_ui=enable_recording_ui, is_owner=is_owner, ) diff --git a/server/reflector/views/rooms.py b/server/reflector/views/rooms.py index 278235b4..6d538841 100644 --- a/server/reflector/views/rooms.py +++ b/server/reflector/views/rooms.py @@ -586,7 +586,6 @@ async def rooms_join_meeting( ) token = await client.create_meeting_token( meeting.room_name, - start_cloud_recording=meeting.recording_type == "cloud", enable_recording_ui=enable_recording_ui, user_id=user_id, is_owner=user_id == room.user_id, From d8ba9da14cce863bfff21dfd0a2f3a316c8b1092 Mon Sep 17 00:00:00 2001 From: Igor Loskutov Date: Sat, 10 Jan 2026 10:52:31 -0500 Subject: [PATCH 03/20] brady bunch prototype (no-mistakes) --- .gitleaksignore | 1 + PRD.md | 1198 ----------------- TASKS.md | 538 -------- server/reflector/dailyco_api/__init__.py | 3 +- server/reflector/dailyco_api/client.py | 36 +- server/reflector/dailyco_api/responses.py | 7 + server/reflector/db/meetings.py | 21 +- server/reflector/video_platforms/daily.py | 27 +- server/reflector/views/daily.py | 87 +- server/reflector/views/meetings.py | 58 +- server/reflector/worker/app.py | 2 +- server/reflector/worker/process.py | 161 ++- .../[transcriptId]/finalSummary.tsx | 5 +- www/app/(app)/transcripts/createTranscript.ts | 5 +- www/app/(app)/transcripts/shareAndPrivacy.tsx | 5 +- www/app/(app)/transcripts/shareZulip.tsx | 5 +- www/app/(app)/transcripts/transcriptTitle.tsx | 5 +- www/app/[roomName]/components/DailyRoom.tsx | 109 +- www/app/lib/apiHooks.ts | 14 + www/app/lib/transcript.ts | 3 +- www/app/lib/types.ts | 3 + www/app/reflector-api.d.ts | 75 ++ www/package.json | 1 + www/pnpm-lock.yaml | 12 + 24 files changed, 568 insertions(+), 1813 deletions(-) delete mode 100644 PRD.md delete mode 100644 TASKS.md diff --git a/.gitleaksignore b/.gitleaksignore index 8eb80bd5..141c82d5 100644 --- a/.gitleaksignore +++ b/.gitleaksignore @@ -3,3 +3,4 @@ docs/docs/installation/auth-setup.md:curl-auth-header:250 docs/docs/installation/daily-setup.md:curl-auth-header:277 gpu/self_hosted/DEV_SETUP.md:curl-auth-header:74 gpu/self_hosted/DEV_SETUP.md:curl-auth-header:83 +server/reflector/worker/process.py:generic-api-key:465 diff --git a/PRD.md b/PRD.md deleted file mode 100644 index 8265b8f3..00000000 --- a/PRD.md +++ /dev/null @@ -1,1198 +0,0 @@ -# PRD: Dual Recording Support (Cloud + Raw-Tracks) - -## Overview - -Enable simultaneous Daily.co cloud recording (Brady Bunch grid MP4) and raw-tracks recording (per-participant WebM audio files) for meetings. Cloud recording provides instant playback with video layout, raw-tracks enable high-quality per-speaker transcription. - -## Goals - -1. Store both cloud MP4 and raw-tracks WebM files from Daily.co meetings -2. Enable users to play back original cloud recording alongside processed transcription -3. Maintain existing transcription quality (raw-tracks only, no changes) -4. Keep implementation simple - validate approach works, add complexity only if needed - -## Non-Goals - -- Video player implementation (audio playback only in MVP) -- Cloud recording as transcription fallback (if raw-tracks fails, transcript fails) -- Transcribing cloud MP4 audio -- Room-level toggle (always enable both recordings for all meetings) -- Unit tests (manual validation only) - ---- - -## Technical Background - -### Daily.co Recording Configuration - -**Room `enable_recording` property:** -- **Purpose:** Allows manual recording start (does NOT auto-start) -- **Values:** Single string - `"cloud"` OR `"raw-tracks"` OR `"local"` (NOT array) -- **Current code:** Set to `"raw-tracks"` when Reflector's `room.recording_type == "cloud"` (line 61-62 of `server/reflector/video_platforms/daily.py`) - -**JWT `start_cloud_recording` property:** -- **Purpose:** Auto-starts cloud recording when participant joins -- **Current code:** Set to `true` but **DEAD CODE** - room has `enable_recording: "raw-tracks"` so JWT setting ignored (line 189 in `daily.py`, called at line 589 in `views/rooms.py`) - -**Frontend `startRecording()` call:** -- **Current code:** Every participant calls `startRecording({ type: "raw-tracks" })` on join (line 236 of `www/app/[roomName]/components/DailyRoom.tsx`) -- **Behavior:** Multiple participants = multiple calls to Daily.co - -**instanceId:** -- **Purpose:** UUID identifying a recording session (per Daily.co docs) -- **Per-user:** Each participant joining generates their own instanceId (ephemeral, not stored) -- **Question:** Can cloud + raw-tracks share same instanceId? Docs unclear - needs validation - -### Daily.co Support Guidance - -From Discord conversation (Kyle + rajneesh, Sept 2024): - -> **Kyle:** "We need both raw-tracks and cloud recording. When we start recording from JS and backend it doesn't work." -> -> **rajneesh (Daily.co support):** "It is possible to get both 'raw-tracks' and 'cloud-recording' at the same time. Start the cloud recording using **daily-js API** and then start raw-tracks using the **REST API endpoint**. Please note you need to pass a unique instanceId in the startRecording call." - -**Interpretation:** -- Cloud recording: Start via `frame.startRecording()` (daily-js, frontend) -- Raw-tracks recording: Start via `POST /rooms/:name/recordings/start` (REST API, backend) -- Both need instanceId parameter - ---- - -## Solution Architecture - -### Recording Trigger Flow - -``` -User joins meeting - ↓ -Frontend: Generate instanceId (useState, per component mount) - ↓ -Frontend: Start cloud recording via daily-js - ↓ (instanceId: ) -Frontend: Call backend endpoint - ↓ -Backend: Start raw-tracks via Daily.co REST API - ↓ (instanceId: - SAME as cloud) - ↓ -Daily.co: Process recordings (assumption: handles duplicate start calls) - ↓ -Webhook: recording.ready-to-download (type: "cloud") - ↓ -Backend: Store s3_key in meeting table - ↓ -Webhook: recording.ready-to-download (type: "raw-tracks") - ↓ -Backend: Queue multitrack processing (existing pipeline) -``` - -**Key assumptions:** -1. **Every participant** calls both APIs (no first-participant detection) -2. **Daily.co handles idempotency** - multiple start calls to same room don't create duplicate recordings -3. **Same instanceId works** for both cloud and raw-tracks - -**Validation required:** Test that multiple participants starting recordings doesn't cause issues. - ---- - -## Implementation Steps - -### Phase 0: Preparation & Validation - -#### Step 0.1: Prototype instanceId approach - -**Before implementing**, validate Daily.co behavior: - -```bash -# Test script: server/scripts/test_daily_dual_recording.py - -# Test 1: Same instanceId -# 1. Create Daily room with enable_recording: "raw-tracks" -# 2. POST /rooms/:name/recordings/start { type: "cloud", instance_id: "test-123" } -# 3. POST /rooms/:name/recordings/start { type: "raw-tracks", instance_id: "test-123" } -# Expected: Both succeed, webhooks arrive with same/different instance_id - -# Test 2: Multiple start calls (simulate multiple participants) -# 1. POST /rooms/:name/recordings/start { type: "cloud", instance_id: "test-123" } -# 2. POST /rooms/:name/recordings/start { type: "cloud", instance_id: "test-123" } # duplicate -# Expected: Second call is idempotent (no error, no duplicate recording) - -# Test 3: Different instanceIds (fallback if same fails) -# 1. POST /rooms/:name/recordings/start { type: "cloud", instance_id: "cloud-123" } -# 2. POST /rooms/:name/recordings/start { type: "raw-tracks", instance_id: "raw-456" } -# Expected: Both succeed independently -``` - -**Document findings:** -- Add results to `server/DAILYCO_TEST.md` -- Update PRD if different instanceId approach required - -**Acceptance criteria:** -- Know which instanceId strategy works -- Know if Daily.co handles duplicate start calls gracefully - ---- - -#### Step 0.2: Remove dead JWT start_cloud_recording - -**File:** `server/reflector/video_platforms/daily.py` - -**Current code (lines 177-200):** -```python -async def create_meeting_token( - self, - room_name: DailyRoomName, - start_cloud_recording: bool, # ← Dead parameter - enable_recording_ui: bool, - user_id: NonEmptyString | None = None, - is_owner: bool = False, - max_recording_duration_seconds: int | None = None, -) -> NonEmptyString: - start_cloud_recording_opts = None - if start_cloud_recording and max_recording_duration_seconds: - start_cloud_recording_opts = {"maxDuration": max_recording_duration_seconds} - - properties = MeetingTokenProperties( - room_name=room_name, - user_id=user_id, - start_cloud_recording=start_cloud_recording, # ← Dead code - start_cloud_recording_opts=start_cloud_recording_opts, # ← Dead code - enable_recording_ui=enable_recording_ui, - is_owner=is_owner, - ) - request = CreateMeetingTokenRequest(properties=properties) - result = await self._api_client.create_meeting_token(request) - return result.token -``` - -**Changes:** -```python -async def create_meeting_token( - self, - room_name: DailyRoomName, - enable_recording_ui: bool, - user_id: NonEmptyString | None = None, - is_owner: bool = False, -) -> NonEmptyString: - # Removed: start_cloud_recording, max_recording_duration_seconds, start_cloud_recording_opts - - properties = MeetingTokenProperties( - room_name=room_name, - user_id=user_id, - enable_recording_ui=enable_recording_ui, - is_owner=is_owner, - ) - request = CreateMeetingTokenRequest(properties=properties) - result = await self._api_client.create_meeting_token(request) - return result.token -``` - -**File:** `server/reflector/views/rooms.py` - -**Update call site (around line 587-593):** -```python -# Before -token = await client.create_meeting_token( - meeting.room_name, - start_cloud_recording=meeting.recording_type == "cloud", # ← Remove - enable_recording_ui=enable_recording_ui, - user_id=user_id, - is_owner=user_id == room.user_id, - max_recording_duration_seconds=remaining_seconds, # ← Remove -) - -# After -token = await client.create_meeting_token( - meeting.room_name, - enable_recording_ui=enable_recording_ui, - user_id=user_id, - is_owner=user_id == room.user_id, -) -``` - -**Verification:** -```bash -cd server && uv run mypy reflector/video_platforms/daily.py reflector/views/rooms.py -``` - -**Expected:** No type errors - ---- - -#### Step 0.3: Update frontend recording start - -**File:** `www/app/[roomName]/components/DailyRoom.tsx` - -**Current code (lines 231-243):** -```typescript -const handleFrameJoinMeeting = useCallback( - (startRecording: (args: { type: "raw-tracks" }) => void) => { - try { - if (meeting.recording_type === "cloud") { - console.log("Starting cloud recording"); - startRecording({ type: "raw-tracks" }); // ← Wrong type, every participant calls - } - } catch (error) { - console.error("Failed to start recording:", error); - } - }, - [meeting.recording_type], -); -``` - -**Changes:** -```typescript -const [recordingInstanceId] = useState(() => crypto.randomUUID()); - -const handleFrameJoinMeeting = useCallback( - (startRecording: (args: { type: "raw-tracks" | "cloud", instanceId: string }) => void) => { - try { - if (meeting.recording_type === "cloud") { - console.log("Starting dual recording", { instanceId: recordingInstanceId }); - - // 1. Start cloud recording via daily-js (frontend) - startRecording({ - type: "cloud", - instanceId: recordingInstanceId - }); - - // 2. Start raw-tracks via backend REST API - fetch(`/v1/meetings/${meeting.id}/recordings/start`, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ - type: "raw-tracks", - instanceId: recordingInstanceId // SAME instanceId - }) - }) - .then(res => { - if (!res.ok) throw new Error(`HTTP ${res.status}`); - console.log("Raw-tracks recording started via backend"); - }) - .catch(err => { - console.error("Failed to start raw-tracks recording:", err); - }); - } - } catch (error) { - console.error("Failed to start recordings:", error); - } - }, - [meeting.recording_type, recordingInstanceId, meeting.id], -); -``` - -**Verification:** -```bash -cd www && pnpm tsc --noEmit -``` - -**Expected:** No type errors - ---- - -### Phase 1: Database Schema - -#### Step 1.1: Add cloud recording fields to Meeting table - -**File:** `server/reflector/db/migrations/versions/YYYYMMDD_HHMM_add_cloud_recording.py` (NEW) - -**Migration:** -```python -"""add cloud recording support - -Revision ID: -Revises: -Create Date: 2026-01-09 12:00:00.000000 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = '' -down_revision = '' -branch_labels = None -depends_on = None - - -def upgrade(): - op.add_column('meeting', sa.Column('cloud_recording_s3_key', sa.String(), nullable=True)) - op.add_column('meeting', sa.Column('cloud_recording_duration', sa.Integer(), nullable=True)) - - -def downgrade(): - op.drop_column('meeting', 'cloud_recording_duration') - op.drop_column('meeting', 'cloud_recording_s3_key') -``` - -**Run migration:** -```bash -cd server && uv run alembic revision --autogenerate -m "add cloud recording support" -cd server && uv run alembic upgrade head -``` - -**Verification:** -```bash -docker compose exec postgres psql -U reflector -d reflector -c "\d meeting" | grep cloud_recording -``` - -**Expected output:** -``` - cloud_recording_s3_key | character varying | | | - cloud_recording_duration | integer | | | -``` - ---- - -#### Step 1.2: Update DB models - -**File:** `server/reflector/db/meetings.py` - -**Changes (around line 94):** -```python -class Meeting(BaseModel): - id: str - room_name: str - room_url: str - host_room_url: str - start_date: datetime - end_date: datetime - room_id: str | None - is_locked: bool = False - room_mode: Literal["normal", "group"] = "normal" - recording_type: Literal["none", "local", "cloud"] = "cloud" - recording_trigger: Literal[ - "none", "prompt", "automatic", "automatic-2nd-participant" - ] = "automatic-2nd-participant" - num_clients: int = 0 - is_active: bool = True - calendar_event_id: str | None = None - calendar_metadata: dict[str, Any] | None = None - platform: Platform = WHEREBY_PLATFORM - # NEW FIELDS: - cloud_recording_s3_key: str | None = None - cloud_recording_duration: int | None = None -``` - -**Add to table definition (around line 14):** -```python -meetings = sa.Table( - "meeting", - metadata, - sa.Column("id", sa.String, primary_key=True), - # ... existing columns ... - sa.Column("cloud_recording_s3_key", sa.String, nullable=True), - sa.Column("cloud_recording_duration", sa.Integer, nullable=True), - # ... rest of columns ... -) -``` - -**Verification:** -```bash -cd server && uv run mypy reflector/db/meetings.py -``` - -**Expected:** No type errors - ---- - -### Phase 2: Daily.co API Client Extension - -#### Step 2.1: Add start_recording method - -**File:** `server/reflector/dailyco_api/client.py` - -**Add method after `list_recordings`:** -```python -async def start_recording( - self, - room_name: str, - recording_type: Literal["cloud", "raw-tracks"], - instance_id: str, -) -> dict: - """Start recording via REST API. - - Reference: https://docs.daily.co/reference/rest-api/rooms/recordings/start - - Args: - room_name: Daily.co room name - recording_type: "cloud" (Brady Bunch MP4) or "raw-tracks" (per-participant WebM) - instance_id: UUID for this recording session (same ID can be used for both types) - - Returns: - Recording start confirmation from Daily.co API - """ - client = await self._get_client() - response = await client.post( - f"/rooms/{room_name}/recordings/start", - json={ - "type": recording_type, - "instance_id": instance_id, - }, - ) - return await self._handle_response(response, "start_recording") -``` - -**Verification:** -```bash -cd server && uv run mypy reflector/dailyco_api/client.py -``` - -**Expected:** No type errors - ---- - -#### Step 2.2: Expose method in DailyClient wrapper - -**File:** `server/reflector/video_platforms/daily.py` - -**Add method after `create_meeting_token`:** -```python -async def start_recording( - self, - room_name: str, - recording_type: Literal["cloud", "raw-tracks"], - instance_id: str, -) -> dict: - """Start recording via Daily.co REST API. - - Proxies call to Daily.co REST API endpoint. - """ - return await self._api_client.start_recording( - room_name=room_name, - recording_type=recording_type, - instance_id=instance_id, - ) -``` - -**Verification:** -```bash -cd server && uv run mypy reflector/video_platforms/daily.py -``` - -**Expected:** No type errors - ---- - -### Phase 3: Backend Endpoint for Raw-Tracks Start - -#### Step 3.1: Create meetings API endpoint - -**File:** `server/reflector/views/meetings.py` (NEW) - -```python -import logging -from fastapi import APIRouter, HTTPException -from fastapi.responses import RedirectResponse -from pydantic import BaseModel - -from reflector.db.meetings import meetings_controller -from reflector.storage import get_dailyco_storage -from reflector.video_platforms.factory import create_platform_client - -logger = logging.getLogger(__name__) - -router = APIRouter() - - -class StartRecordingRequest(BaseModel): - type: str # "raw-tracks" (cloud started from frontend) - instanceId: str - - -@router.post("/meetings/{meeting_id}/recordings/start") -async def start_recording(meeting_id: str, body: StartRecordingRequest): - """Start raw-tracks recording via Daily.co REST API. - - Called by frontend after starting cloud recording via daily-js. - Uses same instanceId to link both recordings. - - Note: No authentication required - anonymous users supported. - """ - meeting = await meetings_controller.get_by_id(meeting_id) - if not meeting: - raise HTTPException(status_code=404, detail="Meeting not found") - - if body.type != "raw-tracks": - raise HTTPException( - status_code=400, - detail="Only raw-tracks can be started via this endpoint (cloud uses daily-js)", - ) - - try: - client = create_platform_client("daily") - result = await client.start_recording( - room_name=meeting.room_name, - recording_type=body.type, - instance_id=body.instanceId, - ) - - logger.info( - "Started raw-tracks recording via REST API", - extra={ - "meeting_id": meeting_id, - "room_name": meeting.room_name, - "instance_id": body.instanceId, - } - ) - - return {"status": "ok", "result": result} - - except Exception as e: - logger.error( - "Failed to start raw-tracks recording", - extra={"meeting_id": meeting_id, "error": str(e)} - ) - raise HTTPException(status_code=500, detail=f"Failed to start recording: {str(e)}") - - -@router.get("/meetings/{meeting_id}/cloud-recording") -async def get_cloud_recording(meeting_id: str): - """Serve cloud recording MP4 file. - - Returns redirect to S3 presigned URL from DAILYCO_STORAGE bucket. - Daily.co writes MP4 there, we read via presigned URL. - - Note: No authentication required - anonymous users supported. - """ - meeting = await meetings_controller.get_by_id(meeting_id) - if not meeting: - raise HTTPException(status_code=404, detail="Meeting not found") - - if not meeting.cloud_recording_s3_key: - raise HTTPException(status_code=404, detail="Cloud recording not available") - - # Generate presigned URL for Daily.co S3 bucket (where Daily writes MP4) - storage = get_dailyco_storage() - presigned_url = await storage.get_file_url( - meeting.cloud_recording_s3_key, - expires_in=3600, # 1 hour - ) - - return RedirectResponse(url=presigned_url) -``` - -**File:** `server/reflector/app.py` - -**Register router:** -```python -from reflector.views.meetings import router as meetings_router - -app.include_router(meetings_router, prefix="/v1/meetings", tags=["meetings"]) -``` - -**Verification:** -```bash -cd server && uv run mypy reflector/views/meetings.py reflector/app.py -``` - -**Expected:** No type errors - ---- - -### Phase 4: Webhook Handler Updates - -#### Step 4.1: Update recording.ready-to-download handler - -**File:** `server/reflector/views/daily.py` - -**Replace `_handle_recording_ready` function (around line 174):** -```python -async def _handle_recording_ready(event: RecordingReadyEvent): - room_name = event.payload.room_name - recording_id = event.payload.recording_id - recording_type = event.payload.type # "cloud" or "raw-tracks" - - logger.info( - "Recording ready for download", - extra={ - "room_name": room_name, - "recording_id": recording_id, - "recording_type": recording_type, - "platform": "daily", - } - ) - - bucket_name = settings.DAILYCO_STORAGE_AWS_BUCKET_NAME - if not bucket_name: - logger.error("DAILYCO_STORAGE_AWS_BUCKET_NAME not configured") - return - - if recording_type == "cloud": - # Cloud recording: single MP4 file written by Daily.co to DAILYCO_STORAGE bucket - s3_key = event.payload.s3_key - - # Store cloud recording reference in meeting table - meeting = await meetings_controller.get_by_room_name(room_name) - if not meeting: - logger.warning( - "Cloud recording: meeting not found", - extra={"room_name": room_name, "recording_id": recording_id} - ) - return - - await meetings_controller.update_meeting( - meeting.id, - cloud_recording_s3_key=s3_key, - cloud_recording_duration=event.payload.duration, - ) - - logger.info( - "Cloud recording stored", - extra={ - "meeting_id": meeting.id, - "s3_key": s3_key, - "duration": event.payload.duration, - } - ) - - elif recording_type == "raw-tracks": - # Existing multi-track processing (unchanged) - tracks = event.payload.tracks - if not tracks: - logger.warning( - "raw-tracks recording: missing tracks array", - extra={"room_name": room_name, "recording_id": recording_id} - ) - return - - track_keys = [t.s3Key for t in tracks if t.type == "audio"] - - logger.info( - "Raw-tracks recording queuing processing", - extra={ - "recording_id": recording_id, - "room_name": room_name, - "num_tracks": len(track_keys), - } - ) - - process_multitrack_recording.delay( - bucket_name=bucket_name, - daily_room_name=room_name, - recording_id=recording_id, - track_keys=track_keys, - ) - - else: - logger.warning( - "Unknown recording type", - extra={"recording_type": recording_type, "recording_id": recording_id} - ) -``` - -**Verification:** -```bash -cd server && uv run mypy reflector/views/daily.py -``` - -**Expected:** No type errors - ---- - -### Phase 5: Meeting API Updates - -#### Step 5.1: Add cloud recording info to Meeting response - -**File:** `server/reflector/views/rooms.py` - -**Update Meeting schema (around line 55):** -```python -class Meeting(BaseModel): - id: str - room_name: str - room_url: str - host_room_url: str - start_date: datetime - end_date: datetime - user_id: str | None = None - room_id: str | None = None - is_locked: bool = False - room_mode: Literal["normal", "group"] = "normal" - recording_type: Literal["none", "local", "cloud"] = "cloud" - recording_trigger: Literal[ - "none", "prompt", "automatic", "automatic-2nd-participant" - ] = "automatic-2nd-participant" - num_clients: int = 0 - is_active: bool = True - calendar_event_id: str | None = None - calendar_metadata: dict[str, Any] | None = None - platform: Platform - # NEW FIELDS: - cloud_recording_available: bool = False - cloud_recording_duration: int | None = None -``` - -**Update `rooms_join_meeting` handler to include cloud recording info:** -```python -@router.post("/rooms/{room_name}/meeting/{meeting_id}/join", response_model=Meeting) -async def rooms_join_meeting( - room_name: str, - meeting_id: str, - user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)], -): - # ... existing logic ... - - # Build response with cloud recording info - meeting_dict = meeting.__dict__.copy() - meeting_dict["cloud_recording_available"] = bool(meeting.cloud_recording_s3_key) - meeting_dict["cloud_recording_duration"] = meeting.cloud_recording_duration - - return Meeting(**meeting_dict) -``` - -**Verification:** -```bash -cd server && uv run mypy reflector/views/rooms.py -``` - -**Expected:** No type errors - ---- - -### Phase 6: Frontend Display - -#### Step 6.1: Update transcript page to show cloud recording - -**File:** `www/app/(app)/transcripts/[transcriptId]/page.tsx` - -**Add after existing audio player section:** -```typescript -{transcript.meeting?.cloud_recording_available && ( - - - - - Original Cloud Recording - - - - Brady Bunch grid layout (MP4 with video and mixed audio from Daily.co) - - - {transcript.meeting.cloud_recording_duration && ( - - Duration: {Math.floor(transcript.meeting.cloud_recording_duration / 60)}m{' '} - {transcript.meeting.cloud_recording_duration % 60}s - - )} - - ⚠️ Large file (~38 MB/minute). May take time to load. - - -)} -``` - -**Verification:** -```bash -cd www && pnpm tsc --noEmit -``` - -**Expected:** No type errors - ---- - -### Phase 7: Manual Testing & Validation - -#### Step 7.1: End-to-end validation - -**Prerequisites:** -```bash -# Ensure services running -docker compose up -d postgres redis server worker - -# Verify env vars -grep -E "DAILY_API_KEY|DAILYCO_STORAGE" server/.env -``` - -**Test procedure:** - -**1. Create room:** -```bash -curl -X POST http://localhost:1250/v1/rooms \ - -H "Content-Type: application/json" \ - -d '{ - "name": "dual-recording-test", - "platform": "daily", - "recording_type": "cloud", - "recording_trigger": "automatic-2nd-participant", - "zulip_auto_post": false, - "zulip_stream": "", - "zulip_topic": "", - "is_locked": false, - "room_mode": "normal", - "is_shared": true, - "webhook_url": "", - "webhook_secret": "", - "skip_consent": false - }' -``` - -**2. Create meeting:** -```bash -curl -X POST http://localhost:1250/v1/rooms/dual-recording-test/meeting \ - -H "Content-Type: application/json" \ - -d '{"allow_duplicated": false}' -``` - -**3. Join meeting (single user):** -- Open `http://localhost:3000/rooms/dual-recording-test` -- Click meeting link -- Allow camera/microphone - -**4. Monitor browser console:** -``` -Starting dual recording { instanceId: "abc-def-..." } -Raw-tracks recording started via backend -``` - -**5. Monitor server logs:** -```bash -docker compose logs server --tail 50 --follow | grep recording -``` - -**Expected:** -``` -[info] Started raw-tracks recording via REST API | meeting_id=... | instance_id=abc-def-... -``` - -**6. Speak for 20-30 seconds, then leave meeting** - -**7. Wait 2-5 minutes for Daily.co processing** - -**8. Monitor webhooks:** -```bash -docker compose logs server --tail 100 --follow | grep "recording.ready" -``` - -**Expected (2 separate webhooks):** -``` -[info] Recording ready for download | recording_type=cloud -[info] Cloud recording stored | s3_key=monadical/.../....mp4 - -[info] Recording ready for download | recording_type=raw-tracks -[info] Raw-tracks recording queuing processing | num_tracks=1 -``` - -**9. Verify database:** -```bash -docker compose exec postgres psql -U reflector -d reflector -c " - SELECT - m.id, - m.cloud_recording_s3_key, - m.cloud_recording_duration, - t.status, - t.title - FROM meeting m - LEFT JOIN transcript t ON t.meeting_id = m.id - WHERE m.room_name LIKE 'dual-recording-test-%' - ORDER BY m.created_at DESC - LIMIT 1; -" -``` - -**Expected:** -``` - id | cloud_recording_s3_key | cloud_recording_duration | status | title ---------------------------------------+-------------------------------------------+--------------------------+--------+--------------------- - | monadical/dual-recording-test-.../....mp4 | 23 | ended | Test Recording -``` - -**10. Test cloud recording endpoint:** -```bash -MEETING_ID="" -curl -I "http://localhost:1250/v1/meetings/$MEETING_ID/cloud-recording" -``` - -**Expected:** -``` -HTTP/1.1 307 Temporary Redirect -location: https://reflector-dailyco-local.s3.amazonaws.com/.../recording.mp4?X-Amz-... -``` - -**11. Test frontend display:** -- Navigate to `http://localhost:3000/transcripts/` -- Verify "Original Cloud Recording" section appears -- Click play button -- Verify audio plays - -**Expected:** Both audio players functional - ---- - -#### Step 7.2: Test multiple participants (validate Daily.co behavior) - -**Test:** Do multiple participants starting recordings cause issues? - -**Setup:** -1. Create new meeting (same room or new) -2. Open 2 browser windows (or 1 normal + 1 incognito) - -**Procedure:** -1. Join meeting from both windows **nearly simultaneously** (within 1 second) -2. Check browser console in both - both should call startRecording -3. Check server logs - should see 2x backend recording start calls -4. Check Daily.co dashboard - how many recording instances created? - -**Expected outcomes:** - -**Best case:** Daily.co handles idempotency -- Only 1 cloud recording created -- Only 1 raw-tracks recording created -- Both webhooks arrive once - -**Acceptable case:** Duplicate recordings but no errors -- 2 cloud recordings created (can delete extra later) -- 2 raw-tracks recordings created (can delete extra later) -- Processing succeeds - -**Bad case:** Errors or corruption -- Daily.co returns error on duplicate start -- Recordings fail -- Transcription broken - -**Action based on results:** -- **Best/Acceptable:** Ship as-is, document behavior -- **Bad:** Implement lock mechanism (see Alternative Solutions below) - ---- - -## Alternative Solutions - -### If Multiple Participants Cause Issues (Implement ONLY If Needed) - -**Symptom:** Daily.co returns error when multiple participants start recordings, OR duplicate recordings cause problems - -**Solution A: Database Lock (Simple)** - -Add first-participant detection with DB-level locking: - -```python -# server/reflector/db/meetings.py - add new field -class Meeting(BaseModel): - # ... existing fields ... - recording_started: bool = False # NEW - -# Add column migration -op.add_column('meeting', sa.Column('recording_started', sa.Boolean(), nullable=False, server_default=sa.false())) - -# In views/rooms.py - rooms_join_meeting -async def rooms_join_meeting(...): - # ... existing logic ... - - # Check and set recording_started atomically - async with get_database().transaction(): - meeting = await meetings_controller.get_by_id(meeting_id) - is_first_participant = not meeting.recording_started - - if is_first_participant: - await meetings_controller.update_meeting( - meeting.id, - recording_started=True - ) - - meeting_dict["is_first_participant"] = is_first_participant - return Meeting(**meeting_dict) -``` - -**Frontend change:** -```typescript -// Only start if is_first_participant -if (joinedMeeting.is_first_participant) { - startRecording({ type: "cloud", instanceId }); - fetch(`/v1/meetings/${meeting.id}/recordings/start`, ...); -} -``` - -**Pro:** Simple, uses existing DB infrastructure -**Con:** Extra DB roundtrip on join - ---- - -**Solution B: Redis Lock (Better Performance)** - -Use Redis for distributed locking: - -```python -# In views/rooms.py -from reflector.redis_cache import RedisAsyncLock - -async def rooms_join_meeting(...): - # ... existing logic ... - - is_first_participant = False - lock_key = f"meeting:{meeting_id}:recording-start" - - try: - async with RedisAsyncLock(lock_key, timeout=5, blocking_timeout=0): - # Check if recording already started - if not await redis.get(f"meeting:{meeting_id}:recording-started"): - await redis.set(f"meeting:{meeting_id}:recording-started", "1") - is_first_participant = True - except LockError: - # Another participant is starting recording right now - is_first_participant = False - - meeting_dict["is_first_participant"] = is_first_participant - return Meeting(**meeting_dict) -``` - -**Pro:** Fast, no DB changes needed -**Con:** Depends on Redis being available (already required) - ---- - -### If Same instanceId Causes Conflicts - -**Symptom:** Daily.co returns error when starting raw-tracks with same instanceId as cloud - -**Solution:** Use different instanceIds for cloud vs raw-tracks - -**Frontend changes:** -```typescript -const [cloudInstanceId] = useState(() => crypto.randomUUID()); -const [rawInstanceId] = useState(() => crypto.randomUUID()); - -// Use different IDs -startRecording({ type: "cloud", instanceId: cloudInstanceId }); - -fetch(`/v1/meetings/${meeting.id}/recordings/start`, { - body: JSON.stringify({ - type: "raw-tracks", - instanceId: rawInstanceId // DIFFERENT - }) -}); -``` - -**Backend:** No changes needed (accepts any instanceId) - -**Correlation:** Rely on `type` field in webhook payload only (both have same `room_name`) - -**Documentation:** Add note to DAILYCO_TEST.md explaining different instanceId requirement - ---- - -## Success Criteria - -### Functional Requirements - -- [ ] Both cloud and raw-tracks recordings start when user joins meeting -- [ ] Cloud recording webhook stores S3 key in meeting table (DAILYCO_STORAGE bucket) -- [ ] Raw-tracks webhook triggers existing multitrack pipeline (unchanged) -- [ ] Cloud recording accessible via `/v1/meetings/{id}/cloud-recording` endpoint (presigned URL) -- [ ] Transcript page displays cloud recording audio player when available -- [ ] Existing transcription quality unchanged (raw-tracks only) -- [ ] Dead code removed (JWT start_cloud_recording) - -### Validation Requirements - -- [ ] Prototype test confirms same instanceId works for both cloud and raw-tracks -- [ ] Multi-participant test confirms Daily.co handles duplicate starts gracefully (or lock implemented) -- [ ] End-to-end test shows both webhooks arriving and data stored correctly -- [ ] Cloud recording playback works in frontend - -### Non-Functional Requirements - -- [ ] No performance degradation in webhook handling -- [ ] Database migration runs without errors -- [ ] Type checking passes (mypy, tsc) - -### Rollback Plan - -If issues detected in production: - -**Immediate mitigation:** -1. Revert frontend DailyRoom.tsx to remove dual recording start -2. This stops cloud recordings (raw-tracks continue normally) - -**Database rollback:** -```bash -docker compose exec server uv run alembic downgrade -1 -``` - -**Code rollback:** -1. Revert frontend changes -2. Revert backend webhook handler to only handle raw-tracks -3. Keep API endpoints (harmless if unused) - ---- - -## Storage Impact Estimation - -**Scenario:** 10 meetings/day, 30 minutes average, 2 participants - -**Before (raw-tracks only):** -- Raw tracks: 2 participants × 30 min × 0.1 MB/min = 6 MB/meeting -- Processed MP3: ~2 MB/meeting -- Total: 8 MB/meeting × 10 = **80 MB/day** = 2.4 GB/month - -**After (with cloud recording enabled):** -- Raw tracks: 6 MB/meeting -- Processed MP3: 2 MB/meeting -- Cloud MP4: 30 min × 38 MB/min = 1,140 MB/meeting -- Total: 1,148 MB/meeting × 10 = **11.5 GB/day** = 345 GB/month - -**S3 Cost (us-east-1 standard):** -- Storage: $0.023/GB/month -- Before: 2.4 GB/month = **$0.06/month** -- After: 345 GB/month = **$7.94/month** - -**Daily.co cost:** Check pricing page for cloud recording charges (separate from raw-tracks) - -**Recommendation:** -- Consider lifecycle policy for cloud MP4s (e.g., delete after 90 days if not accessed) -- Monitor actual usage and adjust retention as needed - ---- - -## Timeline Estimate - -**Phase 0 (Preparation):** 2-4 hours -- Prototype instanceId approach -- Remove dead code -- Update frontend recording start - -**Phase 1 (Database):** 1 hour -- Migration, model updates, verification - -**Phase 2 (API Client):** 1 hour -- Add start_recording methods, type checks - -**Phase 3 (Backend Endpoint):** 2 hours -- Create meetings router, endpoint implementation - -**Phase 4 (Webhook Handler):** 2 hours -- Update _handle_recording_ready, type discrimination - -**Phase 5 (Meeting API):** 1 hour -- Schema updates, response updates - -**Phase 6 (Frontend Display):** 2 hours -- Transcript page updates (cloud recording player) - -**Phase 7 (Testing):** 4 hours -- End-to-end manual test -- Multi-participant validation -- Alternative approach if needed - -**Total:** ~15-17 hours (2 days) - -**Buffer for issues:** +4 hours (lock implementation if needed, debugging) - -**Realistic estimate:** 2-3 days - ---- - -## Appendix: Key Daily.co Documentation - -- [Recording calls with the Daily API](https://docs.daily.co/guides/products/live-streaming-recording/recording-calls-with-the-daily-api) -- [startRecording() - Daily.js](https://docs.daily.co/reference/daily-js/instance-methods/start-recording) -- [POST /rooms/:name/recordings/start](https://docs.daily.co/reference/rest-api/rooms/recordings/start) -- [Multi-instance recording](https://docs.daily.co/guides/products/live-streaming-recording/multi-instance-live-streaming-recording) -- [Webhooks - recording.ready-to-download](https://docs.daily.co/reference/rest-api/webhooks/events/recording-ready-to-download) diff --git a/TASKS.md b/TASKS.md deleted file mode 100644 index a2042dba..00000000 --- a/TASKS.md +++ /dev/null @@ -1,538 +0,0 @@ -# TASKS: Dual Recording Support (Cloud + Raw-Tracks) - -## Overview - -Tasks extracted from PRD.md for implementing simultaneous Daily.co cloud recording (MP4) and raw-tracks recording (per-participant WebM). - ---- - -## Phase 0: Preparation & Validation - -### Task 0.1: Prototype instanceId approach ⚠️ BLOCKER - -**Priority:** MUST DO FIRST (blocks all implementation) - -**Goal:** Validate Daily.co behavior with same/different instanceId for cloud + raw-tracks - -**Steps:** -1. Create test script `server/scripts/test_daily_dual_recording.py` -2. Test 1: Same instanceId for both cloud and raw-tracks -3. Test 2: Multiple start calls (simulate multiple participants) -4. Test 3: Different instanceIds (fallback if same fails) -5. Document findings in `server/DAILYCO_TEST.md` -6. Update PRD if different approach needed - -**Acceptance Criteria:** -- [ ] Know which instanceId strategy works -- [ ] Know if Daily.co handles duplicate start calls gracefully -- [ ] Documented in DAILYCO_TEST.md with recommendation -- [ ] Decision made: proceed with same instanceId OR different instanceIds - -**Files:** `server/scripts/test_daily_dual_recording.py`, `server/DAILYCO_TEST.md` - -**References:** PRD lines 100-132 - ---- - -### Task 0.2: Remove dead JWT start_cloud_recording code - -**Priority:** High (cleanup before adding new features) - -**Goal:** Remove unused JWT `start_cloud_recording` parameter that's been dead code - -**Changes:** -1. `server/reflector/video_platforms/daily.py`: - - Remove `start_cloud_recording` parameter from `create_meeting_token()` - - Remove `max_recording_duration_seconds` parameter - - Remove `start_cloud_recording_opts` logic - - Update `MeetingTokenProperties` instantiation - -2. `server/reflector/views/rooms.py`: - - Update `create_meeting_token()` call site (around line 589) - - Remove `start_cloud_recording=meeting.recording_type == "cloud"` argument - - Remove `max_recording_duration_seconds` argument - -**Acceptance Criteria:** -- [ ] `start_cloud_recording` parameter removed from method signature -- [ ] `max_recording_duration_seconds` parameter removed -- [ ] `start_cloud_recording_opts` logic removed -- [ ] Call site in `rooms.py` updated -- [ ] Type checking passes: `cd server && uv run mypy reflector/video_platforms/daily.py reflector/views/rooms.py` -- [ ] No errors - -**Files:** `server/reflector/video_platforms/daily.py`, `server/reflector/views/rooms.py` - -**References:** PRD lines 134-218 - ---- - -### Task 0.3: Update frontend recording start logic - -**Priority:** High (foundation for dual recording) - -**Goal:** Change frontend to start cloud recording via daily-js and trigger backend for raw-tracks - -**Changes:** -1. `www/app/[roomName]/components/DailyRoom.tsx`: - - Add `useState` for `recordingInstanceId` (crypto.randomUUID()) - - Update `handleFrameJoinMeeting` callback: - - Change `type: "raw-tracks"` to `type: "cloud"` - - Add `instanceId` parameter to startRecording - - Add fetch call to backend `/v1/meetings/${meeting.id}/recordings/start` - - Pass same instanceId to backend - - Update dependencies array - -**Acceptance Criteria:** -- [ ] recordingInstanceId generated once per component mount -- [ ] Cloud recording started via daily-js with instanceId -- [ ] Backend endpoint called with type="raw-tracks" and same instanceId -- [ ] Error handling for backend call -- [ ] Console logging shows both actions -- [ ] Type checking passes: `cd www && pnpm tsc --noEmit` - -**Files:** `www/app/[roomName]/components/DailyRoom.tsx` - -**References:** PRD lines 220-289 - ---- - -## Phase 1: Database Schema - -### Task 1.1: Add cloud recording fields to Meeting table - -**Priority:** High (required for data storage) - -**Goal:** Add database columns to store cloud recording S3 key and duration - -**Steps:** -1. Generate migration: `cd server && uv run alembic revision --autogenerate -m "add cloud recording support"` -2. Verify migration adds: - - `meeting.cloud_recording_s3_key` (String, nullable) - - `meeting.cloud_recording_duration` (Integer, nullable) -3. Run migration: `cd server && uv run alembic upgrade head` -4. Verify columns exist in database - -**Acceptance Criteria:** -- [ ] Migration file created in `server/reflector/db/migrations/versions/` -- [ ] Migration adds `cloud_recording_s3_key` column -- [ ] Migration adds `cloud_recording_duration` column -- [ ] Migration runs without errors -- [ ] Verification query shows columns: `docker compose exec postgres psql -U reflector -d reflector -c "\d meeting" | grep cloud_recording` -- [ ] Both columns visible in output - -**Files:** `server/reflector/db/migrations/versions/_add_cloud_recording_support.py` - -**References:** PRD lines 291-343 - ---- - -### Task 1.2: Update DB models with cloud recording fields - -**Priority:** High (required after migration) - -**Goal:** Add cloud recording fields to SQLAlchemy table and Pydantic model - -**Changes:** -1. `server/reflector/db/meetings.py`: - - Add columns to `meetings` table definition (~line 14): - - `sa.Column("cloud_recording_s3_key", sa.String, nullable=True)` - - `sa.Column("cloud_recording_duration", sa.Integer, nullable=True)` - - Add fields to `Meeting` model (~line 94): - - `cloud_recording_s3_key: str | None = None` - - `cloud_recording_duration: int | None = None` - -**Acceptance Criteria:** -- [ ] Table definition includes both new columns -- [ ] Model includes both new fields with proper types -- [ ] Type checking passes: `cd server && uv run mypy reflector/db/meetings.py` - -**Files:** `server/reflector/db/meetings.py` - -**References:** PRD lines 345-395 - ---- - -## Phase 2: Daily.co API Client Extension - -### Task 2.1: Add start_recording method to Daily.co API client - -**Priority:** Medium (foundation for backend endpoint) - -**Goal:** Add REST API method to start recordings via Daily.co API - -**Changes:** -1. `server/reflector/dailyco_api/client.py`: - - Add `start_recording()` method after `list_recordings()` - - Parameters: `room_name`, `recording_type` (Literal["cloud", "raw-tracks"]), `instance_id` - - POST to `/rooms/{room_name}/recordings/start` - - Return response via `_handle_response()` - - Add docstring with reference to Daily.co docs - -**Acceptance Criteria:** -- [ ] Method signature: `async def start_recording(self, room_name: str, recording_type: Literal["cloud", "raw-tracks"], instance_id: str) -> dict` -- [ ] POSTs to correct endpoint with JSON body -- [ ] Docstring includes Daily.co docs link -- [ ] Type checking passes: `cd server && uv run mypy reflector/dailyco_api/client.py` - -**Files:** `server/reflector/dailyco_api/client.py` - -**References:** PRD lines 397-443 - ---- - -### Task 2.2: Expose start_recording in DailyClient wrapper - -**Priority:** Medium (required for views layer) - -**Goal:** Add proxy method in DailyClient to call Daily.co API client - -**Changes:** -1. `server/reflector/video_platforms/daily.py`: - - Add `start_recording()` method after `create_meeting_token()` - - Proxy call to `self._api_client.start_recording()` - - Same parameters and return type - -**Acceptance Criteria:** -- [ ] Method added with same signature as Task 2.1 -- [ ] Proxies call to `_api_client.start_recording()` -- [ ] Docstring explains purpose -- [ ] Type checking passes: `cd server && uv run mypy reflector/video_platforms/daily.py` - -**Files:** `server/reflector/video_platforms/daily.py` - -**References:** PRD lines 445-475 - ---- - -## Phase 3: Backend Endpoint for Raw-Tracks Start - -### Task 3.1: Create meetings API endpoint for recording start - -**Priority:** High (required for frontend integration) - -**Goal:** Create backend endpoint for frontend to trigger raw-tracks recording - -**Steps:** -1. Create new file `server/reflector/views/meetings.py` -2. Implement two endpoints: - - `POST /meetings/{meeting_id}/recordings/start` - Start raw-tracks via Daily.co REST API - - `GET /meetings/{meeting_id}/cloud-recording` - Serve cloud recording MP4 (presigned URL) -3. Register router in `server/reflector/app.py` - -**Acceptance Criteria:** -- [ ] POST endpoint validates meeting exists (404 if not) -- [ ] POST endpoint validates type="raw-tracks" (400 if cloud) -- [ ] POST endpoint calls Daily.co API with instanceId -- [ ] POST endpoint logs success/failure -- [ ] GET endpoint returns 307 redirect to S3 presigned URL -- [ ] GET endpoint returns 404 if no cloud recording -- [ ] Router registered with prefix `/v1/meetings` -- [ ] Type checking passes: `cd server && uv run mypy reflector/views/meetings.py reflector/app.py` -- [ ] No authentication required (anonymous users supported) - -**Files:** `server/reflector/views/meetings.py` (NEW), `server/reflector/app.py` - -**References:** PRD lines 477-590 - ---- - -## Phase 4: Webhook Handler Updates - -### Task 4.1: Update recording.ready-to-download webhook handler - -**Priority:** High (required for data persistence) - -**Goal:** Handle both cloud and raw-tracks webhooks, store cloud recording metadata - -**Changes:** -1. `server/reflector/views/daily.py`: - - Replace `_handle_recording_ready()` function (~line 174) - - Add logic to discriminate by `event.payload.type`: - - `"cloud"`: Store S3 key and duration in meeting table - - `"raw-tracks"`: Queue multitrack processing (existing behavior) - - Add logging for both paths - -**Acceptance Criteria:** -- [ ] Cloud recording: stores `cloud_recording_s3_key` and `cloud_recording_duration` in meeting table -- [ ] Cloud recording: logs meeting_id, s3_key, duration -- [ ] Raw-tracks: queues `process_multitrack_recording` task (unchanged from existing code) -- [ ] Raw-tracks: logs recording_id, room_name, num_tracks (unchanged from existing code) -- [ ] Unknown types: log warning -- [ ] Type checking passes: `cd server && uv run mypy reflector/views/daily.py` - -**Files:** `server/reflector/views/daily.py` - -**References:** PRD lines 592-690 - ---- - -## Phase 5: Meeting API Updates - -### Task 5.1: Add cloud recording info to Meeting API response - -**Priority:** Medium (required for frontend display) - -**Goal:** Expose cloud recording availability and duration in meeting response - -**Changes:** -1. `server/reflector/views/rooms.py`: - - Update `Meeting` response schema (~line 55): - - Add `cloud_recording_available: bool = False` - - Add `cloud_recording_duration: int | None = None` - - Update `rooms_join_meeting` handler: - - Set `cloud_recording_available = bool(meeting.cloud_recording_s3_key)` - - Set `cloud_recording_duration = meeting.cloud_recording_duration` - -**Acceptance Criteria:** -- [ ] Schema includes `cloud_recording_available` field -- [ ] Schema includes `cloud_recording_duration` field -- [ ] Handler computes availability from S3 key presence -- [ ] Handler includes duration -- [ ] Type checking passes: `cd server && uv run mypy reflector/views/rooms.py` - -**Files:** `server/reflector/views/rooms.py` - -**References:** PRD lines 692-751 - ---- - -## Phase 6: Frontend Display - -### Task 6.1: Display cloud recording player on transcript page - -**Priority:** Medium (user-facing feature) - -**Goal:** Show cloud recording audio player when available - -**Changes:** -1. `www/app/(app)/transcripts/[transcriptId]/page.tsx`: - - Add conditional section after existing audio player - - Check `transcript.meeting?.cloud_recording_available` - - Render audio element with src pointing to `/v1/meetings/${transcript.meeting_id}/cloud-recording` - - Show duration if available (formatted as minutes:seconds) - - Display warning about large file size (~38 MB/minute) - - Add icon and labels ("Original Cloud Recording", "Brady Bunch grid layout") - -**Acceptance Criteria:** -- [ ] Section only shows when `cloud_recording_available` is true -- [ ] Audio element src uses backend endpoint -- [ ] Duration displayed in human-readable format (e.g., "5m 23s") -- [ ] Warning text about large file size visible -- [ ] Descriptive text explains "Brady Bunch grid layout" -- [ ] Type checking passes: `cd www && pnpm tsc --noEmit` -- [ ] Styling consistent with existing audio player - -**Files:** `www/app/(app)/transcripts/[transcriptId]/page.tsx` - -**References:** PRD lines 753-799 - ---- - -## Phase 7: Manual Testing & Validation - -### Task 7.1: End-to-end validation (single participant) - -**Priority:** BLOCKER (must pass before deployment) - -**Goal:** Validate full flow from meeting creation to cloud recording playback - -**Test Procedure:** -1. Start services: `docker compose up -d postgres redis server worker` -2. Create test room via API (name: "dual-recording-test") -3. Create meeting -4. Join meeting from frontend -5. Verify browser console shows dual recording start -6. Verify server logs show raw-tracks start call -7. Speak for 20-30 seconds, leave meeting -8. Wait 2-5 minutes for Daily.co processing -9. Verify webhooks received (cloud + raw-tracks) -10. Check database for cloud_recording_s3_key and transcript status -11. Test cloud recording endpoint returns 307 redirect -12. Test frontend displays cloud recording player -13. Test audio playback works - -**Acceptance Criteria:** -- [ ] Cloud recording webhook stores S3 key in meeting table -- [ ] Raw-tracks webhook triggers transcription pipeline -- [ ] Database query shows both cloud_recording_s3_key and transcript.title populated -- [ ] Cloud recording endpoint returns 307 with S3 presigned URL -- [ ] Frontend shows "Original Cloud Recording" section -- [ ] Audio player loads and plays cloud recording -- [ ] Processed transcript shows correct transcription (from raw-tracks) - -**Test Artifacts:** -- Screenshot of browser console showing dual recording start -- Server log snippet showing webhook handling -- Database query results -- Screenshot of transcript page with cloud recording player - -**References:** PRD lines 801-926 - ---- - -### Task 7.2: Multi-participant validation (idempotency test) - -**Priority:** BLOCKER (determines if lock mechanism needed) - -**Goal:** Validate Daily.co behavior with multiple participants starting recordings - -**Test Procedure:** -1. Create new meeting -2. Open 2 browser windows (normal + incognito) -3. Join meeting from both windows nearly simultaneously (within 1 second) -4. Check browser consoles in both windows -5. Check server logs for 2x backend recording start calls -6. Check Daily.co dashboard for number of recordings created -7. Wait for webhooks -8. Analyze results - -**Expected Outcomes:** -- **Best case:** Daily.co handles idempotency (1 cloud + 1 raw-tracks recording) -- **Acceptable:** Duplicate recordings but no errors -- **Bad:** Errors or corruption - -**Acceptance Criteria:** -- [ ] Documented which outcome occurred -- [ ] If Best/Acceptable: proceed to deployment -- [ ] If Bad: implement Alternative Solution A or B from PRD -- [ ] Decision documented in DAILYCO_TEST.md - -**Action Items Based on Results:** -- Best/Acceptable → Ship as-is -- Bad → Add Task 7.3 (Implement lock mechanism) - -**References:** PRD lines 928-963 - ---- - -### Task 7.3: Implement lock mechanism (CONDITIONAL - only if Task 7.2 fails) - -**Priority:** High (only if multi-participant test shows errors) - -**Goal:** Prevent duplicate recording starts using database or Redis lock - -**Two Options:** - -**Option A: Database Lock** -- Add `recording_started: bool` field to Meeting model -- Add migration for new column -- Use database transaction to atomically check and set flag -- Return `is_first_participant` in join response -- Frontend only starts recordings if first participant - -**Option B: Redis Lock** -- Use `RedisAsyncLock` from existing codebase -- Lock key: `meeting:{meeting_id}:recording-start` -- Check/set `meeting:{meeting_id}:recording-started` flag -- Return `is_first_participant` in join response -- Frontend only starts recordings if first participant - -**Acceptance Criteria:** -- [ ] Multi-participant test (Task 7.2 rerun) shows only 1 recording created -- [ ] No race conditions under simultaneous joins -- [ ] Type checking passes -- [ ] Test with 3+ participants shows consistent behavior - -**Files:** -- Option A: `server/reflector/db/meetings.py`, `server/reflector/views/rooms.py`, `www/app/[roomName]/components/DailyRoom.tsx` -- Option B: `server/reflector/views/rooms.py`, `www/app/[roomName]/components/DailyRoom.tsx` - -**References:** PRD lines 966-1048 - ---- - -## Success Criteria (Overall Project) - -### Functional Requirements -- [ ] Both cloud and raw-tracks recordings start when user joins meeting -- [ ] Cloud recording webhook stores S3 key in meeting table (DAILYCO_STORAGE bucket) -- [ ] Raw-tracks webhook triggers existing multitrack pipeline (unchanged) -- [ ] Cloud recording accessible via `/v1/meetings/{id}/cloud-recording` endpoint -- [ ] Transcript page displays cloud recording audio player when available -- [ ] Existing transcription quality unchanged (raw-tracks only) -- [ ] Dead code removed (JWT start_cloud_recording) - -### Validation Requirements -- [ ] Prototype test (Task 0.1) confirms instanceId strategy -- [ ] Multi-participant test (Task 7.2) confirms Daily.co behavior documented -- [ ] End-to-end test (Task 7.1) shows both webhooks and data stored correctly -- [ ] Cloud recording playback works in frontend - -### Non-Functional Requirements -- [ ] No performance degradation in webhook handling -- [ ] Database migration runs without errors -- [ ] Type checking passes (mypy, tsc) - ---- - -## Dependencies - -**Critical Path:** -``` -Task 0.1 (instanceId validation) - ↓ -Task 0.2 + Task 0.3 (cleanup + frontend setup) - ↓ -Task 1.1 + Task 1.2 (database schema) - ↓ -Task 2.1 + Task 2.2 (API client) - ↓ -Task 3.1 (backend endpoint) - ↓ -Task 4.1 (webhook handler) - ↓ -Task 5.1 + Task 6.1 (API response + frontend display) - ↓ -Task 7.1 (E2E test) - ↓ -Task 7.2 (multi-participant test) - ↓ -[Task 7.3 if needed] (lock mechanism) -``` - -**Parallel Work Possible:** -- Tasks 1.1 + 1.2 can be done together (schema + models) -- Tasks 2.1 + 2.2 can be done together (API client layers) -- Tasks 5.1 + 6.1 can be done together (backend response + frontend display) - ---- - -## Rollback Plan - -**Immediate Mitigation (Production Issues):** -1. Revert frontend `DailyRoom.tsx` to remove dual recording start -2. This stops cloud recordings, raw-tracks continue normally - -**Database Rollback:** -```bash -docker compose exec server uv run alembic downgrade -1 -``` - -**Code Rollback:** -1. Revert frontend changes (Task 0.3, Task 6.1) -2. Revert webhook handler (Task 4.1) to only handle raw-tracks -3. Keep API endpoints (Task 3.1) - harmless if unused - ---- - -## Storage Impact - -**Current (raw-tracks only):** 80 MB/day = 2.4 GB/month = $0.06/month S3 -**With cloud recording:** 11.5 GB/day = 345 GB/month = $7.94/month S3 - -**Recommendation:** Consider S3 lifecycle policy for cloud MP4s (delete after 90 days) - ---- - -## Estimated Effort - -**Phase 0:** 2-4 hours (prototype + cleanup) -**Phase 1:** 1 hour (database) -**Phase 2:** 1 hour (API client) -**Phase 3:** 2 hours (backend endpoint) -**Phase 4:** 2 hours (webhook handler) -**Phase 5:** 1 hour (meeting API) -**Phase 6:** 2 hours (frontend display) -**Phase 7:** 4 hours (testing) - -**Total:** 15-17 hours base + 4 hours buffer = 19-21 hours (2-3 days) diff --git a/server/reflector/dailyco_api/__init__.py b/server/reflector/dailyco_api/__init__.py index 65be426e..69e94a08 100644 --- a/server/reflector/dailyco_api/__init__.py +++ b/server/reflector/dailyco_api/__init__.py @@ -3,7 +3,7 @@ """ # Client -from .client import DailyApiClient, DailyApiError +from .client import DailyApiClient, DailyApiError, RecordingType # Request models from .requests import ( @@ -64,6 +64,7 @@ # Client "DailyApiClient", "DailyApiError", + "RecordingType", # Requests "CreateRoomRequest", "RoomProperties", diff --git a/server/reflector/dailyco_api/client.py b/server/reflector/dailyco_api/client.py index 43b76d88..bc9c163a 100644 --- a/server/reflector/dailyco_api/client.py +++ b/server/reflector/dailyco_api/client.py @@ -7,7 +7,7 @@ """ from http import HTTPStatus -from typing import Any +from typing import Any, Literal import httpx import structlog @@ -32,6 +32,8 @@ logger = structlog.get_logger(__name__) +RecordingType = Literal["cloud", "raw-tracks"] + class DailyApiError(Exception): """Daily.co API error with full request/response context.""" @@ -395,6 +397,38 @@ async def list_recordings( return [RecordingResponse(**r) for r in data["data"]] + async def start_recording( + self, + room_name: NonEmptyString, + recording_type: RecordingType, + instance_id: NonEmptyString, + ) -> dict[str, Any]: + """Start recording via REST API. + + Reference: https://docs.daily.co/reference/rest-api/rooms/recordings/start + + Args: + room_name: Daily.co room name + recording_type: Recording type + instance_id: UUID for this recording session + + Returns: + Recording start confirmation from Daily.co API + + Raises: + DailyApiError: If API request fails + """ + client = await self._get_client() + response = await client.post( + f"{self.base_url}/rooms/{room_name}/recordings/start", + headers=self.headers, + json={ + "type": recording_type, + "instanceId": instance_id, + }, + ) + return await self._handle_response(response, "start_recording") + # ============================================================================ # MEETING TOKENS # ============================================================================ diff --git a/server/reflector/dailyco_api/responses.py b/server/reflector/dailyco_api/responses.py index 6ac95188..21b8fcf0 100644 --- a/server/reflector/dailyco_api/responses.py +++ b/server/reflector/dailyco_api/responses.py @@ -116,6 +116,7 @@ class RecordingS3Info(BaseModel): bucket_name: NonEmptyString bucket_region: NonEmptyString + key: NonEmptyString | None = None endpoint: NonEmptyString | None = None @@ -132,6 +133,9 @@ class RecordingResponse(BaseModel): id: NonEmptyString = Field(description="Recording identifier") room_name: NonEmptyString = Field(description="Room where recording occurred") start_ts: int = Field(description="Recording start timestamp (Unix epoch seconds)") + type: Literal["cloud", "raw-tracks"] | None = Field( + None, description="Recording type (may be missing from API)" + ) status: RecordingStatus = Field( description="Recording status ('in-progress' or 'finished')" ) @@ -145,6 +149,9 @@ class RecordingResponse(BaseModel): None, description="Token for sharing recording" ) s3: RecordingS3Info | None = Field(None, description="S3 bucket information") + s3key: NonEmptyString | None = Field( + None, description="S3 key for cloud recordings (top-level field)" + ) tracks: list[DailyTrack] = Field( default_factory=list, description="Track list for raw-tracks recordings (always array, never null)", diff --git a/server/reflector/db/meetings.py b/server/reflector/db/meetings.py index 8a80e756..82b6569e 100644 --- a/server/reflector/db/meetings.py +++ b/server/reflector/db/meetings.py @@ -2,7 +2,7 @@ from typing import Any, Literal import sqlalchemy as sa -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, computed_field from sqlalchemy.dialects.postgresql import JSONB from reflector.db import get_database, metadata @@ -63,6 +63,8 @@ nullable=False, server_default=assert_equal(WHEREBY_PLATFORM, "whereby"), ), + sa.Column("cloud_recording_s3_key", sa.String, nullable=True), + sa.Column("cloud_recording_duration", sa.Integer, nullable=True), sa.Index("idx_meeting_room_id", "room_id"), sa.Index("idx_meeting_calendar_event", "calendar_event_id"), ) @@ -110,6 +112,13 @@ class Meeting(BaseModel): calendar_event_id: str | None = None calendar_metadata: dict[str, Any] | None = None platform: Platform = WHEREBY_PLATFORM + cloud_recording_s3_key: str | None = None + cloud_recording_duration: int | None = None + + @computed_field + @property + def cloud_recording_available(self) -> bool: + return bool(self.cloud_recording_s3_key) class MeetingController: @@ -141,7 +150,9 @@ async def create( calendar_metadata=calendar_metadata, platform=room.platform, ) - query = meetings.insert().values(**meeting.model_dump()) + query = meetings.insert().values( + **meeting.model_dump(exclude={"cloud_recording_available"}) + ) await get_database().execute(query) return meeting @@ -171,6 +182,12 @@ async def get_by_room_name( return None return Meeting(**result) + async def get_by_room_name_all(self, room_name: str) -> list[Meeting]: + """Get all meetings for a room name (not just most recent).""" + query = meetings.select().where(meetings.c.room_name == room_name) + results = await get_database().fetch_all(query) + return [Meeting(**dict(r)) for r in results] + async def get_active(self, room: Room, current_time: datetime) -> Meeting | None: """ Get latest active meeting for a room. diff --git a/server/reflector/video_platforms/daily.py b/server/reflector/video_platforms/daily.py index 80962300..c7adbdb3 100644 --- a/server/reflector/video_platforms/daily.py +++ b/server/reflector/video_platforms/daily.py @@ -12,6 +12,7 @@ RoomProperties, verify_webhook_signature, ) +from reflector.dailyco_api import RecordingType as DailyRecordingType from reflector.db.daily_participant_sessions import ( daily_participant_sessions_controller, ) @@ -60,8 +61,8 @@ async def create_meeting( enable_recording = "local" elif ( room.recording_type == self.RECORDING_CLOUD - ): # daily "cloud" is not our "cloud" - enable_recording = "raw-tracks" + ): # For dual recording: don't set enable_recording, start both via REST API + enable_recording = None properties = RoomProperties( enable_recording=enable_recording, @@ -196,6 +197,28 @@ async def create_meeting_token( result = await self._api_client.create_meeting_token(request) return result.token + async def start_recording( + self, + room_name: DailyRoomName, + recording_type: DailyRecordingType, + instance_id: NonEmptyString, + ) -> dict: + """Start recording via Daily.co REST API. + + Args: + room_name: Daily.co room name + recording_type: Recording type + instance_id: UUID for this recording session + + Returns: + Recording start confirmation from Daily.co API + """ + return await self._api_client.start_recording( + room_name=room_name, + recording_type=recording_type, + instance_id=instance_id, + ) + async def close(self): """Clean up API client resources.""" await self._api_client.close() diff --git a/server/reflector/views/daily.py b/server/reflector/views/daily.py index cbdac409..284d1821 100644 --- a/server/reflector/views/daily.py +++ b/server/reflector/views/daily.py @@ -174,46 +174,81 @@ async def _handle_recording_started(event: RecordingStartedEvent): async def _handle_recording_ready(event: RecordingReadyEvent): room_name = event.payload.room_name recording_id = event.payload.recording_id - tracks = event.payload.tracks - - if not tracks: - logger.warning( - "recording.ready-to-download: missing tracks", - room_name=room_name, - recording_id=recording_id, - payload=event.payload, - ) - return + recording_type = event.payload.type # "cloud" or "raw-tracks" logger.info( "Recording ready for download", room_name=room_name, recording_id=recording_id, - num_tracks=len(tracks), + recording_type=recording_type, platform="daily", ) bucket_name = settings.DAILYCO_STORAGE_AWS_BUCKET_NAME if not bucket_name: - logger.error( - "DAILYCO_STORAGE_AWS_BUCKET_NAME not configured; cannot process Daily recording" - ) + logger.error("DAILYCO_STORAGE_AWS_BUCKET_NAME not configured") return - track_keys = [t.s3Key for t in tracks if t.type == "audio"] + if recording_type == "cloud": + # Cloud recording: single MP4 file written by Daily.co to DAILYCO_STORAGE bucket + s3_key = event.payload.s3_key + + # Store cloud recording reference in meeting table + meeting = await meetings_controller.get_by_room_name(room_name) + if not meeting: + logger.warning( + "Cloud recording: meeting not found", + room_name=room_name, + recording_id=recording_id, + ) + return + + await meetings_controller.update_meeting( + meeting.id, + cloud_recording_s3_key=s3_key, + cloud_recording_duration=event.payload.duration, + ) - logger.info( - "Recording webhook queuing processing", - recording_id=recording_id, - room_name=room_name, - ) + logger.info( + "Cloud recording stored", + meeting_id=meeting.id, + s3_key=s3_key, + duration=event.payload.duration, + ) - process_multitrack_recording.delay( - bucket_name=bucket_name, - daily_room_name=room_name, - recording_id=recording_id, - track_keys=track_keys, - ) + elif recording_type == "raw-tracks": + # Existing multi-track processing (unchanged) + tracks = event.payload.tracks + if not tracks: + logger.warning( + "raw-tracks recording: missing tracks array", + room_name=room_name, + recording_id=recording_id, + ) + return + + track_keys = [t.s3Key for t in tracks if t.type == "audio"] + + logger.info( + "Raw-tracks recording queuing processing", + recording_id=recording_id, + room_name=room_name, + num_tracks=len(track_keys), + ) + + process_multitrack_recording.delay( + bucket_name=bucket_name, + daily_room_name=room_name, + recording_id=recording_id, + track_keys=track_keys, + ) + + else: + logger.warning( + "Unknown recording type", + recording_type=recording_type, + recording_id=recording_id, + ) async def _handle_recording_error(event: RecordingErrorEvent): diff --git a/server/reflector/views/meetings.py b/server/reflector/views/meetings.py index 25987e47..c9207908 100644 --- a/server/reflector/views/meetings.py +++ b/server/reflector/views/meetings.py @@ -1,16 +1,22 @@ +import logging from datetime import datetime, timezone -from typing import Annotated, Optional +from typing import Annotated, Any, Optional from fastapi import APIRouter, Depends, HTTPException, Request from pydantic import BaseModel import reflector.auth as auth +from reflector.dailyco_api import RecordingType from reflector.db.meetings import ( MeetingConsent, meeting_consent_controller, meetings_controller, ) from reflector.db.rooms import rooms_controller +from reflector.utils.string import NonEmptyString +from reflector.video_platforms.factory import create_platform_client + +logger = logging.getLogger(__name__) router = APIRouter() @@ -73,3 +79,53 @@ async def meeting_deactivate( await meetings_controller.update_meeting(meeting_id, is_active=False) return {"status": "success", "meeting_id": meeting_id} + + +class StartRecordingRequest(BaseModel): + type: RecordingType + instanceId: NonEmptyString + + +@router.post("/meetings/{meeting_id}/recordings/start") +async def start_recording( + meeting_id: NonEmptyString, body: StartRecordingRequest +) -> dict[str, Any]: + """Start cloud or raw-tracks recording via Daily.co REST API. + + Both cloud and raw-tracks are started via REST API to bypass enable_recording limitation. + Uses different instanceIds for cloud vs raw-tracks. + + Note: No authentication required - anonymous users supported. + """ + meeting = await meetings_controller.get_by_id(meeting_id) + if not meeting: + raise HTTPException(status_code=404, detail="Meeting not found") + + try: + client = create_platform_client("daily") + result = await client.start_recording( + room_name=meeting.room_name, + recording_type=body.type, + instance_id=body.instanceId, + ) + + logger.info( + f"Started {body.type} recording via REST API", + extra={ + "meeting_id": meeting_id, + "room_name": meeting.room_name, + "recording_type": body.type, + "instance_id": body.instanceId, + }, + ) + + return {"status": "ok", "result": result} + + except Exception as e: + logger.error( + "Failed to start raw-tracks recording", + extra={"meeting_id": meeting_id, "error": str(e)}, + ) + raise HTTPException( + status_code=500, detail=f"Failed to start recording: {str(e)}" + ) diff --git a/server/reflector/worker/app.py b/server/reflector/worker/app.py index b1256c94..cde2fea1 100644 --- a/server/reflector/worker/app.py +++ b/server/reflector/worker/app.py @@ -44,7 +44,7 @@ }, "poll_daily_recordings": { "task": "reflector.worker.process.poll_daily_recordings", - "schedule": 180.0, # Every 3 minutes (configurable lookback window) + "schedule": 15.0, # Every 15 seconds for dev (was 180.0 / 3 min for prod) }, "trigger_daily_reconciliation": { "task": "reflector.worker.process.trigger_daily_reconciliation", diff --git a/server/reflector/worker/process.py b/server/reflector/worker/process.py index 466cdef0..1a4eb849 100644 --- a/server/reflector/worker/process.py +++ b/server/reflector/worker/process.py @@ -17,7 +17,7 @@ DailyParticipantSession, daily_participant_sessions_controller, ) -from reflector.db.meetings import meetings_controller +from reflector.db.meetings import Meeting, meetings_controller from reflector.db.recordings import Recording, recordings_controller from reflector.db.rooms import rooms_controller from reflector.db.transcripts import ( @@ -337,9 +337,11 @@ async def poll_daily_recordings(): """Poll Daily.co API for recordings and process missing ones. Fetches latest recordings from Daily.co API (default limit 100), compares with DB, - and queues processing for recordings not already in DB. + and stores/queues missing recordings: + - Cloud recordings: Store S3 key in meeting table + - Raw-tracks recordings: Queue multitrack processing - For each missing recording, uses audio tracks from API response. + Runs every 3 minutes as fallback for webhook failures. Worker-level locking provides idempotency (see process_multitrack_recording). """ @@ -380,51 +382,162 @@ async def poll_daily_recordings(): ) return - recording_ids = [rec.id for rec in finished_recordings] + # Separate cloud and raw-tracks recordings + # Infer type if not provided by API: + # - Cloud recordings: s3key exists, tracks array EMPTY + # - Raw-tracks: s3key exists, tracks array HAS items (audio files) + cloud_recordings = [] + raw_tracks_recordings = [] + for rec in finished_recordings: + if rec.type: + inferred_type = rec.type + elif len(rec.tracks) > 0: + # Has tracks = raw-tracks (even if s3key exists) + inferred_type = "raw-tracks" + elif rec.s3key and len(rec.tracks) == 0: + # Has s3key but no tracks = cloud recording + inferred_type = "cloud" + else: + # Fallback + inferred_type = "raw-tracks" + + if inferred_type == "cloud": + cloud_recordings.append(rec) + else: + raw_tracks_recordings.append(rec) + + logger.debug( + "Poll results", + total=len(finished_recordings), + cloud=len(cloud_recordings), + raw_tracks=len(raw_tracks_recordings), + ) + + # Process cloud recordings + await _poll_cloud_recordings(cloud_recordings) + + # Process raw-tracks recordings (existing logic) + await _poll_raw_tracks_recordings(raw_tracks_recordings, bucket_name) + + +async def _poll_cloud_recordings(cloud_recordings: List[FinishedRecordingResponse]): + """Store cloud recordings missing from meeting table.""" + if not cloud_recordings: + return + + # Get all meetings with matching room names + room_names = list({rec.room_name for rec in cloud_recordings}) + meetings_by_room: dict[str, List[Meeting]] = {} + + for room_name in room_names: + meetings = await meetings_controller.get_by_room_name_all(room_name) + if meetings: + meetings_by_room[room_name] = meetings + + stored_count = 0 + for recording in cloud_recordings: + meetings = meetings_by_room.get(recording.room_name, []) + if not meetings: + logger.warning( + "Cloud recording: no meetings found for room", + recording_id=recording.id, + room_name=recording.room_name, + ) + continue + + # Find meeting without cloud recording that overlaps with recording time + # TODO: improve matching logic (recording.start_ts, meeting.start_date) + target_meeting = None + for meeting in meetings: + if not meeting.cloud_recording_s3_key: + target_meeting = meeting + break + + if not target_meeting: + logger.debug( + "Cloud recording: all meetings already have cloud recordings", + recording_id=recording.id, + room_name=recording.room_name, + ) + continue + + # Extract S3 key from recording (cloud recordings use s3key field) + s3_key = recording.s3key or (recording.s3.key if recording.s3 else None) + if not s3_key: + logger.warning( + "Cloud recording: missing S3 key", + recording_id=recording.id, + room_name=recording.room_name, + ) + continue + + await meetings_controller.update_meeting( + target_meeting.id, + cloud_recording_s3_key=s3_key, + cloud_recording_duration=recording.duration, + ) + + logger.info( + "Cloud recording stored via polling", + meeting_id=target_meeting.id, + recording_id=recording.id, + s3_key=s3_key, + duration=recording.duration, + ) + stored_count += 1 + + logger.info( + "Cloud recording polling complete", + total=len(cloud_recordings), + stored=stored_count, + ) + + +async def _poll_raw_tracks_recordings( + raw_tracks_recordings: List[FinishedRecordingResponse], + bucket_name: str, +): + """Queue raw-tracks recordings missing from DB (existing logic).""" + if not raw_tracks_recordings: + return + + recording_ids = [rec.id for rec in raw_tracks_recordings] existing_recordings = await recordings_controller.get_by_ids(recording_ids) existing_ids = {rec.id for rec in existing_recordings} missing_recordings = [ - rec for rec in finished_recordings if rec.id not in existing_ids + rec for rec in raw_tracks_recordings if rec.id not in existing_ids ] if not missing_recordings: logger.debug( - "All recordings already in DB", - api_count=len(finished_recordings), + "All raw-tracks recordings already in DB", + api_count=len(raw_tracks_recordings), existing_count=len(existing_recordings), ) return logger.info( - "Found recordings missing from DB", + "Found raw-tracks recordings missing from DB", missing_count=len(missing_recordings), - total_api_count=len(finished_recordings), + total_api_count=len(raw_tracks_recordings), existing_count=len(existing_recordings), ) for recording in missing_recordings: if not recording.tracks: - if recording.status == "finished": - logger.warning( - "Finished recording has no tracks (no audio captured)", - recording_id=recording.id, - room_name=recording.room_name, - ) - else: - logger.debug( - "No tracks in recording yet", - recording_id=recording.id, - room_name=recording.room_name, - status=recording.status, - ) + logger.warning( + "Finished raw-tracks recording has no tracks (no audio captured)", + recording_id=recording.id, + room_name=recording.room_name, + ) continue track_keys = [t.s3Key for t in recording.tracks if t.type == "audio"] if not track_keys: logger.warning( - "No audio tracks found in recording (only video tracks)", + "No audio tracks found in raw-tracks recording", recording_id=recording.id, room_name=recording.room_name, total_tracks=len(recording.tracks), @@ -432,7 +545,7 @@ async def poll_daily_recordings(): continue logger.info( - "Queueing missing recording for processing", + "Queueing missing raw-tracks recording for processing", recording_id=recording.id, room_name=recording.room_name, track_count=len(track_keys), diff --git a/www/app/(app)/transcripts/[transcriptId]/finalSummary.tsx b/www/app/(app)/transcripts/[transcriptId]/finalSummary.tsx index d7ba37dc..500c4cc5 100644 --- a/www/app/(app)/transcripts/[transcriptId]/finalSummary.tsx +++ b/www/app/(app)/transcripts/[transcriptId]/finalSummary.tsx @@ -3,7 +3,8 @@ import React from "react"; import Markdown from "react-markdown"; import "../../../styles/markdown.css"; import type { components } from "../../../reflector-api"; -type GetTranscript = components["schemas"]["GetTranscript"]; +type GetTranscriptWithParticipants = + components["schemas"]["GetTranscriptWithParticipants"]; type GetTranscriptTopic = components["schemas"]["GetTranscriptTopic"]; import { useTranscriptUpdate } from "../../../lib/apiHooks"; import { @@ -18,7 +19,7 @@ import { LuPen } from "react-icons/lu"; import { useError } from "../../../(errors)/errorContext"; type FinalSummaryProps = { - transcript: GetTranscript; + transcript: GetTranscriptWithParticipants; topics: GetTranscriptTopic[]; onUpdate: (newSummary: string) => void; finalSummaryRef: React.Dispatch>; diff --git a/www/app/(app)/transcripts/createTranscript.ts b/www/app/(app)/transcripts/createTranscript.ts index 8a235161..0991130f 100644 --- a/www/app/(app)/transcripts/createTranscript.ts +++ b/www/app/(app)/transcripts/createTranscript.ts @@ -2,10 +2,11 @@ import type { components } from "../../reflector-api"; import { useTranscriptCreate } from "../../lib/apiHooks"; type CreateTranscript = components["schemas"]["CreateTranscript"]; -type GetTranscript = components["schemas"]["GetTranscript"]; +type GetTranscriptWithParticipants = + components["schemas"]["GetTranscriptWithParticipants"]; type UseCreateTranscript = { - transcript: GetTranscript | null; + transcript: GetTranscriptWithParticipants | null; loading: boolean; error: Error | null; create: (transcriptCreationDetails: CreateTranscript) => Promise; diff --git a/www/app/(app)/transcripts/shareAndPrivacy.tsx b/www/app/(app)/transcripts/shareAndPrivacy.tsx index 04cda920..207d900f 100644 --- a/www/app/(app)/transcripts/shareAndPrivacy.tsx +++ b/www/app/(app)/transcripts/shareAndPrivacy.tsx @@ -2,7 +2,8 @@ import { useEffect, useState } from "react"; import { ShareMode, toShareMode } from "../../lib/shareMode"; import type { components } from "../../reflector-api"; -type GetTranscript = components["schemas"]["GetTranscript"]; +type GetTranscriptWithParticipants = + components["schemas"]["GetTranscriptWithParticipants"]; type GetTranscriptTopic = components["schemas"]["GetTranscriptTopic"]; type UpdateTranscript = components["schemas"]["UpdateTranscript"]; import { @@ -27,7 +28,7 @@ import { featureEnabled } from "../../lib/features"; type ShareAndPrivacyProps = { finalSummaryElement: HTMLDivElement | null; - transcript: GetTranscript; + transcript: GetTranscriptWithParticipants; topics: GetTranscriptTopic[]; }; diff --git a/www/app/(app)/transcripts/shareZulip.tsx b/www/app/(app)/transcripts/shareZulip.tsx index c3efe3ab..96242de2 100644 --- a/www/app/(app)/transcripts/shareZulip.tsx +++ b/www/app/(app)/transcripts/shareZulip.tsx @@ -1,7 +1,8 @@ import { useState, useEffect, useMemo } from "react"; import type { components } from "../../reflector-api"; -type GetTranscript = components["schemas"]["GetTranscript"]; +type GetTranscriptWithParticipants = + components["schemas"]["GetTranscriptWithParticipants"]; type GetTranscriptTopic = components["schemas"]["GetTranscriptTopic"]; import { BoxProps, @@ -26,7 +27,7 @@ import { import { featureEnabled } from "../../lib/features"; type ShareZulipProps = { - transcript: GetTranscript; + transcript: GetTranscriptWithParticipants; topics: GetTranscriptTopic[]; disabled: boolean; }; diff --git a/www/app/(app)/transcripts/transcriptTitle.tsx b/www/app/(app)/transcripts/transcriptTitle.tsx index 49a22c71..ea738673 100644 --- a/www/app/(app)/transcripts/transcriptTitle.tsx +++ b/www/app/(app)/transcripts/transcriptTitle.tsx @@ -2,7 +2,8 @@ import { useState } from "react"; import type { components } from "../../reflector-api"; type UpdateTranscript = components["schemas"]["UpdateTranscript"]; -type GetTranscript = components["schemas"]["GetTranscript"]; +type GetTranscriptWithParticipants = + components["schemas"]["GetTranscriptWithParticipants"]; type GetTranscriptTopic = components["schemas"]["GetTranscriptTopic"]; import { useTranscriptUpdate, @@ -20,7 +21,7 @@ type TranscriptTitle = { onUpdate: (newTitle: string) => void; // share props - transcript: GetTranscript | null; + transcript: GetTranscriptWithParticipants | null; topics: GetTranscriptTopic[] | null; finalSummaryElement: HTMLDivElement | null; }; diff --git a/www/app/[roomName]/components/DailyRoom.tsx b/www/app/[roomName]/components/DailyRoom.tsx index 44fa6315..1ef6e6af 100644 --- a/www/app/[roomName]/components/DailyRoom.tsx +++ b/www/app/[roomName]/components/DailyRoom.tsx @@ -22,14 +22,22 @@ import DailyIframe, { import type { components } from "../../reflector-api"; import { useAuth } from "../../lib/AuthProvider"; import { useConsentDialog } from "../../lib/consent"; -import { useRoomJoinMeeting } from "../../lib/apiHooks"; +import { + useRoomJoinMeeting, + useMeetingStartRecording, +} from "../../lib/apiHooks"; import { omit } from "remeda"; import { assertExists } from "../../lib/utils"; -import { assertMeetingId } from "../../lib/types"; +import { assertMeetingId, DailyRecordingType } from "../../lib/types"; +import { v5 as uuidv5 } from "uuid"; const CONSENT_BUTTON_ID = "recording-consent"; const RECORDING_INDICATOR_ID = "recording-indicator"; +// Namespace UUID for generating deterministic raw-tracks instanceIds +// Generated once for this application: uuidv4() +const RAW_TRACKS_NAMESPACE = "a1b2c3d4-e5f6-7890-abcd-ef1234567890"; + type Meeting = components["schemas"]["Meeting"]; type Room = components["schemas"]["RoomDetails"]; @@ -74,7 +82,10 @@ const useFrame = ( onLeftMeeting: () => void; onCustomButtonClick: (ev: DailyEventObjectCustomButtonClick) => void; onJoinMeeting: ( - startRecording: (args: { type: "raw-tracks" }) => void, + startRecording: (args: { + type: DailyRecordingType; + instanceId: string; + }) => void, ) => void; }, ) => { @@ -173,8 +184,18 @@ export default function DailyRoom({ meeting, room }: DailyRoomProps) { const authLastUserId = auth.lastUserId; const [container, setContainer] = useState(null); const joinMutation = useRoomJoinMeeting(); + const startRecordingMutation = useMeetingStartRecording(); const [joinedMeeting, setJoinedMeeting] = useState(null); + // Generate deterministic instanceIds so all participants use SAME IDs + // Cloud and raw-tracks need DIFFERENT instanceIds (Daily.co restriction) + // useMemo ensures stable values across React StrictMode double-renders + const cloudInstanceId = useMemo(() => meeting.id, [meeting.id]); + const rawTracksInstanceId = useMemo( + () => uuidv5(meeting.id, RAW_TRACKS_NAMESPACE), + [meeting.id], + ); + const roomName = params?.roomName as string; const { @@ -229,17 +250,89 @@ export default function DailyRoom({ meeting, room }: DailyRoomProps) { ); const handleFrameJoinMeeting = useCallback( - (startRecording: (args: { type: "raw-tracks" }) => void) => { + ( + startRecording: (args: { + type: DailyRecordingType; + instanceId: string; + }) => void, + ) => { try { if (meeting.recording_type === "cloud") { - console.log("Starting cloud recording"); - startRecording({ type: "raw-tracks" }); + console.log("Starting dual recording via REST API", { + cloudInstanceId, + rawTracksInstanceId, + }); + + // Start both cloud and raw-tracks via backend REST API (with retry on 404) + // Daily.co needs time to register call as "hosting" for REST API + const startRecordingWithRetry = ( + type: DailyRecordingType, + instanceId: string, + attempt: number = 1, + ) => { + setTimeout( + () => { + startRecordingMutation.mutate( + { + params: { + path: { + meeting_id: meeting.id, + }, + }, + body: { + type, + instanceId, + }, + }, + { + onError: (error: any) => { + const errorText = error?.detail || error?.message || ""; + const is404NotHosting = errorText.includes( + "does not seem to be hosting a call", + ); + const isActiveStream = errorText.includes( + "has an active stream", + ); + + if (is404NotHosting && attempt < 5) { + console.log( + `${type}: Call not hosting yet, retry ${attempt + 1}/5 in 2s...`, + ); + startRecordingWithRetry(type, instanceId, attempt + 1); + } else if (isActiveStream) { + // Expected: another participant already started recording with same instanceId + console.log( + `${type}: Recording already active (started by another participant)`, + ); + } else { + console.error( + `Failed to start ${type} recording:`, + error, + ); + } + }, + }, + ); + }, + attempt === 1 ? 2000 : 2000, + ); // 2s initial delay, then 2s between retries + }; + + // Start both recordings + startRecordingWithRetry("cloud", cloudInstanceId); + startRecordingWithRetry("raw-tracks", rawTracksInstanceId); } } catch (error) { - console.error("Failed to start recording:", error); + console.error("Failed to start recordings:", error); } }, - [meeting.recording_type], + [ + meeting.recording_type, + meeting.id, + startRecordingMutation, + cloudInstanceId, + rawTracksInstanceId, + ], ); const recordingIconUrl = useMemo( diff --git a/www/app/lib/apiHooks.ts b/www/app/lib/apiHooks.ts index a59c31eb..a00eb552 100644 --- a/www/app/lib/apiHooks.ts +++ b/www/app/lib/apiHooks.ts @@ -567,6 +567,20 @@ export function useTranscriptSpeakerMerge() { ); } +export function useMeetingStartRecording() { + const { setError } = useError(); + + return $api.useMutation( + "post", + "/v1/meetings/{meeting_id}/recordings/start", + { + onError: (error) => { + setError(error as Error, "Failed to start recording"); + }, + }, + ); +} + export function useMeetingAudioConsent() { const { setError } = useError(); diff --git a/www/app/lib/transcript.ts b/www/app/lib/transcript.ts index d1fd8b3d..f23a7c38 100644 --- a/www/app/lib/transcript.ts +++ b/www/app/lib/transcript.ts @@ -1,5 +1,6 @@ import { components } from "../reflector-api"; -type ApiTranscriptStatus = components["schemas"]["GetTranscript"]["status"]; +type ApiTranscriptStatus = + components["schemas"]["GetTranscriptWithParticipants"]["status"]; export type TranscriptStatus = ApiTranscriptStatus; diff --git a/www/app/lib/types.ts b/www/app/lib/types.ts index c5ab8ce7..00267ea0 100644 --- a/www/app/lib/types.ts +++ b/www/app/lib/types.ts @@ -89,3 +89,6 @@ export const assertMeetingId = (s: string): MeetingId => { // just cast for now return nes as MeetingId; }; + +// Daily.co recording types for API calls +export type DailyRecordingType = "cloud" | "raw-tracks"; diff --git a/www/app/reflector-api.d.ts b/www/app/reflector-api.d.ts index 3704a9a0..09122b21 100644 --- a/www/app/reflector-api.d.ts +++ b/www/app/reflector-api.d.ts @@ -75,6 +75,31 @@ export interface paths { patch: operations["v1_meeting_deactivate"]; trace?: never; }; + "/v1/meetings/{meeting_id}/recordings/start": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + get?: never; + put?: never; + /** + * Start Recording + * @description Start raw-tracks recording via Daily.co REST API. + * + * Called by frontend after starting cloud recording via daily-js. + * Uses same instanceId to link both recordings. + * + * Note: No authentication required - anonymous users supported. + */ + post: operations["v1_start_recording"]; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; "/v1/rooms": { parameters: { query?: never; @@ -1818,6 +1843,19 @@ export interface components { /** Words */ words: components["schemas"]["Word"][]; }; + /** StartRecordingRequest */ + StartRecordingRequest: { + /** + * Type + * @enum {string} + */ + type: "cloud" | "raw-tracks"; + /** + * Instanceid + * @description A non-empty string + */ + instanceId: string; + }; /** Stream */ Stream: { /** Stream Id */ @@ -2126,6 +2164,43 @@ export interface operations { }; }; }; + v1_start_recording: { + parameters: { + query?: never; + header?: never; + path: { + meeting_id: string; + }; + cookie?: never; + }; + requestBody: { + content: { + "application/json": components["schemas"]["StartRecordingRequest"]; + }; + }; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + [key: string]: unknown; + }; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; v1_rooms_list: { parameters: { query?: { diff --git a/www/package.json b/www/package.json index 13895a3a..f00341e4 100644 --- a/www/package.json +++ b/www/package.json @@ -52,6 +52,7 @@ "simple-peer": "^9.11.1", "tailwindcss": "^3.3.2", "typescript": "^5.1.6", + "uuid": "^13.0.0", "wavesurfer.js": "^7.4.2", "zod": "^4.1.5" }, diff --git a/www/pnpm-lock.yaml b/www/pnpm-lock.yaml index 4cc219ea..342fe72b 100644 --- a/www/pnpm-lock.yaml +++ b/www/pnpm-lock.yaml @@ -124,6 +124,9 @@ importers: typescript: specifier: ^5.1.6 version: 5.9.2 + uuid: + specifier: ^13.0.0 + version: 13.0.0 wavesurfer.js: specifier: ^7.4.2 version: 7.10.1 @@ -8771,6 +8774,13 @@ packages: integrity: sha512-Fykw5U4eZESbq739BeLvEBFRuJODfrlmjx5eJux7W817LjRaq4b7/i4t2zxQmhcX+fAj4nMfRdTzO4tmwLKn0w==, } + uuid@13.0.0: + resolution: + { + integrity: sha512-XQegIaBTVUjSHliKqcnFqYypAd4S+WCYt5NIeRs6w/UAry7z8Y9j5ZwRRL4kzq9U3sD6v+85er9FvkEaBpji2w==, + } + hasBin: true + uuid@8.3.2: resolution: { @@ -15401,6 +15411,8 @@ snapshots: uuid-validate@0.0.3: {} + uuid@13.0.0: {} + uuid@8.3.2: {} uuid@9.0.1: {} From dbe9477ca1711a731477aea941808ced9566f7b4 Mon Sep 17 00:00:00 2001 From: Igor Loskutov Date: Sat, 10 Jan 2026 11:13:52 -0500 Subject: [PATCH 04/20] brady bunch prototype (no-mistakes) review --- server/reflector/dailyco_api/requests.py | 7 - server/reflector/db/meetings.py | 2 +- server/reflector/video_platforms/daily.py | 10 +- server/reflector/views/meetings.py | 8 +- server/reflector/worker/app.py | 5 +- server/reflector/worker/process.py | 10 +- www/app/[roomName]/components/DailyRoom.tsx | 168 +++++++++----------- 7 files changed, 99 insertions(+), 111 deletions(-) diff --git a/server/reflector/dailyco_api/requests.py b/server/reflector/dailyco_api/requests.py index 0adf892b..885579e0 100644 --- a/server/reflector/dailyco_api/requests.py +++ b/server/reflector/dailyco_api/requests.py @@ -88,13 +88,6 @@ class MeetingTokenProperties(BaseModel): is_owner: bool = Field( default=False, description="Grant owner privileges to token holder" ) - start_cloud_recording: bool = Field( - default=False, description="Automatically start cloud recording on join" - ) - start_cloud_recording_opts: dict | None = Field( - default=None, - description="Options for startRecording when start_cloud_recording is true (e.g., maxDuration)", - ) enable_recording_ui: bool = Field( default=True, description="Show recording controls in UI" ) diff --git a/server/reflector/db/meetings.py b/server/reflector/db/meetings.py index 82b6569e..c0c541b5 100644 --- a/server/reflector/db/meetings.py +++ b/server/reflector/db/meetings.py @@ -186,7 +186,7 @@ async def get_by_room_name_all(self, room_name: str) -> list[Meeting]: """Get all meetings for a room name (not just most recent).""" query = meetings.select().where(meetings.c.room_name == room_name) results = await get_database().fetch_all(query) - return [Meeting(**dict(r)) for r in results] + return [Meeting(**r) for r in results] async def get_active(self, room: Room, current_time: datetime) -> Meeting | None: """ diff --git a/server/reflector/video_platforms/daily.py b/server/reflector/video_platforms/daily.py index c7adbdb3..7a5423d7 100644 --- a/server/reflector/video_platforms/daily.py +++ b/server/reflector/video_platforms/daily.py @@ -16,6 +16,7 @@ from reflector.db.daily_participant_sessions import ( daily_participant_sessions_controller, ) +from reflector.db.meetings import meetings_controller from reflector.db.rooms import Room from reflector.logger import logger from reflector.storage import get_dailyco_storage @@ -59,9 +60,8 @@ async def create_meeting( enable_recording = None if room.recording_type == self.RECORDING_LOCAL: enable_recording = "local" - elif ( - room.recording_type == self.RECORDING_CLOUD - ): # For dual recording: don't set enable_recording, start both via REST API + elif room.recording_type == self.RECORDING_CLOUD: + # Don't set enable_recording - recordings started via REST API (not auto-start) enable_recording = None properties = RoomProperties( @@ -107,8 +107,6 @@ async def get_room_sessions(self, room_name: str) -> list[SessionData]: Daily.co doesn't provide historical session API, so we query our database where participant.joined/left webhooks are stored. """ - from reflector.db.meetings import meetings_controller # noqa: PLC0415 - meeting = await meetings_controller.get_by_room_name(room_name) if not meeting: return [] @@ -188,8 +186,6 @@ async def create_meeting_token( properties = MeetingTokenProperties( room_name=room_name, user_id=user_id, - start_cloud_recording=False, - start_cloud_recording_opts=None, enable_recording_ui=enable_recording_ui, is_owner=is_owner, ) diff --git a/server/reflector/views/meetings.py b/server/reflector/views/meetings.py index c9207908..95ccd76d 100644 --- a/server/reflector/views/meetings.py +++ b/server/reflector/views/meetings.py @@ -123,8 +123,12 @@ async def start_recording( except Exception as e: logger.error( - "Failed to start raw-tracks recording", - extra={"meeting_id": meeting_id, "error": str(e)}, + f"Failed to start {body.type} recording", + extra={ + "meeting_id": meeting_id, + "recording_type": body.type, + "error": str(e), + }, ) raise HTTPException( status_code=500, detail=f"Failed to start recording: {str(e)}" diff --git a/server/reflector/worker/app.py b/server/reflector/worker/app.py index cde2fea1..318d5077 100644 --- a/server/reflector/worker/app.py +++ b/server/reflector/worker/app.py @@ -6,6 +6,9 @@ from reflector.settings import settings logger = structlog.get_logger(__name__) + +# Polling intervals (seconds) +POLL_DAILY_RECORDINGS_INTERVAL_SEC = 15.0 # Dev: 15s, Prod: 180s if celery.current_app.main != "default": logger.info(f"Celery already configured ({celery.current_app})") app = celery.current_app @@ -44,7 +47,7 @@ }, "poll_daily_recordings": { "task": "reflector.worker.process.poll_daily_recordings", - "schedule": 15.0, # Every 15 seconds for dev (was 180.0 / 3 min for prod) + "schedule": POLL_DAILY_RECORDINGS_INTERVAL_SEC, }, "trigger_daily_reconciliation": { "task": "reflector.worker.process.trigger_daily_reconciliation", diff --git a/server/reflector/worker/process.py b/server/reflector/worker/process.py index 1a4eb849..07682606 100644 --- a/server/reflector/worker/process.py +++ b/server/reflector/worker/process.py @@ -398,8 +398,14 @@ async def poll_daily_recordings(): # Has s3key but no tracks = cloud recording inferred_type = "cloud" else: - # Fallback - inferred_type = "raw-tracks" + logger.warning( + "Cannot determine recording type, skipping", + recording_id=rec.id, + room_name=rec.room_name, + has_s3key=bool(rec.s3key), + tracks_count=len(rec.tracks), + ) + continue if inferred_type == "cloud": cloud_recordings.append(rec) diff --git a/www/app/[roomName]/components/DailyRoom.tsx b/www/app/[roomName]/components/DailyRoom.tsx index 1ef6e6af..ba1c86c6 100644 --- a/www/app/[roomName]/components/DailyRoom.tsx +++ b/www/app/[roomName]/components/DailyRoom.tsx @@ -34,10 +34,13 @@ import { v5 as uuidv5 } from "uuid"; const CONSENT_BUTTON_ID = "recording-consent"; const RECORDING_INDICATOR_ID = "recording-indicator"; -// Namespace UUID for generating deterministic raw-tracks instanceIds -// Generated once for this application: uuidv4() +// Namespace UUID for UUIDv5 generation of raw-tracks instanceIds +// DO NOT CHANGE: Breaks instanceId determinism across deployments const RAW_TRACKS_NAMESPACE = "a1b2c3d4-e5f6-7890-abcd-ef1234567890"; +const RECORDING_START_DELAY_MS = 2000; +const RECORDING_START_MAX_RETRIES = 5; + type Meeting = components["schemas"]["Meeting"]; type Room = components["schemas"]["RoomDetails"]; @@ -81,12 +84,7 @@ const useFrame = ( cbs: { onLeftMeeting: () => void; onCustomButtonClick: (ev: DailyEventObjectCustomButtonClick) => void; - onJoinMeeting: ( - startRecording: (args: { - type: DailyRecordingType; - instanceId: string; - }) => void, - ) => void; + onJoinMeeting: () => void; }, ) => { const [{ frame, joined }, setState] = useState(USE_FRAME_INIT_STATE); @@ -137,7 +135,7 @@ const useFrame = ( console.error("frame is null in joined-meeting callback"); return; } - cbs.onJoinMeeting(frame.startRecording.bind(frame)); + cbs.onJoinMeeting(); }; frame.on("joined-meeting", joinCb); return () => { @@ -249,91 +247,79 @@ export default function DailyRoom({ meeting, room }: DailyRoomProps) { ], ); - const handleFrameJoinMeeting = useCallback( - ( - startRecording: (args: { - type: DailyRecordingType; - instanceId: string; - }) => void, - ) => { - try { - if (meeting.recording_type === "cloud") { - console.log("Starting dual recording via REST API", { - cloudInstanceId, - rawTracksInstanceId, - }); - - // Start both cloud and raw-tracks via backend REST API (with retry on 404) - // Daily.co needs time to register call as "hosting" for REST API - const startRecordingWithRetry = ( - type: DailyRecordingType, - instanceId: string, - attempt: number = 1, - ) => { - setTimeout( - () => { - startRecordingMutation.mutate( - { - params: { - path: { - meeting_id: meeting.id, - }, - }, - body: { - type, - instanceId, - }, - }, - { - onError: (error: any) => { - const errorText = error?.detail || error?.message || ""; - const is404NotHosting = errorText.includes( - "does not seem to be hosting a call", - ); - const isActiveStream = errorText.includes( - "has an active stream", - ); - - if (is404NotHosting && attempt < 5) { - console.log( - `${type}: Call not hosting yet, retry ${attempt + 1}/5 in 2s...`, - ); - startRecordingWithRetry(type, instanceId, attempt + 1); - } else if (isActiveStream) { - // Expected: another participant already started recording with same instanceId - console.log( - `${type}: Recording already active (started by another participant)`, - ); - } else { - console.error( - `Failed to start ${type} recording:`, - error, - ); - } - }, + const handleFrameJoinMeeting = useCallback(() => { + try { + if (meeting.recording_type === "cloud") { + console.log("Starting dual recording via REST API", { + cloudInstanceId, + rawTracksInstanceId, + }); + + // Start both cloud and raw-tracks via backend REST API (with retry on 404) + // Daily.co needs time to register call as "hosting" for REST API + const startRecordingWithRetry = ( + type: DailyRecordingType, + instanceId: string, + attempt: number = 1, + ) => { + setTimeout(() => { + startRecordingMutation.mutate( + { + params: { + path: { + meeting_id: meeting.id, }, - ); + }, + body: { + type, + instanceId, + }, }, - attempt === 1 ? 2000 : 2000, - ); // 2s initial delay, then 2s between retries - }; - - // Start both recordings - startRecordingWithRetry("cloud", cloudInstanceId); - startRecordingWithRetry("raw-tracks", rawTracksInstanceId); - } - } catch (error) { - console.error("Failed to start recordings:", error); + { + onError: (error: any) => { + const errorText = error?.detail || error?.message || ""; + const is404NotHosting = errorText.includes( + "does not seem to be hosting a call", + ); + const isActiveStream = errorText.includes( + "has an active stream", + ); + + if ( + is404NotHosting && + attempt < RECORDING_START_MAX_RETRIES + ) { + console.log( + `${type}: Call not hosting yet, retry ${attempt + 1}/${RECORDING_START_MAX_RETRIES} in ${RECORDING_START_DELAY_MS}ms...`, + ); + startRecordingWithRetry(type, instanceId, attempt + 1); + } else if (isActiveStream) { + console.log( + `${type}: Recording already active (started by another participant)`, + ); + } else { + console.error(`Failed to start ${type} recording:`, error); + } + }, + }, + ); + }, RECORDING_START_DELAY_MS); + }; + + // Start both recordings + startRecordingWithRetry("cloud", cloudInstanceId); + startRecordingWithRetry("raw-tracks", rawTracksInstanceId); } - }, - [ - meeting.recording_type, - meeting.id, - startRecordingMutation, - cloudInstanceId, - rawTracksInstanceId, - ], - ); + } catch (error) { + console.error("Failed to start recordings:", error); + } + }, [ + meeting.recording_type, + meeting.id, + startRecordingMutation, + cloudInstanceId, + rawTracksInstanceId, + ]); const recordingIconUrl = useMemo( () => new URL("/recording-icon.svg", window.location.origin), From fedb3111e217c339f8e3855faf9592d3490f6605 Mon Sep 17 00:00:00 2001 From: Igor Loskutov Date: Tue, 13 Jan 2026 15:41:33 -0500 Subject: [PATCH 05/20] self-review --- ...b1e6a6fc465_add_cloud_recording_support.py | 72 +++++++++++++++++++ server/reflector/dailyco_api/client.py | 5 +- server/reflector/db/meetings.py | 16 +++-- server/reflector/video_platforms/daily.py | 3 +- server/reflector/views/daily.py | 9 ++- server/reflector/views/meetings.py | 5 +- server/reflector/worker/app.py | 4 +- server/reflector/worker/process.py | 33 +++++---- 8 files changed, 112 insertions(+), 35 deletions(-) create mode 100644 server/migrations/versions/1b1e6a6fc465_add_cloud_recording_support.py diff --git a/server/migrations/versions/1b1e6a6fc465_add_cloud_recording_support.py b/server/migrations/versions/1b1e6a6fc465_add_cloud_recording_support.py new file mode 100644 index 00000000..51af37a7 --- /dev/null +++ b/server/migrations/versions/1b1e6a6fc465_add_cloud_recording_support.py @@ -0,0 +1,72 @@ +"""add cloud recording support + +Revision ID: 1b1e6a6fc465 +Revises: bd3a729bb379 +Create Date: 2026-01-09 17:17:33.535620 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "1b1e6a6fc465" +down_revision: Union[str, None] = "bd3a729bb379" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("meeting", schema=None) as batch_op: + batch_op.add_column( + sa.Column("daily_composed_video_s3_key", sa.String(), nullable=True) + ) + batch_op.add_column( + sa.Column("daily_composed_video_duration", sa.Integer(), nullable=True) + ) + batch_op.drop_index( + batch_op.f("idx_meeting_idempotency"), + postgresql_where="(idempotency_key IS NOT NULL)", + ) + batch_op.drop_column("idempotency_key") + + with op.batch_alter_table("user_api_key", schema=None) as batch_op: + batch_op.drop_constraint( + batch_op.f("user_token_token_hash_key"), type_="unique" + ) + batch_op.drop_index(batch_op.f("idx_user_api_key_hash")) + batch_op.create_index("idx_user_api_key_hash", ["key_hash"], unique=True) + + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("user_api_key", schema=None) as batch_op: + batch_op.drop_index("idx_user_api_key_hash") + batch_op.create_index( + batch_op.f("idx_user_api_key_hash"), ["key_hash"], unique=False + ) + batch_op.create_unique_constraint( + batch_op.f("user_token_token_hash_key"), ["key_hash"] + ) + + with op.batch_alter_table("meeting", schema=None) as batch_op: + batch_op.add_column( + sa.Column( + "idempotency_key", sa.VARCHAR(), autoincrement=False, nullable=True + ) + ) + batch_op.create_index( + batch_op.f("idx_meeting_idempotency"), + ["room_id", "idempotency_key"], + unique=True, + postgresql_where="(idempotency_key IS NOT NULL)", + ) + batch_op.drop_column("daily_composed_video_duration") + batch_op.drop_column("daily_composed_video_s3_key") + + # ### end Alembic commands ### diff --git a/server/reflector/dailyco_api/client.py b/server/reflector/dailyco_api/client.py index bc9c163a..8634039f 100644 --- a/server/reflector/dailyco_api/client.py +++ b/server/reflector/dailyco_api/client.py @@ -8,6 +8,7 @@ from http import HTTPStatus from typing import Any, Literal +from uuid import UUID import httpx import structlog @@ -401,7 +402,7 @@ async def start_recording( self, room_name: NonEmptyString, recording_type: RecordingType, - instance_id: NonEmptyString, + instance_id: UUID, ) -> dict[str, Any]: """Start recording via REST API. @@ -424,7 +425,7 @@ async def start_recording( headers=self.headers, json={ "type": recording_type, - "instanceId": instance_id, + "instanceId": str(instance_id), }, ) return await self._handle_response(response, "start_recording") diff --git a/server/reflector/db/meetings.py b/server/reflector/db/meetings.py index c0c541b5..3c6cc93e 100644 --- a/server/reflector/db/meetings.py +++ b/server/reflector/db/meetings.py @@ -63,8 +63,9 @@ nullable=False, server_default=assert_equal(WHEREBY_PLATFORM, "whereby"), ), - sa.Column("cloud_recording_s3_key", sa.String, nullable=True), - sa.Column("cloud_recording_duration", sa.Integer, nullable=True), + # Daily.co composed video (Brady Bunch grid layout) - Daily.co only, not Whereby + sa.Column("daily_composed_video_s3_key", sa.String, nullable=True), + sa.Column("daily_composed_video_duration", sa.Integer, nullable=True), sa.Index("idx_meeting_room_id", "room_id"), sa.Index("idx_meeting_calendar_event", "calendar_event_id"), ) @@ -112,13 +113,14 @@ class Meeting(BaseModel): calendar_event_id: str | None = None calendar_metadata: dict[str, Any] | None = None platform: Platform = WHEREBY_PLATFORM - cloud_recording_s3_key: str | None = None - cloud_recording_duration: int | None = None + # Daily.co composed video (Brady Bunch grid) - Daily.co only + daily_composed_video_s3_key: str | None = None + daily_composed_video_duration: int | None = None @computed_field @property - def cloud_recording_available(self) -> bool: - return bool(self.cloud_recording_s3_key) + def daily_composed_video_available(self) -> bool: + return bool(self.daily_composed_video_s3_key) class MeetingController: @@ -151,7 +153,7 @@ async def create( platform=room.platform, ) query = meetings.insert().values( - **meeting.model_dump(exclude={"cloud_recording_available"}) + **meeting.model_dump(exclude={"daily_composed_video_available"}) ) await get_database().execute(query) return meeting diff --git a/server/reflector/video_platforms/daily.py b/server/reflector/video_platforms/daily.py index 7a5423d7..37112dab 100644 --- a/server/reflector/video_platforms/daily.py +++ b/server/reflector/video_platforms/daily.py @@ -1,4 +1,5 @@ from datetime import datetime +from uuid import UUID from reflector.dailyco_api import ( CreateMeetingTokenRequest, @@ -197,7 +198,7 @@ async def start_recording( self, room_name: DailyRoomName, recording_type: DailyRecordingType, - instance_id: NonEmptyString, + instance_id: UUID, ) -> dict: """Start recording via Daily.co REST API. diff --git a/server/reflector/views/daily.py b/server/reflector/views/daily.py index 284d1821..0a86a818 100644 --- a/server/reflector/views/daily.py +++ b/server/reflector/views/daily.py @@ -174,7 +174,7 @@ async def _handle_recording_started(event: RecordingStartedEvent): async def _handle_recording_ready(event: RecordingReadyEvent): room_name = event.payload.room_name recording_id = event.payload.recording_id - recording_type = event.payload.type # "cloud" or "raw-tracks" + recording_type = event.payload.type logger.info( "Recording ready for download", @@ -190,7 +190,7 @@ async def _handle_recording_ready(event: RecordingReadyEvent): return if recording_type == "cloud": - # Cloud recording: single MP4 file written by Daily.co to DAILYCO_STORAGE bucket + # single MP4 file written by Daily.co to a bucket s3_key = event.payload.s3_key # Store cloud recording reference in meeting table @@ -205,8 +205,8 @@ async def _handle_recording_ready(event: RecordingReadyEvent): await meetings_controller.update_meeting( meeting.id, - cloud_recording_s3_key=s3_key, - cloud_recording_duration=event.payload.duration, + daily_composed_video_s3_key=s3_key, + daily_composed_video_duration=event.payload.duration, ) logger.info( @@ -217,7 +217,6 @@ async def _handle_recording_ready(event: RecordingReadyEvent): ) elif recording_type == "raw-tracks": - # Existing multi-track processing (unchanged) tracks = event.payload.tracks if not tracks: logger.warning( diff --git a/server/reflector/views/meetings.py b/server/reflector/views/meetings.py index 95ccd76d..d33d5188 100644 --- a/server/reflector/views/meetings.py +++ b/server/reflector/views/meetings.py @@ -1,6 +1,7 @@ import logging from datetime import datetime, timezone from typing import Annotated, Any, Optional +from uuid import UUID from fastapi import APIRouter, Depends, HTTPException, Request from pydantic import BaseModel @@ -83,7 +84,7 @@ async def meeting_deactivate( class StartRecordingRequest(BaseModel): type: RecordingType - instanceId: NonEmptyString + instanceId: UUID @router.post("/meetings/{meeting_id}/recordings/start") @@ -95,7 +96,7 @@ async def start_recording( Both cloud and raw-tracks are started via REST API to bypass enable_recording limitation. Uses different instanceIds for cloud vs raw-tracks. - Note: No authentication required - anonymous users supported. + Note: No authentication required - anonymous users supported. TODO this is a DOS vector """ meeting = await meetings_controller.get_by_id(meeting_id) if not meeting: diff --git a/server/reflector/worker/app.py b/server/reflector/worker/app.py index 318d5077..a353cf55 100644 --- a/server/reflector/worker/app.py +++ b/server/reflector/worker/app.py @@ -8,7 +8,9 @@ logger = structlog.get_logger(__name__) # Polling intervals (seconds) -POLL_DAILY_RECORDINGS_INTERVAL_SEC = 15.0 # Dev: 15s, Prod: 180s +# Webhook-aware: 180s when webhook configured (backup mode), 15s when no webhook (primary discovery) +POLL_DAILY_RECORDINGS_INTERVAL_SEC = 180.0 if settings.DAILY_WEBHOOK_SECRET else 15.0 + if celery.current_app.main != "default": logger.info(f"Celery already configured ({celery.current_app})") app = celery.current_app diff --git a/server/reflector/worker/process.py b/server/reflector/worker/process.py index 07682606..4802e237 100644 --- a/server/reflector/worker/process.py +++ b/server/reflector/worker/process.py @@ -341,7 +341,7 @@ async def poll_daily_recordings(): - Cloud recordings: Store S3 key in meeting table - Raw-tracks recordings: Queue multitrack processing - Runs every 3 minutes as fallback for webhook failures. + Acts as fallback when webhooks active, primary discovery when webhooks unavailable. Worker-level locking provides idempotency (see process_multitrack_recording). """ @@ -383,31 +383,30 @@ async def poll_daily_recordings(): return # Separate cloud and raw-tracks recordings - # Infer type if not provided by API: - # - Cloud recordings: s3key exists, tracks array EMPTY - # - Raw-tracks: s3key exists, tracks array HAS items (audio files) cloud_recordings = [] raw_tracks_recordings = [] for rec in finished_recordings: if rec.type: - inferred_type = rec.type - elif len(rec.tracks) > 0: - # Has tracks = raw-tracks (even if s3key exists) - inferred_type = "raw-tracks" - elif rec.s3key and len(rec.tracks) == 0: - # Has s3key but no tracks = cloud recording - inferred_type = "cloud" + # Daily.co API provides explicit type - use it + recording_type = rec.type else: - logger.warning( - "Cannot determine recording type, skipping", + # Type field missing - this should not happen - either llm assumption or API docs issue. to watch for. + # Inference logic commented out until we confirm it's needed: + # elif len(rec.tracks) > 0: + # recording_type = "raw-tracks" # Has tracks = raw-tracks + # elif rec.s3key and len(rec.tracks) == 0: + # recording_type = "cloud" # Has s3key but no tracks = cloud + logger.error( + "Recording missing type field from Daily.co API - skipping (needs investigation)", recording_id=rec.id, room_name=rec.room_name, has_s3key=bool(rec.s3key), tracks_count=len(rec.tracks), + mtg_session_id=rec.mtgSessionId, ) continue - if inferred_type == "cloud": + if recording_type == "cloud": cloud_recordings.append(rec) else: raw_tracks_recordings.append(rec) @@ -455,7 +454,7 @@ async def _poll_cloud_recordings(cloud_recordings: List[FinishedRecordingRespons # TODO: improve matching logic (recording.start_ts, meeting.start_date) target_meeting = None for meeting in meetings: - if not meeting.cloud_recording_s3_key: + if not meeting.daily_composed_video_s3_key: target_meeting = meeting break @@ -479,8 +478,8 @@ async def _poll_cloud_recordings(cloud_recordings: List[FinishedRecordingRespons await meetings_controller.update_meeting( target_meeting.id, - cloud_recording_s3_key=s3_key, - cloud_recording_duration=recording.duration, + daily_composed_video_s3_key=s3_key, + daily_composed_video_duration=recording.duration, ) logger.info( From 4a93e84b918aa23b2af5fa1c9240e44da2cfb911 Mon Sep 17 00:00:00 2001 From: Igor Loskutov Date: Tue, 13 Jan 2026 17:20:10 -0500 Subject: [PATCH 06/20] daily poll time match (no-mistakes) --- server/reflector/db/meetings.py | 80 ++++++++++++++++++ server/reflector/views/daily.py | 1 + server/reflector/worker/process.py | 129 ++++++++++++++++++++--------- 3 files changed, 171 insertions(+), 39 deletions(-) diff --git a/server/reflector/db/meetings.py b/server/reflector/db/meetings.py index 3c6cc93e..bb87f827 100644 --- a/server/reflector/db/meetings.py +++ b/server/reflector/db/meetings.py @@ -190,6 +190,86 @@ async def get_by_room_name_all(self, room_name: str) -> list[Meeting]: results = await get_database().fetch_all(query) return [Meeting(**r) for r in results] + async def get_by_room_name_and_time( + self, + room_name: str, + recording_start: datetime, + time_window_hours: int = 168, + ) -> Meeting | None: + """ + Get meeting by room name closest to recording timestamp. + + HACK ALERT: Daily.co doesn't return instanceId in recordings API response, + and mtgSessionId is separate from our instanceId. Time-based matching is + the least-bad workaround. + + This handles edge case of duplicate room_name values in DB (race conditions, + double-clicks, etc.) by matching based on temporal proximity. + + Algorithm: + 1. Find meetings within time_window_hours of recording_start + 2. Return meeting with start_date closest to recording_start + 3. If tie, return first by meeting.id (deterministic) + + Args: + room_name: Daily.co room name from recording + recording_start: Unix timestamp from recording.start_ts + time_window_hours: Search window (default 168 = 1 week) + + Returns: + Meeting closest to recording timestamp, or None if no matches + + Failure modes: + - Multiple meetings in same room within ~5 minutes: picks closest + - All meetings outside time window: returns None + - Clock skew between Daily.co and DB: 1-week window tolerates this + + Why 1 week window: + - Handles webhook failures (recording discovered days later) + - Tolerates clock skew + - Rejects unrelated meetings from weeks ago + + Production data showing duplicate room_names: + - Room: daily-private-igor-20251126162029 + - Meeting 1: 2025-11-26 16:20:29.000000 + - Meeting 2: 2025-11-26 16:20:29.990355 (0.99s later) + - Both have cloud recordings + - Time-based matching correctly distinguishes them + """ + from datetime import timedelta + + window_start = recording_start - timedelta(hours=time_window_hours) + window_end = recording_start + timedelta(hours=time_window_hours) + + query = ( + meetings.select() + .where( + sa.and_( + meetings.c.room_name == room_name, + meetings.c.start_date >= window_start, + meetings.c.start_date <= window_end, + ) + ) + .order_by(meetings.c.start_date) + ) + + results = await get_database().fetch_all(query) + if not results: + return None + + candidates = [Meeting(**r) for r in results] + + # Find meeting with start_date closest to recording_start + closest = min( + candidates, + key=lambda m: ( + abs((m.start_date - recording_start).total_seconds()), + m.id, # Tie-breaker: deterministic by UUID + ), + ) + + return closest + async def get_active(self, room: Room, current_time: datetime) -> Meeting | None: """ Get latest active meeting for a room. diff --git a/server/reflector/views/daily.py b/server/reflector/views/daily.py index 0a86a818..10a14321 100644 --- a/server/reflector/views/daily.py +++ b/server/reflector/views/daily.py @@ -240,6 +240,7 @@ async def _handle_recording_ready(event: RecordingReadyEvent): daily_room_name=room_name, recording_id=recording_id, track_keys=track_keys, + recording_start_ts=event.payload.start_ts, ) else: diff --git a/server/reflector/worker/process.py b/server/reflector/worker/process.py index 4802e237..2c143dff 100644 --- a/server/reflector/worker/process.py +++ b/server/reflector/worker/process.py @@ -17,7 +17,7 @@ DailyParticipantSession, daily_participant_sessions_controller, ) -from reflector.db.meetings import Meeting, meetings_controller +from reflector.db.meetings import meetings_controller from reflector.db.recordings import Recording, recordings_controller from reflector.db.rooms import rooms_controller from reflector.db.transcripts import ( @@ -175,13 +175,25 @@ async def process_multitrack_recording( daily_room_name: DailyRoomName, recording_id: str, track_keys: list[str], + recording_start_ts: int | None = None, ): + """ + Process raw-tracks (multitrack) recording from Daily.co. + + Args: + bucket_name: S3 bucket containing tracks + daily_room_name: Daily.co room name + recording_id: Daily.co recording ID + track_keys: S3 keys for audio tracks + recording_start_ts: Unix timestamp when recording started (for time-based meeting matching) + """ logger.info( "Processing multitrack recording", bucket=bucket_name, room_name=daily_room_name, recording_id=recording_id, provided_keys=len(track_keys), + recording_start_ts=recording_start_ts, ) if not track_keys: @@ -240,7 +252,45 @@ async def _process_multitrack_recording_inner( exc_info=True, ) - meeting = await meetings_controller.get_by_room_name(daily_room_name) + # Find meeting: use time-based matching if recording_start_ts available + if recording_start_ts: + recording_start = datetime.fromtimestamp(recording_start_ts, tz=timezone.utc) + meeting = await meetings_controller.get_by_room_name_and_time( + room_name=daily_room_name, + recording_start=recording_start, + time_window_hours=168, # 1 week + ) + if not meeting: + logger.error( + "Raw-tracks: no meeting found within 1-week window (time-based match)", + recording_id=recording_id, + room_name=daily_room_name, + recording_start_ts=recording_start_ts, + recording_start=recording_start.isoformat(), + ) + raise Exception( + f"Meeting not found for recording {recording_id} within 1-week window" + ) + logger.info( + "Found meeting via time-based matching", + meeting_id=meeting.id, + room_name=daily_room_name, + recording_id=recording_id, + time_delta_seconds=abs( + (meeting.start_date - recording_start).total_seconds() + ), + ) + else: + # Fallback: most recent meeting (legacy behavior, less accurate) + meeting = await meetings_controller.get_by_room_name(daily_room_name) + if not meeting: + raise Exception(f"Meeting not found: {daily_room_name}") + logger.warning( + "Found meeting via fallback (most recent) - no recording_start_ts provided", + meeting_id=meeting.id, + room_name=daily_room_name, + recording_id=recording_id, + ) room_name_base = extract_base_room_name(daily_room_name) @@ -248,16 +298,6 @@ async def _process_multitrack_recording_inner( if not room: raise Exception(f"Room not found: {room_name_base}") - if not meeting: - raise Exception(f"Meeting not found: {room_name_base}") - - logger.info( - "Found existing Meeting for recording", - meeting_id=meeting.id, - room_name=daily_room_name, - recording_id=recording_id, - ) - recording = await recordings_controller.get_by_id(recording_id) if not recording: object_key_dir = os.path.dirname(track_keys[0]) if track_keys else "" @@ -421,48 +461,50 @@ async def poll_daily_recordings(): # Process cloud recordings await _poll_cloud_recordings(cloud_recordings) - # Process raw-tracks recordings (existing logic) + # Process raw-tracks recordings await _poll_raw_tracks_recordings(raw_tracks_recordings, bucket_name) async def _poll_cloud_recordings(cloud_recordings: List[FinishedRecordingResponse]): - """Store cloud recordings missing from meeting table.""" + """ + Store cloud recordings missing from meeting table. + + Uses time-based matching to handle duplicate room_name values. + See meetings_controller.get_by_room_name_and_time() for details on the hack. + """ if not cloud_recordings: return - # Get all meetings with matching room names - room_names = list({rec.room_name for rec in cloud_recordings}) - meetings_by_room: dict[str, List[Meeting]] = {} - - for room_name in room_names: - meetings = await meetings_controller.get_by_room_name_all(room_name) - if meetings: - meetings_by_room[room_name] = meetings - stored_count = 0 for recording in cloud_recordings: - meetings = meetings_by_room.get(recording.room_name, []) - if not meetings: + # Convert Unix timestamp to datetime for matching + recording_start = datetime.fromtimestamp(recording.start_ts, tz=timezone.utc) + + # Find meeting by time proximity (1-week window) + meeting = await meetings_controller.get_by_room_name_and_time( + room_name=recording.room_name, + recording_start=recording_start, + time_window_hours=168, # 1 week + ) + + if not meeting: logger.warning( - "Cloud recording: no meetings found for room", + "Cloud recording: no meeting found within 1-week window", recording_id=recording.id, room_name=recording.room_name, + recording_start_ts=recording.start_ts, + recording_start=recording_start.isoformat(), ) continue - # Find meeting without cloud recording that overlaps with recording time - # TODO: improve matching logic (recording.start_ts, meeting.start_date) - target_meeting = None - for meeting in meetings: - if not meeting.daily_composed_video_s3_key: - target_meeting = meeting - break - - if not target_meeting: + # Skip if meeting already has cloud recording + if meeting.daily_composed_video_s3_key: logger.debug( - "Cloud recording: all meetings already have cloud recordings", + "Cloud recording: meeting already has cloud recording", recording_id=recording.id, room_name=recording.room_name, + meeting_id=meeting.id, + existing_s3_key=meeting.daily_composed_video_s3_key, ) continue @@ -477,17 +519,20 @@ async def _poll_cloud_recordings(cloud_recordings: List[FinishedRecordingRespons continue await meetings_controller.update_meeting( - target_meeting.id, + meeting.id, daily_composed_video_s3_key=s3_key, daily_composed_video_duration=recording.duration, ) logger.info( - "Cloud recording stored via polling", - meeting_id=target_meeting.id, + "Cloud recording stored via polling (time-based match)", + meeting_id=meeting.id, recording_id=recording.id, s3_key=s3_key, duration=recording.duration, + time_delta_seconds=abs( + (meeting.start_date - recording_start).total_seconds() + ), ) stored_count += 1 @@ -561,6 +606,7 @@ async def _poll_raw_tracks_recordings( daily_room_name=recording.room_name, recording_id=recording.id, track_keys=track_keys, + recording_start_ts=recording.start_ts, ) @@ -1001,11 +1047,16 @@ async def reprocess_failed_daily_recordings(): transcript_status=transcript.status if transcript else None, ) + # For reprocessing, use meeting's start_date as recording_start_ts + # (meeting already known via recording.meeting_id) + recording_start_ts = int(meeting.start_date.timestamp()) + process_multitrack_recording.delay( bucket_name=bucket_name, daily_room_name=meeting.room_name, recording_id=recording.id, track_keys=recording.track_keys, + recording_start_ts=recording_start_ts, ) reprocessed_count += 1 From bacd276594107362c98d5e62c0a7959fa0b0e528 Mon Sep 17 00:00:00 2001 From: Igor Loskutov Date: Tue, 13 Jan 2026 17:50:15 -0500 Subject: [PATCH 07/20] daily poll self-review (no-mistakes) --- server/reflector/worker/process.py | 65 ++++++++++++------------------ 1 file changed, 26 insertions(+), 39 deletions(-) diff --git a/server/reflector/worker/process.py b/server/reflector/worker/process.py index 2c143dff..ad537544 100644 --- a/server/reflector/worker/process.py +++ b/server/reflector/worker/process.py @@ -175,7 +175,7 @@ async def process_multitrack_recording( daily_room_name: DailyRoomName, recording_id: str, track_keys: list[str], - recording_start_ts: int | None = None, + recording_start_ts: int, ): """ Process raw-tracks (multitrack) recording from Daily.co. @@ -185,7 +185,7 @@ async def process_multitrack_recording( daily_room_name: Daily.co room name recording_id: Daily.co recording ID track_keys: S3 keys for audio tracks - recording_start_ts: Unix timestamp when recording started (for time-based meeting matching) + recording_start_ts: Unix timestamp when recording started (required for time-based meeting matching) """ logger.info( "Processing multitrack recording", @@ -224,7 +224,7 @@ async def process_multitrack_recording( ) await _process_multitrack_recording_inner( - bucket_name, daily_room_name, recording_id, track_keys + bucket_name, daily_room_name, recording_id, track_keys, recording_start_ts ) @@ -233,6 +233,7 @@ async def _process_multitrack_recording_inner( daily_room_name: DailyRoomName, recording_id: str, track_keys: list[str], + recording_start_ts: int, ): """Inner function containing the actual processing logic.""" @@ -252,45 +253,31 @@ async def _process_multitrack_recording_inner( exc_info=True, ) - # Find meeting: use time-based matching if recording_start_ts available - if recording_start_ts: - recording_start = datetime.fromtimestamp(recording_start_ts, tz=timezone.utc) - meeting = await meetings_controller.get_by_room_name_and_time( - room_name=daily_room_name, - recording_start=recording_start, - time_window_hours=168, # 1 week - ) - if not meeting: - logger.error( - "Raw-tracks: no meeting found within 1-week window (time-based match)", - recording_id=recording_id, - room_name=daily_room_name, - recording_start_ts=recording_start_ts, - recording_start=recording_start.isoformat(), - ) - raise Exception( - f"Meeting not found for recording {recording_id} within 1-week window" - ) - logger.info( - "Found meeting via time-based matching", - meeting_id=meeting.id, - room_name=daily_room_name, + # Find meeting via time-based matching + recording_start = datetime.fromtimestamp(recording_start_ts, tz=timezone.utc) + meeting = await meetings_controller.get_by_room_name_and_time( + room_name=daily_room_name, + recording_start=recording_start, + time_window_hours=168, # 1 week + ) + if not meeting: + logger.error( + "Raw-tracks: no meeting found within 1-week window (time-based match)", recording_id=recording_id, - time_delta_seconds=abs( - (meeting.start_date - recording_start).total_seconds() - ), - ) - else: - # Fallback: most recent meeting (legacy behavior, less accurate) - meeting = await meetings_controller.get_by_room_name(daily_room_name) - if not meeting: - raise Exception(f"Meeting not found: {daily_room_name}") - logger.warning( - "Found meeting via fallback (most recent) - no recording_start_ts provided", - meeting_id=meeting.id, room_name=daily_room_name, - recording_id=recording_id, + recording_start_ts=recording_start_ts, + recording_start=recording_start.isoformat(), ) + raise Exception( + f"Meeting not found for recording {recording_id} within 1-week window" + ) + logger.info( + "Found meeting via time-based matching", + meeting_id=meeting.id, + room_name=daily_room_name, + recording_id=recording_id, + time_delta_seconds=abs((meeting.start_date - recording_start).total_seconds()), + ) room_name_base = extract_base_room_name(daily_room_name) From 62ac8796de4a89ac0a6bb93b98d5cfd831e4e456 Mon Sep 17 00:00:00 2001 From: Igor Loskutov Date: Tue, 13 Jan 2026 18:06:48 -0500 Subject: [PATCH 08/20] daily poll self-review (no-mistakes) --- server/reflector/dailyco_api/instance_id.py | 49 ++++++ server/tests/test_dailyco_instance_id.py | 157 ++++++++++++++++++++ 2 files changed, 206 insertions(+) create mode 100644 server/reflector/dailyco_api/instance_id.py create mode 100644 server/tests/test_dailyco_instance_id.py diff --git a/server/reflector/dailyco_api/instance_id.py b/server/reflector/dailyco_api/instance_id.py new file mode 100644 index 00000000..611cd6cc --- /dev/null +++ b/server/reflector/dailyco_api/instance_id.py @@ -0,0 +1,49 @@ +""" +Daily.co recording instanceId generation utilities. + +Deterministic instance ID generation for cloud and raw-tracks recordings. +MUST match frontend logic in www/app/[roomName]/components/DailyRoom.tsx +""" + +from uuid import UUID, uuid5 + +from reflector.utils.string import NonEmptyString + +# Namespace UUID for UUIDv5 generation of raw-tracks instanceIds +# DO NOT CHANGE: Breaks instanceId determinism across deployments and frontend/backend matching +RAW_TRACKS_NAMESPACE = UUID("a1b2c3d4-e5f6-7890-abcd-ef1234567890") + + +def generate_cloud_instance_id(meeting_id: NonEmptyString) -> UUID: + """ + Generate instanceId for cloud recording. + + Cloud recordings use meeting ID directly as instanceId. + This ensures each meeting has unique cloud recording. + + Args: + meeting_id: Meeting UUID string + + Returns: + UUID for cloud recording instanceId + """ + return UUID(meeting_id) + + +def generate_raw_tracks_instance_id(meeting_id: NonEmptyString) -> UUID: + """ + Generate instanceId for raw-tracks recording. + + Raw-tracks recordings use UUIDv5(meeting_id, namespace) to ensure + different instanceId from cloud while remaining deterministic. + + Daily.co requires cloud and raw-tracks to have different instanceIds + for concurrent recording. + + Args: + meeting_id: Meeting UUID string + + Returns: + Deterministic UUID for raw-tracks recording instanceId + """ + return uuid5(RAW_TRACKS_NAMESPACE, meeting_id) diff --git a/server/tests/test_dailyco_instance_id.py b/server/tests/test_dailyco_instance_id.py new file mode 100644 index 00000000..dd00e41d --- /dev/null +++ b/server/tests/test_dailyco_instance_id.py @@ -0,0 +1,157 @@ +""" +Tests for Daily.co instanceId generation. + +Verifies deterministic behavior and frontend/backend consistency. +""" + +import pytest + +from reflector.dailyco_api.instance_id import ( + RAW_TRACKS_NAMESPACE, + generate_cloud_instance_id, + generate_raw_tracks_instance_id, +) + + +class TestInstanceIdDeterminism: + """Test deterministic generation of instanceIds.""" + + def test_cloud_instance_id_is_meeting_id(self): + """Cloud instanceId is meeting ID directly.""" + meeting_id = "550e8400-e29b-41d4-a716-446655440000" + result = generate_cloud_instance_id(meeting_id) + assert str(result) == meeting_id + + def test_cloud_instance_id_deterministic(self): + """Cloud instanceId generation is deterministic.""" + meeting_id = "550e8400-e29b-41d4-a716-446655440000" + result1 = generate_cloud_instance_id(meeting_id) + result2 = generate_cloud_instance_id(meeting_id) + assert result1 == result2 + + def test_raw_tracks_instance_id_deterministic(self): + """Raw-tracks instanceId generation is deterministic.""" + meeting_id = "550e8400-e29b-41d4-a716-446655440000" + result1 = generate_raw_tracks_instance_id(meeting_id) + result2 = generate_raw_tracks_instance_id(meeting_id) + assert result1 == result2 + + def test_raw_tracks_different_from_cloud(self): + """Raw-tracks instanceId differs from cloud instanceId.""" + meeting_id = "550e8400-e29b-41d4-a716-446655440000" + cloud_id = generate_cloud_instance_id(meeting_id) + raw_tracks_id = generate_raw_tracks_instance_id(meeting_id) + assert cloud_id != raw_tracks_id + + def test_different_meetings_different_instance_ids(self): + """Different meetings generate different instanceIds.""" + meeting_id1 = "550e8400-e29b-41d4-a716-446655440000" + meeting_id2 = "6ba7b810-9dad-11d1-80b4-00c04fd430c8" + + cloud1 = generate_cloud_instance_id(meeting_id1) + cloud2 = generate_cloud_instance_id(meeting_id2) + assert cloud1 != cloud2 + + raw1 = generate_raw_tracks_instance_id(meeting_id1) + raw2 = generate_raw_tracks_instance_id(meeting_id2) + assert raw1 != raw2 + + +class TestFrontendBackendConsistency: + """Test that backend matches frontend logic.""" + + def test_namespace_matches_frontend(self): + """Namespace UUID matches frontend RAW_TRACKS_NAMESPACE constant.""" + # From www/app/[roomName]/components/DailyRoom.tsx + frontend_namespace = "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + assert str(RAW_TRACKS_NAMESPACE) == frontend_namespace + + def test_raw_tracks_generation_matches_frontend_logic(self): + """Backend UUIDv5 generation matches frontend uuidv5() call.""" + # Example meeting ID + meeting_id = "550e8400-e29b-41d4-a716-446655440000" + + # Backend result + backend_result = generate_raw_tracks_instance_id(meeting_id) + + # Expected result from frontend: uuidv5(meeting.id, RAW_TRACKS_NAMESPACE) + # Python uuid5 uses (namespace, name) argument order + # JavaScript uuid.v5(name, namespace) - same args, different order + # Frontend: uuidv5(meeting.id, "a1b2c3d4-e5f6-7890-abcd-ef1234567890") + # Backend: uuid5(UUID("a1b2c3d4-e5f6-7890-abcd-ef1234567890"), meeting.id) + + # Verify it's a valid UUID (will raise if not) + assert len(str(backend_result)) == 36 + assert backend_result.version == 5 + + +class TestEdgeCases: + """Test edge cases and error conditions.""" + + def test_invalid_uuid_format_raises(self): + """Invalid UUID format raises ValueError.""" + with pytest.raises(ValueError): + generate_cloud_instance_id("not-a-uuid") + + def test_lowercase_uuid_normalized_for_cloud(self): + """Cloud instanceId: lowercase/uppercase UUIDs produce same result.""" + meeting_id_lower = "550e8400-e29b-41d4-a716-446655440000" + meeting_id_upper = "550E8400-E29B-41D4-A716-446655440000" + + cloud_lower = generate_cloud_instance_id(meeting_id_lower) + cloud_upper = generate_cloud_instance_id(meeting_id_upper) + assert cloud_lower == cloud_upper + + def test_raw_tracks_case_sensitive(self): + """ + Raw-tracks instanceId is CASE-SENSITIVE (uuid5 hashes string directly). + + This is fine - meeting IDs from DB are always lowercase. + Frontend also uses lowercase meeting.id from API. + """ + meeting_id_lower = "550e8400-e29b-41d4-a716-446655440000" + meeting_id_upper = "550E8400-E29B-41D4-A716-446655440000" + + raw_lower = generate_raw_tracks_instance_id(meeting_id_lower) + raw_upper = generate_raw_tracks_instance_id(meeting_id_upper) + # Different cases produce different hashes + assert raw_lower != raw_upper + + +class TestMtgSessionIdVsInstanceId: + """ + Test that mtgSessionId (Daily.co-generated) differs from instanceId (we send). + + Based on production data analysis showing mtgSessionId is separate from instanceId. + """ + + def test_mtg_session_id_not_equal_to_cloud_instance_id(self): + """ + mtgSessionId from Daily.co does NOT match cloud instanceId we send. + + Real example from production: + - Meeting ID (cloud instanceId): 4ad503b6-8189-4910-a8f7-68cdd1b7f990 + - Recording mtgSessionId: f25a2e09-740f-4932-9c0d-b1bebaa669c6 + """ + meeting_id = "4ad503b6-8189-4910-a8f7-68cdd1b7f990" + mtg_session_id = "f25a2e09-740f-4932-9c0d-b1bebaa669c6" + + cloud_instance_id = generate_cloud_instance_id(meeting_id) + assert str(cloud_instance_id) != mtg_session_id + + def test_mtg_session_id_not_equal_to_raw_tracks_instance_id(self): + """ + mtgSessionId from Daily.co does NOT match raw-tracks instanceId we send. + + Real example from production: + - Meeting ID: 4ad503b6-8189-4910-a8f7-68cdd1b7f990 + - Raw-tracks instanceId: 784b3af3-c7dd-57f0-ac54-2ee91c6927cb + - Recording mtgSessionId: f25a2e09-740f-4932-9c0d-b1bebaa669c6 + """ + meeting_id = "4ad503b6-8189-4910-a8f7-68cdd1b7f990" + expected_raw_tracks_id = "784b3af3-c7dd-57f0-ac54-2ee91c6927cb" + mtg_session_id = "f25a2e09-740f-4932-9c0d-b1bebaa669c6" + + raw_tracks_instance_id = generate_raw_tracks_instance_id(meeting_id) + assert str(raw_tracks_instance_id) == expected_raw_tracks_id + assert str(raw_tracks_instance_id) != mtg_session_id From 079ba9654a0c8900488067c0a0698f309c2d9b14 Mon Sep 17 00:00:00 2001 From: Igor Loskutov Date: Tue, 13 Jan 2026 19:10:57 -0500 Subject: [PATCH 09/20] daily co doc --- server/reflector/worker/process.py | 43 ++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/server/reflector/worker/process.py b/server/reflector/worker/process.py index ad537544..cc9d92a7 100644 --- a/server/reflector/worker/process.py +++ b/server/reflector/worker/process.py @@ -415,23 +415,44 @@ async def poll_daily_recordings(): for rec in finished_recordings: if rec.type: # Daily.co API provides explicit type - use it + # LOG THIS: As of Jan 2026, Daily.co never returns type field. + # If this logs, Daily.co API changed - we can remove inference logic. recording_type = rec.type - else: - # Type field missing - this should not happen - either llm assumption or API docs issue. to watch for. - # Inference logic commented out until we confirm it's needed: - # elif len(rec.tracks) > 0: - # recording_type = "raw-tracks" # Has tracks = raw-tracks - # elif rec.s3key and len(rec.tracks) == 0: - # recording_type = "cloud" # Has s3key but no tracks = cloud - logger.error( - "Recording missing type field from Daily.co API - skipping (needs investigation)", + logger.warning( + "Recording has explicit type field from Daily.co API (unexpected, API may have changed)", recording_id=rec.id, room_name=rec.room_name, + recording_type=recording_type, has_s3key=bool(rec.s3key), tracks_count=len(rec.tracks), - mtg_session_id=rec.mtgSessionId, ) - continue + else: + # DAILY.CO API LIMITATION: + # GET /recordings response does NOT include type field. + # Daily.co docs mention type field exists, but API never returns it. + # Verified: 84 recordings from Nov 2025 - Jan 2026 ALL have type=None. + # + # This is not a recent API change - Daily.co has never returned type. + # Must infer from structural properties. + # + # Inference heuristic (reliable for finished recordings): + # - Has tracks array → raw-tracks + # - Has s3key but no tracks → cloud + # - Neither → failed/incomplete recording + if len(rec.tracks) > 0: + recording_type = "raw-tracks" + elif rec.s3key and len(rec.tracks) == 0: + recording_type = "cloud" + else: + logger.warning( + "Recording has no type, no s3key, and no tracks - likely failed recording", + recording_id=rec.id, + room_name=rec.room_name, + status=rec.status, + duration=rec.duration, + mtg_session_id=rec.mtgSessionId, + ) + continue if recording_type == "cloud": cloud_recordings.append(rec) From ac650577028d493189eb8cda9f35d36dcb1d4803 Mon Sep 17 00:00:00 2001 From: Igor Loskutov Date: Wed, 14 Jan 2026 10:36:59 -0500 Subject: [PATCH 10/20] cleanup --- .../1b1e6a6fc465_add_cloud_recording_support.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/server/migrations/versions/1b1e6a6fc465_add_cloud_recording_support.py b/server/migrations/versions/1b1e6a6fc465_add_cloud_recording_support.py index 51af37a7..89578f65 100644 --- a/server/migrations/versions/1b1e6a6fc465_add_cloud_recording_support.py +++ b/server/migrations/versions/1b1e6a6fc465_add_cloud_recording_support.py @@ -27,11 +27,6 @@ def upgrade() -> None: batch_op.add_column( sa.Column("daily_composed_video_duration", sa.Integer(), nullable=True) ) - batch_op.drop_index( - batch_op.f("idx_meeting_idempotency"), - postgresql_where="(idempotency_key IS NOT NULL)", - ) - batch_op.drop_column("idempotency_key") with op.batch_alter_table("user_api_key", schema=None) as batch_op: batch_op.drop_constraint( @@ -55,17 +50,6 @@ def downgrade() -> None: ) with op.batch_alter_table("meeting", schema=None) as batch_op: - batch_op.add_column( - sa.Column( - "idempotency_key", sa.VARCHAR(), autoincrement=False, nullable=True - ) - ) - batch_op.create_index( - batch_op.f("idx_meeting_idempotency"), - ["room_id", "idempotency_key"], - unique=True, - postgresql_where="(idempotency_key IS NOT NULL)", - ) batch_op.drop_column("daily_composed_video_duration") batch_op.drop_column("daily_composed_video_s3_key") From 1a1b07f3518075dd215d9ddae1e262e380c7e670 Mon Sep 17 00:00:00 2001 From: Igor Loskutov Date: Wed, 14 Jan 2026 10:40:45 -0500 Subject: [PATCH 11/20] cleanup --- .../1b1e6a6fc465_add_cloud_recording_support.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/server/migrations/versions/1b1e6a6fc465_add_cloud_recording_support.py b/server/migrations/versions/1b1e6a6fc465_add_cloud_recording_support.py index 89578f65..6df05b8a 100644 --- a/server/migrations/versions/1b1e6a6fc465_add_cloud_recording_support.py +++ b/server/migrations/versions/1b1e6a6fc465_add_cloud_recording_support.py @@ -28,27 +28,11 @@ def upgrade() -> None: sa.Column("daily_composed_video_duration", sa.Integer(), nullable=True) ) - with op.batch_alter_table("user_api_key", schema=None) as batch_op: - batch_op.drop_constraint( - batch_op.f("user_token_token_hash_key"), type_="unique" - ) - batch_op.drop_index(batch_op.f("idx_user_api_key_hash")) - batch_op.create_index("idx_user_api_key_hash", ["key_hash"], unique=True) - # ### end Alembic commands ### def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table("user_api_key", schema=None) as batch_op: - batch_op.drop_index("idx_user_api_key_hash") - batch_op.create_index( - batch_op.f("idx_user_api_key_hash"), ["key_hash"], unique=False - ) - batch_op.create_unique_constraint( - batch_op.f("user_token_token_hash_key"), ["key_hash"] - ) - with op.batch_alter_table("meeting", schema=None) as batch_op: batch_op.drop_column("daily_composed_video_duration") batch_op.drop_column("daily_composed_video_s3_key") From 6f71f26ede6154aa13ee56e917c1fcc0e53be5fa Mon Sep 17 00:00:00 2001 From: Igor Loskutov Date: Wed, 14 Jan 2026 12:49:22 -0500 Subject: [PATCH 12/20] self-review (no-mistakes) --- server/reflector/dailyco_api/instance_id.py | 16 +- server/reflector/db/meetings.py | 53 ++- server/reflector/video_platforms/daily.py | 7 +- server/reflector/views/daily.py | 30 +- server/reflector/views/meetings.py | 4 +- server/reflector/worker/process.py | 133 ++++--- server/tests/test_dailyco_instance_id.py | 62 ++- .../tests/test_time_based_meeting_matching.py | 374 ++++++++++++++++++ 8 files changed, 532 insertions(+), 147 deletions(-) create mode 100644 server/tests/test_time_based_meeting_matching.py diff --git a/server/reflector/dailyco_api/instance_id.py b/server/reflector/dailyco_api/instance_id.py index 611cd6cc..7743229f 100644 --- a/server/reflector/dailyco_api/instance_id.py +++ b/server/reflector/dailyco_api/instance_id.py @@ -2,7 +2,7 @@ Daily.co recording instanceId generation utilities. Deterministic instance ID generation for cloud and raw-tracks recordings. -MUST match frontend logic in www/app/[roomName]/components/DailyRoom.tsx +MUST match frontend logic """ from uuid import UUID, uuid5 @@ -19,13 +19,7 @@ def generate_cloud_instance_id(meeting_id: NonEmptyString) -> UUID: Generate instanceId for cloud recording. Cloud recordings use meeting ID directly as instanceId. - This ensures each meeting has unique cloud recording. - - Args: - meeting_id: Meeting UUID string - - Returns: - UUID for cloud recording instanceId + This ensures each meeting has one unique cloud recording. """ return UUID(meeting_id) @@ -39,11 +33,5 @@ def generate_raw_tracks_instance_id(meeting_id: NonEmptyString) -> UUID: Daily.co requires cloud and raw-tracks to have different instanceIds for concurrent recording. - - Args: - meeting_id: Meeting UUID string - - Returns: - Deterministic UUID for raw-tracks recording instanceId """ return uuid5(RAW_TRACKS_NAMESPACE, meeting_id) diff --git a/server/reflector/db/meetings.py b/server/reflector/db/meetings.py index bb87f827..d03d73c1 100644 --- a/server/reflector/db/meetings.py +++ b/server/reflector/db/meetings.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import datetime, timedelta from typing import Any, Literal import sqlalchemy as sa @@ -9,7 +9,7 @@ from reflector.db.rooms import Room from reflector.schemas.platform import WHEREBY_PLATFORM, Platform from reflector.utils import generate_uuid4 -from reflector.utils.string import assert_equal +from reflector.utils.string import NonEmptyString, assert_equal meetings = sa.Table( "meeting", @@ -192,7 +192,7 @@ async def get_by_room_name_all(self, room_name: str) -> list[Meeting]: async def get_by_room_name_and_time( self, - room_name: str, + room_name: NonEmptyString, recording_start: datetime, time_window_hours: int = 168, ) -> Meeting | None: @@ -213,7 +213,7 @@ async def get_by_room_name_and_time( Args: room_name: Daily.co room name from recording - recording_start: Unix timestamp from recording.start_ts + recording_start: Timezone-aware datetime from recording.start_ts time_window_hours: Search window (default 168 = 1 week) Returns: @@ -229,14 +229,12 @@ async def get_by_room_name_and_time( - Tolerates clock skew - Rejects unrelated meetings from weeks ago - Production data showing duplicate room_names: - - Room: daily-private-igor-20251126162029 - - Meeting 1: 2025-11-26 16:20:29.000000 - - Meeting 2: 2025-11-26 16:20:29.990355 (0.99s later) - - Both have cloud recordings - - Time-based matching correctly distinguishes them """ - from datetime import timedelta + # Validate timezone-aware datetime + if recording_start.tzinfo is None: + raise ValueError( + f"recording_start must be timezone-aware, got naive datetime: {recording_start}" + ) window_start = recording_start - timedelta(hours=time_window_hours) window_end = recording_start + timedelta(hours=time_window_hours) @@ -359,6 +357,39 @@ async def update_meeting(self, meeting_id: str, **kwargs): query = meetings.update().where(meetings.c.id == meeting_id).values(**kwargs) await get_database().execute(query) + async def set_cloud_recording_if_missing( + self, + meeting_id: NonEmptyString, + s3_key: NonEmptyString, + duration: int, + ) -> bool: + """ + Set cloud recording only if not already set. + + Returns True if updated, False if already set. + Prevents webhook/polling race condition via atomic WHERE clause. + """ + query = ( + meetings.update() + .where( + sa.and_( + meetings.c.id == meeting_id, + meetings.c.daily_composed_video_s3_key.is_(None), + ) + ) + .values( + daily_composed_video_s3_key=s3_key, + daily_composed_video_duration=duration, + ) + ) + await get_database().execute(query) + + # Check if update succeeded by verifying current value + meeting = await self.get_by_id(meeting_id) + if not meeting: + return False + return meeting.daily_composed_video_s3_key == s3_key + async def increment_num_clients(self, meeting_id: str) -> None: """Atomically increment participant count.""" query = ( diff --git a/server/reflector/video_platforms/daily.py b/server/reflector/video_platforms/daily.py index 37112dab..cef78b4c 100644 --- a/server/reflector/video_platforms/daily.py +++ b/server/reflector/video_platforms/daily.py @@ -203,12 +203,7 @@ async def start_recording( """Start recording via Daily.co REST API. Args: - room_name: Daily.co room name - recording_type: Recording type - instance_id: UUID for this recording session - - Returns: - Recording start confirmation from Daily.co API + instance_id: UUID for this recording session - one UUID per "room" in Daily (which is "meeting" in Reflector) """ return await self._api_client.start_recording( room_name=room_name, diff --git a/server/reflector/views/daily.py b/server/reflector/views/daily.py index 10a14321..384290da 100644 --- a/server/reflector/views/daily.py +++ b/server/reflector/views/daily.py @@ -19,6 +19,7 @@ from reflector.worker.process import ( poll_daily_room_presence_task, process_multitrack_recording, + store_cloud_recording, ) router = APIRouter() @@ -190,30 +191,13 @@ async def _handle_recording_ready(event: RecordingReadyEvent): return if recording_type == "cloud": - # single MP4 file written by Daily.co to a bucket - s3_key = event.payload.s3_key - - # Store cloud recording reference in meeting table - meeting = await meetings_controller.get_by_room_name(room_name) - if not meeting: - logger.warning( - "Cloud recording: meeting not found", - room_name=room_name, - recording_id=recording_id, - ) - return - - await meetings_controller.update_meeting( - meeting.id, - daily_composed_video_s3_key=s3_key, - daily_composed_video_duration=event.payload.duration, - ) - - logger.info( - "Cloud recording stored", - meeting_id=meeting.id, - s3_key=s3_key, + await store_cloud_recording( + recording_id=recording_id, + room_name=room_name, + s3_key=event.payload.s3_key, duration=event.payload.duration, + start_ts=event.payload.start_ts, + source="webhook", ) elif recording_type == "raw-tracks": diff --git a/server/reflector/views/meetings.py b/server/reflector/views/meetings.py index d33d5188..c886b347 100644 --- a/server/reflector/views/meetings.py +++ b/server/reflector/views/meetings.py @@ -93,8 +93,8 @@ async def start_recording( ) -> dict[str, Any]: """Start cloud or raw-tracks recording via Daily.co REST API. - Both cloud and raw-tracks are started via REST API to bypass enable_recording limitation. - Uses different instanceIds for cloud vs raw-tracks. + Both cloud and raw-tracks are started via REST API to bypass enable_recording limitation of allowing only 1 recording at a time. + Uses different instanceIds for cloud vs raw-tracks (same won't work) Note: No authentication required - anonymous users supported. TODO this is a DOS vector """ diff --git a/server/reflector/worker/process.py b/server/reflector/worker/process.py index cc9d92a7..0cd52f9b 100644 --- a/server/reflector/worker/process.py +++ b/server/reflector/worker/process.py @@ -2,7 +2,7 @@ import os import re from datetime import datetime, timezone -from typing import List +from typing import List, Literal from urllib.parse import unquote import av @@ -42,6 +42,7 @@ filter_cam_audio_tracks, recording_lock_key, ) +from reflector.utils.string import NonEmptyString from reflector.video_platforms.factory import create_platform_client from reflector.video_platforms.whereby_utils import ( parse_whereby_recording_filename, @@ -179,13 +180,6 @@ async def process_multitrack_recording( ): """ Process raw-tracks (multitrack) recording from Daily.co. - - Args: - bucket_name: S3 bucket containing tracks - daily_room_name: Daily.co room name - recording_id: Daily.co recording ID - track_keys: S3 keys for audio tracks - recording_start_ts: Unix timestamp when recording started (required for time-based meeting matching) """ logger.info( "Processing multitrack recording", @@ -253,7 +247,6 @@ async def _process_multitrack_recording_inner( exc_info=True, ) - # Find meeting via time-based matching recording_start = datetime.fromtimestamp(recording_start_ts, tz=timezone.utc) meeting = await meetings_controller.get_by_room_name_and_time( room_name=daily_room_name, @@ -473,49 +466,86 @@ async def poll_daily_recordings(): await _poll_raw_tracks_recordings(raw_tracks_recordings, bucket_name) -async def _poll_cloud_recordings(cloud_recordings: List[FinishedRecordingResponse]): +async def store_cloud_recording( + recording_id: NonEmptyString, + room_name: NonEmptyString, + s3_key: NonEmptyString, + duration: int, + start_ts: int, + source: Literal["webhook", "polling"], +) -> bool: """ - Store cloud recordings missing from meeting table. + Store cloud recording reference in meeting table. + Common function for both webhook and polling code paths. Uses time-based matching to handle duplicate room_name values. - See meetings_controller.get_by_room_name_and_time() for details on the hack. + + Args: + recording_id: Daily.co recording ID + room_name: Daily.co room name + s3_key: S3 key where recording is stored + duration: Recording duration in seconds + start_ts: Unix timestamp when recording started + source: "webhook" or "polling" (for logging) + + Returns: + True if stored, False if skipped/failed """ - if not cloud_recordings: - return + recording_start = datetime.fromtimestamp(start_ts, tz=timezone.utc) - stored_count = 0 - for recording in cloud_recordings: - # Convert Unix timestamp to datetime for matching - recording_start = datetime.fromtimestamp(recording.start_ts, tz=timezone.utc) + meeting = await meetings_controller.get_by_room_name_and_time( + room_name=room_name, + recording_start=recording_start, + time_window_hours=168, # 1 week + ) - # Find meeting by time proximity (1-week window) - meeting = await meetings_controller.get_by_room_name_and_time( - room_name=recording.room_name, - recording_start=recording_start, - time_window_hours=168, # 1 week + if not meeting: + logger.warning( + f"Cloud recording ({source}): no meeting found within 1-week window", + recording_id=recording_id, + room_name=room_name, + recording_start_ts=start_ts, + recording_start=recording_start.isoformat(), ) + return False - if not meeting: - logger.warning( - "Cloud recording: no meeting found within 1-week window", - recording_id=recording.id, - room_name=recording.room_name, - recording_start_ts=recording.start_ts, - recording_start=recording_start.isoformat(), - ) - continue + success = await meetings_controller.set_cloud_recording_if_missing( + meeting_id=meeting.id, + s3_key=s3_key, + duration=duration, + ) + + if not success: + logger.debug( + f"Cloud recording ({source}): already set (race lost)", + recording_id=recording_id, + room_name=room_name, + meeting_id=meeting.id, + ) + return False + + logger.info( + f"Cloud recording stored via {source} (time-based match)", + meeting_id=meeting.id, + recording_id=recording_id, + s3_key=s3_key, + duration=duration, + time_delta_seconds=abs((meeting.start_date - recording_start).total_seconds()), + ) + return True - # Skip if meeting already has cloud recording - if meeting.daily_composed_video_s3_key: - logger.debug( - "Cloud recording: meeting already has cloud recording", - recording_id=recording.id, - room_name=recording.room_name, - meeting_id=meeting.id, - existing_s3_key=meeting.daily_composed_video_s3_key, - ) - continue +async def _poll_cloud_recordings(cloud_recordings: List[FinishedRecordingResponse]): + """ + Store cloud recordings missing from meeting table via polling. + + Uses time-based matching via store_cloud_recording(). + """ + if not cloud_recordings: + return + + stored_count = 0 + for recording in cloud_recordings: # Extract S3 key from recording (cloud recordings use s3key field) s3_key = recording.s3key or (recording.s3.key if recording.s3 else None) if not s3_key: @@ -526,23 +556,16 @@ async def _poll_cloud_recordings(cloud_recordings: List[FinishedRecordingRespons ) continue - await meetings_controller.update_meeting( - meeting.id, - daily_composed_video_s3_key=s3_key, - daily_composed_video_duration=recording.duration, - ) - - logger.info( - "Cloud recording stored via polling (time-based match)", - meeting_id=meeting.id, + stored = await store_cloud_recording( recording_id=recording.id, + room_name=recording.room_name, s3_key=s3_key, duration=recording.duration, - time_delta_seconds=abs( - (meeting.start_date - recording_start).total_seconds() - ), + start_ts=recording.start_ts, + source="polling", ) - stored_count += 1 + if stored: + stored_count += 1 logger.info( "Cloud recording polling complete", diff --git a/server/tests/test_dailyco_instance_id.py b/server/tests/test_dailyco_instance_id.py index dd00e41d..d410205b 100644 --- a/server/tests/test_dailyco_instance_id.py +++ b/server/tests/test_dailyco_instance_id.py @@ -17,16 +17,11 @@ class TestInstanceIdDeterminism: """Test deterministic generation of instanceIds.""" def test_cloud_instance_id_is_meeting_id(self): - """Cloud instanceId is meeting ID directly.""" - meeting_id = "550e8400-e29b-41d4-a716-446655440000" - result = generate_cloud_instance_id(meeting_id) - assert str(result) == meeting_id - - def test_cloud_instance_id_deterministic(self): - """Cloud instanceId generation is deterministic.""" + """Cloud instanceId is meeting ID directly (implicitly tests determinism).""" meeting_id = "550e8400-e29b-41d4-a716-446655440000" result1 = generate_cloud_instance_id(meeting_id) result2 = generate_cloud_instance_id(meeting_id) + assert str(result1) == meeting_id assert result1 == result2 def test_raw_tracks_instance_id_deterministic(self): @@ -102,56 +97,51 @@ def test_lowercase_uuid_normalized_for_cloud(self): cloud_upper = generate_cloud_instance_id(meeting_id_upper) assert cloud_lower == cloud_upper - def test_raw_tracks_case_sensitive(self): + def test_uuid5_is_case_sensitive_warning(self): """ - Raw-tracks instanceId is CASE-SENSITIVE (uuid5 hashes string directly). + Documents uuid5 case sensitivity - different case UUIDs produce different hashes. + + Not a problem: meeting.id always lowercase from DB and API. + Frontend generates raw-tracks instanceId from lowercase meeting.id. + Backend receives lowercase meeting_id when matching. - This is fine - meeting IDs from DB are always lowercase. - Frontend also uses lowercase meeting.id from API. + This test documents the behavior, not a requirement. """ meeting_id_lower = "550e8400-e29b-41d4-a716-446655440000" meeting_id_upper = "550E8400-E29B-41D4-A716-446655440000" raw_lower = generate_raw_tracks_instance_id(meeting_id_lower) raw_upper = generate_raw_tracks_instance_id(meeting_id_upper) - # Different cases produce different hashes assert raw_lower != raw_upper class TestMtgSessionIdVsInstanceId: """ - Test that mtgSessionId (Daily.co-generated) differs from instanceId (we send). - - Based on production data analysis showing mtgSessionId is separate from instanceId. - """ + Documents that Daily.co's mtgSessionId differs from our instanceId. - def test_mtg_session_id_not_equal_to_cloud_instance_id(self): - """ - mtgSessionId from Daily.co does NOT match cloud instanceId we send. - - Real example from production: - - Meeting ID (cloud instanceId): 4ad503b6-8189-4910-a8f7-68cdd1b7f990 - - Recording mtgSessionId: f25a2e09-740f-4932-9c0d-b1bebaa669c6 - """ - meeting_id = "4ad503b6-8189-4910-a8f7-68cdd1b7f990" - mtg_session_id = "f25a2e09-740f-4932-9c0d-b1bebaa669c6" + Why this matters: We investigated using mtgSessionId for matching but discovered + it's Daily.co-generated and unrelated to instanceId we send. This test documents + that finding so we don't investigate it again. - cloud_instance_id = generate_cloud_instance_id(meeting_id) - assert str(cloud_instance_id) != mtg_session_id + Production data from 2026-01-13: + - Meeting ID: 4ad503b6-8189-4910-a8f7-68cdd1b7f990 + - Cloud instanceId: 4ad503b6-8189-4910-a8f7-68cdd1b7f990 (same as meeting ID) + - Raw-tracks instanceId: 784b3af3-c7dd-57f0-ac54-2ee91c6927cb (UUIDv5 derived) + - Recording mtgSessionId: f25a2e09-740f-4932-9c0d-b1bebaa669c6 (different!) - def test_mtg_session_id_not_equal_to_raw_tracks_instance_id(self): - """ - mtgSessionId from Daily.co does NOT match raw-tracks instanceId we send. + Conclusion: Cannot use mtgSessionId for recording-to-meeting matching. + """ - Real example from production: - - Meeting ID: 4ad503b6-8189-4910-a8f7-68cdd1b7f990 - - Raw-tracks instanceId: 784b3af3-c7dd-57f0-ac54-2ee91c6927cb - - Recording mtgSessionId: f25a2e09-740f-4932-9c0d-b1bebaa669c6 - """ + def test_mtg_session_id_differs_from_our_instance_ids(self): + """mtgSessionId (Daily.co) != instanceId (ours) for both cloud and raw-tracks.""" meeting_id = "4ad503b6-8189-4910-a8f7-68cdd1b7f990" expected_raw_tracks_id = "784b3af3-c7dd-57f0-ac54-2ee91c6927cb" mtg_session_id = "f25a2e09-740f-4932-9c0d-b1bebaa669c6" + cloud_instance_id = generate_cloud_instance_id(meeting_id) raw_tracks_instance_id = generate_raw_tracks_instance_id(meeting_id) + + assert str(cloud_instance_id) == meeting_id assert str(raw_tracks_instance_id) == expected_raw_tracks_id + assert str(cloud_instance_id) != mtg_session_id assert str(raw_tracks_instance_id) != mtg_session_id diff --git a/server/tests/test_time_based_meeting_matching.py b/server/tests/test_time_based_meeting_matching.py new file mode 100644 index 00000000..3506c183 --- /dev/null +++ b/server/tests/test_time_based_meeting_matching.py @@ -0,0 +1,374 @@ +""" +Integration tests for time-based meeting-to-recording matching. + +Tests the critical path for matching Daily.co recordings to meetings when +API doesn't return instanceId. +""" + +from datetime import datetime, timedelta, timezone + +import pytest + +from reflector.db.meetings import meetings_controller +from reflector.db.rooms import rooms_controller + + +@pytest.fixture +async def test_room(): + """Create a test room for meetings.""" + room = await rooms_controller.add( + name="test-room-time", + user_id="test-user-id", + zulip_auto_post=False, + zulip_stream="", + zulip_topic="", + is_locked=False, + room_mode="normal", + recording_type="cloud", + recording_trigger="automatic", + is_shared=False, + platform="daily", + ) + return room + + +@pytest.fixture +def base_time(): + """Fixed timestamp for deterministic tests.""" + return datetime(2026, 1, 14, 9, 0, 0, tzinfo=timezone.utc) + + +class TestTimeBasedMatching: + """Test get_by_room_name_and_time() matching logic.""" + + async def test_exact_time_match(self, test_room, base_time): + """Recording timestamp exactly matches meeting start_date.""" + meeting = await meetings_controller.create( + id="meeting-exact", + room_name="daily-test-20260114090000", + room_url="https://example.daily.co/test", + host_room_url="https://example.daily.co/test?t=host", + start_date=base_time, + end_date=base_time + timedelta(hours=1), + room=test_room, + ) + + result = await meetings_controller.get_by_room_name_and_time( + room_name="daily-test-20260114090000", + recording_start=base_time, + time_window_hours=168, + ) + + assert result is not None + assert result.id == meeting.id + + async def test_recording_slightly_after_meeting_start(self, test_room, base_time): + """Recording started 1 minute after meeting (participants joined late).""" + meeting = await meetings_controller.create( + id="meeting-late", + room_name="daily-test-20260114090100", + room_url="https://example.daily.co/test", + host_room_url="https://example.daily.co/test?t=host", + start_date=base_time, + end_date=base_time + timedelta(hours=1), + room=test_room, + ) + + recording_start = base_time + timedelta(minutes=1) + + result = await meetings_controller.get_by_room_name_and_time( + room_name="daily-test-20260114090100", + recording_start=recording_start, + time_window_hours=168, + ) + + assert result is not None + assert result.id == meeting.id + + async def test_duplicate_room_names_picks_closest(self, test_room, base_time): + """ + Two meetings with same room_name (duplicate/race condition). + Should pick closest by timestamp. + """ + meeting1 = await meetings_controller.create( + id="meeting-1-first", + room_name="daily-duplicate-room", + room_url="https://example.daily.co/test", + host_room_url="https://example.daily.co/test?t=host", + start_date=base_time, + end_date=base_time + timedelta(hours=1), + room=test_room, + ) + + meeting2 = await meetings_controller.create( + id="meeting-2-second", + room_name="daily-duplicate-room", # Same room_name! + room_url="https://example.daily.co/test", + host_room_url="https://example.daily.co/test?t=host", + start_date=base_time + timedelta(seconds=0.99), # 0.99s later + end_date=base_time + timedelta(hours=1), + room=test_room, + ) + + # Recording started 0.5s after meeting1 + # Distance: meeting1 = 0.5s, meeting2 = 0.49s → meeting2 is closer + recording_start = base_time + timedelta(seconds=0.5) + + result = await meetings_controller.get_by_room_name_and_time( + room_name="daily-duplicate-room", + recording_start=recording_start, + time_window_hours=168, + ) + + assert result is not None + assert result.id == meeting2.id # meeting2 is closer (0.49s vs 0.5s) + + async def test_outside_time_window_returns_none(self, test_room, base_time): + """Recording outside 1-week window returns None.""" + await meetings_controller.create( + id="meeting-old", + room_name="daily-test-old", + room_url="https://example.daily.co/test", + host_room_url="https://example.daily.co/test?t=host", + start_date=base_time, + end_date=base_time + timedelta(hours=1), + room=test_room, + ) + + # Recording 8 days later (outside 7-day window) + recording_start = base_time + timedelta(days=8) + + result = await meetings_controller.get_by_room_name_and_time( + room_name="daily-test-old", + recording_start=recording_start, + time_window_hours=168, + ) + + assert result is None + + async def test_tie_breaker_deterministic(self, test_room, base_time): + """When time delta identical, tie-breaker by meeting.id is deterministic.""" + meeting_z = await meetings_controller.create( + id="zzz-last-uuid", + room_name="daily-test-tie", + room_url="https://example.daily.co/test", + host_room_url="https://example.daily.co/test?t=host", + start_date=base_time, + end_date=base_time + timedelta(hours=1), + room=test_room, + ) + + meeting_a = await meetings_controller.create( + id="aaa-first-uuid", + room_name="daily-test-tie", + room_url="https://example.daily.co/test", + host_room_url="https://example.daily.co/test?t=host", + start_date=base_time, # Exact same start_date + end_date=base_time + timedelta(hours=1), + room=test_room, + ) + + result = await meetings_controller.get_by_room_name_and_time( + room_name="daily-test-tie", + recording_start=base_time, + time_window_hours=168, + ) + + assert result is not None + # Tie-breaker: lexicographically first UUID + assert result.id == "aaa-first-uuid" + + async def test_timezone_naive_datetime_raises(self, test_room, base_time): + """Timezone-naive datetime raises ValueError.""" + await meetings_controller.create( + id="meeting-tz", + room_name="daily-test-tz", + room_url="https://example.daily.co/test", + host_room_url="https://example.daily.co/test?t=host", + start_date=base_time, + end_date=base_time + timedelta(hours=1), + room=test_room, + ) + + # Naive datetime (no timezone) + naive_dt = datetime(2026, 1, 14, 9, 0, 0) + + with pytest.raises(ValueError, match="timezone-aware"): + await meetings_controller.get_by_room_name_and_time( + room_name="daily-test-tz", + recording_start=naive_dt, + time_window_hours=168, + ) + + async def test_one_week_boundary_after_included(self, test_room, base_time): + """Meeting 1-week AFTER recording is included (window_end boundary).""" + meeting_time = base_time + timedelta(hours=168) + + await meetings_controller.create( + id="meeting-boundary-after", + room_name="daily-test-boundary-after", + room_url="https://example.daily.co/test", + host_room_url="https://example.daily.co/test?t=host", + start_date=meeting_time, + end_date=meeting_time + timedelta(hours=1), + room=test_room, + ) + + result = await meetings_controller.get_by_room_name_and_time( + room_name="daily-test-boundary-after", + recording_start=base_time, + time_window_hours=168, + ) + + assert result is not None + assert result.id == "meeting-boundary-after" + + async def test_one_week_boundary_before_included(self, test_room, base_time): + """Meeting 1-week BEFORE recording is included (window_start boundary).""" + meeting_time = base_time - timedelta(hours=168) + + await meetings_controller.create( + id="meeting-boundary-before", + room_name="daily-test-boundary-before", + room_url="https://example.daily.co/test", + host_room_url="https://example.daily.co/test?t=host", + start_date=meeting_time, + end_date=meeting_time + timedelta(hours=1), + room=test_room, + ) + + result = await meetings_controller.get_by_room_name_and_time( + room_name="daily-test-boundary-before", + recording_start=base_time, + time_window_hours=168, + ) + + assert result is not None + assert result.id == "meeting-boundary-before" + + async def test_recording_before_meeting_start(self, test_room, base_time): + """Recording started before meeting (clock skew or early join).""" + await meetings_controller.create( + id="meeting-early", + room_name="daily-test-early", + room_url="https://example.daily.co/test", + host_room_url="https://example.daily.co/test?t=host", + start_date=base_time, + end_date=base_time + timedelta(hours=1), + room=test_room, + ) + + recording_start = base_time - timedelta(minutes=2) + + result = await meetings_controller.get_by_room_name_and_time( + room_name="daily-test-early", + recording_start=recording_start, + time_window_hours=168, + ) + + assert result is not None + assert result.id == "meeting-early" + + async def test_mixed_inside_outside_window(self, test_room, base_time): + """Multiple meetings, only one inside window - returns the inside one.""" + await meetings_controller.create( + id="meeting-old", + room_name="daily-test-mixed", + room_url="https://example.daily.co/test", + host_room_url="https://example.daily.co/test?t=host", + start_date=base_time - timedelta(days=10), + end_date=base_time - timedelta(days=10, hours=-1), + room=test_room, + ) + + await meetings_controller.create( + id="meeting-inside", + room_name="daily-test-mixed", + room_url="https://example.daily.co/test", + host_room_url="https://example.daily.co/test?t=host", + start_date=base_time - timedelta(days=2), + end_date=base_time - timedelta(days=2, hours=-1), + room=test_room, + ) + + await meetings_controller.create( + id="meeting-future", + room_name="daily-test-mixed", + room_url="https://example.daily.co/test", + host_room_url="https://example.daily.co/test?t=host", + start_date=base_time + timedelta(days=10), + end_date=base_time + timedelta(days=10, hours=1), + room=test_room, + ) + + result = await meetings_controller.get_by_room_name_and_time( + room_name="daily-test-mixed", + recording_start=base_time, + time_window_hours=168, + ) + + assert result is not None + assert result.id == "meeting-inside" + + +class TestAtomicCloudRecordingUpdate: + """Test atomic update prevents race conditions.""" + + async def test_first_update_succeeds(self, test_room, base_time): + """First call to set_cloud_recording_if_missing succeeds.""" + meeting = await meetings_controller.create( + id="meeting-atomic-1", + room_name="daily-test-atomic", + room_url="https://example.daily.co/test", + host_room_url="https://example.daily.co/test?t=host", + start_date=base_time, + end_date=base_time + timedelta(hours=1), + room=test_room, + ) + + success = await meetings_controller.set_cloud_recording_if_missing( + meeting_id=meeting.id, + s3_key="first-s3-key", + duration=100, + ) + + assert success is True + + updated = await meetings_controller.get_by_id(meeting.id) + assert updated.daily_composed_video_s3_key == "first-s3-key" + assert updated.daily_composed_video_duration == 100 + + async def test_second_update_fails_atomically(self, test_room, base_time): + """Second call to update same meeting doesn't overwrite (atomic check).""" + meeting = await meetings_controller.create( + id="meeting-atomic-2", + room_name="daily-test-atomic2", + room_url="https://example.daily.co/test", + host_room_url="https://example.daily.co/test?t=host", + start_date=base_time, + end_date=base_time + timedelta(hours=1), + room=test_room, + ) + + success1 = await meetings_controller.set_cloud_recording_if_missing( + meeting_id=meeting.id, + s3_key="first-s3-key", + duration=100, + ) + + assert success1 is True + + after_first = await meetings_controller.get_by_id(meeting.id) + assert after_first.daily_composed_video_s3_key == "first-s3-key" + + success2 = await meetings_controller.set_cloud_recording_if_missing( + meeting_id=meeting.id, + s3_key="bucket/path/should-not-overwrite", + duration=200, + ) + + assert success2 is False + + final = await meetings_controller.get_by_id(meeting.id) + assert final.daily_composed_video_s3_key == "first-s3-key" + assert final.daily_composed_video_duration == 100 From b5ccdb3016ca305917da7ef7c9d3a8b5fc42c49b Mon Sep 17 00:00:00 2001 From: Igor Loskutov Date: Wed, 14 Jan 2026 14:33:39 -0500 Subject: [PATCH 13/20] self-review (no-mistakes) --- server/reflector/db/recordings.py | 14 +++++ server/reflector/views/rooms.py | 3 + server/reflector/worker/process.py | 98 +++++++++++++++++++++--------- www/app/reflector-api.d.ts | 19 ++++-- 4 files changed, 101 insertions(+), 33 deletions(-) diff --git a/server/reflector/db/recordings.py b/server/reflector/db/recordings.py index 82609b38..bf799561 100644 --- a/server/reflector/db/recordings.py +++ b/server/reflector/db/recordings.py @@ -7,6 +7,7 @@ from reflector.db import get_database, metadata from reflector.utils import generate_uuid4 +from reflector.utils.string import NonEmptyString recordings = sa.Table( "recording", @@ -71,6 +72,19 @@ async def remove_by_id(self, id: str) -> None: query = recordings.delete().where(recordings.c.id == id) await get_database().execute(query) + async def set_meeting_id( + self, + recording_id: NonEmptyString, + meeting_id: NonEmptyString, + ) -> None: + """Link recording to meeting.""" + query = ( + recordings.update() + .where(recordings.c.id == recording_id) + .values(meeting_id=meeting_id) + ) + await get_database().execute(query) + # no check for existence async def get_by_ids(self, recording_ids: list[str]) -> list[Recording]: if not recording_ids: diff --git a/server/reflector/views/rooms.py b/server/reflector/views/rooms.py index 6d538841..ba83b580 100644 --- a/server/reflector/views/rooms.py +++ b/server/reflector/views/rooms.py @@ -73,6 +73,9 @@ class Meeting(BaseModel): calendar_event_id: str | None = None calendar_metadata: dict[str, Any] | None = None platform: Platform + daily_composed_video_s3_key: str | None = None + daily_composed_video_duration: int | None = None + daily_composed_video_available: bool = False class CreateRoom(BaseModel): diff --git a/server/reflector/worker/process.py b/server/reflector/worker/process.py index 0cd52f9b..2c9dfe27 100644 --- a/server/reflector/worker/process.py +++ b/server/reflector/worker/process.py @@ -229,7 +229,16 @@ async def _process_multitrack_recording_inner( track_keys: list[str], recording_start_ts: int, ): - """Inner function containing the actual processing logic.""" + """ + Process multitrack recording (first time or reprocessing). + + For first processing (webhook/polling): + - Uses recording_start_ts for time-based meeting matching (no instanceId available) + + For reprocessing: + - Uses recording.meeting_id directly (already linked during first processing) + - recording_start_ts is ignored + """ tz = timezone.utc recorded_at = datetime.now(tz) @@ -247,30 +256,52 @@ async def _process_multitrack_recording_inner( exc_info=True, ) - recording_start = datetime.fromtimestamp(recording_start_ts, tz=timezone.utc) - meeting = await meetings_controller.get_by_room_name_and_time( - room_name=daily_room_name, - recording_start=recording_start, - time_window_hours=168, # 1 week - ) - if not meeting: - logger.error( - "Raw-tracks: no meeting found within 1-week window (time-based match)", + # Check if recording already exists (reprocessing path) + recording = await recordings_controller.get_by_id(recording_id) + + if recording and recording.meeting_id: + # Reprocessing: recording exists with meeting already linked + meeting = await meetings_controller.get_by_id(recording.meeting_id) + if not meeting: + raise Exception( + f"Meeting {recording.meeting_id} not found for recording {recording_id}" + ) + + logger.info( + "Reprocessing: using existing recording.meeting_id", recording_id=recording_id, + meeting_id=meeting.id, room_name=daily_room_name, - recording_start_ts=recording_start_ts, - recording_start=recording_start.isoformat(), ) - raise Exception( - f"Meeting not found for recording {recording_id} within 1-week window" + else: + # First processing: recording doesn't exist, need time-based matching + # (Daily.co doesn't return instanceId in API, must match by timestamp) + recording_start = datetime.fromtimestamp(recording_start_ts, tz=timezone.utc) + meeting = await meetings_controller.get_by_room_name_and_time( + room_name=daily_room_name, + recording_start=recording_start, + time_window_hours=168, # 1 week + ) + if not meeting: + logger.error( + "Raw-tracks: no meeting found within 1-week window (time-based match)", + recording_id=recording_id, + room_name=daily_room_name, + recording_start_ts=recording_start_ts, + recording_start=recording_start.isoformat(), + ) + raise Exception( + f"Meeting not found for recording {recording_id} within 1-week window" + ) + logger.info( + "First processing: found meeting via time-based matching", + meeting_id=meeting.id, + room_name=daily_room_name, + recording_id=recording_id, + time_delta_seconds=abs( + (meeting.start_date - recording_start).total_seconds() + ), ) - logger.info( - "Found meeting via time-based matching", - meeting_id=meeting.id, - room_name=daily_room_name, - recording_id=recording_id, - time_delta_seconds=abs((meeting.start_date - recording_start).total_seconds()), - ) room_name_base = extract_base_room_name(daily_room_name) @@ -278,8 +309,8 @@ async def _process_multitrack_recording_inner( if not room: raise Exception(f"Room not found: {room_name_base}") - recording = await recordings_controller.get_by_id(recording_id) if not recording: + # Create recording (only happens during first processing) object_key_dir = os.path.dirname(track_keys[0]) if track_keys else "" recording = await recordings_controller.create( Recording( @@ -291,7 +322,19 @@ async def _process_multitrack_recording_inner( track_keys=track_keys, ) ) - # else: Recording already exists; metadata set at creation time + elif not recording.meeting_id: + # Recording exists but meeting_id is null (failed first processing) + # Update with meeting from time-based matching + await recordings_controller.set_meeting_id( + recording_id=recording.id, + meeting_id=meeting.id, + ) + recording.meeting_id = meeting.id + logger.info( + "Updated existing recording with meeting_id", + recording_id=recording.id, + meeting_id=meeting.id, + ) transcript = await transcripts_controller.get_by_recording_id(recording.id) if not transcript: @@ -407,8 +450,7 @@ async def poll_daily_recordings(): raw_tracks_recordings = [] for rec in finished_recordings: if rec.type: - # Daily.co API provides explicit type - use it - # LOG THIS: As of Jan 2026, Daily.co never returns type field. + # Daily.co API returns null type - make sure this assumption stays # If this logs, Daily.co API changed - we can remove inference logic. recording_type = rec.type logger.warning( @@ -1078,9 +1120,9 @@ async def reprocess_failed_daily_recordings(): transcript_status=transcript.status if transcript else None, ) - # For reprocessing, use meeting's start_date as recording_start_ts - # (meeting already known via recording.meeting_id) - recording_start_ts = int(meeting.start_date.timestamp()) + # For reprocessing, pass actual recording time (though it's ignored - see _process_multitrack_recording_inner) + # Reprocessing uses recording.meeting_id directly instead of time-based matching + recording_start_ts = int(recording.recorded_at.timestamp()) process_multitrack_recording.delay( bucket_name=bucket_name, diff --git a/www/app/reflector-api.d.ts b/www/app/reflector-api.d.ts index 09122b21..cb3381ff 100644 --- a/www/app/reflector-api.d.ts +++ b/www/app/reflector-api.d.ts @@ -86,12 +86,12 @@ export interface paths { put?: never; /** * Start Recording - * @description Start raw-tracks recording via Daily.co REST API. + * @description Start cloud or raw-tracks recording via Daily.co REST API. * - * Called by frontend after starting cloud recording via daily-js. - * Uses same instanceId to link both recordings. + * Both cloud and raw-tracks are started via REST API to bypass enable_recording limitation of allowing only 1 recording at a time. + * Uses different instanceIds for cloud vs raw-tracks (same won't work) * - * Note: No authentication required - anonymous users supported. + * Note: No authentication required - anonymous users supported. TODO this is a DOS vector */ post: operations["v1_start_recording"]; delete?: never; @@ -1569,6 +1569,15 @@ export interface components { * @enum {string} */ platform: "whereby" | "daily"; + /** Daily Composed Video S3 Key */ + daily_composed_video_s3_key?: string | null; + /** Daily Composed Video Duration */ + daily_composed_video_duration?: number | null; + /** + * Daily Composed Video Available + * @default false + */ + daily_composed_video_available: boolean; }; /** MeetingConsentRequest */ MeetingConsentRequest: { @@ -1852,7 +1861,7 @@ export interface components { type: "cloud" | "raw-tracks"; /** * Instanceid - * @description A non-empty string + * Format: uuid */ instanceId: string; }; From 234ea42bce75b42d8146e3d0bac269970ea34d89 Mon Sep 17 00:00:00 2001 From: Igor Loskutov Date: Wed, 14 Jan 2026 14:54:18 -0500 Subject: [PATCH 14/20] self-review --- server/reflector/db/meetings.py | 11 +- server/reflector/views/rooms.py | 1 - server/reflector/worker/process.py | 13 +- www/app/[roomName]/components/DailyRoom.tsx | 124 ++++++++++---------- www/app/lib/types.ts | 1 - www/app/reflector-api.d.ts | 5 - www/package.json | 2 +- www/pnpm-lock.yaml | 19 ++- 8 files changed, 85 insertions(+), 91 deletions(-) diff --git a/server/reflector/db/meetings.py b/server/reflector/db/meetings.py index d03d73c1..36180ed7 100644 --- a/server/reflector/db/meetings.py +++ b/server/reflector/db/meetings.py @@ -2,7 +2,7 @@ from typing import Any, Literal import sqlalchemy as sa -from pydantic import BaseModel, Field, computed_field +from pydantic import BaseModel, Field from sqlalchemy.dialects.postgresql import JSONB from reflector.db import get_database, metadata @@ -117,11 +117,6 @@ class Meeting(BaseModel): daily_composed_video_s3_key: str | None = None daily_composed_video_duration: int | None = None - @computed_field - @property - def daily_composed_video_available(self) -> bool: - return bool(self.daily_composed_video_s3_key) - class MeetingController: async def create( @@ -152,9 +147,7 @@ async def create( calendar_metadata=calendar_metadata, platform=room.platform, ) - query = meetings.insert().values( - **meeting.model_dump(exclude={"daily_composed_video_available"}) - ) + query = meetings.insert().values(**meeting.model_dump()) await get_database().execute(query) return meeting diff --git a/server/reflector/views/rooms.py b/server/reflector/views/rooms.py index ba83b580..11e668c0 100644 --- a/server/reflector/views/rooms.py +++ b/server/reflector/views/rooms.py @@ -75,7 +75,6 @@ class Meeting(BaseModel): platform: Platform daily_composed_video_s3_key: str | None = None daily_composed_video_duration: int | None = None - daily_composed_video_available: bool = False class CreateRoom(BaseModel): diff --git a/server/reflector/worker/process.py b/server/reflector/worker/process.py index 2c9dfe27..bf3f14a4 100644 --- a/server/reflector/worker/process.py +++ b/server/reflector/worker/process.py @@ -263,9 +263,12 @@ async def _process_multitrack_recording_inner( # Reprocessing: recording exists with meeting already linked meeting = await meetings_controller.get_by_id(recording.meeting_id) if not meeting: - raise Exception( - f"Meeting {recording.meeting_id} not found for recording {recording_id}" + logger.error( + "Reprocessing: meeting not found for recording - skipping", + meeting_id=recording.meeting_id, + recording_id=recording_id, ) + return logger.info( "Reprocessing: using existing recording.meeting_id", @@ -284,15 +287,13 @@ async def _process_multitrack_recording_inner( ) if not meeting: logger.error( - "Raw-tracks: no meeting found within 1-week window (time-based match)", + "Raw-tracks: no meeting found within 1-week window (time-based match) - skipping", recording_id=recording_id, room_name=daily_room_name, recording_start_ts=recording_start_ts, recording_start=recording_start.isoformat(), ) - raise Exception( - f"Meeting not found for recording {recording_id} within 1-week window" - ) + return # Skip processing, will retry on next poll logger.info( "First processing: found meeting via time-based matching", meeting_id=meeting.id, diff --git a/www/app/[roomName]/components/DailyRoom.tsx b/www/app/[roomName]/components/DailyRoom.tsx index ba1c86c6..b49ea8be 100644 --- a/www/app/[roomName]/components/DailyRoom.tsx +++ b/www/app/[roomName]/components/DailyRoom.tsx @@ -27,9 +27,13 @@ import { useMeetingStartRecording, } from "../../lib/apiHooks"; import { omit } from "remeda"; -import { assertExists } from "../../lib/utils"; +import { + assertExists, + NonEmptyString, + parseNonEmptyString, +} from "../../lib/utils"; import { assertMeetingId, DailyRecordingType } from "../../lib/types"; -import { v5 as uuidv5 } from "uuid"; +import { useUuidV5 } from "react-uuid-hook"; const CONSENT_BUTTON_ID = "recording-consent"; const RECORDING_INDICATOR_ID = "recording-indicator"; @@ -186,12 +190,9 @@ export default function DailyRoom({ meeting, room }: DailyRoomProps) { const [joinedMeeting, setJoinedMeeting] = useState(null); // Generate deterministic instanceIds so all participants use SAME IDs - // Cloud and raw-tracks need DIFFERENT instanceIds (Daily.co restriction) - // useMemo ensures stable values across React StrictMode double-renders - const cloudInstanceId = useMemo(() => meeting.id, [meeting.id]); - const rawTracksInstanceId = useMemo( - () => uuidv5(meeting.id, RAW_TRACKS_NAMESPACE), - [meeting.id], + const cloudInstanceId = parseNonEmptyString(meeting.id); + const rawTracksInstanceId = parseNonEmptyString( + useUuidV5(meeting.id, RAW_TRACKS_NAMESPACE), ); const roomName = params?.roomName as string; @@ -248,70 +249,63 @@ export default function DailyRoom({ meeting, room }: DailyRoomProps) { ); const handleFrameJoinMeeting = useCallback(() => { - try { - if (meeting.recording_type === "cloud") { - console.log("Starting dual recording via REST API", { - cloudInstanceId, - rawTracksInstanceId, - }); + if (meeting.recording_type === "cloud") { + console.log("Starting dual recording via REST API", { + cloudInstanceId, + rawTracksInstanceId, + }); - // Start both cloud and raw-tracks via backend REST API (with retry on 404) - // Daily.co needs time to register call as "hosting" for REST API - const startRecordingWithRetry = ( - type: DailyRecordingType, - instanceId: string, - attempt: number = 1, - ) => { - setTimeout(() => { - startRecordingMutation.mutate( - { - params: { - path: { - meeting_id: meeting.id, - }, - }, - body: { - type, - instanceId, + // Start both cloud and raw-tracks via backend REST API (with retry on 404) + // Daily.co needs time to register call as "hosting" for REST API + const startRecordingWithRetry = ( + type: DailyRecordingType, + instanceId: NonEmptyString, + attempt: number = 1, + ) => { + setTimeout(() => { + startRecordingMutation.mutate( + { + params: { + path: { + meeting_id: meeting.id, }, }, - { - onError: (error: any) => { - const errorText = error?.detail || error?.message || ""; - const is404NotHosting = errorText.includes( - "does not seem to be hosting a call", + body: { + type, + instanceId, + }, + }, + { + onError: (error: any) => { + const errorText = error?.detail || error?.message || ""; + const is404NotHosting = errorText.includes( + "does not seem to be hosting a call", + ); + const isActiveStream = errorText.includes( + "has an active stream", + ); + + if (is404NotHosting && attempt < RECORDING_START_MAX_RETRIES) { + console.log( + `${type}: Call not hosting yet, retry ${attempt + 1}/${RECORDING_START_MAX_RETRIES} in ${RECORDING_START_DELAY_MS}ms...`, ); - const isActiveStream = errorText.includes( - "has an active stream", + startRecordingWithRetry(type, instanceId, attempt + 1); + } else if (isActiveStream) { + console.log( + `${type}: Recording already active (started by another participant)`, ); - - if ( - is404NotHosting && - attempt < RECORDING_START_MAX_RETRIES - ) { - console.log( - `${type}: Call not hosting yet, retry ${attempt + 1}/${RECORDING_START_MAX_RETRIES} in ${RECORDING_START_DELAY_MS}ms...`, - ); - startRecordingWithRetry(type, instanceId, attempt + 1); - } else if (isActiveStream) { - console.log( - `${type}: Recording already active (started by another participant)`, - ); - } else { - console.error(`Failed to start ${type} recording:`, error); - } - }, + } else { + console.error(`Failed to start ${type} recording:`, error); + } }, - ); - }, RECORDING_START_DELAY_MS); - }; + }, + ); + }, RECORDING_START_DELAY_MS); + }; - // Start both recordings - startRecordingWithRetry("cloud", cloudInstanceId); - startRecordingWithRetry("raw-tracks", rawTracksInstanceId); - } - } catch (error) { - console.error("Failed to start recordings:", error); + // Start both recordings + startRecordingWithRetry("cloud", cloudInstanceId); + startRecordingWithRetry("raw-tracks", rawTracksInstanceId); } }, [ meeting.recording_type, diff --git a/www/app/lib/types.ts b/www/app/lib/types.ts index 00267ea0..54e2bae1 100644 --- a/www/app/lib/types.ts +++ b/www/app/lib/types.ts @@ -90,5 +90,4 @@ export const assertMeetingId = (s: string): MeetingId => { return nes as MeetingId; }; -// Daily.co recording types for API calls export type DailyRecordingType = "cloud" | "raw-tracks"; diff --git a/www/app/reflector-api.d.ts b/www/app/reflector-api.d.ts index cb3381ff..12a7085c 100644 --- a/www/app/reflector-api.d.ts +++ b/www/app/reflector-api.d.ts @@ -1573,11 +1573,6 @@ export interface components { daily_composed_video_s3_key?: string | null; /** Daily Composed Video Duration */ daily_composed_video_duration?: number | null; - /** - * Daily Composed Video Available - * @default false - */ - daily_composed_video_available: boolean; }; /** MeetingConsentRequest */ MeetingConsentRequest: { diff --git a/www/package.json b/www/package.json index f00341e4..ceefbf55 100644 --- a/www/package.json +++ b/www/package.json @@ -46,13 +46,13 @@ "react-markdown": "^9.0.0", "react-qr-code": "^2.0.12", "react-select-search": "^4.1.7", + "react-uuid-hook": "^0.0.6", "redlock": "5.0.0-beta.2", "remeda": "^2.31.1", "sass": "^1.63.6", "simple-peer": "^9.11.1", "tailwindcss": "^3.3.2", "typescript": "^5.1.6", - "uuid": "^13.0.0", "wavesurfer.js": "^7.4.2", "zod": "^4.1.5" }, diff --git a/www/pnpm-lock.yaml b/www/pnpm-lock.yaml index 342fe72b..cd65de55 100644 --- a/www/pnpm-lock.yaml +++ b/www/pnpm-lock.yaml @@ -106,6 +106,9 @@ importers: react-select-search: specifier: ^4.1.7 version: 4.1.8(prop-types@15.8.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1) + react-uuid-hook: + specifier: ^0.0.6 + version: 0.0.6(react@18.3.1) redlock: specifier: 5.0.0-beta.2 version: 5.0.0-beta.2 @@ -124,9 +127,6 @@ importers: typescript: specifier: ^5.1.6 version: 5.9.2 - uuid: - specifier: ^13.0.0 - version: 13.0.0 wavesurfer.js: specifier: ^7.4.2 version: 7.10.1 @@ -7631,6 +7631,14 @@ packages: "@types/react": optional: true + react-uuid-hook@0.0.6: + resolution: + { + integrity: sha512-u9+EvFbqpWfLE/ReYFry0vYu1BAg1fY9ekr0XLSDNnfWyrnVFytpurwz5qYsIB0psevuvrpZHIcvu7AjUwqinA==, + } + peerDependencies: + react: ">=16.8.0" + react@18.3.1: resolution: { @@ -14580,6 +14588,11 @@ snapshots: optionalDependencies: "@types/react": 18.2.20 + react-uuid-hook@0.0.6(react@18.3.1): + dependencies: + react: 18.3.1 + uuid: 13.0.0 + react@18.3.1: dependencies: loose-envify: 1.4.0 From 602848fe2e71037431338ec5661cca6efd2441c9 Mon Sep 17 00:00:00 2001 From: Igor Loskutov Date: Wed, 14 Jan 2026 15:12:48 -0500 Subject: [PATCH 15/20] self-review --- server/reflector/db/meetings.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/server/reflector/db/meetings.py b/server/reflector/db/meetings.py index 36180ed7..02f407b2 100644 --- a/server/reflector/db/meetings.py +++ b/server/reflector/db/meetings.py @@ -362,6 +362,13 @@ async def set_cloud_recording_if_missing( Returns True if updated, False if already set. Prevents webhook/polling race condition via atomic WHERE clause. """ + # Check current value before update to detect actual change + meeting_before = await self.get_by_id(meeting_id) + if not meeting_before: + return False + + was_null = meeting_before.daily_composed_video_s3_key is None + query = ( meetings.update() .where( @@ -377,11 +384,9 @@ async def set_cloud_recording_if_missing( ) await get_database().execute(query) - # Check if update succeeded by verifying current value - meeting = await self.get_by_id(meeting_id) - if not meeting: - return False - return meeting.daily_composed_video_s3_key == s3_key + # Return True only if value was NULL before (actual update occurred) + # If was_null=False, the WHERE clause prevented the update + return was_null async def increment_num_clients(self, meeting_id: str) -> None: """Atomically increment participant count.""" From 0c0404afc847ddaf63bd85719e1a30f38508d2c5 Mon Sep 17 00:00:00 2001 From: Igor Loskutov Date: Wed, 14 Jan 2026 17:00:26 -0500 Subject: [PATCH 16/20] ui typefix --- www/app/[roomName]/components/DailyRoom.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/www/app/[roomName]/components/DailyRoom.tsx b/www/app/[roomName]/components/DailyRoom.tsx index b49ea8be..d1c00254 100644 --- a/www/app/[roomName]/components/DailyRoom.tsx +++ b/www/app/[roomName]/components/DailyRoom.tsx @@ -192,7 +192,7 @@ export default function DailyRoom({ meeting, room }: DailyRoomProps) { // Generate deterministic instanceIds so all participants use SAME IDs const cloudInstanceId = parseNonEmptyString(meeting.id); const rawTracksInstanceId = parseNonEmptyString( - useUuidV5(meeting.id, RAW_TRACKS_NAMESPACE), + useUuidV5(meeting.id, RAW_TRACKS_NAMESPACE)[0], ); const roomName = params?.roomName as string; From 64a3fcb469b4ac27a2f0e30cac47dc9b8553ff0d Mon Sep 17 00:00:00 2001 From: Igor Loskutov Date: Wed, 14 Jan 2026 17:50:33 -0500 Subject: [PATCH 17/20] dupe calls error handling proper --- server/reflector/views/meetings.py | 38 ++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/server/reflector/views/meetings.py b/server/reflector/views/meetings.py index c886b347..2e78d0bb 100644 --- a/server/reflector/views/meetings.py +++ b/server/reflector/views/meetings.py @@ -1,3 +1,4 @@ +import json import logging from datetime import datetime, timezone from typing import Annotated, Any, Optional @@ -8,6 +9,7 @@ import reflector.auth as auth from reflector.dailyco_api import RecordingType +from reflector.dailyco_api.client import DailyApiError from reflector.db.meetings import ( MeetingConsent, meeting_consent_controller, @@ -122,7 +124,43 @@ async def start_recording( return {"status": "ok", "result": result} + except DailyApiError as e: + # Parse Daily.co error response to detect "has an active stream" + try: + error_body = json.loads(e.response_body) + error_info = error_body.get("info", "") + + # "has an active stream" means recording already started by another participant + # This is SUCCESS from business logic perspective - return 200 + if "has an active stream" in error_info: + logger.info( + f"{body.type} recording already active (started by another participant)", + extra={ + "meeting_id": meeting_id, + "room_name": meeting.room_name, + "recording_type": body.type, + "instance_id": body.instanceId, + }, + ) + return {"status": "already_active", "instanceId": str(body.instanceId)} + except (json.JSONDecodeError, KeyError): + pass # Fall through to error handling + + # All other Daily.co API errors + logger.error( + f"Failed to start {body.type} recording", + extra={ + "meeting_id": meeting_id, + "recording_type": body.type, + "error": str(e), + }, + ) + raise HTTPException( + status_code=500, detail=f"Failed to start recording: {str(e)}" + ) + except Exception as e: + # Non-Daily.co errors logger.error( f"Failed to start {body.type} recording", extra={ From b203fcdf69dedffaaa996b3bed6e4adca0c10465 Mon Sep 17 00:00:00 2001 From: Igor Loskutov Date: Thu, 15 Jan 2026 16:37:15 -0500 Subject: [PATCH 18/20] daily reflector data model doc --- server/docs/DAILY_REFLECTOR_DATA_MODEL.md | 496 ++++++++++++++++++++++ 1 file changed, 496 insertions(+) create mode 100644 server/docs/DAILY_REFLECTOR_DATA_MODEL.md diff --git a/server/docs/DAILY_REFLECTOR_DATA_MODEL.md b/server/docs/DAILY_REFLECTOR_DATA_MODEL.md new file mode 100644 index 00000000..c25a3fd6 --- /dev/null +++ b/server/docs/DAILY_REFLECTOR_DATA_MODEL.md @@ -0,0 +1,496 @@ +# Daily.co and Reflector Data Model + +This document explains the data model relationships between Daily.co's API concepts and Reflector's database schema, clarifying common sources of confusion. + +--- + +## Table of Contents + +1. [Core Entities Overview](#core-entities-overview) +2. [Daily.co vs Reflector Terminology](#dailyco-vs-reflector-terminology) +3. [Entity Relationships](#entity-relationships) +4. [Recording Multiplicity](#recording-multiplicity) +5. [Session Identifiers Explained](#session-identifiers-explained) +6. [Time-Based Matching](#time-based-matching) +7. [Multitrack Recording Details](#multitrack-recording-details) +8. [Verified Example](#verified-example) + +--- + +## Core Entities Overview + +### Reflector's Four Primary Entities + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Room (Reflector) │ +│ - Persistent meeting template │ +│ - User-created configuration │ +│ - Example: "team-standup" │ +└────────────────────┬────────────────────────────────────────────┘ + │ 1:N + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ Meeting (Reflector) │ +│ - Single session instance │ +│ - Creates NEW Daily.co room with timestamp │ +│ - Example: "team-standup-20260115120000" │ +└────────────────────┬────────────────────────────────────────────┘ + │ 1:N + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ Recording (Reflector + Daily.co) │ +│ - One segment of audio/video │ +│ - New recording created on stop/restart │ +│ - track_keys: JSON array of S3 file paths │ +└────────────────────┬────────────────────────────────────────────┘ + │ 1:1 + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ Transcript (Reflector) │ +│ - Processed audio with transcription │ +│ - Diarization, summaries, topics │ +│ - One transcript per recording │ +└─────────────────────────────────────────────────────────────────┘ +``` + +--- + +## Daily.co vs Reflector Terminology + +### Room + +| Aspect | Daily.co | Reflector | +|--------|----------|-----------| +| **Definition** | Virtual meeting space on Daily.co platform | User-created meeting template/configuration | +| **Lifetime** | Configurable expiration | Persistent until user deletes | +| **Creation** | API call for each meeting | Pre-created by user once | +| **Reuse** | Can host multiple sessions | Generates new Daily.co room per meeting | +| **Name Format** | `room-name` (reusable) | `room-name` (base identifier) | +| **Timestamping** | Not required | Meeting adds timestamp: `{name}-YYYYMMDDHHMMSS` | + +**Example:** +``` +Reflector Room: "daily-private-igor" (persistent config) + ↓ starts meeting +Daily.co Room: "daily-private-igor-20260110042117" +``` + +### Meeting + +| Aspect | Daily.co | Reflector | +|--------|----------|-----------| +| **Definition** | Session that starts when first participant joins | Explicit database record of a session | +| **Identifier** | `mtgSessionId` (generated by Daily.co) | `meeting.id` (UUID, generated by Reflector) | +| **Creation** | Implicit (first participant join) | Explicit API call before participants join | +| **Purpose** | Tracks active session state | Links recordings, transcripts, participants | +| **Scope** | Per room instance | Per Reflector room + timestamp | + +**Critical Limitation:** Daily.co's recordings API often does NOT return `mtgSessionId`, requiring time-based matching (see [Time-Based Matching](#time-based-matching)). + +### Recording + +| Aspect | Daily.co | Reflector | +|--------|----------|-----------| +| **Definition** | Audio/video files on S3 | Metadata + processing status | +| **Types** | `cloud` (composed video), `raw-tracks` (multitrack) | Stores references + `track_keys` array | +| **Multiplicity** | One recording object per start/stop cycle | One DB row per Daily.co recording object | +| **Identifier** | Daily.co `recording_id` | Same `recording_id` (stored in DB) | +| **Multitrack** | Array of `.webm` files (one per participant) | `track_keys` JSON array with S3 paths | +| **Linkage** | Via `room_name` + `start_ts` | FK `meeting_id` (set via time-based match) | + +**Critical Behavior:** Recording **stops/restarts** create **separate recording objects** with unique IDs. + +--- + +## Entity Relationships + +### Database Schema Relationships + +```sql +-- Simplified schema showing key relationships + +TABLE room ( + id VARCHAR PRIMARY KEY, + name VARCHAR UNIQUE, + platform VARCHAR -- 'whereby' | 'daily' +) + +TABLE meeting ( + id VARCHAR PRIMARY KEY, + room_id VARCHAR REFERENCES room(id) ON DELETE CASCADE, -- nullable + room_name VARCHAR, -- Daily.co room name (timestamped) + start_date TIMESTAMP, + platform VARCHAR +) + +TABLE recording ( + id VARCHAR PRIMARY KEY, -- Daily.co recording_id + meeting_id VARCHAR, -- FK to meeting (set via time-based match) + bucket_name VARCHAR, + object_key VARCHAR, -- S3 prefix + track_keys JSON, -- Array of S3 keys for multitrack + recorded_at TIMESTAMP +) + +TABLE transcript ( + id VARCHAR PRIMARY KEY, + recording_id VARCHAR, -- nullable FK + meeting_id VARCHAR, -- nullable FK + room_id VARCHAR, -- nullable FK + participants JSON, -- [{id, speaker, name, user_id}, ...] + title VARCHAR, + long_summary VARCHAR, + webvtt TEXT +) +``` + +**Relationship Cardinalities:** +``` +1 Room → N Meetings +1 Meeting → N Recordings (common: 1-21 recordings per meeting) +1 Recording → 1 Transcript +1 Meeting → N Transcripts (via recordings) +``` + +--- + +## Recording Multiplicity + +### Why Multiple Recordings Per Meeting? + +Daily.co creates a **new recording object** (new ID, new files) whenever recording stops and restarts. This happens due to: + +1. **Manual stop/start** - User clicks stop, then start recording again +2. **Network reconnection** - Participant drops, reconnects → triggers restart +3. **Participant rejoin** - Last participant leaves, new one joins → new session + +--- + +## Session Identifiers Explained + +### The Hidden Entity: Daily.co Meeting Session + +Daily.co has an **implicit ephemeral entity** that sits between Room and Recording: + +``` +Daily.co Room: "daily-private-igor-20260110042117" + │ + ├─ Daily.co Meeting Session #1 (mtgSessionId: c04334de...) + │ └─ Recording #3 (f4a50f94) - 4s, 1 track + │ + └─ Daily.co Meeting Session #2 (mtgSessionId: 4cdae3c0...) + ├─ Recording #2 (b0fa94da) - 80s, 2 tracks ← recording stopped + └─ Recording #1 (05edf519) - 62s, 1 track ← then restarted +``` + +**Daily.co Meeting Session:** +- **Lifecycle:** Starts when first participant joins, ends when last participant leaves +- **Identifier:** `mtgSessionId` (generated by Daily.co) +- **Persistence:** Ephemeral - new ID if everyone leaves and someone rejoins +- **Relationship:** 1 Session → N Recordings (if recording stops/restarts during session) + +**Key Insight:** Multiple recordings can share the same `mtgSessionId` if recording was stopped and restarted while participants remained connected. + +### mtgSessionId (Meeting Session Identifier) + +`mtgSessionId` identifies a **Daily.co meeting session** (not individual participants, not a room). + +### session_id (Per-Participant) + +**Different concept:** Per-participant connection identifier from webhooks. + +**Reflector Tracking:** `daily_participant_session` table +```sql +TABLE daily_participant_session ( + id VARCHAR PRIMARY KEY, -- {meeting_id}:{user_id}:{joined_at_ms} + meeting_id VARCHAR, + session_id VARCHAR, -- From webhook (per-participant) + user_id VARCHAR, + user_name VARCHAR, + joined_at TIMESTAMP, + left_at TIMESTAMP +) +``` +--- + +## Time-Based Matching + +### Problem Statement + +Daily.co's recordings API does not reliably return `mtgSessionId`, making it impossible to directly link recordings to meetings via Daily.co's identifiers. + +**Example API response:** +```json +{ + "id": "recording-uuid", + "room_name": "daily-private-igor-20260110042117", + "start_ts": 1768018896, + "mtgSessionId": null ← Missing! +} +``` + +### Solution: Time-Based Matching + +**Implementation:** `reflector/db/meetings.py:get_by_room_name_and_time()` + + +--- + +## Multitrack Recording Details + +### track_keys JSON Array + +**Schema:** `recording.track_keys` (JSON, nullable) +```sql +-- Example recording with 2 audio tracks +{ + "id": "b0fa94da-73b5-4f95-9239-5216a682a505", + "track_keys": [ + "igormonadical/daily-private-igor-20260110042117/1768018896877-890c0eae-e186-4534-a7bd-7c794b7d6d7f-cam-audio-1768018914565", + "igormonadical/daily-private-igor-20260110042117/1768018896877-9660e8e9-4297-4f17-951d-0b2bf2401803-cam-audio-1768018899286" + ] +} +``` + +**Semantics:** +- `track_keys = null` → Not multitrack (cloud recording) +- `track_keys = []` → Multitrack recording with no audio captured (silence/muted) +- `track_keys = [...]` → Multitrack with N audio tracks + +**Property:** `recording.is_multitrack` (Python) +```python +@property +def is_multitrack(self) -> bool: + return self.track_keys is not None and len(self.track_keys) > 0 +``` + +### Track Filename Format + +Daily.co multitrack filenames encode timing and participant information: + +**Format:** `{recording_start_ts}-{participant_id}-cam-audio-{track_start_ts}` + +**Example:** `1768018896877-890c0eae-e186-4534-a7bd-7c794b7d6d7f-cam-audio-1768018914565` + +**Parsed Components:** +```python +# reflector/utils/daily.py:25-60 +class DailyRecordingFilename(NamedTuple): + recording_start_ts: int # 1768018896877 (milliseconds) + participant_id: str # 890c0eae-e186-4534-a7bd-7c794b7d6d7f + track_start_ts: int # 1768018914565 (milliseconds) +``` + +**Note:** Browser downloads from S3 add `.webm` extension due to MIME headers, but S3 object keys have no extension. + +### Video Track Filtering + +Daily.co API returns both audio and video tracks, but Reflector only processes audio. + +**Filtering Logic:** `reflector/worker/process.py:660` +```python +track_keys = [t.s3Key for t in recording.tracks if t.type == "audio"] +``` + +**Example API Response:** +```json +{ + "tracks": [ + {"type": "audio", "s3Key": "...cam-audio-1768018914565"}, + {"type": "audio", "s3Key": "...cam-audio-1768018899286"}, + {"type": "video", "s3Key": "...cam-video-1768018897095"} ← Filtered out + ] +} +``` + +**Result:** Only 2 audio tracks stored in `recording.track_keys`, video track discarded. + +**Rationale:** Reflector is audio transcription system; video not needed for processing. + +### Track-to-Participant Mapping + +**Flow:** +1. Daily.co webhook/polling provides `track_keys` array +2. Each track filename contains `participant_id` +3. Reflector queries Daily.co API: `GET /meetings/{mtgSessionId}/participants` +4. Maps `participant_id` → `user_name` +5. Stores in `transcript.participants` JSON: +```json +[ + { + "id": "890c0eae-e186-4534-a7bd-7c794b7d6d7f", + "speaker": 0, + "name": "test2", + "user_id": "907f2cc1-eaab-435f-8ee2-09185f416b22" + }, + { + "id": "9660e8e9-4297-4f17-951d-0b2bf2401803", + "speaker": 1, + "name": "test", + "user_id": "907f2cc1-eaab-435f-8ee2-09185f416b22" + } +] +``` + +**Diarization:** Multitrack recordings don't need speaker diarization AI — speaker identity comes from separate audio tracks. + +--- + +## Example + +### Meeting: daily-private-igor-20260110042117 + +**Context:** User conducted test recording with start/stop cycles, producing 3 recordings. + +#### Database State + +```sql +-- Meeting +id: 034804b8-cee2-4fb4-94d7-122f6f068a61 +room_name: daily-private-igor-20260110042117 +start_date: 2026-01-10 04:21:17+00 +``` + +#### Daily.co API Response + +```json +[ + { + "id": "f4a50f94-053c-4f9d-bda6-78ad051fbc36", + "room_name": "daily-private-igor-20260110042117", + "start_ts": 1768018885, + "duration": 4, + "status": "finished", + "mtgSessionId": "c04334de-42a0-4c2a-96be-a49b068dca85", + "tracks": [ + {"type": "audio", "s3Key": "...62e8f3ae...cam-audio-1768018885417"} + ] + }, + { + "id": "b0fa94da-73b5-4f95-9239-5216a682a505", + "room_name": "daily-private-igor-20260110042117", + "start_ts": 1768018896, + "duration": 80, + "status": "finished", + "mtgSessionId": "4cdae3c0-86cb-4578-8a6d-3a228bb48345", + "tracks": [ + {"type": "audio", "s3Key": "...890c0eae...cam-audio-1768018914565"}, + {"type": "audio", "s3Key": "...9660e8e9...cam-audio-1768018899286"}, + {"type": "video", "s3Key": "...9660e8e9...cam-video-1768018897095"} + ] + }, + { + "id": "05edf519-9048-4b49-9a75-73e9826fd950", + "room_name": "daily-private-igor-20260110042117", + "start_ts": 1768018914, + "duration": 62, + "status": "finished", + "mtgSessionId": "4cdae3c0-86cb-4578-8a6d-3a228bb48345", + "tracks": [ + {"type": "audio", "s3Key": "...890c0eae...cam-audio-1768018914948"} + ] + } +] +``` + +**Key Observations:** +- 3 recording objects returned by Daily.co +- 2 different `mtgSessionId` values (2 different meeting instances) +- Recording #2 has 3 tracks (2 audio + 1 video) +- Timestamps: 1768018885 → 1768018896 (+11s) → 1768018914 (+18s) + +#### Reflector Database + +**Recordings:** +``` +┌──────────────────────────────────────┬──────────────┬────────────┬──────────────────────────────────────┐ +│ id │ track_count │ duration │ mtgSessionId │ +├──────────────────────────────────────┼──────────────┼────────────┼──────────────────────────────────────┤ +│ f4a50f94-053c-4f9d-bda6-78ad051fbc36 │ 1 │ 4s │ c04334de-42a0-4c2a-96be-a49b068dca85 │ +│ b0fa94da-73b5-4f95-9239-5216a682a505 │ 2 (video=0) │ 80s │ 4cdae3c0-86cb-4578-8a6d-3a228bb48345 │ +│ 05edf519-9048-4b49-9a75-73e9826fd950 │ 1 │ 62s │ 4cdae3c0-86cb-4578-8a6d-3a228bb48345 │ +└──────────────────────────────────────┴──────────────┴────────────┴──────────────────────────────────────┘ +``` +**Note:** Recording #2 has 2 audio tracks (video filtered out), not 3. + +**Transcripts:** +``` +┌──────────────────────────────────────┬──────────────────────────────────────┬──────────────┬──────────────────────────────────────────────┐ +│ id │ recording_id │ participants │ title │ +├──────────────────────────────────────┼──────────────────────────────────────┼──────────────┼──────────────────────────────────────────────┤ +│ 17149b1f-546c-4837-80a0-f8140bd16592 │ f4a50f94-053c-4f9d-bda6-78ad051fbc36 │ 1 (test) │ (empty - no speech) │ +│ 49801332-3222-4c11-bdb2-375479fc87f2 │ b0fa94da-73b5-4f95-9239-5216a682a505 │ 2 (test, │ "Examination and Validation Procedures │ +│ │ │ test2) │ Review" │ +│ e5271e12-20fb-42d2-b5a8-21438abadef9 │ 05edf519-9048-4b49-9a75-73e9826fd950 │ 1 (test2) │ "Technical Sound Check Procedure Review" │ +└──────────────────────────────────────┴──────────────────────────────────────┴──────────────┴──────────────────────────────────────────────┘ +``` + +**Transcript Content:** + +*Transcript #1* (17149b1f): Empty WebVTT (no audio captured) + +*Transcript #2* (49801332): +```webvtt +WEBVTT + +00:00:03.109 --> 00:00:05.589 +Test, test, test. Test, test, test, test, test. + +00:00:19.829 --> 00:00:22.710 +Test test test test test test test test test test test. +``` +**AI-Generated Summary:** +> "The meeting focused on the critical importance of rigorous testing for ensuring reliability and quality, with test and test2 emphasizing the need for a structured testing framework and meticulous documentation..." + +*Transcript #3* (e5271e12): +```webvtt +WEBVTT + +00:00:02.029 --> 00:00:04.910 +Test, test, test, test, test, test, test, test, test, test, test. +``` + +#### Validation: track_keys → participants + +**Recording #2 (b0fa94da) tracks:** +```json +[ + ".../890c0eae-e186-4534-a7bd-7c794b7d6d7f-cam-audio-...", + ".../9660e8e9-4297-4f17-951d-0b2bf2401803-cam-audio-..." +] +``` + +**Transcript #2 (49801332) participants:** +```json +[ + {"id": "890c0eae-e186-4534-a7bd-7c794b7d6d7f", "speaker": 0, "name": "test2"}, + {"id": "9660e8e9-4297-4f17-951d-0b2bf2401803", "speaker": 1, "name": "test"} +] +``` + +### Data Flow + +``` +Daily.co API: 3 recordings + ↓ +Polling: _poll_raw_tracks_recordings() + ↓ +Worker: process_multitrack_recording.delay() × 3 + ↓ +DB: 3 recording rows created + ↓ +Pipeline: Audio processing + transcription × 3 + ↓ +DB: 3 transcript rows created (1:1 with recordings) + ↓ +UI: User sees 3 separate transcripts +``` + +**Result:** ✅ 1:1 Recording → Transcript relationship maintained. + + +--- +**Document Version:** 1.0 +**Last Verified:** 2026-01-15 +**Data Source:** Production database + Daily.co API inspection From 863af9a21b4d96e8a95f3f8d0cb765cf7fa89f36 Mon Sep 17 00:00:00 2001 From: Igor Loskutov Date: Wed, 21 Jan 2026 09:48:18 -0500 Subject: [PATCH 19/20] logging style fix --- server/reflector/views/meetings.py | 49 ++++++++---------------------- 1 file changed, 13 insertions(+), 36 deletions(-) diff --git a/server/reflector/views/meetings.py b/server/reflector/views/meetings.py index 2e78d0bb..44adf500 100644 --- a/server/reflector/views/meetings.py +++ b/server/reflector/views/meetings.py @@ -1,5 +1,4 @@ import json -import logging from datetime import datetime, timezone from typing import Annotated, Any, Optional from uuid import UUID @@ -16,11 +15,10 @@ meetings_controller, ) from reflector.db.rooms import rooms_controller +from reflector.logger import logger from reflector.utils.string import NonEmptyString from reflector.video_platforms.factory import create_platform_client -logger = logging.getLogger(__name__) - router = APIRouter() @@ -104,6 +102,13 @@ async def start_recording( if not meeting: raise HTTPException(status_code=404, detail="Meeting not found") + log = logger.bind( + meeting_id=meeting_id, + room_name=meeting.room_name, + recording_type=body.type, + instance_id=body.instanceId, + ) + try: client = create_platform_client("daily") result = await client.start_recording( @@ -112,15 +117,7 @@ async def start_recording( instance_id=body.instanceId, ) - logger.info( - f"Started {body.type} recording via REST API", - extra={ - "meeting_id": meeting_id, - "room_name": meeting.room_name, - "recording_type": body.type, - "instance_id": body.instanceId, - }, - ) + log.info(f"Started {body.type} recording via REST API") return {"status": "ok", "result": result} @@ -133,42 +130,22 @@ async def start_recording( # "has an active stream" means recording already started by another participant # This is SUCCESS from business logic perspective - return 200 if "has an active stream" in error_info: - logger.info( - f"{body.type} recording already active (started by another participant)", - extra={ - "meeting_id": meeting_id, - "room_name": meeting.room_name, - "recording_type": body.type, - "instance_id": body.instanceId, - }, + log.info( + f"{body.type} recording already active (started by another participant)" ) return {"status": "already_active", "instanceId": str(body.instanceId)} except (json.JSONDecodeError, KeyError): pass # Fall through to error handling # All other Daily.co API errors - logger.error( - f"Failed to start {body.type} recording", - extra={ - "meeting_id": meeting_id, - "recording_type": body.type, - "error": str(e), - }, - ) + log.error(f"Failed to start {body.type} recording", error=str(e)) raise HTTPException( status_code=500, detail=f"Failed to start recording: {str(e)}" ) except Exception as e: # Non-Daily.co errors - logger.error( - f"Failed to start {body.type} recording", - extra={ - "meeting_id": meeting_id, - "recording_type": body.type, - "error": str(e), - }, - ) + log.error(f"Failed to start {body.type} recording", error=str(e)) raise HTTPException( status_code=500, detail=f"Failed to start recording: {str(e)}" ) From d10f098ceb69dcc1129ca070f16bc9e816cd7b6c Mon Sep 17 00:00:00 2001 From: Igor Loskutov Date: Thu, 22 Jan 2026 11:24:54 -0500 Subject: [PATCH 20/20] migration merge --- ..._merge_cloud_recording_and_celery_heads.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 server/migrations/versions/e69f08ead8ea_merge_cloud_recording_and_celery_heads.py diff --git a/server/migrations/versions/e69f08ead8ea_merge_cloud_recording_and_celery_heads.py b/server/migrations/versions/e69f08ead8ea_merge_cloud_recording_and_celery_heads.py new file mode 100644 index 00000000..bdf8691d --- /dev/null +++ b/server/migrations/versions/e69f08ead8ea_merge_cloud_recording_and_celery_heads.py @@ -0,0 +1,23 @@ +"""merge cloud recording and celery heads + +Revision ID: e69f08ead8ea +Revises: 1b1e6a6fc465, 80beb1ea3269 +Create Date: 2026-01-21 21:39:10.326841 + +""" + +from typing import Sequence, Union + +# revision identifiers, used by Alembic. +revision: str = "e69f08ead8ea" +down_revision: Union[str, None] = ("1b1e6a6fc465", "80beb1ea3269") +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + pass + + +def downgrade() -> None: + pass