Skip to content

Commit 47105ce

Browse files
authored
fix: useTranscription now handles data channel events (#13) (#22)
Fix `useTranscription` never firing because the backend sends transcript data over `RoomEvent.DataReceived` (data channel) rather than `RoomEvent.TranscriptionReceived`. Closes #13 ### What changed - Extract shared data-channel parsing into `src/utils/parseTranscription.ts` (with 18 tests) - Wire `useTranscription` to listen on `DataReceived` alongside `TranscriptionReceived` - Refactor `useTranscript` to import from the shared module (no behavior change) ### Verify Both `useTranscription` (callback) and `useTranscript` (accumulated state) should receive transcript entries during a live avatar session.
1 parent 5eccdb1 commit 47105ce

5 files changed

Lines changed: 293 additions & 82 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1313

1414
### Fixed
1515

16-
- `useTranscript` merges data-channel transcript JSON from `RoomEvent.DataReceived` when `{ mergeDataChannelSegments: true }` (default): LiveKit-style `{ segments: [...] }` and Runway worker `{ type: "transcription", role, turn, text }` streaming deltas (concatenated per role+turn). Set `{ mergeDataChannelSegments: false }` to use only `RoomEvent.TranscriptionReceived`.
16+
- `useTranscription` and `useTranscript` now receive transcription data from all supported transports ([#13](https://github.com/runwayml/avatars-sdk-react/issues/13))
1717

1818
## [0.12.0] - 2026-04-02
1919

src/hooks/useTranscript.ts

Lines changed: 5 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@ import type { Participant, TranscriptionSegment } from 'livekit-client';
55
import { RoomEvent } from 'livekit-client';
66
import { useEffect, useRef, useState } from 'react';
77
import type { TranscriptionEntry } from '../types';
8+
import {
9+
tryDecodeJSON,
10+
tryParseFlatDelta,
11+
tryParseSegmentArray,
12+
} from '../utils/parseTranscription';
813

914
export interface UseTranscriptOptions {
1015
/** Include interim (non-final) segments. Default: `false` */
@@ -22,86 +27,6 @@ export interface UseTranscriptOptions {
2227

2328
const DEFAULT_BUFFER_SIZE = 100;
2429

25-
type DataChannelJSON = Record<string, unknown>;
26-
27-
function tryDecodeJSON(payload: Uint8Array): DataChannelJSON | null {
28-
try {
29-
const text = new TextDecoder().decode(payload).trim();
30-
if (!text.startsWith('{')) return null;
31-
const parsed: unknown = JSON.parse(text);
32-
return parsed && typeof parsed === 'object'
33-
? (parsed as DataChannelJSON)
34-
: null;
35-
} catch {
36-
return null;
37-
}
38-
}
39-
40-
/**
41-
* Parse LiveKit-style segment arrays: `{ segments: [{ id, text, final? }] }`
42-
* Also handles wrapper shapes like `{ type: "transcription", segments: [...] }`
43-
* and `{ data: { segments: [...] } }`.
44-
*/
45-
function tryParseSegmentArray(
46-
root: DataChannelJSON,
47-
participant?: Participant,
48-
): Array<TranscriptionEntry> | null {
49-
const type = root.type;
50-
const typeAllowed =
51-
type === undefined ||
52-
type === 'transcription' ||
53-
type === 'transcript' ||
54-
type === 'voice_transcript';
55-
if (!typeAllowed) return null;
56-
57-
let segments: unknown = root.segments;
58-
if (
59-
!Array.isArray(segments) &&
60-
root.data &&
61-
typeof root.data === 'object'
62-
) {
63-
segments = (root.data as DataChannelJSON).segments;
64-
}
65-
if (!Array.isArray(segments)) return null;
66-
67-
const identity = participant?.identity ?? 'unknown';
68-
const out: Array<TranscriptionEntry> = [];
69-
70-
for (const item of segments) {
71-
if (!item || typeof item !== 'object') continue;
72-
const seg = item as DataChannelJSON;
73-
if (typeof seg.id !== 'string' || typeof seg.text !== 'string') continue;
74-
out.push({
75-
id: seg.id,
76-
text: seg.text,
77-
final: typeof seg.final === 'boolean' ? seg.final : true,
78-
participantIdentity:
79-
typeof seg.participantIdentity === 'string'
80-
? seg.participantIdentity
81-
: identity,
82-
});
83-
}
84-
85-
return out.length > 0 ? out : null;
86-
}
87-
88-
/**
89-
* Parse Runway flat streaming deltas: `{ type: "transcription", role, turn, text }`
90-
* These arrive one delta at a time and are accumulated per role+turn.
91-
*/
92-
function tryParseFlatDelta(
93-
root: DataChannelJSON,
94-
): { role: string; turn: number; textDelta: string } | null {
95-
if (root.type !== 'transcription') return null;
96-
if (typeof root.text !== 'string') return null;
97-
98-
return {
99-
role: typeof root.role === 'string' ? root.role : 'assistant',
100-
turn: typeof root.turn === 'number' ? root.turn : 0,
101-
textDelta: root.text,
102-
};
103-
}
104-
10530
/**
10631
* Hook that returns an accumulated, deduplicated transcript from the session.
10732
*

src/hooks/useTranscription.ts

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@ import type { Participant, TranscriptionSegment } from 'livekit-client';
55
import { RoomEvent } from 'livekit-client';
66
import { useEffect, useRef } from 'react';
77
import type { TranscriptionHandler } from '../types';
8+
import {
9+
tryDecodeJSON,
10+
tryParseFlatDelta,
11+
tryParseSegmentArray,
12+
} from '../utils/parseTranscription';
813

914
/**
1015
* Hook to listen for transcription events from the session.
@@ -13,7 +18,11 @@ import type { TranscriptionHandler } from '../types';
1318
* only final segments are delivered. Pass `{ interim: true }` to also
1419
* receive partial/streaming segments.
1520
*
16-
* Must be used within an AvatarSession or AvatarCall component.
21+
* Listens to both native `RoomEvent.TranscriptionReceived` and data-channel
22+
* JSON (`RoomEvent.DataReceived`) so transcripts work regardless of which
23+
* transport the backend uses.
24+
*
25+
* Must be used within an AvatarSession, AvatarProvider, or AvatarCall component.
1726
*
1827
* @example
1928
* ```tsx
@@ -37,7 +46,11 @@ export function useTranscription(
3746
const interimRef = useRef(options?.interim ?? false);
3847
interimRef.current = options?.interim ?? false;
3948

49+
const flatAccRef = useRef(new Map<string, string>());
50+
4051
useEffect(() => {
52+
flatAccRef.current.clear();
53+
4154
function handleTranscription(
4255
segments: Array<TranscriptionSegment>,
4356
participant?: Participant,
@@ -54,9 +67,43 @@ export function useTranscription(
5467
}
5568
}
5669

70+
function handleDataReceived(
71+
payload: Uint8Array,
72+
participant?: Participant,
73+
) {
74+
const json = tryDecodeJSON(payload);
75+
if (!json) return;
76+
77+
const segments = tryParseSegmentArray(json, participant);
78+
if (segments) {
79+
for (const entry of segments) {
80+
if (!interimRef.current && !entry.final) continue;
81+
handlerRef.current(entry);
82+
}
83+
return;
84+
}
85+
86+
const delta = tryParseFlatDelta(json);
87+
if (delta) {
88+
const identity = participant?.identity ?? 'unknown';
89+
const accKey = `runway-transcription-${delta.role}-${delta.turn}`;
90+
const prev = flatAccRef.current.get(accKey) ?? '';
91+
const nextText = prev + delta.textDelta;
92+
flatAccRef.current.set(accKey, nextText);
93+
handlerRef.current({
94+
id: accKey,
95+
text: nextText,
96+
final: false,
97+
participantIdentity: identity,
98+
});
99+
}
100+
}
101+
57102
room.on(RoomEvent.TranscriptionReceived, handleTranscription);
103+
room.on(RoomEvent.DataReceived, handleDataReceived);
58104
return () => {
59105
room.off(RoomEvent.TranscriptionReceived, handleTranscription);
106+
room.off(RoomEvent.DataReceived, handleDataReceived);
60107
};
61108
}, [room]);
62109
}
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
import { describe, expect, it } from 'bun:test';
2+
import {
3+
tryDecodeJSON,
4+
tryParseFlatDelta,
5+
tryParseSegmentArray,
6+
} from './parseTranscription';
7+
8+
const encode = (obj: unknown) =>
9+
new TextEncoder().encode(JSON.stringify(obj));
10+
11+
describe('tryDecodeJSON', () => {
12+
it('decodes a valid JSON object payload', () => {
13+
const result = tryDecodeJSON(encode({ type: 'transcription', text: 'hi' }));
14+
expect(result).toEqual({ type: 'transcription', text: 'hi' });
15+
});
16+
17+
it('returns null for non-JSON payloads', () => {
18+
expect(tryDecodeJSON(new Uint8Array([0xff, 0xfe]))).toBeNull();
19+
});
20+
21+
it('returns null for JSON arrays', () => {
22+
expect(tryDecodeJSON(new TextEncoder().encode('[1,2,3]'))).toBeNull();
23+
});
24+
25+
it('returns null for empty input', () => {
26+
expect(tryDecodeJSON(new Uint8Array([]))).toBeNull();
27+
});
28+
});
29+
30+
describe('tryParseFlatDelta', () => {
31+
it('parses a Runway flat transcription delta', () => {
32+
const result = tryParseFlatDelta({
33+
type: 'transcription',
34+
role: 'assistant',
35+
turn: 0,
36+
text: ' hello',
37+
});
38+
expect(result).toEqual({ role: 'assistant', turn: 0, textDelta: ' hello' });
39+
});
40+
41+
it('parses user role deltas', () => {
42+
const result = tryParseFlatDelta({
43+
type: 'transcription',
44+
role: 'user',
45+
turn: 1,
46+
text: 'How are you?',
47+
});
48+
expect(result).toEqual({
49+
role: 'user',
50+
turn: 1,
51+
textDelta: 'How are you?',
52+
});
53+
});
54+
55+
it('defaults role to assistant and turn to 0 when missing', () => {
56+
const result = tryParseFlatDelta({ type: 'transcription', text: 'hi' });
57+
expect(result).toEqual({ role: 'assistant', turn: 0, textDelta: 'hi' });
58+
});
59+
60+
it('returns null when type is not transcription', () => {
61+
expect(
62+
tryParseFlatDelta({ type: 'client_event', text: 'hi' }),
63+
).toBeNull();
64+
});
65+
66+
it('returns null when text is missing', () => {
67+
expect(
68+
tryParseFlatDelta({ type: 'transcription', role: 'assistant', turn: 0 }),
69+
).toBeNull();
70+
});
71+
});
72+
73+
describe('tryParseSegmentArray', () => {
74+
it('parses a segments array with explicit type', () => {
75+
const result = tryParseSegmentArray({
76+
type: 'transcription',
77+
segments: [{ id: 'seg-1', text: 'Hello world', final: true }],
78+
});
79+
expect(result).toEqual([
80+
{
81+
id: 'seg-1',
82+
text: 'Hello world',
83+
final: true,
84+
participantIdentity: 'unknown',
85+
},
86+
]);
87+
});
88+
89+
it('parses segments without a type field', () => {
90+
const result = tryParseSegmentArray({
91+
segments: [{ id: 'seg-2', text: 'test' }],
92+
});
93+
expect(result).toHaveLength(1);
94+
expect(result?.[0].id).toBe('seg-2');
95+
expect(result?.[0].final).toBe(true);
96+
});
97+
98+
it('uses participant identity when provided', () => {
99+
const participant = { identity: 'agent-123' } as { identity: string };
100+
const result = tryParseSegmentArray(
101+
{ segments: [{ id: 'seg-3', text: 'hi' }] },
102+
participant,
103+
);
104+
expect(result?.[0].participantIdentity).toBe('agent-123');
105+
});
106+
107+
it('respects participantIdentity on individual segments', () => {
108+
const result = tryParseSegmentArray({
109+
segments: [
110+
{ id: 'seg-4', text: 'yo', participantIdentity: 'user-456' },
111+
],
112+
});
113+
expect(result?.[0].participantIdentity).toBe('user-456');
114+
});
115+
116+
it('handles nested data.segments shape', () => {
117+
const result = tryParseSegmentArray({
118+
type: 'transcript',
119+
data: { segments: [{ id: 'seg-5', text: 'nested' }] },
120+
});
121+
expect(result).toHaveLength(1);
122+
expect(result?.[0].text).toBe('nested');
123+
});
124+
125+
it('returns null for non-transcript type', () => {
126+
expect(
127+
tryParseSegmentArray({
128+
type: 'client_event',
129+
segments: [{ id: 's', text: 't' }],
130+
}),
131+
).toBeNull();
132+
});
133+
134+
it('returns null when no segments array exists', () => {
135+
expect(
136+
tryParseSegmentArray({ type: 'transcription', text: 'flat delta' }),
137+
).toBeNull();
138+
});
139+
140+
it('skips malformed segment items', () => {
141+
const result = tryParseSegmentArray({
142+
segments: [
143+
{ id: 'good', text: 'valid' },
144+
{ noId: true, text: 'bad' },
145+
null,
146+
'string',
147+
],
148+
});
149+
expect(result).toHaveLength(1);
150+
expect(result?.[0].id).toBe('good');
151+
});
152+
153+
it('returns null when all segments are malformed', () => {
154+
expect(
155+
tryParseSegmentArray({ segments: [{ noId: true }] }),
156+
).toBeNull();
157+
});
158+
});

0 commit comments

Comments
 (0)