From e4257333689b9fc4c5ec9db3e71c9b1f5a43e494 Mon Sep 17 00:00:00 2001 From: Santiago Martinez-Avial Date: Mon, 22 Dec 2025 17:46:54 +0100 Subject: [PATCH] feat: add Turn class and implement turns property in Transcript model for aggregating utterances --- src/helia/models/transcript.py | 44 ++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/src/helia/models/transcript.py b/src/helia/models/transcript.py index cf5e56a..a1a8a03 100644 --- a/src/helia/models/transcript.py +++ b/src/helia/models/transcript.py @@ -12,10 +12,54 @@ class Utterance(BaseModel): value: str +class Turn(BaseModel): + speaker: Literal["Interviewer", "Participant"] + value: str + start_time: float + end_time: float + utterance_count: int + + class Transcript(Document): transcript_id: str utterances: list[Utterance] + @property + def turns(self) -> list[Turn]: + """ + Aggregates consecutive utterances from the same speaker into a single Turn. + """ + if not self.utterances: + return [] + + turns: list[Turn] = [] + current_batch: list[Utterance] = [] + + for utterance in self.utterances: + if not current_batch: + current_batch.append(utterance) + continue + + if utterance.speaker == current_batch[-1].speaker: + current_batch.append(utterance) + else: + turns.append(self._create_turn(current_batch)) + current_batch = [utterance] + + if current_batch: + turns.append(self._create_turn(current_batch)) + + return turns + + def _create_turn(self, batch: list[Utterance]) -> Turn: + return Turn( + speaker=batch[0].speaker, + value=" ".join(u.value for u in batch), + start_time=batch[0].start_time, + end_time=batch[-1].end_time, + utterance_count=len(batch), + ) + class Settings: name = "transcripts" indexes: ClassVar = [