feat: add Turn class and implement turns property in Transcript model for aggregating utterances

2025-12-22 17:46:54 +01:00
parent 5c6d87dab7
commit e425733368
1 changed files with 44 additions and 0 deletions
--- a/src/helia/models/transcript.py
+++ b/src/helia/models/transcript.py
@@ -12,10 +12,54 @@ class Utterance(BaseModel):
    value: str
 class Turn(BaseModel):
    speaker: Literal["Interviewer", "Participant"]
    value: str
    start_time: float
    end_time: float
    utterance_count: int
 class Transcript(Document):
    transcript_id: str
    utterances: list[Utterance]
    @property
    def turns(self) -> list[Turn]:
        """
        Aggregates consecutive utterances from the same speaker into a single Turn.
        """
        if not self.utterances:
            return []
        turns: list[Turn] = []
        current_batch: list[Utterance] = []
        for utterance in self.utterances:
            if not current_batch:
                current_batch.append(utterance)
                continue
            if utterance.speaker == current_batch[-1].speaker:
                current_batch.append(utterance)
            else:
                turns.append(self._create_turn(current_batch))
                current_batch = [utterance]
        if current_batch:
            turns.append(self._create_turn(current_batch))
        return turns
    def _create_turn(self, batch: list[Utterance]) -> Turn:
        return Turn(
            speaker=batch[0].speaker,
            value=" ".join(u.value for u in batch),
            start_time=batch[0].start_time,
            end_time=batch[-1].end_time,
            utterance_count=len(batch),
        )
    class Settings:
        name = "transcripts"
        indexes: ClassVar = [