feat: add Turn class and implement turns property in Transcript model for aggregating utterances

This commit is contained in:
Santiago Martinez-Avial
2025-12-22 17:46:54 +01:00
parent 5c6d87dab7
commit e425733368

View File

@@ -12,10 +12,54 @@ class Utterance(BaseModel):
value: str
class Turn(BaseModel):
speaker: Literal["Interviewer", "Participant"]
value: str
start_time: float
end_time: float
utterance_count: int
class Transcript(Document):
transcript_id: str
utterances: list[Utterance]
@property
def turns(self) -> list[Turn]:
"""
Aggregates consecutive utterances from the same speaker into a single Turn.
"""
if not self.utterances:
return []
turns: list[Turn] = []
current_batch: list[Utterance] = []
for utterance in self.utterances:
if not current_batch:
current_batch.append(utterance)
continue
if utterance.speaker == current_batch[-1].speaker:
current_batch.append(utterance)
else:
turns.append(self._create_turn(current_batch))
current_batch = [utterance]
if current_batch:
turns.append(self._create_turn(current_batch))
return turns
def _create_turn(self, batch: list[Utterance]) -> Turn:
return Turn(
speaker=batch[0].speaker,
value=" ".join(u.value for u in batch),
start_time=batch[0].start_time,
end_time=batch[-1].end_time,
utterance_count=len(batch),
)
class Settings:
name = "transcripts"
indexes: ClassVar = [