feat: add Turn class and implement turns property in Transcript model for aggregating utterances
This commit is contained in:
@@ -12,10 +12,54 @@ class Utterance(BaseModel):
|
||||
value: str
|
||||
|
||||
|
||||
class Turn(BaseModel):
|
||||
speaker: Literal["Interviewer", "Participant"]
|
||||
value: str
|
||||
start_time: float
|
||||
end_time: float
|
||||
utterance_count: int
|
||||
|
||||
|
||||
class Transcript(Document):
|
||||
transcript_id: str
|
||||
utterances: list[Utterance]
|
||||
|
||||
@property
|
||||
def turns(self) -> list[Turn]:
|
||||
"""
|
||||
Aggregates consecutive utterances from the same speaker into a single Turn.
|
||||
"""
|
||||
if not self.utterances:
|
||||
return []
|
||||
|
||||
turns: list[Turn] = []
|
||||
current_batch: list[Utterance] = []
|
||||
|
||||
for utterance in self.utterances:
|
||||
if not current_batch:
|
||||
current_batch.append(utterance)
|
||||
continue
|
||||
|
||||
if utterance.speaker == current_batch[-1].speaker:
|
||||
current_batch.append(utterance)
|
||||
else:
|
||||
turns.append(self._create_turn(current_batch))
|
||||
current_batch = [utterance]
|
||||
|
||||
if current_batch:
|
||||
turns.append(self._create_turn(current_batch))
|
||||
|
||||
return turns
|
||||
|
||||
def _create_turn(self, batch: list[Utterance]) -> Turn:
|
||||
return Turn(
|
||||
speaker=batch[0].speaker,
|
||||
value=" ".join(u.value for u in batch),
|
||||
start_time=batch[0].start_time,
|
||||
end_time=batch[-1].end_time,
|
||||
utterance_count=len(batch),
|
||||
)
|
||||
|
||||
class Settings:
|
||||
name = "transcripts"
|
||||
indexes: ClassVar = [
|
||||
|
||||
Reference in New Issue
Block a user