feat: add Turn class and implement turns property in Transcript model for aggregating utterances
This commit is contained in:
@@ -12,10 +12,54 @@ class Utterance(BaseModel):
|
|||||||
value: str
|
value: str
|
||||||
|
|
||||||
|
|
||||||
|
class Turn(BaseModel):
|
||||||
|
speaker: Literal["Interviewer", "Participant"]
|
||||||
|
value: str
|
||||||
|
start_time: float
|
||||||
|
end_time: float
|
||||||
|
utterance_count: int
|
||||||
|
|
||||||
|
|
||||||
class Transcript(Document):
|
class Transcript(Document):
|
||||||
transcript_id: str
|
transcript_id: str
|
||||||
utterances: list[Utterance]
|
utterances: list[Utterance]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def turns(self) -> list[Turn]:
|
||||||
|
"""
|
||||||
|
Aggregates consecutive utterances from the same speaker into a single Turn.
|
||||||
|
"""
|
||||||
|
if not self.utterances:
|
||||||
|
return []
|
||||||
|
|
||||||
|
turns: list[Turn] = []
|
||||||
|
current_batch: list[Utterance] = []
|
||||||
|
|
||||||
|
for utterance in self.utterances:
|
||||||
|
if not current_batch:
|
||||||
|
current_batch.append(utterance)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if utterance.speaker == current_batch[-1].speaker:
|
||||||
|
current_batch.append(utterance)
|
||||||
|
else:
|
||||||
|
turns.append(self._create_turn(current_batch))
|
||||||
|
current_batch = [utterance]
|
||||||
|
|
||||||
|
if current_batch:
|
||||||
|
turns.append(self._create_turn(current_batch))
|
||||||
|
|
||||||
|
return turns
|
||||||
|
|
||||||
|
def _create_turn(self, batch: list[Utterance]) -> Turn:
|
||||||
|
return Turn(
|
||||||
|
speaker=batch[0].speaker,
|
||||||
|
value=" ".join(u.value for u in batch),
|
||||||
|
start_time=batch[0].start_time,
|
||||||
|
end_time=batch[-1].end_time,
|
||||||
|
utterance_count=len(batch),
|
||||||
|
)
|
||||||
|
|
||||||
class Settings:
|
class Settings:
|
||||||
name = "transcripts"
|
name = "transcripts"
|
||||||
indexes: ClassVar = [
|
indexes: ClassVar = [
|
||||||
|
|||||||
Reference in New Issue
Block a user