WIP
This commit is contained in:
16
.env.example
Normal file
16
.env.example
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
# LLM Configuration
|
||||||
|
# Defaults to OpenRouter if not specified
|
||||||
|
|
||||||
|
# Base URL for the LLM provider (default: https://openrouter.ai/api/v1)
|
||||||
|
HELIA_LLM_BASE_URL=https://openrouter.ai/api/v1
|
||||||
|
|
||||||
|
# API Key. Checked in order: HELIA_LLM_API_KEY, OPENROUTER_API_KEY, OPENAI_API_KEY
|
||||||
|
HELIA_LLM_API_KEY=sk-or-your-api-key-here
|
||||||
|
|
||||||
|
# Model identifier (default: google/gemini-3.0-pro-preview)
|
||||||
|
HELIA_LLM_MODEL=google/gemini-3.0-pro-preview
|
||||||
|
|
||||||
|
# Neo4j Configuration
|
||||||
|
NEO4J_URI=bolt://localhost:7687
|
||||||
|
NEO4J_USER=neo4j
|
||||||
|
NEO4J_PASSWORD=password
|
||||||
12
.gitignore
vendored
Normal file
12
.gitignore
vendored
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
# Python-generated files
|
||||||
|
__pycache__/
|
||||||
|
*.py[oc]
|
||||||
|
build/
|
||||||
|
dist/
|
||||||
|
wheels/
|
||||||
|
*.egg-info
|
||||||
|
|
||||||
|
# Virtual environments
|
||||||
|
.venv
|
||||||
|
|
||||||
|
daic-woz/
|
||||||
1
.python-version
Normal file
1
.python-version
Normal file
@@ -0,0 +1 @@
|
|||||||
|
3.13
|
||||||
113
README.md
Normal file
113
README.md
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
# Helia
|
||||||
|
|
||||||
|
Agentic Interview Framework for ingesting, analyzing, and querying transcript data.
|
||||||
|
|
||||||
|
## Project Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
src/helia/
|
||||||
|
├── agent/
|
||||||
|
│ └── workflow.py # LangGraph agent workflow
|
||||||
|
├── analysis/
|
||||||
|
│ └── extractor.py # LLM metadata extraction
|
||||||
|
├── graph/
|
||||||
|
│ ├── loader.py # Neo4j data loading
|
||||||
|
│ └── schema.py # Pydantic graph models
|
||||||
|
├── ingestion/
|
||||||
|
│ └── parser.py # Transcript parsing logic
|
||||||
|
└── main.py # CLI entry point
|
||||||
|
```
|
||||||
|
|
||||||
|
## Data Flow
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
graph TD
|
||||||
|
A[Transcript File<br/>TSV/TXT] -->|TranscriptParser| B(Utterance Objects)
|
||||||
|
B -->|MetadataExtractor<br/>+ OpenAI LLM| C(Enriched UtteranceNodes)
|
||||||
|
C -->|GraphLoader| D[(Neo4j Database)]
|
||||||
|
E[User Question] -->|LangGraph Agent| F{Router}
|
||||||
|
F -->|Graph Tool| D
|
||||||
|
F -->|Vector Tool| G[(Vector Store)]
|
||||||
|
D --> H[Context]
|
||||||
|
G --> H
|
||||||
|
H -->|Synthesizer| I[Answer]
|
||||||
|
```
|
||||||
|
|
||||||
|
1. **Ingestion**: `TranscriptParser` reads TSV/txt files into `Utterance` objects.
|
||||||
|
2. **Analysis**: `MetadataExtractor` enriches utterances with sentiment and tone using LLMs.
|
||||||
|
3. **Graph**: `GraphLoader` pushes nodes and relationships to Neo4j database.
|
||||||
|
4. **Agent**: ReAct workflow queries graph/vector data to answer user questions.
|
||||||
|
|
||||||
|
## Implemented Features
|
||||||
|
|
||||||
|
- Parse DAIC-WOZ transcripts and simple text formats.
|
||||||
|
- Extract metadata (sentiment, tone, speech acts) via OpenAI.
|
||||||
|
- Load `Utterance` and `Speaker` nodes into Neo4j.
|
||||||
|
- Run basic LangGraph agent with planner and router.
|
||||||
|
|
||||||
|
## Roadmap
|
||||||
|
|
||||||
|
- Add robust error handling for LLM API failures.
|
||||||
|
- Implement real `graph_tool` and `vector_tool` logic.
|
||||||
|
- Enhance agent planning capabilities.
|
||||||
|
- Add comprehensive test suite.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
Install the package using `uv`.
|
||||||
|
|
||||||
|
```sh
|
||||||
|
uv pip install helia
|
||||||
|
```
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
Run the agent directly from the command line.
|
||||||
|
|
||||||
|
```sh
|
||||||
|
export OPENAI_API_KEY=sk-...
|
||||||
|
export NEO4J_URI=bolt://localhost:7687
|
||||||
|
export NEO4J_PASSWORD=password
|
||||||
|
|
||||||
|
python -m helia.main "How many interruptions occurred?"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
Parse a transcript file programmatically.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from helia.ingestion.parser import TranscriptParser
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
parser = TranscriptParser()
|
||||||
|
utterances = parser.parse(Path("transcript.tsv"))
|
||||||
|
```
|
||||||
|
|
||||||
|
Extract metadata from utterances.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from helia.analysis.extractor import MetadataExtractor
|
||||||
|
|
||||||
|
extractor = MetadataExtractor()
|
||||||
|
nodes = extractor.extract(utterances)
|
||||||
|
```
|
||||||
|
|
||||||
|
Load data into Neo4j.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from helia.graph.loader import GraphLoader
|
||||||
|
|
||||||
|
loader = GraphLoader()
|
||||||
|
loader.connect()
|
||||||
|
loader.load_utterances(nodes)
|
||||||
|
loader.close()
|
||||||
|
```
|
||||||
|
|
||||||
|
## Contributing
|
||||||
|
|
||||||
|
Fork the project and submit a pull request.
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
This project is available as open source under the terms of the [MIT License](LICENSE).
|
||||||
27
docker-compose.yml
Normal file
27
docker-compose.yml
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
services:
|
||||||
|
neo4j:
|
||||||
|
image: neo4j:5
|
||||||
|
container_name: helia-neo4j
|
||||||
|
ports:
|
||||||
|
- "7474:7474" # Neo4j Browser / HTTP
|
||||||
|
- "7687:7687" # Bolt
|
||||||
|
environment:
|
||||||
|
# Matches defaults in `src/helia/graph/loader.py`
|
||||||
|
- NEO4J_AUTH=neo4j/password
|
||||||
|
volumes:
|
||||||
|
- neo4j_data:/data
|
||||||
|
- neo4j_logs:/logs
|
||||||
|
|
||||||
|
qdrant:
|
||||||
|
image: qdrant/qdrant:latest
|
||||||
|
container_name: helia-qdrant
|
||||||
|
ports:
|
||||||
|
- "6333:6333" # HTTP
|
||||||
|
- "6334:6334" # gRPC
|
||||||
|
volumes:
|
||||||
|
- qdrant_storage:/qdrant/storage
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
neo4j_data:
|
||||||
|
neo4j_logs:
|
||||||
|
qdrant_storage:
|
||||||
62
pyproject.toml
Normal file
62
pyproject.toml
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
[build-system]
|
||||||
|
requires = ["hatchling"]
|
||||||
|
build-backend = "hatchling.build"
|
||||||
|
|
||||||
|
[project]
|
||||||
|
name = "helia"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "Agentic Interview Analysis Framework"
|
||||||
|
readme = "README.md"
|
||||||
|
requires-python = ">=3.13"
|
||||||
|
dependencies = [
|
||||||
|
"langchain>=0.1.0",
|
||||||
|
"langchain-openai>=0.1.0",
|
||||||
|
"langgraph",
|
||||||
|
"neo4j",
|
||||||
|
"qdrant-client",
|
||||||
|
"pydantic",
|
||||||
|
"openai",
|
||||||
|
"pydantic-settings>=2.12.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.hatch.build.targets.wheel]
|
||||||
|
packages = ["src/helia"]
|
||||||
|
|
||||||
|
[dependency-groups]
|
||||||
|
dev = [
|
||||||
|
"ruff>=0.14.7",
|
||||||
|
"pyrefly>=0.43.1",
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.ruff]
|
||||||
|
line-length = 100
|
||||||
|
target-version = "py314"
|
||||||
|
|
||||||
|
[tool.ruff.lint]
|
||||||
|
extend-select = [
|
||||||
|
"F", # Pyflakes rules
|
||||||
|
"W", # PyCodeStyle warnings
|
||||||
|
"E", # PyCodeStyle errors
|
||||||
|
"I", # Sort imports properly
|
||||||
|
"UP", # Warn if certain things can changed due to newer Python versions
|
||||||
|
"C4", # Catch incorrect use of comprehensions, dict, list, etc
|
||||||
|
"FA", # Enforce from __future__ import annotations
|
||||||
|
"ISC", # Good use of string concatenation
|
||||||
|
"ICN", # Use common import conventions
|
||||||
|
"RET", # Good return practices
|
||||||
|
"SIM", # Common simplification rules
|
||||||
|
"TID", # Some good import practices
|
||||||
|
"TC", # Enforce importing certain types in a TYPE_CHECKING block
|
||||||
|
"PTH", # Use pathlib instead of os.path
|
||||||
|
"TD", # Be diligent with TODO comments
|
||||||
|
"NPY", # Numpy-specific rules
|
||||||
|
"COM", # enforce trailing comma rules
|
||||||
|
"DTZ", # require strict timezone manipulation with datetime
|
||||||
|
"FBT", # detect boolean traps
|
||||||
|
"N", # enforce naming conventions, e.g. ClassName vs function_name
|
||||||
|
]
|
||||||
|
ignore = ["E501", "COM812", "TD003"]
|
||||||
|
|
||||||
|
[tool.pyrefly]
|
||||||
|
search-path = ["src"]
|
||||||
|
project-includes = ["**/*.py*", "**/*.ipynb"]
|
||||||
0
src/helia/__init__.py
Normal file
0
src/helia/__init__.py
Normal file
108
src/helia/agent/workflow.py
Normal file
108
src/helia/agent/workflow.py
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from langgraph.graph import END, StateGraph
|
||||||
|
from typing_extensions import TypedDict
|
||||||
|
|
||||||
|
from helia.llm.client import get_openai_client
|
||||||
|
from helia.llm.settings import settings
|
||||||
|
|
||||||
|
|
||||||
|
class AgentState(TypedDict):
|
||||||
|
question: str
|
||||||
|
plan: list[str]
|
||||||
|
context: list[str]
|
||||||
|
answer: str
|
||||||
|
critique: str | None
|
||||||
|
|
||||||
|
|
||||||
|
def planner_node(state: AgentState) -> dict[str, Any]:
|
||||||
|
plan: list[str] = ["Understand question", "Retrieve info", "Synthesize answer"]
|
||||||
|
return {"plan": plan}
|
||||||
|
|
||||||
|
|
||||||
|
def router_node(state: AgentState) -> str:
|
||||||
|
question = state["question"].lower()
|
||||||
|
if "how many" in question or "when" in question:
|
||||||
|
return "graph_tool"
|
||||||
|
return "vector_tool"
|
||||||
|
|
||||||
|
|
||||||
|
def graph_tool_node(state: AgentState) -> dict[str, Any]:
|
||||||
|
context = [*state["context"]]
|
||||||
|
context.append("Graph data: Interruption count = 5")
|
||||||
|
return {"context": context}
|
||||||
|
|
||||||
|
|
||||||
|
def vector_tool_node(state: AgentState) -> dict[str, Any]:
|
||||||
|
context = [*state["context"]]
|
||||||
|
context.append("Vector data: Discussed salary at 10:00")
|
||||||
|
return {"context": context}
|
||||||
|
|
||||||
|
|
||||||
|
def synthesizer_node(state: AgentState) -> dict[str, Any]:
|
||||||
|
context_text = "\n".join(state["context"])
|
||||||
|
question = state["question"]
|
||||||
|
|
||||||
|
prompt = f"""
|
||||||
|
Answer the user's question based on the provided context.
|
||||||
|
|
||||||
|
Context:
|
||||||
|
{context_text}
|
||||||
|
|
||||||
|
Question: {question}
|
||||||
|
|
||||||
|
Answer:
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
client = get_openai_client()
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model=settings.model,
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": "You are a helpful assistant."},
|
||||||
|
{"role": "user", "content": prompt},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
answer = response.choices[0].message.content or "No answer generated."
|
||||||
|
except Exception as e:
|
||||||
|
answer = f"Error generating answer: {e}. Fallback: Based on context: {context_text}, here is the answer."
|
||||||
|
|
||||||
|
return {"answer": answer}
|
||||||
|
|
||||||
|
|
||||||
|
def reflector_node(state: AgentState) -> dict[str, Any]:
|
||||||
|
return {"critique": "Answer appears sufficient."}
|
||||||
|
|
||||||
|
|
||||||
|
workflow: Any = StateGraph(AgentState)
|
||||||
|
|
||||||
|
workflow.add_node("planner", planner_node)
|
||||||
|
workflow.add_node("graph_tool", graph_tool_node)
|
||||||
|
workflow.add_node("vector_tool", vector_tool_node)
|
||||||
|
workflow.add_node("synthesizer", synthesizer_node)
|
||||||
|
workflow.add_node("reflector", reflector_node)
|
||||||
|
|
||||||
|
workflow.set_entry_point("planner")
|
||||||
|
|
||||||
|
workflow.add_conditional_edges(
|
||||||
|
"planner", router_node, {"graph_tool": "graph_tool", "vector_tool": "vector_tool"}
|
||||||
|
)
|
||||||
|
|
||||||
|
workflow.add_edge("graph_tool", "synthesizer")
|
||||||
|
workflow.add_edge("vector_tool", "synthesizer")
|
||||||
|
workflow.add_edge("synthesizer", "reflector")
|
||||||
|
workflow.add_edge("reflector", END)
|
||||||
|
|
||||||
|
|
||||||
|
def run_agent(question: str) -> dict[str, Any]:
|
||||||
|
app = workflow.compile()
|
||||||
|
inputs: AgentState = {
|
||||||
|
"question": question,
|
||||||
|
"plan": [],
|
||||||
|
"context": [],
|
||||||
|
"answer": "",
|
||||||
|
"critique": None,
|
||||||
|
}
|
||||||
|
return app.invoke(inputs)
|
||||||
92
src/helia/analysis/extractor.py
Normal file
92
src/helia/analysis/extractor.py
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from typing import TYPE_CHECKING, Any
|
||||||
|
|
||||||
|
from helia.graph.schema import UtteranceNode
|
||||||
|
from helia.llm.client import get_openai_client
|
||||||
|
from helia.llm.settings import settings
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from helia.ingestion.parser import Utterance
|
||||||
|
|
||||||
|
|
||||||
|
class MetadataExtractor:
|
||||||
|
def __init__(self):
|
||||||
|
self.llm = get_openai_client()
|
||||||
|
|
||||||
|
def extract(self, utterances: list[Utterance]) -> list[UtteranceNode]:
|
||||||
|
nodes: list[UtteranceNode] = []
|
||||||
|
window_size = 3
|
||||||
|
|
||||||
|
for i, utt in enumerate(utterances):
|
||||||
|
if i > 0:
|
||||||
|
prev_utt = utterances[i - 1]
|
||||||
|
if (
|
||||||
|
utt.start_time is not None
|
||||||
|
and prev_utt.end_time is not None
|
||||||
|
and utt.start_time < prev_utt.end_time
|
||||||
|
):
|
||||||
|
utt.metadata["is_interrupted"] = True
|
||||||
|
prev_utt.metadata["was_interrupted_by"] = utt.id
|
||||||
|
|
||||||
|
start_idx = max(0, i - window_size + 1)
|
||||||
|
context_window = utterances[start_idx : i + 1]
|
||||||
|
|
||||||
|
metadata = self._analyze_with_llm(utt, context_window)
|
||||||
|
|
||||||
|
utt.metadata.update(metadata)
|
||||||
|
|
||||||
|
node = UtteranceNode(
|
||||||
|
id=utt.id,
|
||||||
|
speaker_id=utt.speaker,
|
||||||
|
text=utt.text,
|
||||||
|
start_time=utt.start_time if utt.start_time is not None else 0.0,
|
||||||
|
end_time=utt.end_time if utt.end_time is not None else 0.0,
|
||||||
|
sentiment=metadata.get("sentiment"),
|
||||||
|
tone=metadata.get("tone"),
|
||||||
|
speech_act=metadata.get("speech_act"),
|
||||||
|
)
|
||||||
|
nodes.append(node)
|
||||||
|
|
||||||
|
return nodes
|
||||||
|
|
||||||
|
def _analyze_with_llm(self, target_utt: Utterance, context: list[Utterance]) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Constructs the prompt and calls the LLM.
|
||||||
|
"""
|
||||||
|
context_text = "\n".join([f"{u.speaker}: {u.text}" for u in context])
|
||||||
|
prompt = f"""
|
||||||
|
Analyze the last utterance in this conversation context:
|
||||||
|
|
||||||
|
CONTEXT:
|
||||||
|
{context_text}
|
||||||
|
|
||||||
|
Analyze the LAST utterance (by {target_utt.speaker}) for:
|
||||||
|
1. Sentiment (Positive, Negative, Neutral)
|
||||||
|
2. Tone (e.g., Confident, Hesitant, Aggressive, Polite, etc.)
|
||||||
|
3. Speech Act (e.g., Question, Statement, Agreement, Disagreement, etc.)
|
||||||
|
|
||||||
|
Return ONLY valid JSON with keys: "sentiment", "tone", "speech_act".
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = self.llm.chat.completions.create(
|
||||||
|
model=settings.model,
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You are an expert linguistic analyst. Output JSON only.",
|
||||||
|
},
|
||||||
|
{"role": "user", "content": prompt},
|
||||||
|
],
|
||||||
|
response_format={"type": "json_object"},
|
||||||
|
)
|
||||||
|
content = response.choices[0].message.content
|
||||||
|
if content:
|
||||||
|
return json.loads(content)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"LLM analysis failed: {e}")
|
||||||
|
|
||||||
|
# Fallback if LLM fails
|
||||||
|
return {"sentiment": "Neutral", "tone": "Confident", "speech_act": "Statement"}
|
||||||
95
src/helia/graph/loader.py
Normal file
95
src/helia/graph/loader.py
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
import os
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
from neo4j import Driver, GraphDatabase
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from helia.graph.schema import UtteranceNode
|
||||||
|
|
||||||
|
|
||||||
|
class GraphLoader:
|
||||||
|
def __init__(
|
||||||
|
self, uri: str | None = None, user: str | None = None, password: str | None = None
|
||||||
|
):
|
||||||
|
self.uri = uri or os.environ.get("NEO4J_URI", "bolt://localhost:7687")
|
||||||
|
self.user = user or os.environ.get("NEO4J_USER", "neo4j")
|
||||||
|
self.password = password or os.environ.get("NEO4J_PASSWORD", "password")
|
||||||
|
self.driver: Driver | None = None
|
||||||
|
|
||||||
|
def connect(self):
|
||||||
|
driver = GraphDatabase.driver(self.uri, auth=(self.user, self.password))
|
||||||
|
driver.verify_connectivity()
|
||||||
|
self.driver = driver
|
||||||
|
print(f"Connected to Neo4j at {self.uri}")
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
if self.driver:
|
||||||
|
self.driver.close()
|
||||||
|
|
||||||
|
def clear_database(self):
|
||||||
|
"""Clears all nodes and relationships. Use with caution!"""
|
||||||
|
if not self.driver:
|
||||||
|
return
|
||||||
|
with self.driver.session() as session:
|
||||||
|
session.run("MATCH (n) DETACH DELETE n")
|
||||||
|
|
||||||
|
def load_utterances(self, nodes: list[UtteranceNode]):
|
||||||
|
"""
|
||||||
|
Loads a list of enriched UtteranceNodes into Neo4j.
|
||||||
|
Creates Speaker nodes, Utterance nodes, and the NEXT chain.
|
||||||
|
"""
|
||||||
|
if not self.driver:
|
||||||
|
raise RuntimeError("Driver not connected.")
|
||||||
|
|
||||||
|
with self.driver.session() as session:
|
||||||
|
for i, node in enumerate(nodes):
|
||||||
|
session.run(
|
||||||
|
"""
|
||||||
|
MERGE (u:Utterance {id: $id})
|
||||||
|
SET u.text = $text,
|
||||||
|
u.start_time = $start_time,
|
||||||
|
u.end_time = $end_time,
|
||||||
|
u.sentiment = $sentiment,
|
||||||
|
u.tone = $tone,
|
||||||
|
u.speech_act = $speech_act
|
||||||
|
""",
|
||||||
|
node.model_dump(),
|
||||||
|
)
|
||||||
|
|
||||||
|
if i > 0:
|
||||||
|
prev_node = nodes[i - 1]
|
||||||
|
session.run(
|
||||||
|
"""
|
||||||
|
MATCH (prev:Utterance {id: $prev_id})
|
||||||
|
MATCH (curr:Utterance {id: $curr_id})
|
||||||
|
MERGE (prev)-[:NEXT]->(curr)
|
||||||
|
""",
|
||||||
|
prev_id=prev_node.id,
|
||||||
|
curr_id=node.id,
|
||||||
|
)
|
||||||
|
|
||||||
|
session.run(
|
||||||
|
"""
|
||||||
|
MERGE (s:Speaker {id: $speaker_id})
|
||||||
|
WITH s
|
||||||
|
MATCH (u:Utterance {id: $utterance_id})
|
||||||
|
MERGE (s)-[:SPOKE]->(u)
|
||||||
|
""",
|
||||||
|
speaker_id=node.speaker_id,
|
||||||
|
utterance_id=node.id,
|
||||||
|
)
|
||||||
|
|
||||||
|
def create_interruption(self, interrupter_id: str, interrupted_id: str):
|
||||||
|
if not self.driver:
|
||||||
|
return
|
||||||
|
|
||||||
|
with self.driver.session() as session:
|
||||||
|
session.run(
|
||||||
|
"""
|
||||||
|
MATCH (a:Utterance {id: $interrupter_id})
|
||||||
|
MATCH (b:Utterance {id: $interrupted_id})
|
||||||
|
MERGE (a)-[:INTERRUPTED]->(b)
|
||||||
|
""",
|
||||||
|
interrupter_id=interrupter_id,
|
||||||
|
interrupted_id=interrupted_id,
|
||||||
|
)
|
||||||
55
src/helia/graph/schema.py
Normal file
55
src/helia/graph/schema.py
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class SpeakerNode(BaseModel):
|
||||||
|
id: str = Field(..., description="Unique identifier for the speaker (e.g., 'speaker_01')")
|
||||||
|
name: str | None = Field(None, description="Real name if known")
|
||||||
|
role: str | None = Field(
|
||||||
|
None, description="Role in the conversation (e.g., 'Interviewer', 'Candidate')"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class UtteranceNode(BaseModel):
|
||||||
|
id: str = Field(..., description="Unique ID for the utterance")
|
||||||
|
speaker_id: str = Field(..., description="ID of the speaker who said this")
|
||||||
|
text: str = Field(..., description="The content of the speech")
|
||||||
|
start_time: float
|
||||||
|
end_time: float
|
||||||
|
# Metadata extracted by the agent
|
||||||
|
sentiment: str | None = Field(None, description="Sentiment: Positive, Negative, Neutral")
|
||||||
|
tone: str | None = Field(None, description="Tone: Aggressive, Hesitant, Confident")
|
||||||
|
speech_act: str | None = Field(None, description="Type: Question, Statement, Agreement")
|
||||||
|
|
||||||
|
|
||||||
|
class TopicNode(BaseModel):
|
||||||
|
name: str = Field(..., description="Topic name (e.g., 'Salary', 'Project X')")
|
||||||
|
description: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class SpokeRel(BaseModel):
|
||||||
|
"""(Speaker)-[:SPOKE]->(Utterance)"""
|
||||||
|
|
||||||
|
speaker_id: str
|
||||||
|
utterance_id: str
|
||||||
|
|
||||||
|
|
||||||
|
class NextRel(BaseModel):
|
||||||
|
"""(Utterance A)-[:NEXT]->(Utterance B)"""
|
||||||
|
|
||||||
|
from_id: str
|
||||||
|
to_id: str
|
||||||
|
time_gap: float = 0.0
|
||||||
|
|
||||||
|
|
||||||
|
class InterruptedRel(BaseModel):
|
||||||
|
"""(Utterance A)-[:INTERRUPTED]->(Utterance B)"""
|
||||||
|
|
||||||
|
interrupter_utterance_id: str
|
||||||
|
interrupted_utterance_id: str
|
||||||
|
|
||||||
|
|
||||||
|
class MentionsRel(BaseModel):
|
||||||
|
"""(Utterance)-[:MENTIONS]->(Topic)"""
|
||||||
|
|
||||||
|
utterance_id: str
|
||||||
|
topic_name: str
|
||||||
66
src/helia/ingestion/parser.py
Normal file
66
src/helia/ingestion/parser.py
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
import csv
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
class Utterance(BaseModel):
|
||||||
|
"""
|
||||||
|
Represents a single turn in a conversation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
id: str
|
||||||
|
speaker: str
|
||||||
|
text: str
|
||||||
|
start_time: float | None = None
|
||||||
|
end_time: float | None = None
|
||||||
|
metadata: dict = {}
|
||||||
|
|
||||||
|
@property
|
||||||
|
def duration(self) -> float:
|
||||||
|
if self.start_time is not None and self.end_time is not None:
|
||||||
|
return self.end_time - self.start_time
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
|
||||||
|
class TranscriptParser:
|
||||||
|
def parse(self, file_path: Path) -> list[Utterance]:
|
||||||
|
with file_path.open(encoding="utf-8") as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
|
||||||
|
if not lines:
|
||||||
|
return []
|
||||||
|
|
||||||
|
header = lines[0].strip()
|
||||||
|
if header == "start_time\tstop_time\tspeaker\tvalue":
|
||||||
|
return self._parse_tsv(lines[1:])
|
||||||
|
|
||||||
|
return self._parse_simple(lines)
|
||||||
|
|
||||||
|
def _parse_tsv(self, lines: list[str]) -> list[Utterance]:
|
||||||
|
reader = csv.DictReader(
|
||||||
|
lines, fieldnames=["start_time", "stop_time", "speaker", "value"], delimiter="\t"
|
||||||
|
)
|
||||||
|
return [
|
||||||
|
Utterance(
|
||||||
|
id=f"u_{i}",
|
||||||
|
speaker=row["speaker"],
|
||||||
|
text=row["value"].strip(),
|
||||||
|
start_time=float(row["start_time"]),
|
||||||
|
end_time=float(row["stop_time"]),
|
||||||
|
)
|
||||||
|
for i, row in enumerate(reader)
|
||||||
|
]
|
||||||
|
|
||||||
|
def _parse_simple(self, lines: list[str]) -> list[Utterance]:
|
||||||
|
utterances = []
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
if ":" in line:
|
||||||
|
speaker, text = line.split(":", 1)
|
||||||
|
utterances.append(
|
||||||
|
Utterance(id=f"u_{i}", speaker=speaker.strip(), text=text.strip())
|
||||||
|
)
|
||||||
|
return utterances
|
||||||
4
src/helia/llm/__init__.py
Normal file
4
src/helia/llm/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
from helia.llm.client import get_openai_client
|
||||||
|
from helia.llm.settings import settings
|
||||||
|
|
||||||
|
__all__ = ["get_openai_client", "settings"]
|
||||||
18
src/helia/llm/client.py
Normal file
18
src/helia/llm/client.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
from helia.llm.settings import settings
|
||||||
|
|
||||||
|
|
||||||
|
def get_openai_client() -> OpenAI:
|
||||||
|
"""
|
||||||
|
Returns an configured OpenAI client based on global settings.
|
||||||
|
Defaults to OpenRouter base_url if not specified otherwise.
|
||||||
|
"""
|
||||||
|
api_key = settings.resolve_api_key()
|
||||||
|
|
||||||
|
return OpenAI(
|
||||||
|
base_url=settings.base_url,
|
||||||
|
api_key=api_key,
|
||||||
|
timeout=settings.timeout,
|
||||||
|
max_retries=settings.max_retries,
|
||||||
|
)
|
||||||
65
src/helia/llm/settings.py
Normal file
65
src/helia/llm/settings.py
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
import os
|
||||||
|
from typing import Final
|
||||||
|
|
||||||
|
from pydantic import Field
|
||||||
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||||
|
|
||||||
|
|
||||||
|
class LLMSettings(BaseSettings):
|
||||||
|
"""
|
||||||
|
Configuration for LLM clients, defaulting to OpenRouter.
|
||||||
|
"""
|
||||||
|
|
||||||
|
api_key: str | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="API key for the LLM provider. Checks HELIA_LLM_API_KEY, OPENROUTER_API_KEY, then OPENAI_API_KEY.",
|
||||||
|
)
|
||||||
|
base_url: str = Field(
|
||||||
|
default="https://openrouter.ai/api/v1",
|
||||||
|
description="Base URL for the LLM provider. Defaults to OpenRouter.",
|
||||||
|
)
|
||||||
|
model: str = Field(
|
||||||
|
default="google/gemini-3.0-pro-preview",
|
||||||
|
description="Model identifier to use.",
|
||||||
|
)
|
||||||
|
timeout: float = Field(
|
||||||
|
default=30.0,
|
||||||
|
description="Request timeout in seconds.",
|
||||||
|
)
|
||||||
|
max_retries: int = Field(
|
||||||
|
default=2,
|
||||||
|
description="Maximum number of retries for failed requests.",
|
||||||
|
)
|
||||||
|
|
||||||
|
model_config = SettingsConfigDict(
|
||||||
|
env_prefix="HELIA_LLM_",
|
||||||
|
case_sensitive=False,
|
||||||
|
extra="ignore",
|
||||||
|
)
|
||||||
|
|
||||||
|
def resolve_api_key(self) -> str:
|
||||||
|
"""
|
||||||
|
Resolves the API key with a fallback strategy:
|
||||||
|
1. configured api_key (from HELIA_LLM_API_KEY)
|
||||||
|
2. OPENROUTER_API_KEY env var
|
||||||
|
3. OPENAI_API_KEY env var
|
||||||
|
4. Raise ValueError if none found
|
||||||
|
"""
|
||||||
|
if self.api_key:
|
||||||
|
return self.api_key
|
||||||
|
|
||||||
|
# Fallback 1: OpenRouter
|
||||||
|
if key := os.environ.get("OPENROUTER_API_KEY"):
|
||||||
|
return key
|
||||||
|
|
||||||
|
# Fallback 2: OpenAI
|
||||||
|
if key := os.environ.get("OPENAI_API_KEY"):
|
||||||
|
return key
|
||||||
|
|
||||||
|
raise ValueError(
|
||||||
|
"No API key found. Please set HELIA_LLM_API_KEY, OPENROUTER_API_KEY, or OPENAI_API_KEY."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton instance for easy import
|
||||||
|
settings: Final[LLMSettings] = LLMSettings()
|
||||||
21
src/helia/main.py
Normal file
21
src/helia/main.py
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
from helia.agent.workflow import run_agent
|
||||||
|
|
||||||
|
print("Initializing Agentic Interview Framework...")
|
||||||
|
|
||||||
|
if len(sys.argv) > 1:
|
||||||
|
question = " ".join(sys.argv[1:])
|
||||||
|
else:
|
||||||
|
question = "How many times did the interviewer interrupt?"
|
||||||
|
|
||||||
|
print(f"\nRunning Re-Agent with question: '{question}'\n")
|
||||||
|
|
||||||
|
result = run_agent(question)
|
||||||
|
print(result["answer"])
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user