feat: implement PHQ-8 assessment prompt and refactor related components for improved functionality

This commit is contained in:
Santiago Martinez-Avial
2025-12-23 01:20:20 +01:00
parent 69fc70ea65
commit a9346ccb34
11 changed files with 166 additions and 92 deletions

View File

@@ -0,0 +1,3 @@
from . import init_db
__all__ = ["init_db"]

12
migrations/init_db.py Normal file
View File

@@ -0,0 +1,12 @@
import asyncio
from . import init_prompts, init_transcripts
async def migrate() -> None:
await init_prompts.migrate()
await init_transcripts.migrate()
if __name__ == "__main__":
asyncio.run(migrate())

View File

@@ -0,0 +1,69 @@
import asyncio
import logging
from helia.configuration import HeliaConfig
from helia.db import init_db
from helia.models.prompt import Prompt
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
DEFAULT_PROMPT = """You are an expert clinical psychologist analyzing a patient interview transcript.
Your task is to assess the patient according to the PHQ-8 (Patient Health Questionnaire-8) criteria.
The transcript is provided below. You must analyze the ENTIRE transcript to find evidence for each of the 8 criteria.
CRITERIA TO ASSESS:
1. Little interest or pleasure in doing things (Anhedonia)
2. Feeling down, depressed, or hopeless
3. Trouble falling or staying asleep, or sleeping too much
4. Feeling tired or having little energy
5. Poor appetite or overeating
6. Feeling bad about yourself - or that you are a failure or have let yourself or your family down
7. Trouble concentrating on things, such as reading the newspaper or watching television
8. Moving or speaking so slowly that other people could have noticed? Or the opposite - being so fidgety or restless that you have been moving around a lot more than usual (Psychomotor agitation/retardation)
SCORING SCALE:
0 = Not at all
1 = Several days
2 = More than half the days
3 = Nearly every day
INSTRUCTIONS:
- For EACH of the 8 items, you must provide:
- A score (0-3).
- A list of DIRECT QUOTES from the transcript that support this score.
- A reasoning explanation.
- If there is NO evidence for a symptom, score it as 0.
- Be conservative: do not hallucinate symptoms. Only score if the patient explicitly mentions it or strong context implies it.
TRANSCRIPT:
{transcript_text}
"""
async def migrate() -> None:
try:
config = HeliaConfig() # ty:ignore[missing-argument]
except Exception:
logger.exception("Failed to load configuration: %s.")
return
logger.info("Connecting to database...")
await init_db(config)
prompt_name = "phq8-assessment"
logger.info("Creating initial prompt '%s'...", prompt_name)
new_prompt = Prompt(
name=prompt_name,
template=DEFAULT_PROMPT,
input_variables=["transcript_text"],
)
await new_prompt.insert()
logger.info("Prompt created successfully.")
if __name__ == "__main__":
asyncio.run(migrate())

View File

@@ -65,10 +65,7 @@ async def migrate() -> None:
s3_loader = S3DatasetLoader(config.s3)
keys = await s3_loader.list_transcripts()
logger.info("Found %d transcript files in S3.", len(keys))
await Transcript.delete_all()
logger.info("Cleared existing transcripts.")
logger.info("%d transcript files in S3.", len(keys))
id_pattern = re.compile(r"/(\d+)_TRANSCRIPT\.csv$")