feat: implement PHQ-8 assessment prompt and refactor related components for improved functionality

2025-12-23 01:20:20 +01:00
parent 69fc70ea65
commit a9346ccb34
11 changed files with 166 additions and 92 deletions
--- a/migrations/init.py
+++ b/migrations/init.py
@@ -0,0 +1,3 @@
+from . import init_db
+
+__all__ = ["init_db"]
--- a/migrations/init_db.py
+++ b/migrations/init_db.py
@@ -0,0 +1,12 @@
+import asyncio
+
+from . import init_prompts, init_transcripts
+
+
+async def migrate() -> None:
+    await init_prompts.migrate()
+    await init_transcripts.migrate()
+
+
+if __name__ == "__main__":
+    asyncio.run(migrate())
--- a/migrations/init_prompts.py
+++ b/migrations/init_prompts.py
@@ -0,0 +1,69 @@
+import asyncio
+import logging
+
+from helia.configuration import HeliaConfig
+from helia.db import init_db
+from helia.models.prompt import Prompt
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+DEFAULT_PROMPT = """You are an expert clinical psychologist analyzing a patient interview transcript.
+Your task is to assess the patient according to the PHQ-8 (Patient Health Questionnaire-8) criteria.
+
+The transcript is provided below. You must analyze the ENTIRE transcript to find evidence for each of the 8 criteria.
+
+CRITERIA TO ASSESS:
+1. Little interest or pleasure in doing things (Anhedonia)
+2. Feeling down, depressed, or hopeless
+3. Trouble falling or staying asleep, or sleeping too much
+4. Feeling tired or having little energy
+5. Poor appetite or overeating
+6. Feeling bad about yourself - or that you are a failure or have let yourself or your family down
+7. Trouble concentrating on things, such as reading the newspaper or watching television
+8. Moving or speaking so slowly that other people could have noticed? Or the opposite - being so fidgety or restless that you have been moving around a lot more than usual (Psychomotor agitation/retardation)
+
+SCORING SCALE:
+0 = Not at all
+1 = Several days
+2 = More than half the days
+3 = Nearly every day
+
+INSTRUCTIONS:
+- For EACH of the 8 items, you must provide:
+  - A score (0-3).
+  - A list of DIRECT QUOTES from the transcript that support this score.
+  - A reasoning explanation.
+- If there is NO evidence for a symptom, score it as 0.
+- Be conservative: do not hallucinate symptoms. Only score if the patient explicitly mentions it or strong context implies it.
+
+TRANSCRIPT:
+{transcript_text}
+"""
+
+
+async def migrate() -> None:
+    try:
+        config = HeliaConfig()  # ty:ignore[missing-argument]
+    except Exception:
+        logger.exception("Failed to load configuration: %s.")
+        return
+
+    logger.info("Connecting to database...")
+    await init_db(config)
+
+    prompt_name = "phq8-assessment"
+
+    logger.info("Creating initial prompt '%s'...", prompt_name)
+    new_prompt = Prompt(
+        name=prompt_name,
+        template=DEFAULT_PROMPT,
+        input_variables=["transcript_text"],
+    )
+
+    await new_prompt.insert()
+    logger.info("Prompt created successfully.")
+
+
+if __name__ == "__main__":
+    asyncio.run(migrate())
--- a/migrations/init_transcripts.py
+++ b/migrations/init_transcripts.py
@@ -65,10 +65,7 @@ async def migrate() -> None:

    s3_loader = S3DatasetLoader(config.s3)
    keys = await s3_loader.list_transcripts()
-    logger.info("Found %d transcript files in S3.", len(keys))
-
-    await Transcript.delete_all()
-    logger.info("Cleared existing transcripts.")
+    logger.info("%d transcript files in S3.", len(keys))

    id_pattern = re.compile(r"/(\d+)_TRANSCRIPT\.csv$")