feat(citations): add per-claim citations to AI briefs

LLM prompts updated to output {text, citation, quote} objects for every key_point and risk. govinfo_url stored on BillBrief (migration 0006) so the frontend can link directly to the source document without an extra query. AIBriefCard renders § citation chips that expand inline to show the verbatim quote and a View source → GovInfo link. Old plain-string briefs continue to render unchanged. Authored-By: Jack Levy
2026-02-28 22:48:58 -05:00
parent 6a1b387dd2
commit 8d6a55905c
7 changed files with 144 additions and 28 deletions
--- a/backend/alembic/versions/0006_add_brief_govinfo_url.py
+++ b/backend/alembic/versions/0006_add_brief_govinfo_url.py
@@ -0,0 +1,21 @@
+"""add govinfo_url to bill_briefs
+
+Revision ID: 0006
+Revises: 0005
+Create Date: 2026-02-28
+"""
+import sqlalchemy as sa
+from alembic import op
+
+revision = "0006"
+down_revision = "0005"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    op.add_column("bill_briefs", sa.Column("govinfo_url", sa.String(), nullable=True))
+
+
+def downgrade():
+    op.drop_column("bill_briefs", "govinfo_url")
--- a/backend/app/models/brief.py
+++ b/backend/app/models/brief.py
@@ -14,12 +14,13 @@ class BillBrief(Base):
    document_id = Column(Integer, ForeignKey("bill_documents.id", ondelete="SET NULL"), nullable=True)
    brief_type = Column(String(20), nullable=False, server_default="full")  # full | amendment
    summary = Column(Text)
-    key_points = Column(JSONB)      # list[str]
-    risks = Column(JSONB)           # list[str]
+    key_points = Column(JSONB)      # list[{text, citation, quote}]
+    risks = Column(JSONB)           # list[{text, citation, quote}]
    deadlines = Column(JSONB)       # list[{date: str, description: str}]
    topic_tags = Column(JSONB)      # list[str]
    llm_provider = Column(String(50))
    llm_model = Column(String(100))
+    govinfo_url = Column(String, nullable=True)
    created_at = Column(DateTime(timezone=True), server_default=func.now())

    bill = relationship("Bill", back_populates="briefs")
--- a/backend/app/schemas/schemas.py
+++ b/backend/app/schemas/schemas.py
@@ -36,12 +36,13 @@ class BriefSchema(BaseModel):
    id: int
    brief_type: str = "full"
    summary: Optional[str] = None
-    key_points: Optional[list[str]] = None
-    risks: Optional[list[str]] = None
+    key_points: Optional[list[Any]] = None
+    risks: Optional[list[Any]] = None
    deadlines: Optional[list[dict[str, Any]]] = None
    topic_tags: Optional[list[str]] = None
    llm_provider: Optional[str] = None
    llm_model: Optional[str] = None
+    govinfo_url: Optional[str] = None
    created_at: Optional[datetime] = None

    model_config = {"from_attributes": True}
--- a/backend/app/services/llm_service.py
+++ b/backend/app/services/llm_service.py
@@ -21,17 +21,24 @@ without political bias.
 Always respond with valid JSON matching exactly this schema:
 {
  "summary": "2-4 paragraph plain-language summary of what this bill does",
-  "key_points": ["specific concrete fact 1", "specific concrete fact 2"],
-  "risks": ["legitimate concern or challenge 1", "legitimate concern 2"],
+  "key_points": [
+    {"text": "specific concrete fact", "citation": "Section X(y)", "quote": "verbatim excerpt from bill ≤80 words"}
+  ],
+  "risks": [
+    {"text": "legitimate concern or challenge", "citation": "Section X(y)", "quote": "verbatim excerpt from bill ≤80 words"}
+  ],
  "deadlines": [{"date": "YYYY-MM-DD or null", "description": "what happens on this date"}],
  "topic_tags": ["healthcare", "taxation"]
 }

 Rules:
 - summary: Explain WHAT the bill does, not whether it is good or bad. Be factual and complete.
- key_points: 5-10 specific, concrete things the bill changes, authorizes, or appropriates.
+- key_points: 5-10 specific, concrete things the bill changes, authorizes, or appropriates. \
+Each item MUST include "text" (your claim), "citation" (the section number, e.g. "Section 301(a)(2)"), \
+and "quote" (a verbatim excerpt of ≤80 words from that section that supports your claim).
 - risks: Legitimate concerns from any perspective — costs, implementation challenges, \
-constitutional questions, unintended consequences. Include at least 2 even for benign bills.
+constitutional questions, unintended consequences. Include at least 2 even for benign bills. \
+Each item MUST include "text", "citation", and "quote" just like key_points.
 - deadlines: Only include if explicitly stated in the text. Use null for date if a deadline \
 is mentioned without a specific date. Empty list if none.
 - topic_tags: 3-8 lowercase tags. Prefer these standard tags: healthcare, taxation, defense, \
@@ -49,8 +56,8 @@ TOKENS_PER_CHAR = 0.25  # rough approximation: 4 chars ≈ 1 token
@dataclass
 class ReverseBrief:
    summary: str
-    key_points: list[str]
-    risks: list[str]
+    key_points: list[dict]
+    risks: list[dict]
    deadlines: list[dict]
    topic_tags: list[str]
    llm_provider: str
@@ -82,16 +89,23 @@ and you must summarize what changed between the previous and new version.
 Always respond with valid JSON matching exactly this schema:
 {
  "summary": "2-3 paragraph plain-language description of what changed in this version",
-  "key_points": ["specific change 1", "specific change 2"],
-  "risks": ["new concern introduced by this change 1", "concern 2"],
+  "key_points": [
+    {"text": "specific change", "citation": "Section X(y)", "quote": "verbatim excerpt from new version ≤80 words"}
+  ],
+  "risks": [
+    {"text": "new concern introduced by this change", "citation": "Section X(y)", "quote": "verbatim excerpt from new version ≤80 words"}
+  ],
  "deadlines": [{"date": "YYYY-MM-DD or null", "description": "new deadline added"}],
  "topic_tags": ["healthcare", "taxation"]
 }

 Rules:
 - summary: Focus ONLY on what is different from the previous version. Be specific.
- key_points: List concrete additions, removals, or modifications in this version.
- risks: Only include risks that are new or changed relative to the previous version.
+- key_points: List concrete additions, removals, or modifications in this version. \
+Each item MUST include "text" (your claim), "citation" (the section number, e.g. "Section 301(a)(2)"), \
+and "quote" (a verbatim excerpt of ≤80 words from the NEW version that supports your claim).
+- risks: Only include risks that are new or changed relative to the previous version. \
+Each item MUST include "text", "citation", and "quote" just like key_points.
 - deadlines: Only new or changed deadlines. Empty list if none.
 - topic_tags: Same standard tags as before — include any new topics this version adds.

--- a/backend/app/workers/llm_processor.py
+++ b/backend/app/workers/llm_processor.py
@@ -86,6 +86,7 @@ def process_document_with_llm(self, document_id: int):
            topic_tags=brief.topic_tags,
            llm_provider=brief.llm_provider,
            llm_model=brief.llm_model,
+            govinfo_url=doc.govinfo_url,
        )
        db.add(db_brief)
        db.commit()