feat: Member Effectiveness Score + Representation Alignment View (v0.9.9)
Member Effectiveness Score
- New BillCosponsor table (migration 0018) with per-bill co-sponsor
party data required for the bipartisan multiplier
- bill_category column on Bill (substantive | commemorative | administrative)
set by a cheap one-shot LLM call after each brief is generated
- effectiveness_score / percentile / tier columns on Member
- New bill_classifier.py worker with 5 tasks:
classify_bill_category — triggered from llm_processor after brief
fetch_bill_cosponsors — triggered from congress_poller on new bill
calculate_effectiveness_scores — nightly at 5 AM UTC
backfill_bill_categories / backfill_all_bill_cosponsors — one-time
- Scoring: distance-traveled pts × bipartisan (1.5×) × substance (0.1×
for commemorative) × leadership (1.2× for committee chairs)
- Percentile normalised within (seniority tier × party) buckets
- Effectiveness card on member detail page with colour-coded bar
- Admin panel: 3 new backfill/calculate controls in Maintenance section
Representation Alignment View
- New GET /api/alignment endpoint: cross-references user's stanced bill
follows (pocket_veto/pocket_boost) with followed members' vote positions
- Efficient bulk queries — no N+1 loops
- New /alignment page with ranked member list and alignment bars
- Alignment added to sidebar nav (auth-required)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,58 @@
|
||||
"""Add bill_category, cosponsors, and member effectiveness score columns
|
||||
|
||||
Revision ID: 0018
|
||||
Revises: 0017
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
revision = "0018"
|
||||
down_revision = "0017"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
# Bill additions
|
||||
op.add_column("bills", sa.Column("bill_category", sa.String(20), nullable=True))
|
||||
op.add_column("bills", sa.Column("cosponsors_fetched_at", sa.DateTime(timezone=True), nullable=True))
|
||||
|
||||
# Co-sponsors table
|
||||
op.create_table(
|
||||
"bill_cosponsors",
|
||||
sa.Column("id", sa.Integer, primary_key=True, autoincrement=True),
|
||||
sa.Column("bill_id", sa.String, sa.ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False),
|
||||
sa.Column("bioguide_id", sa.String, sa.ForeignKey("members.bioguide_id", ondelete="SET NULL"), nullable=True),
|
||||
sa.Column("name", sa.String(200)),
|
||||
sa.Column("party", sa.String(50)),
|
||||
sa.Column("state", sa.String(10)),
|
||||
sa.Column("sponsored_date", sa.Date, nullable=True),
|
||||
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()")),
|
||||
)
|
||||
op.create_index("ix_bill_cosponsors_bill_id", "bill_cosponsors", ["bill_id"])
|
||||
op.create_index("ix_bill_cosponsors_bioguide_id", "bill_cosponsors", ["bioguide_id"])
|
||||
# Partial unique index — prevents duplicates for known members, allows multiple nulls
|
||||
op.create_index(
|
||||
"uq_bill_cosponsors_bill_member",
|
||||
"bill_cosponsors",
|
||||
["bill_id", "bioguide_id"],
|
||||
unique=True,
|
||||
postgresql_where=sa.text("bioguide_id IS NOT NULL"),
|
||||
)
|
||||
|
||||
# Member effectiveness columns
|
||||
op.add_column("members", sa.Column("effectiveness_score", sa.Float, nullable=True))
|
||||
op.add_column("members", sa.Column("effectiveness_percentile", sa.Float, nullable=True))
|
||||
op.add_column("members", sa.Column("effectiveness_tier", sa.String(20), nullable=True))
|
||||
|
||||
|
||||
def downgrade():
|
||||
op.drop_column("members", "effectiveness_tier")
|
||||
op.drop_column("members", "effectiveness_percentile")
|
||||
op.drop_column("members", "effectiveness_score")
|
||||
op.drop_index("uq_bill_cosponsors_bill_member", "bill_cosponsors")
|
||||
op.drop_index("ix_bill_cosponsors_bioguide_id", "bill_cosponsors")
|
||||
op.drop_index("ix_bill_cosponsors_bill_id", "bill_cosponsors")
|
||||
op.drop_table("bill_cosponsors")
|
||||
op.drop_column("bills", "cosponsors_fetched_at")
|
||||
op.drop_column("bills", "bill_category")
|
||||
@@ -230,6 +230,30 @@ async def backfill_labels(current_user: User = Depends(get_current_admin)):
|
||||
return {"task_id": task.id, "status": "queued"}
|
||||
|
||||
|
||||
@router.post("/backfill-cosponsors")
|
||||
async def backfill_cosponsors(current_user: User = Depends(get_current_admin)):
|
||||
"""Fetch co-sponsor data from Congress.gov for all bills that haven't been fetched yet."""
|
||||
from app.workers.bill_classifier import backfill_all_bill_cosponsors
|
||||
task = backfill_all_bill_cosponsors.delay()
|
||||
return {"task_id": task.id, "status": "queued"}
|
||||
|
||||
|
||||
@router.post("/backfill-categories")
|
||||
async def backfill_categories(current_user: User = Depends(get_current_admin)):
|
||||
"""Classify all bills with text but no category as substantive/commemorative/administrative."""
|
||||
from app.workers.bill_classifier import backfill_bill_categories
|
||||
task = backfill_bill_categories.delay()
|
||||
return {"task_id": task.id, "status": "queued"}
|
||||
|
||||
|
||||
@router.post("/calculate-effectiveness")
|
||||
async def calculate_effectiveness(current_user: User = Depends(get_current_admin)):
|
||||
"""Recalculate member effectiveness scores and percentiles now."""
|
||||
from app.workers.bill_classifier import calculate_effectiveness_scores
|
||||
task = calculate_effectiveness_scores.delay()
|
||||
return {"task_id": task.id, "status": "queued"}
|
||||
|
||||
|
||||
@router.post("/resume-analysis")
|
||||
async def resume_analysis(current_user: User = Depends(get_current_admin)):
|
||||
"""Re-queue LLM processing for docs with no brief, and document fetching for bills with no doc."""
|
||||
|
||||
161
backend/app/api/alignment.py
Normal file
161
backend/app/api/alignment.py
Normal file
@@ -0,0 +1,161 @@
|
||||
"""
|
||||
Representation Alignment API.
|
||||
|
||||
Returns how well each followed member's voting record aligns with the
|
||||
current user's bill stances (pocket_veto / pocket_boost).
|
||||
"""
|
||||
from collections import defaultdict
|
||||
|
||||
from fastapi import APIRouter, Depends
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.dependencies import get_current_user
|
||||
from app.database import get_db
|
||||
from app.models import Follow, Member
|
||||
from app.models.user import User
|
||||
from app.models.vote import BillVote, MemberVotePosition
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("")
|
||||
async def get_alignment(
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Cross-reference the user's stanced bill follows with how their
|
||||
followed members voted on those same bills.
|
||||
|
||||
pocket_boost + Yea → aligned
|
||||
pocket_veto + Nay → aligned
|
||||
All other combinations with an actual Yea/Nay vote → opposed
|
||||
Not Voting / Present → excluded from tally
|
||||
"""
|
||||
# 1. Bill follows with a stance
|
||||
bill_follows_result = await db.execute(
|
||||
select(Follow).where(
|
||||
Follow.user_id == current_user.id,
|
||||
Follow.follow_type == "bill",
|
||||
Follow.follow_mode.in_(["pocket_veto", "pocket_boost"]),
|
||||
)
|
||||
)
|
||||
bill_follows = bill_follows_result.scalars().all()
|
||||
|
||||
if not bill_follows:
|
||||
return {
|
||||
"members": [],
|
||||
"total_bills_with_stance": 0,
|
||||
"total_bills_with_votes": 0,
|
||||
}
|
||||
|
||||
stance_map = {f.follow_value: f.follow_mode for f in bill_follows}
|
||||
|
||||
# 2. Followed members
|
||||
member_follows_result = await db.execute(
|
||||
select(Follow).where(
|
||||
Follow.user_id == current_user.id,
|
||||
Follow.follow_type == "member",
|
||||
)
|
||||
)
|
||||
member_follows = member_follows_result.scalars().all()
|
||||
followed_member_ids = {f.follow_value for f in member_follows}
|
||||
|
||||
if not followed_member_ids:
|
||||
return {
|
||||
"members": [],
|
||||
"total_bills_with_stance": len(stance_map),
|
||||
"total_bills_with_votes": 0,
|
||||
}
|
||||
|
||||
# 3. Bulk fetch votes for all stanced bills
|
||||
bill_ids = list(stance_map.keys())
|
||||
votes_result = await db.execute(
|
||||
select(BillVote).where(BillVote.bill_id.in_(bill_ids))
|
||||
)
|
||||
votes = votes_result.scalars().all()
|
||||
|
||||
if not votes:
|
||||
return {
|
||||
"members": [],
|
||||
"total_bills_with_stance": len(stance_map),
|
||||
"total_bills_with_votes": 0,
|
||||
}
|
||||
|
||||
vote_ids = [v.id for v in votes]
|
||||
bill_id_by_vote = {v.id: v.bill_id for v in votes}
|
||||
bills_with_votes = len({v.bill_id for v in votes})
|
||||
|
||||
# 4. Bulk fetch positions for followed members on those votes
|
||||
positions_result = await db.execute(
|
||||
select(MemberVotePosition).where(
|
||||
MemberVotePosition.vote_id.in_(vote_ids),
|
||||
MemberVotePosition.bioguide_id.in_(followed_member_ids),
|
||||
)
|
||||
)
|
||||
positions = positions_result.scalars().all()
|
||||
|
||||
# 5. Aggregate per member
|
||||
tally: dict[str, dict] = defaultdict(lambda: {"aligned": 0, "opposed": 0})
|
||||
|
||||
for pos in positions:
|
||||
if pos.position not in ("Yea", "Nay"):
|
||||
# Skip Not Voting / Present — not a real position signal
|
||||
continue
|
||||
bill_id = bill_id_by_vote.get(pos.vote_id)
|
||||
if not bill_id:
|
||||
continue
|
||||
stance = stance_map.get(bill_id)
|
||||
is_aligned = (
|
||||
(stance == "pocket_boost" and pos.position == "Yea") or
|
||||
(stance == "pocket_veto" and pos.position == "Nay")
|
||||
)
|
||||
if is_aligned:
|
||||
tally[pos.bioguide_id]["aligned"] += 1
|
||||
else:
|
||||
tally[pos.bioguide_id]["opposed"] += 1
|
||||
|
||||
if not tally:
|
||||
return {
|
||||
"members": [],
|
||||
"total_bills_with_stance": len(stance_map),
|
||||
"total_bills_with_votes": bills_with_votes,
|
||||
}
|
||||
|
||||
# 6. Load member details
|
||||
member_ids = list(tally.keys())
|
||||
members_result = await db.execute(
|
||||
select(Member).where(Member.bioguide_id.in_(member_ids))
|
||||
)
|
||||
members = members_result.scalars().all()
|
||||
member_map = {m.bioguide_id: m for m in members}
|
||||
|
||||
# 7. Build response
|
||||
result = []
|
||||
for bioguide_id, counts in tally.items():
|
||||
m = member_map.get(bioguide_id)
|
||||
aligned = counts["aligned"]
|
||||
opposed = counts["opposed"]
|
||||
total = aligned + opposed
|
||||
result.append({
|
||||
"bioguide_id": bioguide_id,
|
||||
"name": m.name if m else bioguide_id,
|
||||
"party": m.party if m else None,
|
||||
"state": m.state if m else None,
|
||||
"chamber": m.chamber if m else None,
|
||||
"photo_url": m.photo_url if m else None,
|
||||
"effectiveness_percentile": m.effectiveness_percentile if m else None,
|
||||
"aligned": aligned,
|
||||
"opposed": opposed,
|
||||
"total": total,
|
||||
"alignment_pct": round(aligned / total * 100, 1) if total > 0 else None,
|
||||
})
|
||||
|
||||
result.sort(key=lambda x: (x["alignment_pct"] is None, -(x["alignment_pct"] or 0)))
|
||||
|
||||
return {
|
||||
"members": result,
|
||||
"total_bills_with_stance": len(stance_map),
|
||||
"total_bills_with_votes": bills_with_votes,
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from app.api import bills, members, follows, dashboard, search, settings, admin, health, auth, notifications, notes, collections, share
|
||||
from app.api import bills, members, follows, dashboard, search, settings, admin, health, auth, notifications, notes, collections, share, alignment
|
||||
from app.config import settings as config
|
||||
|
||||
app = FastAPI(
|
||||
@@ -31,3 +31,4 @@ app.include_router(notifications.router, prefix="/api/notifications", tags=["not
|
||||
app.include_router(notes.router, prefix="/api/notes", tags=["notes"])
|
||||
app.include_router(collections.router, prefix="/api/collections", tags=["collections"])
|
||||
app.include_router(share.router, prefix="/api/share", tags=["share"])
|
||||
app.include_router(alignment.router, prefix="/api/alignment", tags=["alignment"])
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from app.models.bill import Bill, BillAction, BillDocument
|
||||
from app.models.bill import Bill, BillAction, BillDocument, BillCosponsor
|
||||
from app.models.brief import BillBrief
|
||||
from app.models.collection import Collection, CollectionBill
|
||||
from app.models.follow import Follow
|
||||
@@ -16,6 +16,7 @@ from app.models.vote import BillVote, MemberVotePosition
|
||||
__all__ = [
|
||||
"Bill",
|
||||
"BillAction",
|
||||
"BillCosponsor",
|
||||
"BillDocument",
|
||||
"BillBrief",
|
||||
"BillNote",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from sqlalchemy import (
|
||||
Column, String, Integer, Date, DateTime, Text, ForeignKey, Index
|
||||
Column, String, Integer, Date, DateTime, Text, ForeignKey, Index, UniqueConstraint
|
||||
)
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy.sql import func
|
||||
@@ -26,6 +26,9 @@ class Bill(Base):
|
||||
congress_url = Column(String)
|
||||
govtrack_url = Column(String)
|
||||
|
||||
bill_category = Column(String(20), nullable=True) # substantive | commemorative | administrative
|
||||
cosponsors_fetched_at = Column(DateTime(timezone=True))
|
||||
|
||||
# Ingestion tracking
|
||||
last_checked_at = Column(DateTime(timezone=True))
|
||||
actions_fetched_at = Column(DateTime(timezone=True))
|
||||
@@ -40,6 +43,7 @@ class Bill(Base):
|
||||
trend_scores = relationship("TrendScore", back_populates="bill", order_by="desc(TrendScore.score_date)")
|
||||
committee_bills = relationship("CommitteeBill", back_populates="bill")
|
||||
notes = relationship("BillNote", back_populates="bill", cascade="all, delete-orphan")
|
||||
cosponsors = relationship("BillCosponsor", back_populates="bill", cascade="all, delete-orphan")
|
||||
|
||||
__table_args__ = (
|
||||
Index("ix_bills_congress_number", "congress_number"),
|
||||
@@ -87,3 +91,23 @@ class BillDocument(Base):
|
||||
__table_args__ = (
|
||||
Index("ix_bill_documents_bill_id", "bill_id"),
|
||||
)
|
||||
|
||||
|
||||
class BillCosponsor(Base):
|
||||
__tablename__ = "bill_cosponsors"
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
bill_id = Column(String, ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False)
|
||||
bioguide_id = Column(String, ForeignKey("members.bioguide_id", ondelete="SET NULL"), nullable=True)
|
||||
name = Column(String(200))
|
||||
party = Column(String(50))
|
||||
state = Column(String(10))
|
||||
sponsored_date = Column(Date, nullable=True)
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||
|
||||
bill = relationship("Bill", back_populates="cosponsors")
|
||||
|
||||
__table_args__ = (
|
||||
Index("ix_bill_cosponsors_bill_id", "bill_id"),
|
||||
Index("ix_bill_cosponsors_bioguide_id", "bioguide_id"),
|
||||
)
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy import Column, Integer, JSON, String, DateTime
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy.sql import func
|
||||
@@ -26,6 +27,9 @@ class Member(Base):
|
||||
leadership_json = Column(JSON)
|
||||
sponsored_count = Column(Integer)
|
||||
cosponsored_count = Column(Integer)
|
||||
effectiveness_score = Column(sa.Float, nullable=True)
|
||||
effectiveness_percentile = Column(sa.Float, nullable=True)
|
||||
effectiveness_tier = Column(String(20), nullable=True) # junior | mid | senior
|
||||
detail_fetched = Column(DateTime(timezone=True))
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||
updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now())
|
||||
|
||||
@@ -124,6 +124,9 @@ class MemberSchema(BaseModel):
|
||||
leadership_json: Optional[list[Any]] = None
|
||||
sponsored_count: Optional[int] = None
|
||||
cosponsored_count: Optional[int] = None
|
||||
effectiveness_score: Optional[float] = None
|
||||
effectiveness_percentile: Optional[float] = None
|
||||
effectiveness_tier: Optional[str] = None
|
||||
latest_trend: Optional["MemberTrendScoreSchema"] = None
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
@@ -225,6 +228,7 @@ class BillSchema(BaseModel):
|
||||
latest_brief: Optional[BriefSchema] = None
|
||||
latest_trend: Optional[TrendScoreSchema] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
bill_category: Optional[str] = None
|
||||
has_document: bool = False
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
@@ -86,6 +86,10 @@ def get_bill_actions(congress: int, bill_type: str, bill_number: int, offset: in
|
||||
return _get(f"/bill/{congress}/{bill_type.lower()}/{bill_number}/actions", {"offset": offset, "limit": 250})
|
||||
|
||||
|
||||
def get_bill_cosponsors(congress: int, bill_type: str, bill_number: int, offset: int = 0) -> dict:
|
||||
return _get(f"/bill/{congress}/{bill_type.lower()}/{bill_number}/cosponsors", {"offset": offset, "limit": 250})
|
||||
|
||||
|
||||
def get_bill_text_versions(congress: int, bill_type: str, bill_number: int) -> dict:
|
||||
return _get(f"/bill/{congress}/{bill_type.lower()}/{bill_number}/text", {})
|
||||
|
||||
|
||||
353
backend/app/workers/bill_classifier.py
Normal file
353
backend/app/workers/bill_classifier.py
Normal file
@@ -0,0 +1,353 @@
|
||||
"""
|
||||
Bill classifier and Member Effectiveness Score workers.
|
||||
|
||||
Tasks:
|
||||
classify_bill_category — lightweight LLM call; triggered after brief generation
|
||||
fetch_bill_cosponsors — Congress.gov cosponsor fetch; triggered on new bill
|
||||
calculate_effectiveness_scores — nightly beat task
|
||||
backfill_bill_categories — one-time backfill for existing bills
|
||||
backfill_all_bill_cosponsors — one-time backfill for existing bills
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from sqlalchemy import text
|
||||
|
||||
from app.config import settings
|
||||
from app.database import get_sync_db
|
||||
from app.models import Bill, BillCosponsor, BillDocument, Member
|
||||
from app.models.setting import AppSetting
|
||||
from app.services import congress_api
|
||||
from app.services.llm_service import RateLimitError, get_llm_provider
|
||||
from app.workers.celery_app import celery_app
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ── Classification ─────────────────────────────────────────────────────────────
|
||||
|
||||
_CLASSIFICATION_PROMPT = """\
|
||||
Classify this bill into exactly one category.
|
||||
|
||||
Categories:
|
||||
- substantive: Creates, modifies, or repeals policy, programs, regulations, funding, or rights. Real legislative work.
|
||||
- commemorative: Names buildings/post offices, recognizes awareness days/weeks, honors individuals or events with no policy effect.
|
||||
- administrative: Technical corrections, routine reauthorizations, housekeeping changes with no new policy substance.
|
||||
|
||||
Respond with ONLY valid JSON: {{"category": "substantive" | "commemorative" | "administrative"}}
|
||||
|
||||
BILL TITLE: {title}
|
||||
|
||||
BILL TEXT (excerpt):
|
||||
{excerpt}
|
||||
|
||||
Classify now:"""
|
||||
|
||||
_VALID_CATEGORIES = {"substantive", "commemorative", "administrative"}
|
||||
|
||||
|
||||
@celery_app.task(
|
||||
bind=True,
|
||||
max_retries=3,
|
||||
rate_limit=f"{settings.LLM_RATE_LIMIT_RPM}/m",
|
||||
name="app.workers.bill_classifier.classify_bill_category",
|
||||
)
|
||||
def classify_bill_category(self, bill_id: str, document_id: int):
|
||||
"""Set bill_category via a cheap one-shot LLM call. Idempotent."""
|
||||
db = get_sync_db()
|
||||
try:
|
||||
bill = db.get(Bill, bill_id)
|
||||
if not bill or bill.bill_category:
|
||||
return {"status": "skipped"}
|
||||
|
||||
doc = db.get(BillDocument, document_id)
|
||||
excerpt = (doc.raw_text[:1200] if doc and doc.raw_text else "").strip()
|
||||
|
||||
prov_row = db.get(AppSetting, "llm_provider")
|
||||
model_row = db.get(AppSetting, "llm_model")
|
||||
provider = get_llm_provider(
|
||||
prov_row.value if prov_row else None,
|
||||
model_row.value if model_row else None,
|
||||
)
|
||||
|
||||
prompt = _CLASSIFICATION_PROMPT.format(
|
||||
title=bill.title or "Unknown",
|
||||
excerpt=excerpt or "(no text available)",
|
||||
)
|
||||
|
||||
raw = provider.generate_text(prompt).strip()
|
||||
# Strip markdown fences if present
|
||||
if raw.startswith("```"):
|
||||
raw = raw.split("```")[1].lstrip("json").strip()
|
||||
raw = raw.rstrip("```").strip()
|
||||
|
||||
data = json.loads(raw)
|
||||
category = data.get("category", "").lower()
|
||||
if category not in _VALID_CATEGORIES:
|
||||
logger.warning(f"classify_bill_category: invalid category '{category}' for {bill_id}, defaulting to substantive")
|
||||
category = "substantive"
|
||||
|
||||
bill.bill_category = category
|
||||
db.commit()
|
||||
logger.info(f"Bill {bill_id} classified as '{category}'")
|
||||
return {"status": "ok", "bill_id": bill_id, "category": category}
|
||||
|
||||
except RateLimitError as exc:
|
||||
db.rollback()
|
||||
raise self.retry(exc=exc, countdown=exc.retry_after)
|
||||
except Exception as exc:
|
||||
db.rollback()
|
||||
logger.error(f"classify_bill_category failed for {bill_id}: {exc}")
|
||||
raise self.retry(exc=exc, countdown=120)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@celery_app.task(bind=True, max_retries=3, name="app.workers.bill_classifier.backfill_bill_categories")
|
||||
def backfill_bill_categories(self):
|
||||
"""Queue classification for all bills with text but no category."""
|
||||
db = get_sync_db()
|
||||
try:
|
||||
rows = db.execute(text("""
|
||||
SELECT bd.bill_id, bd.id AS document_id
|
||||
FROM bill_documents bd
|
||||
JOIN bills b ON b.bill_id = bd.bill_id
|
||||
WHERE b.bill_category IS NULL AND bd.raw_text IS NOT NULL
|
||||
""")).fetchall()
|
||||
|
||||
queued = 0
|
||||
for row in rows:
|
||||
classify_bill_category.delay(row.bill_id, row.document_id)
|
||||
queued += 1
|
||||
time.sleep(0.05)
|
||||
|
||||
logger.info(f"backfill_bill_categories: queued {queued} classification tasks")
|
||||
return {"queued": queued}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# ── Co-sponsor fetching ────────────────────────────────────────────────────────
|
||||
|
||||
@celery_app.task(bind=True, max_retries=3, name="app.workers.bill_classifier.fetch_bill_cosponsors")
|
||||
def fetch_bill_cosponsors(self, bill_id: str):
|
||||
"""Fetch and store cosponsor list from Congress.gov. Idempotent."""
|
||||
db = get_sync_db()
|
||||
try:
|
||||
bill = db.get(Bill, bill_id)
|
||||
if not bill or bill.cosponsors_fetched_at:
|
||||
return {"status": "skipped"}
|
||||
|
||||
known_bioguides = {row[0] for row in db.execute(text("SELECT bioguide_id FROM members")).fetchall()}
|
||||
inserted = 0
|
||||
offset = 0
|
||||
|
||||
while True:
|
||||
data = congress_api.get_bill_cosponsors(
|
||||
bill.congress_number, bill.bill_type, bill.bill_number, offset=offset
|
||||
)
|
||||
cosponsors = data.get("cosponsors", [])
|
||||
if not cosponsors:
|
||||
break
|
||||
|
||||
for cs in cosponsors:
|
||||
bioguide_id = cs.get("bioguideId")
|
||||
# Only link to members we've already ingested
|
||||
if bioguide_id and bioguide_id not in known_bioguides:
|
||||
bioguide_id = None
|
||||
|
||||
# Skip if we already have this (bioguide_id, bill_id) pair
|
||||
if bioguide_id:
|
||||
exists = db.query(BillCosponsor).filter_by(
|
||||
bill_id=bill_id, bioguide_id=bioguide_id
|
||||
).first()
|
||||
if exists:
|
||||
continue
|
||||
|
||||
date_str = cs.get("sponsorshipDate")
|
||||
try:
|
||||
sponsored_date = datetime.strptime(date_str, "%Y-%m-%d").date() if date_str else None
|
||||
except ValueError:
|
||||
sponsored_date = None
|
||||
|
||||
db.add(BillCosponsor(
|
||||
bill_id=bill_id,
|
||||
bioguide_id=bioguide_id,
|
||||
name=cs.get("fullName") or cs.get("name"),
|
||||
party=cs.get("party"),
|
||||
state=cs.get("state"),
|
||||
sponsored_date=sponsored_date,
|
||||
))
|
||||
inserted += 1
|
||||
|
||||
db.commit()
|
||||
offset += 250
|
||||
if len(cosponsors) < 250:
|
||||
break
|
||||
time.sleep(0.25)
|
||||
|
||||
bill.cosponsors_fetched_at = datetime.now(timezone.utc)
|
||||
db.commit()
|
||||
return {"bill_id": bill_id, "inserted": inserted}
|
||||
|
||||
except Exception as exc:
|
||||
db.rollback()
|
||||
logger.error(f"fetch_bill_cosponsors failed for {bill_id}: {exc}")
|
||||
raise self.retry(exc=exc, countdown=60)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@celery_app.task(bind=True, name="app.workers.bill_classifier.backfill_all_bill_cosponsors")
|
||||
def backfill_all_bill_cosponsors(self):
|
||||
"""Queue cosponsor fetches for all bills that haven't been fetched yet."""
|
||||
db = get_sync_db()
|
||||
try:
|
||||
rows = db.execute(text(
|
||||
"SELECT bill_id FROM bills WHERE cosponsors_fetched_at IS NULL"
|
||||
)).fetchall()
|
||||
|
||||
queued = 0
|
||||
for row in rows:
|
||||
fetch_bill_cosponsors.delay(row.bill_id)
|
||||
queued += 1
|
||||
time.sleep(0.05)
|
||||
|
||||
logger.info(f"backfill_all_bill_cosponsors: queued {queued} tasks")
|
||||
return {"queued": queued}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# ── Effectiveness scoring ──────────────────────────────────────────────────────
|
||||
|
||||
def _distance_points(latest_action_text: str | None) -> int:
|
||||
"""Map latest action text to a distance-traveled score."""
|
||||
text = (latest_action_text or "").lower()
|
||||
if "became public law" in text or "signed by president" in text or "enacted" in text:
|
||||
return 50
|
||||
if "passed house" in text or "passed senate" in text or "agreed to in" in text:
|
||||
return 20
|
||||
if "placed on" in text and "calendar" in text:
|
||||
return 10
|
||||
if "reported by" in text or "ordered to be reported" in text or "discharged" in text:
|
||||
return 5
|
||||
return 1
|
||||
|
||||
|
||||
def _bipartisan_multiplier(db, bill_id: str, sponsor_party: str | None) -> float:
|
||||
"""1.5x if ≥20% of cosponsors are from the opposing party."""
|
||||
if not sponsor_party:
|
||||
return 1.0
|
||||
cosponsors = db.query(BillCosponsor).filter_by(bill_id=bill_id).all()
|
||||
if not cosponsors:
|
||||
return 1.0
|
||||
opposing = [c for c in cosponsors if c.party and c.party != sponsor_party]
|
||||
if len(cosponsors) > 0 and len(opposing) / len(cosponsors) >= 0.20:
|
||||
return 1.5
|
||||
return 1.0
|
||||
|
||||
|
||||
def _substance_multiplier(bill_category: str | None) -> float:
|
||||
return 0.1 if bill_category == "commemorative" else 1.0
|
||||
|
||||
|
||||
def _leadership_multiplier(member: Member, congress_number: int) -> float:
|
||||
"""1.2x if member chaired a committee during this Congress."""
|
||||
if not member.leadership_json:
|
||||
return 1.0
|
||||
for role in member.leadership_json:
|
||||
if (role.get("congress") == congress_number and
|
||||
"chair" in (role.get("type") or "").lower()):
|
||||
return 1.2
|
||||
return 1.0
|
||||
|
||||
|
||||
def _seniority_tier(terms_json: list | None) -> str:
|
||||
"""Return 'junior' | 'mid' | 'senior' based on number of terms served."""
|
||||
if not terms_json:
|
||||
return "junior"
|
||||
count = len(terms_json)
|
||||
if count <= 2:
|
||||
return "junior"
|
||||
if count <= 5:
|
||||
return "mid"
|
||||
return "senior"
|
||||
|
||||
|
||||
@celery_app.task(bind=True, name="app.workers.bill_classifier.calculate_effectiveness_scores")
|
||||
def calculate_effectiveness_scores(self):
|
||||
"""Nightly: compute effectiveness score and within-tier percentile for all members."""
|
||||
db = get_sync_db()
|
||||
try:
|
||||
members = db.query(Member).all()
|
||||
if not members:
|
||||
return {"status": "no_members"}
|
||||
|
||||
# Map bioguide_id → Member for quick lookup
|
||||
member_map = {m.bioguide_id: m for m in members}
|
||||
|
||||
# Load all bills sponsored by current members (current congress only)
|
||||
current_congress = congress_api.get_current_congress()
|
||||
bills = db.query(Bill).filter_by(congress_number=current_congress).all()
|
||||
|
||||
# Compute raw score per member
|
||||
raw_scores: dict[str, float] = {m.bioguide_id: 0.0 for m in members}
|
||||
|
||||
for bill in bills:
|
||||
if not bill.sponsor_id or bill.sponsor_id not in member_map:
|
||||
continue
|
||||
sponsor = member_map[bill.sponsor_id]
|
||||
|
||||
pts = _distance_points(bill.latest_action_text)
|
||||
bipartisan = _bipartisan_multiplier(db, bill.bill_id, sponsor.party)
|
||||
substance = _substance_multiplier(bill.bill_category)
|
||||
leadership = _leadership_multiplier(sponsor, current_congress)
|
||||
|
||||
raw_scores[bill.sponsor_id] = raw_scores.get(bill.sponsor_id, 0.0) + (
|
||||
pts * bipartisan * substance * leadership
|
||||
)
|
||||
|
||||
# Group members by (tier, party) for percentile normalisation
|
||||
# We treat party as a proxy for majority/minority — grouped separately so
|
||||
# a minority-party junior isn't unfairly compared to a majority-party senior.
|
||||
from collections import defaultdict
|
||||
buckets: dict[tuple, list[str]] = defaultdict(list)
|
||||
for m in members:
|
||||
tier = _seniority_tier(m.terms_json)
|
||||
party_bucket = m.party or "Unknown"
|
||||
buckets[(tier, party_bucket)].append(m.bioguide_id)
|
||||
|
||||
# Compute percentile within each bucket
|
||||
percentiles: dict[str, float] = {}
|
||||
tiers: dict[str, str] = {}
|
||||
for (tier, _), ids in buckets.items():
|
||||
scores = [(bid, raw_scores.get(bid, 0.0)) for bid in ids]
|
||||
scores.sort(key=lambda x: x[1])
|
||||
n = len(scores)
|
||||
for rank, (bid, _) in enumerate(scores):
|
||||
percentiles[bid] = round((rank / max(n - 1, 1)) * 100, 1)
|
||||
tiers[bid] = tier
|
||||
|
||||
# Bulk update members
|
||||
updated = 0
|
||||
for m in members:
|
||||
score = raw_scores.get(m.bioguide_id, 0.0)
|
||||
pct = percentiles.get(m.bioguide_id)
|
||||
tier = tiers.get(m.bioguide_id, _seniority_tier(m.terms_json))
|
||||
m.effectiveness_score = round(score, 2)
|
||||
m.effectiveness_percentile = pct
|
||||
m.effectiveness_tier = tier
|
||||
updated += 1
|
||||
|
||||
db.commit()
|
||||
logger.info(f"calculate_effectiveness_scores: updated {updated} members for Congress {current_congress}")
|
||||
return {"status": "ok", "updated": updated, "congress": current_congress}
|
||||
|
||||
except Exception as exc:
|
||||
db.rollback()
|
||||
logger.error(f"calculate_effectiveness_scores failed: {exc}")
|
||||
raise
|
||||
finally:
|
||||
db.close()
|
||||
@@ -17,6 +17,7 @@ celery_app = Celery(
|
||||
"app.workers.member_interest",
|
||||
"app.workers.notification_dispatcher",
|
||||
"app.workers.llm_batch_processor",
|
||||
"app.workers.bill_classifier",
|
||||
],
|
||||
)
|
||||
|
||||
@@ -37,6 +38,7 @@ celery_app.conf.update(
|
||||
"app.workers.document_fetcher.*": {"queue": "documents"},
|
||||
"app.workers.llm_processor.*": {"queue": "llm"},
|
||||
"app.workers.llm_batch_processor.*": {"queue": "llm"},
|
||||
"app.workers.bill_classifier.*": {"queue": "llm"},
|
||||
"app.workers.news_fetcher.*": {"queue": "news"},
|
||||
"app.workers.trend_scorer.*": {"queue": "news"},
|
||||
"app.workers.member_interest.*": {"queue": "news"},
|
||||
@@ -96,5 +98,9 @@ celery_app.conf.update(
|
||||
"task": "app.workers.llm_batch_processor.poll_llm_batch_results",
|
||||
"schedule": crontab(minute="*/30"),
|
||||
},
|
||||
"calculate-effectiveness-scores": {
|
||||
"task": "app.workers.bill_classifier.calculate_effectiveness_scores",
|
||||
"schedule": crontab(hour=5, minute=0), # 5 AM UTC, after all other nightly tasks
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
@@ -112,13 +112,15 @@ def poll_congress_bills(self):
|
||||
db.add(Bill(**parsed))
|
||||
db.commit()
|
||||
new_count += 1
|
||||
# Enqueue document, action, and sponsor fetches
|
||||
# Enqueue document, action, sponsor, and cosponsor fetches
|
||||
from app.workers.document_fetcher import fetch_bill_documents
|
||||
fetch_bill_documents.delay(bill_id)
|
||||
fetch_bill_actions.delay(bill_id)
|
||||
fetch_sponsor_for_bill.delay(
|
||||
bill_id, current_congress, parsed["bill_type"], parsed["bill_number"]
|
||||
)
|
||||
from app.workers.bill_classifier import fetch_bill_cosponsors
|
||||
fetch_bill_cosponsors.delay(bill_id)
|
||||
else:
|
||||
_update_bill_if_changed(db, existing, parsed)
|
||||
updated_count += 1
|
||||
|
||||
@@ -119,6 +119,10 @@ def process_document_with_llm(self, document_id: int):
|
||||
from app.workers.news_fetcher import fetch_news_for_bill
|
||||
fetch_news_for_bill.delay(doc.bill_id)
|
||||
|
||||
# Classify bill as substantive / commemorative / administrative
|
||||
from app.workers.bill_classifier import classify_bill_category
|
||||
classify_bill_category.delay(doc.bill_id, document_id)
|
||||
|
||||
return {"status": "ok", "brief_id": db_brief.id, "brief_type": brief_type}
|
||||
|
||||
except RateLimitError as exc:
|
||||
|
||||
Reference in New Issue
Block a user