Add bill action pipeline, admin health panel, and LLM provider fixes
- Fetch bill actions from Congress.gov and populate the action timeline - Add nightly batch task and beat schedule for active bill actions - Add admin reprocess endpoint for per-bill debugging - Add BriefPanel with "What Changed" view and version history - Add External API Health section with per-source latency testing - Redesign Manual Controls as health panel with status dots and descriptions - Add Resume Analysis task for stalled LLM jobs - Add Backfill Dates & Links task for bills with null metadata - Fix LLM provider/model DB overrides being ignored (env vars used instead) - Fix Gemini 404: gemini-1.5-pro deprecated → gemini-2.0-flash - Fix Anthropic models list: use REST API directly (SDK too old for .models) - Replace test-LLM full analysis with lightweight ping (max_tokens=20) - Add has_document field to BillDetail; show "No bill text published" state - Fix "Introduced: —" showing for bills with null introduced_date - Add bills_missing_sponsor and bills_missing_metadata to admin stats - Add GovInfo health check using /collections endpoint (fixes 500 from /packages) Authored-By: Jack Levy
This commit is contained in:
@@ -300,17 +300,95 @@ def fetch_actions_for_active_bills(self):
|
||||
def _update_bill_if_changed(db, existing: Bill, parsed: dict) -> bool:
|
||||
"""Update bill fields if anything has changed. Returns True if updated."""
|
||||
changed = False
|
||||
dirty = False
|
||||
|
||||
# Meaningful change fields — trigger document + action fetch when updated
|
||||
track_fields = ["title", "short_title", "latest_action_date", "latest_action_text", "status"]
|
||||
for field in track_fields:
|
||||
new_val = parsed.get(field)
|
||||
if new_val and getattr(existing, field) != new_val:
|
||||
setattr(existing, field, new_val)
|
||||
changed = True
|
||||
dirty = True
|
||||
|
||||
# Static fields — only fill in if currently null; no change trigger needed
|
||||
fill_null_fields = ["introduced_date", "congress_url", "chamber"]
|
||||
for field in fill_null_fields:
|
||||
new_val = parsed.get(field)
|
||||
if new_val and getattr(existing, field) is None:
|
||||
setattr(existing, field, new_val)
|
||||
dirty = True
|
||||
|
||||
if changed:
|
||||
existing.last_checked_at = datetime.now(timezone.utc)
|
||||
if dirty:
|
||||
db.commit()
|
||||
# Check for new text versions and sync actions now that the bill has changed
|
||||
if changed:
|
||||
from app.workers.document_fetcher import fetch_bill_documents
|
||||
fetch_bill_documents.delay(existing.bill_id)
|
||||
fetch_bill_actions.delay(existing.bill_id)
|
||||
return changed
|
||||
|
||||
|
||||
@celery_app.task(bind=True, name="app.workers.congress_poller.backfill_bill_metadata")
|
||||
def backfill_bill_metadata(self):
|
||||
"""
|
||||
Find bills with null introduced_date (or other static fields) and
|
||||
re-fetch their detail from Congress.gov to fill in the missing values.
|
||||
No document or LLM calls — metadata only.
|
||||
"""
|
||||
db = get_sync_db()
|
||||
try:
|
||||
from sqlalchemy import text as sa_text
|
||||
rows = db.execute(sa_text("""
|
||||
SELECT bill_id, congress_number, bill_type, bill_number
|
||||
FROM bills
|
||||
WHERE introduced_date IS NULL
|
||||
OR congress_url IS NULL
|
||||
OR chamber IS NULL
|
||||
""")).fetchall()
|
||||
|
||||
updated = 0
|
||||
skipped = 0
|
||||
for row in rows:
|
||||
try:
|
||||
detail = congress_api.get_bill_detail(
|
||||
row.congress_number, row.bill_type, row.bill_number
|
||||
)
|
||||
bill_data = detail.get("bill", {})
|
||||
parsed = congress_api.parse_bill_from_api(
|
||||
{
|
||||
"type": row.bill_type,
|
||||
"number": row.bill_number,
|
||||
"introducedDate": bill_data.get("introducedDate"),
|
||||
"title": bill_data.get("title"),
|
||||
"shortTitle": bill_data.get("shortTitle"),
|
||||
"latestAction": bill_data.get("latestAction") or {},
|
||||
},
|
||||
row.congress_number,
|
||||
)
|
||||
bill = db.get(Bill, row.bill_id)
|
||||
if not bill:
|
||||
skipped += 1
|
||||
continue
|
||||
fill_null_fields = ["introduced_date", "congress_url", "chamber", "title", "short_title"]
|
||||
dirty = False
|
||||
for field in fill_null_fields:
|
||||
new_val = parsed.get(field)
|
||||
if new_val and getattr(bill, field) is None:
|
||||
setattr(bill, field, new_val)
|
||||
dirty = True
|
||||
if dirty:
|
||||
db.commit()
|
||||
updated += 1
|
||||
else:
|
||||
skipped += 1
|
||||
time.sleep(0.2) # ~300 req/min — well under the 5k/hr limit
|
||||
except Exception as exc:
|
||||
logger.warning(f"backfill_bill_metadata: failed for {row.bill_id}: {exc}")
|
||||
skipped += 1
|
||||
|
||||
logger.info(f"backfill_bill_metadata: {updated} updated, {skipped} skipped")
|
||||
return {"updated": updated, "skipped": skipped}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
Reference in New Issue
Block a user