diff --git a/backend/app/workers/congress_poller.py b/backend/app/workers/congress_poller.py index 92f5cf1..08b9f0f 100644 --- a/backend/app/workers/congress_poller.py +++ b/backend/app/workers/congress_poller.py @@ -6,7 +6,7 @@ Uses fromDateTime to fetch only recently updated bills. All operations are idempotent. """ import logging -from datetime import datetime, timezone +from datetime import datetime, timedelta, timezone from app.database import get_sync_db from app.models import Bill, BillAction, Member, AppSetting @@ -30,12 +30,21 @@ def _set_setting(db, key: str, value: str) -> None: db.commit() +# Only track legislation that can become law. Simple/concurrent resolutions +# (hres, sres, hconres, sconres) are procedural and not worth analyzing. +TRACKED_BILL_TYPES = {"hr", "s", "hjres", "sjres"} + + @celery_app.task(bind=True, max_retries=3, name="app.workers.congress_poller.poll_congress_bills") def poll_congress_bills(self): """Fetch recently updated bills from Congress.gov and enqueue document + LLM processing.""" db = get_sync_db() try: last_polled = _get_setting(db, "congress_last_polled_at") + # On first run, seed from 2 months back rather than the full congress history + if not last_polled: + two_months_ago = datetime.now(timezone.utc) - timedelta(days=60) + last_polled = two_months_ago.strftime("%Y-%m-%dT%H:%M:%SZ") current_congress = congress_api.get_current_congress() logger.info(f"Polling Congress {current_congress} (since {last_polled})") @@ -56,6 +65,8 @@ def poll_congress_bills(self): for bill_data in bills_data: parsed = congress_api.parse_bill_from_api(bill_data, current_congress) + if parsed.get("bill_type") not in TRACKED_BILL_TYPES: + continue bill_id = parsed["bill_id"] existing = db.get(Bill, bill_id)