Limit bills to last 2 months + filter out procedural resolutions

- Drop hres/sres/hconres/sconres (simple/concurrent resolutions) from poller;
  only track hr, s, hjres, sjres (legislation that can become law)
- On first run, seed from 60 days back instead of full congress history,
  keeping the bill count to ~1,600 instead of 13,000+

Authored-By: Jack Levy
This commit is contained in:
Jack Levy
2026-02-28 22:21:34 -05:00
parent 5b73b60d9e
commit c48241fe2f

View File

@@ -6,7 +6,7 @@ Uses fromDateTime to fetch only recently updated bills.
All operations are idempotent.
"""
import logging
from datetime import datetime, timezone
from datetime import datetime, timedelta, timezone
from app.database import get_sync_db
from app.models import Bill, BillAction, Member, AppSetting
@@ -30,12 +30,21 @@ def _set_setting(db, key: str, value: str) -> None:
db.commit()
# Only track legislation that can become law. Simple/concurrent resolutions
# (hres, sres, hconres, sconres) are procedural and not worth analyzing.
TRACKED_BILL_TYPES = {"hr", "s", "hjres", "sjres"}
@celery_app.task(bind=True, max_retries=3, name="app.workers.congress_poller.poll_congress_bills")
def poll_congress_bills(self):
"""Fetch recently updated bills from Congress.gov and enqueue document + LLM processing."""
db = get_sync_db()
try:
last_polled = _get_setting(db, "congress_last_polled_at")
# On first run, seed from 2 months back rather than the full congress history
if not last_polled:
two_months_ago = datetime.now(timezone.utc) - timedelta(days=60)
last_polled = two_months_ago.strftime("%Y-%m-%dT%H:%M:%SZ")
current_congress = congress_api.get_current_congress()
logger.info(f"Polling Congress {current_congress} (since {last_polled})")
@@ -56,6 +65,8 @@ def poll_congress_bills(self):
for bill_data in bills_data:
parsed = congress_api.parse_bill_from_api(bill_data, current_congress)
if parsed.get("bill_type") not in TRACKED_BILL_TYPES:
continue
bill_id = parsed["bill_id"]
existing = db.get(Bill, bill_id)