fix: handle within-page cosponsor duplicates from Congress.gov API
Congress.gov occasionally returns the same member twice on a single page with different sponsorship dates (observed: Sen. Warnock on 119-s-1383). The DB uniqueness check didn't catch this because the first insert hadn't been committed yet when processing the duplicate row, causing a UniqueViolation. Fix adds an `inserted_this_run` set to skip bioguide_ids already added in the current fetch loop. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -140,6 +140,9 @@ def fetch_bill_cosponsors(self, bill_id: str):
|
|||||||
return {"status": "skipped"}
|
return {"status": "skipped"}
|
||||||
|
|
||||||
known_bioguides = {row[0] for row in db.execute(text("SELECT bioguide_id FROM members")).fetchall()}
|
known_bioguides = {row[0] for row in db.execute(text("SELECT bioguide_id FROM members")).fetchall()}
|
||||||
|
# Track bioguide_ids already inserted this run to handle within-page dupes
|
||||||
|
# (Congress.gov sometimes lists the same member twice with different dates)
|
||||||
|
inserted_this_run: set[str] = set()
|
||||||
inserted = 0
|
inserted = 0
|
||||||
offset = 0
|
offset = 0
|
||||||
|
|
||||||
@@ -157,12 +160,15 @@ def fetch_bill_cosponsors(self, bill_id: str):
|
|||||||
if bioguide_id and bioguide_id not in known_bioguides:
|
if bioguide_id and bioguide_id not in known_bioguides:
|
||||||
bioguide_id = None
|
bioguide_id = None
|
||||||
|
|
||||||
# Skip if we already have this (bioguide_id, bill_id) pair
|
# Skip dupes — both across runs (DB check) and within this page
|
||||||
if bioguide_id:
|
if bioguide_id:
|
||||||
|
if bioguide_id in inserted_this_run:
|
||||||
|
continue
|
||||||
exists = db.query(BillCosponsor).filter_by(
|
exists = db.query(BillCosponsor).filter_by(
|
||||||
bill_id=bill_id, bioguide_id=bioguide_id
|
bill_id=bill_id, bioguide_id=bioguide_id
|
||||||
).first()
|
).first()
|
||||||
if exists:
|
if exists:
|
||||||
|
inserted_this_run.add(bioguide_id)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
date_str = cs.get("sponsorshipDate")
|
date_str = cs.get("sponsorshipDate")
|
||||||
@@ -179,6 +185,8 @@ def fetch_bill_cosponsors(self, bill_id: str):
|
|||||||
state=cs.get("state"),
|
state=cs.get("state"),
|
||||||
sponsored_date=sponsored_date,
|
sponsored_date=sponsored_date,
|
||||||
))
|
))
|
||||||
|
if bioguide_id:
|
||||||
|
inserted_this_run.add(bioguide_id)
|
||||||
inserted += 1
|
inserted += 1
|
||||||
|
|
||||||
db.commit()
|
db.commit()
|
||||||
|
|||||||
Reference in New Issue
Block a user