Files
PocketVeto/backend/app/api/members.py
Jack Levy 4cada298ab fix: ZIP rep lookup — handle GEOID20 for 119th Congress TIGERweb layers
The 119th Congressional Districts layer uses 2020 Census vintage field
names (GEOID20, STATEFP20, CD119FP) instead of GEOID. The GEOID check
was silently falling through; added GEOID20 fallback, isdigit() guard,
try/except on CD field parsing, and debug logging of unparseable layers.

Authored by: Jack Levy
2026-03-15 10:29:09 -04:00

324 lines
12 KiB
Python

import logging
import re
from datetime import datetime, timezone
from typing import Optional
_FIPS_TO_STATE = {
"01": "AL", "02": "AK", "04": "AZ", "05": "AR", "06": "CA",
"08": "CO", "09": "CT", "10": "DE", "11": "DC", "12": "FL",
"13": "GA", "15": "HI", "16": "ID", "17": "IL", "18": "IN",
"19": "IA", "20": "KS", "21": "KY", "22": "LA", "23": "ME",
"24": "MD", "25": "MA", "26": "MI", "27": "MN", "28": "MS",
"29": "MO", "30": "MT", "31": "NE", "32": "NV", "33": "NH",
"34": "NJ", "35": "NM", "36": "NY", "37": "NC", "38": "ND",
"39": "OH", "40": "OK", "41": "OR", "42": "PA", "44": "RI",
"45": "SC", "46": "SD", "47": "TN", "48": "TX", "49": "UT",
"50": "VT", "51": "VA", "53": "WA", "54": "WV", "55": "WI",
"56": "WY", "60": "AS", "66": "GU", "69": "MP", "72": "PR", "78": "VI",
}
import httpx
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy import desc, func, or_, select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from app.database import get_db
from app.models import Bill, Member, MemberTrendScore, MemberNewsArticle
from app.schemas.schemas import (
BillSchema, MemberSchema, MemberTrendScoreSchema,
MemberNewsArticleSchema, PaginatedResponse,
)
from app.services import congress_api
logger = logging.getLogger(__name__)
router = APIRouter()
@router.get("/by-zip/{zip_code}", response_model=list[MemberSchema])
async def get_members_by_zip(zip_code: str, db: AsyncSession = Depends(get_db)):
"""Return the House rep and senators for a ZIP code.
Step 1: Nominatim (OpenStreetMap) — ZIP → lat/lng.
Step 2: TIGERweb Legislative identify — lat/lng → congressional district.
"""
if not re.fullmatch(r"\d{5}", zip_code):
raise HTTPException(status_code=400, detail="ZIP code must be 5 digits")
state_code: str | None = None
district_num: str | None = None
try:
async with httpx.AsyncClient(timeout=20.0) as client:
# Step 1: ZIP → lat/lng
r1 = await client.get(
"https://nominatim.openstreetmap.org/search",
params={"postalcode": zip_code, "country": "US", "format": "json", "limit": "1"},
headers={"User-Agent": "PocketVeto/1.0"},
)
places = r1.json() if r1.status_code == 200 else []
if not places:
logger.warning("Nominatim: no result for ZIP %s", zip_code)
return []
lat = places[0]["lat"]
lng = places[0]["lon"]
# Step 2: lat/lng → congressional district via TIGERweb identify (all layers)
half = 0.5
r2 = await client.get(
"https://tigerweb.geo.census.gov/arcgis/rest/services/TIGERweb/Legislative/MapServer/identify",
params={
"f": "json",
"geometry": f"{lng},{lat}",
"geometryType": "esriGeometryPoint",
"sr": "4326",
"layers": "all",
"tolerance": "2",
"mapExtent": f"{float(lng)-half},{float(lat)-half},{float(lng)+half},{float(lat)+half}",
"imageDisplay": "100,100,96",
},
)
if r2.status_code != 200:
logger.warning("TIGERweb returned %s for ZIP %s", r2.status_code, zip_code)
return []
identify_results = r2.json().get("results", [])
logger.info(
"TIGERweb ZIP %s layers: %s",
zip_code, [r.get("layerName") for r in identify_results],
)
for item in identify_results:
if "Congressional" not in (item.get("layerName") or ""):
continue
attrs = item.get("attributes", {})
# GEOID / GEOID20 — 2-char state FIPS + 2-char district (e.g. "0618" = CA-18)
# 119th Congress layers use GEOID20 (2020 Census vintage)
geoid = str(attrs.get("GEOID") or attrs.get("GEOID20") or "").strip()
if len(geoid) == 4 and geoid.isdigit():
state_fips = geoid[:2]
district_fips = geoid[2:]
state_code = _FIPS_TO_STATE.get(state_fips)
district_num = str(int(district_fips)) if district_fips.strip("0") else None
if state_code:
break
# Fallback: explicit field names (e.g. CD119FP + STATEFP20)
cd_field = next((k for k in attrs if re.match(r"CD\d+FP$", k)), None)
state_field = next((k for k in attrs if "STATEFP" in k.upper()), None)
if cd_field and state_field:
try:
state_fips = str(attrs[state_field]).zfill(2)
district_fips = str(attrs[cd_field]).zfill(2)
state_code = _FIPS_TO_STATE.get(state_fips)
district_num = str(int(district_fips)) if district_fips.strip("0") else None
if state_code:
break
except (ValueError, TypeError):
pass
logger.debug(
"ZIP %s: could not parse layer %r — attrs: %s",
zip_code, item.get("layerName"), list(attrs.keys()),
)
if not state_code:
logger.warning(
"ZIP %s: no CD found. Layers: %s",
zip_code, [r.get("layerName") for r in identify_results],
)
except Exception as exc:
logger.warning("ZIP lookup error for %s: %s", zip_code, exc)
return []
if not state_code:
return []
members: list[MemberSchema] = []
seen: set[str] = set()
if district_num:
result = await db.execute(
select(Member).where(
Member.state == state_code,
Member.district == district_num,
Member.chamber == "House of Representatives",
)
)
member = result.scalar_one_or_none()
if member:
seen.add(member.bioguide_id)
members.append(MemberSchema.model_validate(member))
else:
# At-large states (AK, DE, MT, ND, SD, VT, WY)
result = await db.execute(
select(Member).where(
Member.state == state_code,
Member.chamber == "House of Representatives",
).limit(1)
)
member = result.scalar_one_or_none()
if member:
seen.add(member.bioguide_id)
members.append(MemberSchema.model_validate(member))
result = await db.execute(
select(Member).where(
Member.state == state_code,
Member.chamber == "Senate",
)
)
for member in result.scalars().all():
if member.bioguide_id not in seen:
seen.add(member.bioguide_id)
members.append(MemberSchema.model_validate(member))
return members
@router.get("", response_model=PaginatedResponse[MemberSchema])
async def list_members(
chamber: Optional[str] = Query(None),
party: Optional[str] = Query(None),
state: Optional[str] = Query(None),
q: Optional[str] = Query(None),
page: int = Query(1, ge=1),
per_page: int = Query(50, ge=1, le=250),
db: AsyncSession = Depends(get_db),
):
query = select(Member)
if chamber:
query = query.where(Member.chamber == chamber)
if party:
query = query.where(Member.party == party)
if state:
query = query.where(Member.state == state)
if q:
# name is stored as "Last, First" — also match "First Last" order
first_last = func.concat(
func.split_part(Member.name, ", ", 2), " ",
func.split_part(Member.name, ", ", 1),
)
query = query.where(or_(
Member.name.ilike(f"%{q}%"),
first_last.ilike(f"%{q}%"),
))
total = await db.scalar(select(func.count()).select_from(query.subquery())) or 0
query = query.order_by(Member.last_name, Member.first_name).offset((page - 1) * per_page).limit(per_page)
result = await db.execute(query)
members = result.scalars().all()
return PaginatedResponse(
items=members,
total=total,
page=page,
per_page=per_page,
pages=max(1, (total + per_page - 1) // per_page),
)
@router.get("/{bioguide_id}", response_model=MemberSchema)
async def get_member(bioguide_id: str, db: AsyncSession = Depends(get_db)):
member = await db.get(Member, bioguide_id)
if not member:
raise HTTPException(status_code=404, detail="Member not found")
# Kick off member interest on first view — single combined task avoids duplicate API calls
if member.detail_fetched is None:
try:
from app.workers.member_interest import sync_member_interest
sync_member_interest.delay(bioguide_id)
except Exception:
pass
# Lazy-enrich with detail data from Congress.gov on first view
if member.detail_fetched is None:
try:
detail_raw = congress_api.get_member_detail(bioguide_id)
enriched = congress_api.parse_member_detail_from_api(detail_raw)
for field, value in enriched.items():
if value is not None:
setattr(member, field, value)
member.detail_fetched = datetime.now(timezone.utc)
await db.commit()
await db.refresh(member)
except Exception as e:
logger.warning(f"Could not enrich member detail for {bioguide_id}: {e}")
# Attach latest trend score
result_schema = MemberSchema.model_validate(member)
latest_trend = (
await db.execute(
select(MemberTrendScore)
.where(MemberTrendScore.member_id == bioguide_id)
.order_by(desc(MemberTrendScore.score_date))
.limit(1)
)
)
trend = latest_trend.scalar_one_or_none()
if trend:
result_schema.latest_trend = MemberTrendScoreSchema.model_validate(trend)
return result_schema
@router.get("/{bioguide_id}/trend", response_model=list[MemberTrendScoreSchema])
async def get_member_trend(
bioguide_id: str,
days: int = Query(30, ge=7, le=365),
db: AsyncSession = Depends(get_db),
):
from datetime import date, timedelta
cutoff = date.today() - timedelta(days=days)
result = await db.execute(
select(MemberTrendScore)
.where(MemberTrendScore.member_id == bioguide_id, MemberTrendScore.score_date >= cutoff)
.order_by(MemberTrendScore.score_date)
)
return result.scalars().all()
@router.get("/{bioguide_id}/news", response_model=list[MemberNewsArticleSchema])
async def get_member_news(bioguide_id: str, db: AsyncSession = Depends(get_db)):
result = await db.execute(
select(MemberNewsArticle)
.where(MemberNewsArticle.member_id == bioguide_id)
.order_by(desc(MemberNewsArticle.published_at))
.limit(20)
)
return result.scalars().all()
@router.get("/{bioguide_id}/bills", response_model=PaginatedResponse[BillSchema])
async def get_member_bills(
bioguide_id: str,
page: int = Query(1, ge=1),
per_page: int = Query(20, ge=1, le=100),
db: AsyncSession = Depends(get_db),
):
query = select(Bill).options(selectinload(Bill.briefs), selectinload(Bill.sponsor)).where(Bill.sponsor_id == bioguide_id)
total = await db.scalar(select(func.count()).select_from(query.subquery())) or 0
query = query.order_by(desc(Bill.introduced_date)).offset((page - 1) * per_page).limit(per_page)
result = await db.execute(query)
bills = result.scalars().all()
items = []
for bill in bills:
b = BillSchema.model_validate(bill)
if bill.briefs:
b.latest_brief = bill.briefs[0]
items.append(b)
return PaginatedResponse(
items=items,
total=total,
page=page,
per_page=per_page,
pages=max(1, (total + per_page - 1) // per_page),
)