feat(interest): add public interest tracking for members of Congress

Adds Google Trends, NewsAPI, and Google News RSS scoring for members,
mirroring the existing bill interest pipeline. Member profiles now show
a Public Interest chart (with signal breakdown) and a Related News panel.

Key changes:
- New member_trend_scores + member_news_articles tables (migration 0008)
- fetch_gnews_articles() added to news_service for unlimited RSS article storage
- Bill news fetcher now combines NewsAPI + Google News RSS (more coverage)
- New member_interest Celery worker with scheduled news + trend tasks
- GET /members/{id}/trend and /news API endpoints
- TrendChart redesigned with signal breakdown badges and bar+line combo chart
- NewsPanel accepts generic article shape (bills and members)

Co-Authored-By: Jack Levy
This commit is contained in:
Jack Levy
2026-03-01 00:36:30 -05:00
parent e21eb21acf
commit a66b5b4bcb
17 changed files with 569 additions and 29 deletions

View File

@@ -8,8 +8,11 @@ from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from app.database import get_db
from app.models import Bill, Member
from app.schemas.schemas import BillSchema, MemberSchema, PaginatedResponse
from app.models import Bill, Member, MemberTrendScore, MemberNewsArticle
from app.schemas.schemas import (
BillSchema, MemberSchema, MemberTrendScoreSchema,
MemberNewsArticleSchema, PaginatedResponse,
)
from app.services import congress_api
logger = logging.getLogger(__name__)
@@ -66,6 +69,15 @@ async def get_member(bioguide_id: str, db: AsyncSession = Depends(get_db)):
if not member:
raise HTTPException(status_code=404, detail="Member not found")
# Kick off member interest scoring on first view (non-blocking)
if member.detail_fetched is None:
try:
from app.workers.member_interest import fetch_member_news, calculate_member_trend_score
fetch_member_news.delay(bioguide_id)
calculate_member_trend_score.delay(bioguide_id)
except Exception:
pass
# Lazy-enrich with detail data from Congress.gov on first view
if member.detail_fetched is None:
try:
@@ -80,7 +92,47 @@ async def get_member(bioguide_id: str, db: AsyncSession = Depends(get_db)):
except Exception as e:
logger.warning(f"Could not enrich member detail for {bioguide_id}: {e}")
return member
# Attach latest trend score
result_schema = MemberSchema.model_validate(member)
latest_trend = (
await db.execute(
select(MemberTrendScore)
.where(MemberTrendScore.member_id == bioguide_id)
.order_by(desc(MemberTrendScore.score_date))
.limit(1)
)
)
trend = latest_trend.scalar_one_or_none()
if trend:
result_schema.latest_trend = MemberTrendScoreSchema.model_validate(trend)
return result_schema
@router.get("/{bioguide_id}/trend", response_model=list[MemberTrendScoreSchema])
async def get_member_trend(
bioguide_id: str,
days: int = Query(30, ge=7, le=365),
db: AsyncSession = Depends(get_db),
):
from datetime import date, timedelta
cutoff = date.today() - timedelta(days=days)
result = await db.execute(
select(MemberTrendScore)
.where(MemberTrendScore.member_id == bioguide_id, MemberTrendScore.score_date >= cutoff)
.order_by(MemberTrendScore.score_date)
)
return result.scalars().all()
@router.get("/{bioguide_id}/news", response_model=list[MemberNewsArticleSchema])
async def get_member_news(bioguide_id: str, db: AsyncSession = Depends(get_db)):
result = await db.execute(
select(MemberNewsArticle)
.where(MemberNewsArticle.member_id == bioguide_id)
.order_by(desc(MemberNewsArticle.published_at))
.limit(20)
)
return result.scalars().all()
@router.get("/{bioguide_id}/bills", response_model=PaginatedResponse[BillSchema])

View File

@@ -2,6 +2,7 @@ from app.models.bill import Bill, BillAction, BillDocument
from app.models.brief import BillBrief
from app.models.follow import Follow
from app.models.member import Member
from app.models.member_interest import MemberTrendScore, MemberNewsArticle
from app.models.news import NewsArticle
from app.models.setting import AppSetting
from app.models.trend import TrendScore
@@ -15,6 +16,8 @@ __all__ = [
"BillBrief",
"Follow",
"Member",
"MemberTrendScore",
"MemberNewsArticle",
"NewsArticle",
"AppSetting",
"TrendScore",

View File

@@ -31,3 +31,11 @@ class Member(Base):
updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now())
bills = relationship("Bill", back_populates="sponsor", foreign_keys="Bill.sponsor_id")
trend_scores = relationship(
"MemberTrendScore", back_populates="member",
order_by="desc(MemberTrendScore.score_date)", cascade="all, delete-orphan"
)
news_articles = relationship(
"MemberNewsArticle", back_populates="member",
order_by="desc(MemberNewsArticle.published_at)", cascade="all, delete-orphan"
)

View File

@@ -0,0 +1,47 @@
from sqlalchemy import Column, Integer, String, Date, Float, Text, DateTime, ForeignKey, Index, UniqueConstraint
from sqlalchemy.orm import relationship
from sqlalchemy.sql import func
from app.database import Base
class MemberTrendScore(Base):
__tablename__ = "member_trend_scores"
id = Column(Integer, primary_key=True, autoincrement=True)
member_id = Column(String, ForeignKey("members.bioguide_id", ondelete="CASCADE"), nullable=False)
score_date = Column(Date, nullable=False)
newsapi_count = Column(Integer, default=0)
gnews_count = Column(Integer, default=0)
gtrends_score = Column(Float, default=0.0)
composite_score = Column(Float, default=0.0)
member = relationship("Member", back_populates="trend_scores")
__table_args__ = (
UniqueConstraint("member_id", "score_date", name="uq_member_trend_scores_member_date"),
Index("ix_member_trend_scores_member_id", "member_id"),
Index("ix_member_trend_scores_score_date", "score_date"),
Index("ix_member_trend_scores_composite", "composite_score"),
)
class MemberNewsArticle(Base):
__tablename__ = "member_news_articles"
id = Column(Integer, primary_key=True, autoincrement=True)
member_id = Column(String, ForeignKey("members.bioguide_id", ondelete="CASCADE"), nullable=False)
source = Column(String(200))
headline = Column(Text)
url = Column(String)
published_at = Column(DateTime(timezone=True))
relevance_score = Column(Float, default=0.0)
created_at = Column(DateTime(timezone=True), server_default=func.now())
member = relationship("Member", back_populates="news_articles")
__table_args__ = (
UniqueConstraint("member_id", "url", name="uq_member_news_member_url"),
Index("ix_member_news_articles_member_id", "member_id"),
Index("ix_member_news_articles_published_at", "published_at"),
)

View File

@@ -35,6 +35,7 @@ class MemberSchema(BaseModel):
leadership_json: Optional[list[Any]] = None
sponsored_count: Optional[int] = None
cosponsored_count: Optional[int] = None
latest_trend: Optional["MemberTrendScoreSchema"] = None
model_config = {"from_attributes": True}
@@ -94,6 +95,27 @@ class TrendScoreSchema(BaseModel):
model_config = {"from_attributes": True}
class MemberTrendScoreSchema(BaseModel):
score_date: date
newsapi_count: int
gnews_count: int
gtrends_score: float
composite_score: float
model_config = {"from_attributes": True}
class MemberNewsArticleSchema(BaseModel):
id: int
source: Optional[str] = None
headline: Optional[str] = None
url: Optional[str] = None
published_at: Optional[datetime] = None
relevance_score: Optional[float] = None
model_config = {"from_attributes": True}
# ── Bill ──────────────────────────────────────────────────────────────────────
class BillSchema(BaseModel):

View File

@@ -87,3 +87,52 @@ def fetch_gnews_count(query: str, days: int = 30) -> int:
except Exception as e:
logger.error(f"Google News RSS fetch failed: {e}")
return 0
def fetch_gnews_articles(query: str, days: int = 30) -> list[dict]:
"""Fetch articles from Google News RSS. No rate limit — unlimited source."""
import time as time_mod
try:
encoded = urllib.parse.quote(f"{query} when:{days}d")
url = f"{GOOGLE_NEWS_RSS}?q={encoded}&hl=en-US&gl=US&ceid=US:en"
time.sleep(1) # Polite delay
feed = feedparser.parse(url)
articles = []
for entry in feed.entries[:20]:
pub_at = None
if entry.get("published_parsed"):
try:
pub_at = datetime.fromtimestamp(
time_mod.mktime(entry.published_parsed), tz=timezone.utc
).isoformat()
except Exception:
pass
source = ""
if hasattr(entry, "source") and isinstance(entry.source, dict):
source = entry.source.get("title", "")
elif entry.get("tags"):
source = entry.tags[0].get("term", "") if entry.tags else ""
articles.append({
"source": source or "Google News",
"headline": entry.get("title", ""),
"url": entry.get("link", ""),
"published_at": pub_at,
})
return [a for a in articles if a["url"] and a["headline"]]
except Exception as e:
logger.error(f"Google News RSS article fetch failed: {e}")
return []
def build_member_query(first_name: str, last_name: str, chamber: Optional[str] = None) -> str:
"""Build a news search query for a member of Congress."""
full_name = f"{first_name} {last_name}".strip()
title = ""
if chamber:
if "senate" in chamber.lower():
title = "Senator"
else:
title = "Rep."
if title:
return f'"{full_name}" OR "{title} {last_name}"'
return f'"{full_name}"'

View File

@@ -50,6 +50,14 @@ def get_trends_score(keywords: list[str]) -> float:
return 0.0
def keywords_for_member(first_name: str, last_name: str) -> list[str]:
"""Extract meaningful search keywords for a member of Congress."""
full_name = f"{first_name} {last_name}".strip()
if not full_name:
return []
return [full_name]
def keywords_for_bill(title: str, short_title: str, topic_tags: list[str]) -> list[str]:
"""Extract meaningful search keywords for a bill."""
keywords = []

View File

@@ -14,6 +14,7 @@ celery_app = Celery(
"app.workers.llm_processor",
"app.workers.news_fetcher",
"app.workers.trend_scorer",
"app.workers.member_interest",
],
)
@@ -35,6 +36,7 @@ celery_app.conf.update(
"app.workers.llm_processor.*": {"queue": "llm"},
"app.workers.news_fetcher.*": {"queue": "news"},
"app.workers.trend_scorer.*": {"queue": "news"},
"app.workers.member_interest.*": {"queue": "news"},
},
task_queues=[
Queue("polling"),
@@ -58,5 +60,13 @@ celery_app.conf.update(
"task": "app.workers.trend_scorer.calculate_all_trend_scores",
"schedule": crontab(hour=2, minute=0),
},
"fetch-news-active-members": {
"task": "app.workers.member_interest.fetch_news_for_active_members",
"schedule": crontab(hour="*/12", minute=30),
},
"calculate-member-trend-scores": {
"task": "app.workers.member_interest.calculate_all_member_trend_scores",
"schedule": crontab(hour=3, minute=0),
},
},
)

View File

@@ -0,0 +1,177 @@
"""
Member interest worker — tracks public interest in members of Congress.
Fetches news articles and calculates trend scores for members using the
same composite scoring model as bills (NewsAPI + Google News RSS + pytrends).
Runs on a schedule and can also be triggered per-member.
"""
import logging
from datetime import date, datetime, timedelta, timezone
from app.database import get_sync_db
from app.models import Member, MemberNewsArticle, MemberTrendScore
from app.services import news_service, trends_service
from app.workers.celery_app import celery_app
from app.workers.trend_scorer import calculate_composite_score
logger = logging.getLogger(__name__)
def _parse_pub_at(raw: str | None) -> datetime | None:
if not raw:
return None
try:
return datetime.fromisoformat(raw.replace("Z", "+00:00"))
except Exception:
return None
@celery_app.task(bind=True, max_retries=2, name="app.workers.member_interest.fetch_member_news")
def fetch_member_news(self, bioguide_id: str):
"""Fetch and store recent news articles for a specific member."""
db = get_sync_db()
try:
member = db.get(Member, bioguide_id)
if not member or not member.first_name or not member.last_name:
return {"status": "skipped"}
query = news_service.build_member_query(
first_name=member.first_name,
last_name=member.last_name,
chamber=member.chamber,
)
newsapi_articles = news_service.fetch_newsapi_articles(query, days=30)
gnews_articles = news_service.fetch_gnews_articles(query, days=30)
all_articles = newsapi_articles + gnews_articles
saved = 0
for article in all_articles:
url = article.get("url")
if not url:
continue
existing = (
db.query(MemberNewsArticle)
.filter_by(member_id=bioguide_id, url=url)
.first()
)
if existing:
continue
db.add(MemberNewsArticle(
member_id=bioguide_id,
source=article.get("source", "")[:200],
headline=article.get("headline", ""),
url=url,
published_at=_parse_pub_at(article.get("published_at")),
relevance_score=1.0,
))
saved += 1
db.commit()
logger.info(f"Saved {saved} news articles for member {bioguide_id}")
return {"status": "ok", "saved": saved}
except Exception as exc:
db.rollback()
logger.error(f"Member news fetch failed for {bioguide_id}: {exc}")
raise self.retry(exc=exc, countdown=300)
finally:
db.close()
@celery_app.task(bind=True, name="app.workers.member_interest.calculate_member_trend_score")
def calculate_member_trend_score(self, bioguide_id: str):
"""Calculate and store today's public interest score for a member."""
db = get_sync_db()
try:
member = db.get(Member, bioguide_id)
if not member or not member.first_name or not member.last_name:
return {"status": "skipped"}
today = date.today()
existing = (
db.query(MemberTrendScore)
.filter_by(member_id=bioguide_id, score_date=today)
.first()
)
if existing:
return {"status": "already_scored"}
query = news_service.build_member_query(
first_name=member.first_name,
last_name=member.last_name,
chamber=member.chamber,
)
keywords = trends_service.keywords_for_member(member.first_name, member.last_name)
newsapi_articles = news_service.fetch_newsapi_articles(query, days=30)
newsapi_count = len(newsapi_articles)
gnews_count = news_service.fetch_gnews_count(query, days=30)
gtrends_score = trends_service.get_trends_score(keywords)
composite = calculate_composite_score(newsapi_count, gnews_count, gtrends_score)
db.add(MemberTrendScore(
member_id=bioguide_id,
score_date=today,
newsapi_count=newsapi_count,
gnews_count=gnews_count,
gtrends_score=gtrends_score,
composite_score=composite,
))
db.commit()
logger.info(f"Scored member {bioguide_id}: composite={composite:.1f}")
return {"status": "ok", "composite": composite}
except Exception as exc:
db.rollback()
logger.error(f"Member trend scoring failed for {bioguide_id}: {exc}")
raise
finally:
db.close()
@celery_app.task(bind=True, name="app.workers.member_interest.fetch_news_for_active_members")
def fetch_news_for_active_members(self):
"""
Scheduled task: fetch news for members who have been viewed or followed.
Prioritises members with detail_fetched set (profile has been viewed).
"""
db = get_sync_db()
try:
members = (
db.query(Member)
.filter(Member.detail_fetched.isnot(None))
.filter(Member.first_name.isnot(None))
.all()
)
for member in members:
fetch_member_news.delay(member.bioguide_id)
logger.info(f"Queued news fetch for {len(members)} members")
return {"queued": len(members)}
finally:
db.close()
@celery_app.task(bind=True, name="app.workers.member_interest.calculate_all_member_trend_scores")
def calculate_all_member_trend_scores(self):
"""
Scheduled nightly task: score all members that have been viewed.
Members are scored only after their profile has been loaded at least once.
"""
db = get_sync_db()
try:
members = (
db.query(Member)
.filter(Member.detail_fetched.isnot(None))
.filter(Member.first_name.isnot(None))
.all()
)
for member in members:
calculate_member_trend_score.delay(member.bioguide_id)
logger.info(f"Queued trend scoring for {len(members)} members")
return {"queued": len(members)}
finally:
db.close()

View File

@@ -41,9 +41,12 @@ def fetch_news_for_bill(self, bill_id: str):
bill_number=bill.bill_number,
)
articles = news_service.fetch_newsapi_articles(query)
newsapi_articles = news_service.fetch_newsapi_articles(query)
gnews_articles = news_service.fetch_gnews_articles(query)
all_articles = newsapi_articles + gnews_articles
saved = 0
for article in articles:
for article in all_articles:
url = article.get("url")
if not url:
continue