Files
PocketVeto/backend/app/services/news_service.py
Jack Levy a66b5b4bcb feat(interest): add public interest tracking for members of Congress
Adds Google Trends, NewsAPI, and Google News RSS scoring for members,
mirroring the existing bill interest pipeline. Member profiles now show
a Public Interest chart (with signal breakdown) and a Related News panel.

Key changes:
- New member_trend_scores + member_news_articles tables (migration 0008)
- fetch_gnews_articles() added to news_service for unlimited RSS article storage
- Bill news fetcher now combines NewsAPI + Google News RSS (more coverage)
- New member_interest Celery worker with scheduled news + trend tasks
- GET /members/{id}/trend and /news API endpoints
- TrendChart redesigned with signal breakdown badges and bar+line combo chart
- NewsPanel accepts generic article shape (bills and members)

Co-Authored-By: Jack Levy
2026-03-01 00:36:30 -05:00

139 lines
4.9 KiB
Python

"""
News correlation service.
- NewsAPI.org: structured news articles per bill (100 req/day limit)
- Google News RSS: volume signal for zeitgeist scoring (no limit)
"""
import logging
import time
import urllib.parse
from datetime import datetime, timedelta, timezone
from typing import Optional
import feedparser
import requests
from tenacity import retry, stop_after_attempt, wait_exponential
from app.config import settings
logger = logging.getLogger(__name__)
NEWSAPI_BASE = "https://newsapi.org/v2"
GOOGLE_NEWS_RSS = "https://news.google.com/rss/search"
NEWSAPI_DAILY_LIMIT = 95 # Leave 5 as buffer
@retry(stop=stop_after_attempt(2), wait=wait_exponential(min=1, max=5))
def _newsapi_get(endpoint: str, params: dict) -> dict:
params["apiKey"] = settings.NEWSAPI_KEY
response = requests.get(f"{NEWSAPI_BASE}/{endpoint}", params=params, timeout=30)
response.raise_for_status()
return response.json()
def build_news_query(bill_title: str, short_title: Optional[str], sponsor_name: Optional[str],
bill_type: str, bill_number: int) -> str:
"""Build a NewsAPI search query for a bill."""
terms = []
if short_title:
terms.append(f'"{short_title}"')
elif bill_title:
# Use first 6 words of title as phrase
words = bill_title.split()[:6]
if len(words) >= 3:
terms.append(f'"{" ".join(words)}"')
# Add bill number as fallback
terms.append(f'"{bill_type.upper()} {bill_number}"')
return " OR ".join(terms[:2]) # Keep queries short for relevance
def fetch_newsapi_articles(query: str, days: int = 30) -> list[dict]:
"""Fetch articles from NewsAPI.org. Returns empty list if quota is exhausted or key not set."""
if not settings.NEWSAPI_KEY:
return []
try:
from_date = (datetime.now(timezone.utc) - timedelta(days=days)).strftime("%Y-%m-%d")
data = _newsapi_get("everything", {
"q": query,
"language": "en",
"sortBy": "relevancy",
"pageSize": 10,
"from": from_date,
})
articles = data.get("articles", [])
return [
{
"source": a.get("source", {}).get("name", ""),
"headline": a.get("title", ""),
"url": a.get("url", ""),
"published_at": a.get("publishedAt"),
}
for a in articles
if a.get("url") and a.get("title")
]
except Exception as e:
logger.error(f"NewsAPI fetch failed: {e}")
return []
def fetch_gnews_count(query: str, days: int = 30) -> int:
"""Count articles in Google News RSS for the past N days. Used as volume signal."""
try:
encoded = urllib.parse.quote(f"{query} when:{days}d")
url = f"{GOOGLE_NEWS_RSS}?q={encoded}&hl=en-US&gl=US&ceid=US:en"
time.sleep(1) # Polite delay
feed = feedparser.parse(url)
return len(feed.entries)
except Exception as e:
logger.error(f"Google News RSS fetch failed: {e}")
return 0
def fetch_gnews_articles(query: str, days: int = 30) -> list[dict]:
"""Fetch articles from Google News RSS. No rate limit — unlimited source."""
import time as time_mod
try:
encoded = urllib.parse.quote(f"{query} when:{days}d")
url = f"{GOOGLE_NEWS_RSS}?q={encoded}&hl=en-US&gl=US&ceid=US:en"
time.sleep(1) # Polite delay
feed = feedparser.parse(url)
articles = []
for entry in feed.entries[:20]:
pub_at = None
if entry.get("published_parsed"):
try:
pub_at = datetime.fromtimestamp(
time_mod.mktime(entry.published_parsed), tz=timezone.utc
).isoformat()
except Exception:
pass
source = ""
if hasattr(entry, "source") and isinstance(entry.source, dict):
source = entry.source.get("title", "")
elif entry.get("tags"):
source = entry.tags[0].get("term", "") if entry.tags else ""
articles.append({
"source": source or "Google News",
"headline": entry.get("title", ""),
"url": entry.get("link", ""),
"published_at": pub_at,
})
return [a for a in articles if a["url"] and a["headline"]]
except Exception as e:
logger.error(f"Google News RSS article fetch failed: {e}")
return []
def build_member_query(first_name: str, last_name: str, chamber: Optional[str] = None) -> str:
"""Build a news search query for a member of Congress."""
full_name = f"{first_name} {last_name}".strip()
title = ""
if chamber:
if "senate" in chamber.lower():
title = "Senator"
else:
title = "Rep."
if title:
return f'"{full_name}" OR "{title} {last_name}"'
return f'"{full_name}"'