Nine efficiency improvements across the data pipeline:
1. NewsAPI OR batching (news_service.py + news_fetcher.py)
- Combine up to 4 bills per NewsAPI call using OR query syntax
- NEWSAPI_BATCH_SIZE=4 means ~4× effective daily quota (100→400 bill-fetches)
- fetch_news_for_bill_batch task; fetch_news_for_active_bills queues batches
2. Google News RSS cache (news_service.py)
- 2-hour Redis cache shared between news_fetcher and trend_scorer
- Eliminates duplicate RSS hits when both workers run against same bill
- clear_gnews_cache() admin helper + admin endpoint
3. pytrends keyword batching (trends_service.py + trend_scorer.py)
- Compare up to 5 bills per pytrends call instead of 1
- get_trends_scores_batch() returns scores in original order
- Reduces pytrends calls by ~5× and associated rate-limit risk
4. GovInfo ETags (govinfo_api.py + document_fetcher.py)
- If-None-Match conditional GET; DocumentUnchangedError on HTTP 304
- ETags stored in Redis (30-day TTL) keyed by MD5(url)
- document_fetcher catches DocumentUnchangedError → {"status": "unchanged"}
5. Anthropic prompt caching (llm_service.py)
- cache_control: {type: ephemeral} on system messages in AnthropicProvider
- Caches the ~700-token system prompt server-side; ~50% cost reduction on
repeated calls within the 5-minute cache window
6. Async sponsor fetch (congress_poller.py)
- New fetch_sponsor_for_bill Celery task replaces blocking get_bill_detail()
inline in poll loop
- Bills saved immediately with sponsor_id=None; sponsor linked async
- Removes 0.25s sleep per new bill from poll hot path
7. Skip doc fetch for procedural actions (congress_poller.py)
- _DOC_PRODUCING_CATEGORIES = {vote, committee_report, presidential, ...}
- fetch_bill_documents only enqueued when action is likely to produce
new GovInfo text (saves ~60–70% of unnecessary document fetch attempts)
8. Adaptive poll frequency (congress_poller.py)
- _is_congress_off_hours(): weekends + before 9AM / after 9PM EST
- Skips poll if off-hours AND last poll < 1 hour ago
- Prevents wasteful polling when Congress is not in session
9. Admin panel additions (admin.py + settings/page.tsx + api.ts)
- GET /api/admin/newsapi-quota → remaining calls today
- POST /api/admin/clear-gnews-cache → flush RSS cache
- Settings page shows NewsAPI quota remaining (amber if < 10)
- "Clear Google News Cache" button in Manual Controls
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
372 lines
12 KiB
Python
372 lines
12 KiB
Python
from datetime import date, datetime
|
||
from typing import Any, Generic, Optional, TypeVar
|
||
|
||
from pydantic import BaseModel
|
||
|
||
|
||
# ── Notifications ──────────────────────────────────────────────────────────────
|
||
|
||
# ── Bill Notes ────────────────────────────────────────────────────────────────
|
||
|
||
class BillNoteSchema(BaseModel):
|
||
id: int
|
||
bill_id: str
|
||
content: str
|
||
pinned: bool
|
||
created_at: datetime
|
||
updated_at: datetime
|
||
|
||
model_config = {"from_attributes": True}
|
||
|
||
|
||
class BillNoteUpsert(BaseModel):
|
||
content: str
|
||
pinned: bool = False
|
||
|
||
|
||
# ── Notifications ──────────────────────────────────────────────────────────────
|
||
|
||
class NotificationSettingsResponse(BaseModel):
|
||
ntfy_topic_url: str = ""
|
||
ntfy_auth_method: str = "none" # none | token | basic
|
||
ntfy_token: str = ""
|
||
ntfy_username: str = ""
|
||
ntfy_password: str = ""
|
||
ntfy_enabled: bool = False
|
||
rss_enabled: bool = False
|
||
rss_token: Optional[str] = None
|
||
# Digest
|
||
digest_enabled: bool = False
|
||
digest_frequency: str = "daily" # daily | weekly
|
||
# Quiet hours — stored as local-time hour integers (0-23); timezone is IANA name
|
||
quiet_hours_start: Optional[int] = None
|
||
quiet_hours_end: Optional[int] = None
|
||
timezone: Optional[str] = None # IANA name, e.g. "America/New_York"
|
||
alert_filters: Optional[dict] = None
|
||
|
||
model_config = {"from_attributes": True}
|
||
|
||
|
||
class NotificationSettingsUpdate(BaseModel):
|
||
ntfy_topic_url: Optional[str] = None
|
||
ntfy_auth_method: Optional[str] = None
|
||
ntfy_token: Optional[str] = None
|
||
ntfy_username: Optional[str] = None
|
||
ntfy_password: Optional[str] = None
|
||
ntfy_enabled: Optional[bool] = None
|
||
rss_enabled: Optional[bool] = None
|
||
digest_enabled: Optional[bool] = None
|
||
digest_frequency: Optional[str] = None
|
||
quiet_hours_start: Optional[int] = None
|
||
quiet_hours_end: Optional[int] = None
|
||
timezone: Optional[str] = None # IANA name sent by the browser on save
|
||
alert_filters: Optional[dict] = None
|
||
|
||
|
||
class NotificationEventSchema(BaseModel):
|
||
id: int
|
||
bill_id: str
|
||
event_type: str
|
||
payload: Optional[Any] = None
|
||
dispatched_at: Optional[datetime] = None
|
||
created_at: datetime
|
||
|
||
model_config = {"from_attributes": True}
|
||
|
||
|
||
class NtfyTestRequest(BaseModel):
|
||
ntfy_topic_url: str
|
||
ntfy_auth_method: str = "none"
|
||
ntfy_token: str = ""
|
||
ntfy_username: str = ""
|
||
ntfy_password: str = ""
|
||
|
||
|
||
class FollowModeTestRequest(BaseModel):
|
||
mode: str # pocket_veto | pocket_boost
|
||
event_type: str # new_document | new_amendment | bill_updated
|
||
|
||
|
||
class NotificationTestResult(BaseModel):
|
||
status: str # "ok" | "error"
|
||
detail: str
|
||
event_count: Optional[int] = None # RSS only
|
||
|
||
T = TypeVar("T")
|
||
|
||
|
||
class PaginatedResponse(BaseModel, Generic[T]):
|
||
items: list[T]
|
||
total: int
|
||
page: int
|
||
per_page: int
|
||
pages: int
|
||
|
||
|
||
# ── Member ────────────────────────────────────────────────────────────────────
|
||
|
||
class MemberSchema(BaseModel):
|
||
bioguide_id: str
|
||
name: str
|
||
first_name: Optional[str] = None
|
||
last_name: Optional[str] = None
|
||
party: Optional[str] = None
|
||
state: Optional[str] = None
|
||
chamber: Optional[str] = None
|
||
district: Optional[str] = None
|
||
photo_url: Optional[str] = None
|
||
official_url: Optional[str] = None
|
||
congress_url: Optional[str] = None
|
||
birth_year: Optional[str] = None
|
||
address: Optional[str] = None
|
||
phone: Optional[str] = None
|
||
terms_json: Optional[list[Any]] = None
|
||
leadership_json: Optional[list[Any]] = None
|
||
sponsored_count: Optional[int] = None
|
||
cosponsored_count: Optional[int] = None
|
||
latest_trend: Optional["MemberTrendScoreSchema"] = None
|
||
|
||
model_config = {"from_attributes": True}
|
||
|
||
|
||
# ── Bill Brief ────────────────────────────────────────────────────────────────
|
||
|
||
class BriefSchema(BaseModel):
|
||
id: int
|
||
brief_type: str = "full"
|
||
summary: Optional[str] = None
|
||
key_points: Optional[list[Any]] = None
|
||
risks: Optional[list[Any]] = None
|
||
deadlines: Optional[list[dict[str, Any]]] = None
|
||
topic_tags: Optional[list[str]] = None
|
||
llm_provider: Optional[str] = None
|
||
llm_model: Optional[str] = None
|
||
govinfo_url: Optional[str] = None
|
||
share_token: Optional[str] = None
|
||
created_at: Optional[datetime] = None
|
||
|
||
model_config = {"from_attributes": True}
|
||
|
||
|
||
# ── Bill Action ───────────────────────────────────────────────────────────────
|
||
|
||
class BillActionSchema(BaseModel):
|
||
id: int
|
||
action_date: Optional[date] = None
|
||
action_text: Optional[str] = None
|
||
action_type: Optional[str] = None
|
||
chamber: Optional[str] = None
|
||
|
||
model_config = {"from_attributes": True}
|
||
|
||
|
||
# ── News Article ──────────────────────────────────────────────────────────────
|
||
|
||
class NewsArticleSchema(BaseModel):
|
||
id: int
|
||
source: Optional[str] = None
|
||
headline: Optional[str] = None
|
||
url: Optional[str] = None
|
||
published_at: Optional[datetime] = None
|
||
relevance_score: Optional[float] = None
|
||
|
||
model_config = {"from_attributes": True}
|
||
|
||
|
||
# ── Trend Score ───────────────────────────────────────────────────────────────
|
||
|
||
class TrendScoreSchema(BaseModel):
|
||
score_date: date
|
||
newsapi_count: int
|
||
gnews_count: int
|
||
gtrends_score: float
|
||
composite_score: float
|
||
|
||
model_config = {"from_attributes": True}
|
||
|
||
|
||
class MemberTrendScoreSchema(BaseModel):
|
||
score_date: date
|
||
newsapi_count: int
|
||
gnews_count: int
|
||
gtrends_score: float
|
||
composite_score: float
|
||
|
||
model_config = {"from_attributes": True}
|
||
|
||
|
||
class MemberNewsArticleSchema(BaseModel):
|
||
id: int
|
||
source: Optional[str] = None
|
||
headline: Optional[str] = None
|
||
url: Optional[str] = None
|
||
published_at: Optional[datetime] = None
|
||
relevance_score: Optional[float] = None
|
||
|
||
model_config = {"from_attributes": True}
|
||
|
||
|
||
# ── Bill ──────────────────────────────────────────────────────────────────────
|
||
|
||
class BillSchema(BaseModel):
|
||
bill_id: str
|
||
congress_number: int
|
||
bill_type: str
|
||
bill_number: int
|
||
title: Optional[str] = None
|
||
short_title: Optional[str] = None
|
||
introduced_date: Optional[date] = None
|
||
latest_action_date: Optional[date] = None
|
||
latest_action_text: Optional[str] = None
|
||
status: Optional[str] = None
|
||
chamber: Optional[str] = None
|
||
congress_url: Optional[str] = None
|
||
sponsor: Optional[MemberSchema] = None
|
||
latest_brief: Optional[BriefSchema] = None
|
||
latest_trend: Optional[TrendScoreSchema] = None
|
||
updated_at: Optional[datetime] = None
|
||
has_document: bool = False
|
||
|
||
model_config = {"from_attributes": True}
|
||
|
||
|
||
class BillDetailSchema(BillSchema):
|
||
actions: list[BillActionSchema] = []
|
||
news_articles: list[NewsArticleSchema] = []
|
||
trend_scores: list[TrendScoreSchema] = []
|
||
briefs: list[BriefSchema] = []
|
||
has_document: bool = False
|
||
|
||
|
||
# ── Follow ────────────────────────────────────────────────────────────────────
|
||
|
||
class FollowCreate(BaseModel):
|
||
follow_type: str # bill | member | topic
|
||
follow_value: str
|
||
|
||
|
||
class FollowSchema(BaseModel):
|
||
id: int
|
||
user_id: int
|
||
follow_type: str
|
||
follow_value: str
|
||
follow_mode: str = "neutral"
|
||
created_at: datetime
|
||
|
||
model_config = {"from_attributes": True}
|
||
|
||
|
||
class FollowModeUpdate(BaseModel):
|
||
follow_mode: str
|
||
|
||
|
||
# ── Settings ──────────────────────────────────────────────────────────────────
|
||
|
||
# ── Auth ──────────────────────────────────────────────────────────────────────
|
||
|
||
class UserCreate(BaseModel):
|
||
email: str
|
||
password: str
|
||
|
||
|
||
class UserResponse(BaseModel):
|
||
id: int
|
||
email: str
|
||
is_admin: bool
|
||
notification_prefs: dict
|
||
created_at: Optional[datetime] = None
|
||
|
||
model_config = {"from_attributes": True}
|
||
|
||
|
||
class TokenResponse(BaseModel):
|
||
access_token: str
|
||
token_type: str = "bearer"
|
||
user: "UserResponse"
|
||
|
||
|
||
# ── Settings ──────────────────────────────────────────────────────────────────
|
||
|
||
class SettingUpdate(BaseModel):
|
||
key: str
|
||
value: str
|
||
|
||
|
||
class SettingsResponse(BaseModel):
|
||
llm_provider: str
|
||
llm_model: str
|
||
congress_poll_interval_minutes: int
|
||
newsapi_enabled: bool
|
||
pytrends_enabled: bool
|
||
api_keys_configured: dict[str, bool]
|
||
|
||
|
||
# ── Collections ────────────────────────────────────────────────────────────────
|
||
|
||
class CollectionCreate(BaseModel):
|
||
name: str
|
||
is_public: bool = False
|
||
|
||
def validate_name(self) -> str:
|
||
name = self.name.strip()
|
||
if not 1 <= len(name) <= 100:
|
||
raise ValueError("name must be 1–100 characters")
|
||
return name
|
||
|
||
|
||
class CollectionUpdate(BaseModel):
|
||
name: Optional[str] = None
|
||
is_public: Optional[bool] = None
|
||
|
||
|
||
class CollectionSchema(BaseModel):
|
||
id: int
|
||
name: str
|
||
slug: str
|
||
is_public: bool
|
||
share_token: str
|
||
bill_count: int
|
||
created_at: datetime
|
||
|
||
model_config = {"from_attributes": True}
|
||
|
||
|
||
class CollectionDetailSchema(CollectionSchema):
|
||
bills: list[BillSchema]
|
||
|
||
|
||
class BriefShareResponse(BaseModel):
|
||
brief: BriefSchema
|
||
bill: BillSchema
|
||
|
||
|
||
# ── Votes ──────────────────────────────────────────────────────────────────────
|
||
|
||
class MemberVotePositionSchema(BaseModel):
|
||
bioguide_id: Optional[str] = None
|
||
member_name: Optional[str] = None
|
||
party: Optional[str] = None
|
||
state: Optional[str] = None
|
||
position: str
|
||
|
||
model_config = {"from_attributes": True}
|
||
|
||
|
||
class BillVoteSchema(BaseModel):
|
||
id: int
|
||
congress: int
|
||
chamber: str
|
||
session: int
|
||
roll_number: int
|
||
question: Optional[str] = None
|
||
description: Optional[str] = None
|
||
vote_date: Optional[date] = None
|
||
yeas: Optional[int] = None
|
||
nays: Optional[int] = None
|
||
not_voting: Optional[int] = None
|
||
result: Optional[str] = None
|
||
source_url: Optional[str] = None
|
||
positions: list[MemberVotePositionSchema] = []
|
||
|
||
model_config = {"from_attributes": True}
|