Initial commit

This commit is contained in:
Jack Levy
2026-02-28 21:08:19 -05:00
commit e418dd9ae0
85 changed files with 5261 additions and 0 deletions

19
backend/Dockerfile Normal file
View File

@@ -0,0 +1,19 @@
FROM python:3.12-slim
WORKDIR /app
# System deps for psycopg2, pdfminer, lxml
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
libpq-dev \
libxml2-dev \
libxslt-dev \
&& rm -rf /var/lib/apt/lists/*
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
# Default command (overridden per service in docker-compose.yml)
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]

41
backend/alembic.ini Normal file
View File

@@ -0,0 +1,41 @@
[alembic]
script_location = alembic
prepend_sys_path = .
version_path_separator = os
sqlalchemy.url = postgresql://congress:congress@postgres:5432/pocketveto
[post_write_hooks]
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARN
handlers = console
qualname =
[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S

51
backend/alembic/env.py Normal file
View File

@@ -0,0 +1,51 @@
import os
from logging.config import fileConfig
from alembic import context
from sqlalchemy import engine_from_config, pool
# Import all models so Alembic can detect them
from app.database import Base
import app.models # noqa: F401 — registers all models with Base.metadata
config = context.config
# Override sqlalchemy.url from environment if set
sync_url = os.environ.get("SYNC_DATABASE_URL")
if sync_url:
config.set_main_option("sqlalchemy.url", sync_url)
if config.config_file_name is not None:
fileConfig(config.config_file_name)
target_metadata = Base.metadata
def run_migrations_offline() -> None:
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online() -> None:
connectable = engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(connection=connection, target_metadata=target_metadata)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()

View File

@@ -0,0 +1,26 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: Union[str, None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
${upgrades if upgrades else "pass"}
def downgrade() -> None:
${downgrades if downgrades else "pass"}

View File

@@ -0,0 +1,205 @@
"""initial schema
Revision ID: 0001
Revises:
Create Date: 2025-01-01 00:00:00.000000
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects.postgresql import JSONB
revision: str = "0001"
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ── members ──────────────────────────────────────────────────────────────
op.create_table(
"members",
sa.Column("bioguide_id", sa.String(), primary_key=True),
sa.Column("name", sa.String(), nullable=False),
sa.Column("first_name", sa.String()),
sa.Column("last_name", sa.String()),
sa.Column("party", sa.String(10)),
sa.Column("state", sa.String(5)),
sa.Column("chamber", sa.String(10)),
sa.Column("district", sa.String(10)),
sa.Column("photo_url", sa.String()),
sa.Column("official_url", sa.String()),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
)
# ── bills ─────────────────────────────────────────────────────────────────
op.create_table(
"bills",
sa.Column("bill_id", sa.String(), primary_key=True),
sa.Column("congress_number", sa.Integer(), nullable=False),
sa.Column("bill_type", sa.String(10), nullable=False),
sa.Column("bill_number", sa.Integer(), nullable=False),
sa.Column("title", sa.Text()),
sa.Column("short_title", sa.Text()),
sa.Column("sponsor_id", sa.String(), sa.ForeignKey("members.bioguide_id"), nullable=True),
sa.Column("introduced_date", sa.Date()),
sa.Column("latest_action_date", sa.Date()),
sa.Column("latest_action_text", sa.Text()),
sa.Column("status", sa.String(100)),
sa.Column("chamber", sa.String(10)),
sa.Column("congress_url", sa.String()),
sa.Column("govtrack_url", sa.String()),
sa.Column("last_checked_at", sa.DateTime(timezone=True)),
sa.Column("actions_fetched_at", sa.DateTime(timezone=True)),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
)
op.create_index("ix_bills_congress_number", "bills", ["congress_number"])
op.create_index("ix_bills_latest_action_date", "bills", ["latest_action_date"])
op.create_index("ix_bills_introduced_date", "bills", ["introduced_date"])
op.create_index("ix_bills_chamber", "bills", ["chamber"])
op.create_index("ix_bills_sponsor_id", "bills", ["sponsor_id"])
# Full-text search vector (tsvector generated column) — manual, not in autogenerate
op.execute("""
ALTER TABLE bills ADD COLUMN search_vector tsvector
GENERATED ALWAYS AS (
setweight(to_tsvector('english', coalesce(title, '')), 'A') ||
setweight(to_tsvector('english', coalesce(short_title, '')), 'A') ||
setweight(to_tsvector('english', coalesce(latest_action_text, '')), 'C')
) STORED
""")
op.execute("CREATE INDEX ix_bills_search_vector ON bills USING GIN(search_vector)")
# ── bill_actions ──────────────────────────────────────────────────────────
op.create_table(
"bill_actions",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column("bill_id", sa.String(), sa.ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False),
sa.Column("action_date", sa.Date()),
sa.Column("action_text", sa.Text()),
sa.Column("action_type", sa.String(100)),
sa.Column("chamber", sa.String(10)),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
)
op.create_index("ix_bill_actions_bill_id", "bill_actions", ["bill_id"])
op.create_index("ix_bill_actions_action_date", "bill_actions", ["action_date"])
# ── bill_documents ────────────────────────────────────────────────────────
op.create_table(
"bill_documents",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column("bill_id", sa.String(), sa.ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False),
sa.Column("doc_type", sa.String(50)),
sa.Column("doc_version", sa.String(50)),
sa.Column("govinfo_url", sa.String()),
sa.Column("raw_text", sa.Text()),
sa.Column("fetched_at", sa.DateTime(timezone=True)),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
)
op.create_index("ix_bill_documents_bill_id", "bill_documents", ["bill_id"])
# ── bill_briefs ───────────────────────────────────────────────────────────
op.create_table(
"bill_briefs",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column("bill_id", sa.String(), sa.ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False),
sa.Column("document_id", sa.Integer(), sa.ForeignKey("bill_documents.id", ondelete="SET NULL"), nullable=True),
sa.Column("summary", sa.Text()),
sa.Column("key_points", JSONB()),
sa.Column("risks", JSONB()),
sa.Column("deadlines", JSONB()),
sa.Column("topic_tags", JSONB()),
sa.Column("llm_provider", sa.String(50)),
sa.Column("llm_model", sa.String(100)),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
)
op.create_index("ix_bill_briefs_bill_id", "bill_briefs", ["bill_id"])
op.execute("CREATE INDEX ix_bill_briefs_topic_tags ON bill_briefs USING GIN(topic_tags)")
# ── committees ────────────────────────────────────────────────────────────
op.create_table(
"committees",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column("committee_code", sa.String(20), unique=True, nullable=False),
sa.Column("name", sa.String(500)),
sa.Column("chamber", sa.String(10)),
sa.Column("committee_type", sa.String(50)),
)
# ── committee_bills ───────────────────────────────────────────────────────
op.create_table(
"committee_bills",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column("committee_id", sa.Integer(), sa.ForeignKey("committees.id", ondelete="CASCADE"), nullable=False),
sa.Column("bill_id", sa.String(), sa.ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False),
sa.Column("referral_date", sa.Date()),
)
op.create_index("ix_committee_bills_bill_id", "committee_bills", ["bill_id"])
op.create_index("ix_committee_bills_committee_id", "committee_bills", ["committee_id"])
# ── news_articles ─────────────────────────────────────────────────────────
op.create_table(
"news_articles",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column("bill_id", sa.String(), sa.ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False),
sa.Column("source", sa.String(200)),
sa.Column("headline", sa.Text()),
sa.Column("url", sa.String(), unique=True),
sa.Column("published_at", sa.DateTime(timezone=True)),
sa.Column("relevance_score", sa.Float(), default=0.0),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
)
op.create_index("ix_news_articles_bill_id", "news_articles", ["bill_id"])
op.create_index("ix_news_articles_published_at", "news_articles", ["published_at"])
# ── trend_scores ──────────────────────────────────────────────────────────
op.create_table(
"trend_scores",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column("bill_id", sa.String(), sa.ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False),
sa.Column("score_date", sa.Date(), nullable=False),
sa.Column("newsapi_count", sa.Integer(), default=0),
sa.Column("gnews_count", sa.Integer(), default=0),
sa.Column("gtrends_score", sa.Float(), default=0.0),
sa.Column("composite_score", sa.Float(), default=0.0),
sa.UniqueConstraint("bill_id", "score_date", name="uq_trend_scores_bill_date"),
)
op.create_index("ix_trend_scores_bill_id", "trend_scores", ["bill_id"])
op.create_index("ix_trend_scores_score_date", "trend_scores", ["score_date"])
op.create_index("ix_trend_scores_composite", "trend_scores", ["composite_score"])
# ── follows ───────────────────────────────────────────────────────────────
op.create_table(
"follows",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column("follow_type", sa.String(20), nullable=False),
sa.Column("follow_value", sa.String(), nullable=False),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
sa.UniqueConstraint("follow_type", "follow_value", name="uq_follows_type_value"),
)
# ── app_settings ──────────────────────────────────────────────────────────
op.create_table(
"app_settings",
sa.Column("key", sa.String(), primary_key=True),
sa.Column("value", sa.String()),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
)
def downgrade() -> None:
op.drop_table("app_settings")
op.drop_table("follows")
op.drop_table("trend_scores")
op.drop_table("news_articles")
op.drop_table("committee_bills")
op.drop_table("committees")
op.drop_table("bill_briefs")
op.drop_table("bill_documents")
op.drop_table("bill_actions")
op.drop_table("bills")
op.drop_table("members")

View File

@@ -0,0 +1,30 @@
"""widen chamber and party columns
Revision ID: 0002
Revises: 0001
Create Date: 2026-02-28 00:00:00.000000
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
revision: str = "0002"
down_revision: Union[str, None] = "0001"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.alter_column("members", "chamber", type_=sa.String(50))
op.alter_column("members", "party", type_=sa.String(50))
op.alter_column("bills", "chamber", type_=sa.String(50))
op.alter_column("bill_actions", "chamber", type_=sa.String(50))
def downgrade() -> None:
op.alter_column("bill_actions", "chamber", type_=sa.String(10))
op.alter_column("bills", "chamber", type_=sa.String(10))
op.alter_column("members", "party", type_=sa.String(10))
op.alter_column("members", "chamber", type_=sa.String(10))

View File

@@ -0,0 +1,26 @@
"""widen member state and district columns
Revision ID: 0003
Revises: 0002
Create Date: 2026-03-01 00:00:00.000000
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
revision: str = "0003"
down_revision: Union[str, None] = "0002"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.alter_column("members", "state", type_=sa.String(50))
op.alter_column("members", "district", type_=sa.String(50))
def downgrade() -> None:
op.alter_column("members", "district", type_=sa.String(10))
op.alter_column("members", "state", type_=sa.String(5))

View File

@@ -0,0 +1,27 @@
"""add brief_type to bill_briefs
Revision ID: 0004
Revises: 0003
Create Date: 2026-03-01 00:00:00.000000
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
revision: str = "0004"
down_revision: Union[str, None] = "0003"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.add_column(
"bill_briefs",
sa.Column("brief_type", sa.String(20), nullable=False, server_default="full"),
)
def downgrade() -> None:
op.drop_column("bill_briefs", "brief_type")

View File

39
backend/app/api/admin.py Normal file
View File

@@ -0,0 +1,39 @@
from fastapi import APIRouter
router = APIRouter()
@router.post("/trigger-poll")
async def trigger_poll():
"""Manually trigger a Congress.gov poll without waiting for the Beat schedule."""
from app.workers.congress_poller import poll_congress_bills
task = poll_congress_bills.delay()
return {"task_id": task.id, "status": "queued"}
@router.post("/trigger-member-sync")
async def trigger_member_sync():
"""Manually trigger a member sync."""
from app.workers.congress_poller import sync_members
task = sync_members.delay()
return {"task_id": task.id, "status": "queued"}
@router.post("/trigger-trend-scores")
async def trigger_trend_scores():
"""Manually trigger trend score calculation."""
from app.workers.trend_scorer import calculate_all_trend_scores
task = calculate_all_trend_scores.delay()
return {"task_id": task.id, "status": "queued"}
@router.get("/task-status/{task_id}")
async def get_task_status(task_id: str):
"""Check the status of an async task."""
from app.workers.celery_app import celery_app
result = celery_app.AsyncResult(task_id)
return {
"task_id": task_id,
"status": result.status,
"result": result.result if result.ready() else None,
}

145
backend/app/api/bills.py Normal file
View File

@@ -0,0 +1,145 @@
from typing import Optional
from fastapi import APIRouter, Depends, Query
from sqlalchemy import desc, func, or_, select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from app.database import get_db
from app.models import Bill, BillAction, BillBrief, NewsArticle, TrendScore
from app.schemas.schemas import (
BillDetailSchema,
BillSchema,
BillActionSchema,
NewsArticleSchema,
PaginatedResponse,
TrendScoreSchema,
)
router = APIRouter()
@router.get("", response_model=PaginatedResponse[BillSchema])
async def list_bills(
chamber: Optional[str] = Query(None),
topic: Optional[str] = Query(None),
sponsor_id: Optional[str] = Query(None),
q: Optional[str] = Query(None),
page: int = Query(1, ge=1),
per_page: int = Query(20, ge=1, le=100),
sort: str = Query("latest_action_date"),
db: AsyncSession = Depends(get_db),
):
query = (
select(Bill)
.options(
selectinload(Bill.sponsor),
selectinload(Bill.briefs),
selectinload(Bill.trend_scores),
)
)
if chamber:
query = query.where(Bill.chamber == chamber)
if sponsor_id:
query = query.where(Bill.sponsor_id == sponsor_id)
if topic:
query = query.join(BillBrief, Bill.bill_id == BillBrief.bill_id).where(
BillBrief.topic_tags.contains([topic])
)
if q:
query = query.where(
or_(
Bill.bill_id.ilike(f"%{q}%"),
Bill.title.ilike(f"%{q}%"),
Bill.short_title.ilike(f"%{q}%"),
)
)
# Count total
count_query = select(func.count()).select_from(query.subquery())
total = await db.scalar(count_query) or 0
# Sort
sort_col = getattr(Bill, sort, Bill.latest_action_date)
query = query.order_by(desc(sort_col)).offset((page - 1) * per_page).limit(per_page)
result = await db.execute(query)
bills = result.scalars().unique().all()
# Attach latest brief and trend to each bill
items = []
for bill in bills:
bill_dict = BillSchema.model_validate(bill)
if bill.briefs:
bill_dict.latest_brief = bill.briefs[0]
if bill.trend_scores:
bill_dict.latest_trend = bill.trend_scores[0]
items.append(bill_dict)
return PaginatedResponse(
items=items,
total=total,
page=page,
per_page=per_page,
pages=max(1, (total + per_page - 1) // per_page),
)
@router.get("/{bill_id}", response_model=BillDetailSchema)
async def get_bill(bill_id: str, db: AsyncSession = Depends(get_db)):
result = await db.execute(
select(Bill)
.options(
selectinload(Bill.sponsor),
selectinload(Bill.actions),
selectinload(Bill.briefs),
selectinload(Bill.news_articles),
selectinload(Bill.trend_scores),
)
.where(Bill.bill_id == bill_id)
)
bill = result.scalar_one_or_none()
if not bill:
from fastapi import HTTPException
raise HTTPException(status_code=404, detail="Bill not found")
detail = BillDetailSchema.model_validate(bill)
if bill.briefs:
detail.latest_brief = bill.briefs[0]
if bill.trend_scores:
detail.latest_trend = bill.trend_scores[0]
return detail
@router.get("/{bill_id}/actions", response_model=list[BillActionSchema])
async def get_bill_actions(bill_id: str, db: AsyncSession = Depends(get_db)):
result = await db.execute(
select(BillAction)
.where(BillAction.bill_id == bill_id)
.order_by(desc(BillAction.action_date))
)
return result.scalars().all()
@router.get("/{bill_id}/news", response_model=list[NewsArticleSchema])
async def get_bill_news(bill_id: str, db: AsyncSession = Depends(get_db)):
result = await db.execute(
select(NewsArticle)
.where(NewsArticle.bill_id == bill_id)
.order_by(desc(NewsArticle.published_at))
.limit(20)
)
return result.scalars().all()
@router.get("/{bill_id}/trend", response_model=list[TrendScoreSchema])
async def get_bill_trend(bill_id: str, days: int = Query(30, ge=7, le=365), db: AsyncSession = Depends(get_db)):
from datetime import date, timedelta
cutoff = date.today() - timedelta(days=days)
result = await db.execute(
select(TrendScore)
.where(TrendScore.bill_id == bill_id, TrendScore.score_date >= cutoff)
.order_by(TrendScore.score_date)
)
return result.scalars().all()

View File

@@ -0,0 +1,102 @@
from datetime import date, timedelta
from fastapi import Depends
from fastapi import APIRouter
from sqlalchemy import desc, select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from app.database import get_db
from app.models import Bill, BillBrief, Follow, TrendScore
from app.schemas.schemas import BillSchema
router = APIRouter()
@router.get("")
async def get_dashboard(db: AsyncSession = Depends(get_db)):
# Load all follows
follows_result = await db.execute(select(Follow))
follows = follows_result.scalars().all()
followed_bill_ids = [f.follow_value for f in follows if f.follow_type == "bill"]
followed_member_ids = [f.follow_value for f in follows if f.follow_type == "member"]
followed_topics = [f.follow_value for f in follows if f.follow_type == "topic"]
feed_bills: list[Bill] = []
seen_ids: set[str] = set()
# 1. Directly followed bills
if followed_bill_ids:
result = await db.execute(
select(Bill)
.options(selectinload(Bill.sponsor), selectinload(Bill.briefs), selectinload(Bill.trend_scores))
.where(Bill.bill_id.in_(followed_bill_ids))
.order_by(desc(Bill.latest_action_date))
.limit(20)
)
for bill in result.scalars().all():
if bill.bill_id not in seen_ids:
feed_bills.append(bill)
seen_ids.add(bill.bill_id)
# 2. Bills from followed members
if followed_member_ids:
result = await db.execute(
select(Bill)
.options(selectinload(Bill.sponsor), selectinload(Bill.briefs), selectinload(Bill.trend_scores))
.where(Bill.sponsor_id.in_(followed_member_ids))
.order_by(desc(Bill.latest_action_date))
.limit(20)
)
for bill in result.scalars().all():
if bill.bill_id not in seen_ids:
feed_bills.append(bill)
seen_ids.add(bill.bill_id)
# 3. Bills matching followed topics
for topic in followed_topics:
result = await db.execute(
select(Bill)
.options(selectinload(Bill.sponsor), selectinload(Bill.briefs), selectinload(Bill.trend_scores))
.join(BillBrief, Bill.bill_id == BillBrief.bill_id)
.where(BillBrief.topic_tags.contains([topic]))
.order_by(desc(Bill.latest_action_date))
.limit(10)
)
for bill in result.scalars().all():
if bill.bill_id not in seen_ids:
feed_bills.append(bill)
seen_ids.add(bill.bill_id)
# Sort feed by latest action date
feed_bills.sort(key=lambda b: b.latest_action_date or date.min, reverse=True)
# 4. Trending bills (top 10 by composite score today)
trending_result = await db.execute(
select(Bill)
.options(selectinload(Bill.sponsor), selectinload(Bill.briefs), selectinload(Bill.trend_scores))
.join(TrendScore, Bill.bill_id == TrendScore.bill_id)
.where(TrendScore.score_date >= date.today() - timedelta(days=1))
.order_by(desc(TrendScore.composite_score))
.limit(10)
)
trending_bills = trending_result.scalars().unique().all()
def serialize_bill(bill: Bill) -> dict:
b = BillSchema.model_validate(bill)
if bill.briefs:
b.latest_brief = bill.briefs[0]
if bill.trend_scores:
b.latest_trend = bill.trend_scores[0]
return b.model_dump()
return {
"feed": [serialize_bill(b) for b in feed_bills[:50]],
"trending": [serialize_bill(b) for b in trending_bills],
"follows": {
"bills": len(followed_bill_ids),
"members": len(followed_member_ids),
"topics": len(followed_topics),
},
}

View File

@@ -0,0 +1,49 @@
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy import select
from sqlalchemy.exc import IntegrityError
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db
from app.models import Follow
from app.schemas.schemas import FollowCreate, FollowSchema
router = APIRouter()
VALID_FOLLOW_TYPES = {"bill", "member", "topic"}
@router.get("", response_model=list[FollowSchema])
async def list_follows(db: AsyncSession = Depends(get_db)):
result = await db.execute(select(Follow).order_by(Follow.created_at.desc()))
return result.scalars().all()
@router.post("", response_model=FollowSchema, status_code=201)
async def add_follow(body: FollowCreate, db: AsyncSession = Depends(get_db)):
if body.follow_type not in VALID_FOLLOW_TYPES:
raise HTTPException(status_code=400, detail=f"follow_type must be one of {VALID_FOLLOW_TYPES}")
follow = Follow(follow_type=body.follow_type, follow_value=body.follow_value)
db.add(follow)
try:
await db.commit()
await db.refresh(follow)
except IntegrityError:
await db.rollback()
# Already following — return existing
result = await db.execute(
select(Follow).where(
Follow.follow_type == body.follow_type,
Follow.follow_value == body.follow_value,
)
)
return result.scalar_one()
return follow
@router.delete("/{follow_id}", status_code=204)
async def remove_follow(follow_id: int, db: AsyncSession = Depends(get_db)):
follow = await db.get(Follow, follow_id)
if not follow:
raise HTTPException(status_code=404, detail="Follow not found")
await db.delete(follow)
await db.commit()

43
backend/app/api/health.py Normal file
View File

@@ -0,0 +1,43 @@
from datetime import datetime, timezone
import redis as redis_lib
from fastapi import APIRouter, Depends
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
from app.config import settings
from app.database import get_db
router = APIRouter()
@router.get("")
async def health():
return {"status": "ok", "timestamp": datetime.now(timezone.utc).isoformat()}
@router.get("/detailed")
async def health_detailed(db: AsyncSession = Depends(get_db)):
# Check DB
db_ok = False
try:
await db.execute(text("SELECT 1"))
db_ok = True
except Exception:
pass
# Check Redis
redis_ok = False
try:
r = redis_lib.from_url(settings.REDIS_URL)
redis_ok = r.ping()
except Exception:
pass
status = "ok" if (db_ok and redis_ok) else "degraded"
return {
"status": status,
"database": "ok" if db_ok else "error",
"redis": "ok" if redis_ok else "error",
"timestamp": datetime.now(timezone.utc).isoformat(),
}

View File

@@ -0,0 +1,85 @@
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy import desc, func, select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from app.database import get_db
from app.models import Bill, Member
from app.schemas.schemas import BillSchema, MemberSchema, PaginatedResponse
router = APIRouter()
@router.get("", response_model=PaginatedResponse[MemberSchema])
async def list_members(
chamber: Optional[str] = Query(None),
party: Optional[str] = Query(None),
state: Optional[str] = Query(None),
q: Optional[str] = Query(None),
page: int = Query(1, ge=1),
per_page: int = Query(50, ge=1, le=250),
db: AsyncSession = Depends(get_db),
):
query = select(Member)
if chamber:
query = query.where(Member.chamber == chamber)
if party:
query = query.where(Member.party == party)
if state:
query = query.where(Member.state == state)
if q:
query = query.where(Member.name.ilike(f"%{q}%"))
total = await db.scalar(select(func.count()).select_from(query.subquery())) or 0
query = query.order_by(Member.last_name, Member.first_name).offset((page - 1) * per_page).limit(per_page)
result = await db.execute(query)
members = result.scalars().all()
return PaginatedResponse(
items=members,
total=total,
page=page,
per_page=per_page,
pages=max(1, (total + per_page - 1) // per_page),
)
@router.get("/{bioguide_id}", response_model=MemberSchema)
async def get_member(bioguide_id: str, db: AsyncSession = Depends(get_db)):
member = await db.get(Member, bioguide_id)
if not member:
raise HTTPException(status_code=404, detail="Member not found")
return member
@router.get("/{bioguide_id}/bills", response_model=PaginatedResponse[BillSchema])
async def get_member_bills(
bioguide_id: str,
page: int = Query(1, ge=1),
per_page: int = Query(20, ge=1, le=100),
db: AsyncSession = Depends(get_db),
):
query = select(Bill).options(selectinload(Bill.briefs)).where(Bill.sponsor_id == bioguide_id)
total = await db.scalar(select(func.count()).select_from(query.subquery())) or 0
query = query.order_by(desc(Bill.introduced_date)).offset((page - 1) * per_page).limit(per_page)
result = await db.execute(query)
bills = result.scalars().all()
items = []
for bill in bills:
b = BillSchema.model_validate(bill)
if bill.briefs:
b.latest_brief = bill.briefs[0]
items.append(b)
return PaginatedResponse(
items=items,
total=total,
page=page,
per_page=per_page,
pages=max(1, (total + per_page - 1) // per_page),
)

53
backend/app/api/search.py Normal file
View File

@@ -0,0 +1,53 @@
from fastapi import APIRouter, Depends, Query
from sqlalchemy import select, text
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db
from app.models import Bill, Member
from app.schemas.schemas import BillSchema, MemberSchema
router = APIRouter()
@router.get("")
async def search(
q: str = Query(..., min_length=2),
db: AsyncSession = Depends(get_db),
):
# Bill ID direct match
id_results = await db.execute(
select(Bill).where(Bill.bill_id.ilike(f"%{q}%")).limit(20)
)
id_bills = id_results.scalars().all()
# Full-text search on title/content via tsvector
fts_results = await db.execute(
select(Bill)
.where(text("search_vector @@ plainto_tsquery('english', :q)"))
.order_by(text("ts_rank(search_vector, plainto_tsquery('english', :q)) DESC"))
.limit(20)
.params(q=q)
)
fts_bills = fts_results.scalars().all()
# Merge, dedup, preserve order (ID matches first)
seen = set()
bills = []
for b in id_bills + fts_bills:
if b.bill_id not in seen:
seen.add(b.bill_id)
bills.append(b)
# Fuzzy member search
member_results = await db.execute(
select(Member)
.where(Member.name.ilike(f"%{q}%"))
.order_by(Member.last_name)
.limit(10)
)
members = member_results.scalars().all()
return {
"bills": [BillSchema.model_validate(b) for b in bills],
"members": [MemberSchema.model_validate(m) for m in members],
}

View File

@@ -0,0 +1,86 @@
from fastapi import APIRouter, Depends
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.config import settings
from app.database import get_db
from app.models import AppSetting
from app.schemas.schemas import SettingUpdate, SettingsResponse
router = APIRouter()
@router.get("", response_model=SettingsResponse)
async def get_settings(db: AsyncSession = Depends(get_db)):
"""Return current effective settings (env + DB overrides)."""
# DB overrides take precedence over env vars
overrides: dict[str, str] = {}
result = await db.execute(select(AppSetting))
for row in result.scalars().all():
overrides[row.key] = row.value
return SettingsResponse(
llm_provider=overrides.get("llm_provider", settings.LLM_PROVIDER),
llm_model=overrides.get("llm_model", _current_model(overrides.get("llm_provider", settings.LLM_PROVIDER))),
congress_poll_interval_minutes=int(overrides.get("congress_poll_interval_minutes", settings.CONGRESS_POLL_INTERVAL_MINUTES)),
newsapi_enabled=bool(settings.NEWSAPI_KEY),
pytrends_enabled=settings.PYTRENDS_ENABLED,
)
@router.put("")
async def update_setting(body: SettingUpdate, db: AsyncSession = Depends(get_db)):
"""Update a runtime setting."""
ALLOWED_KEYS = {"llm_provider", "llm_model", "congress_poll_interval_minutes"}
if body.key not in ALLOWED_KEYS:
from fastapi import HTTPException
raise HTTPException(status_code=400, detail=f"Allowed setting keys: {ALLOWED_KEYS}")
existing = await db.get(AppSetting, body.key)
if existing:
existing.value = body.value
else:
db.add(AppSetting(key=body.key, value=body.value))
await db.commit()
return {"key": body.key, "value": body.value}
@router.post("/test-llm")
async def test_llm_connection():
"""Test that the configured LLM provider responds correctly."""
from app.services.llm_service import get_llm_provider
try:
provider = get_llm_provider()
brief = provider.generate_brief(
doc_text="This is a test bill for connection verification purposes.",
bill_metadata={
"title": "Test Connection Bill",
"sponsor_name": "Test Sponsor",
"party": "Test",
"state": "DC",
"chamber": "House",
"introduced_date": "2025-01-01",
"latest_action_text": "Test action",
"latest_action_date": "2025-01-01",
},
)
return {
"status": "ok",
"provider": brief.llm_provider,
"model": brief.llm_model,
"summary_preview": brief.summary[:100] + "..." if len(brief.summary) > 100 else brief.summary,
}
except Exception as e:
return {"status": "error", "detail": str(e)}
def _current_model(provider: str) -> str:
if provider == "openai":
return settings.OPENAI_MODEL
elif provider == "anthropic":
return settings.ANTHROPIC_MODEL
elif provider == "gemini":
return settings.GEMINI_MODEL
elif provider == "ollama":
return settings.OLLAMA_MODEL
return "unknown"

50
backend/app/config.py Normal file
View File

@@ -0,0 +1,50 @@
from functools import lru_cache
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
model_config = SettingsConfigDict(env_file=".env", extra="ignore")
# URLs
LOCAL_URL: str = "http://localhost"
PUBLIC_URL: str = ""
# Database
DATABASE_URL: str = "postgresql+asyncpg://congress:congress@postgres:5432/pocketveto"
SYNC_DATABASE_URL: str = "postgresql://congress:congress@postgres:5432/pocketveto"
# Redis
REDIS_URL: str = "redis://redis:6379/0"
# api.data.gov (shared key for Congress.gov and GovInfo)
DATA_GOV_API_KEY: str = ""
CONGRESS_POLL_INTERVAL_MINUTES: int = 30
# LLM
LLM_PROVIDER: str = "openai" # openai | anthropic | gemini | ollama
OPENAI_API_KEY: str = ""
OPENAI_MODEL: str = "gpt-4o"
ANTHROPIC_API_KEY: str = ""
ANTHROPIC_MODEL: str = "claude-opus-4-6"
GEMINI_API_KEY: str = ""
GEMINI_MODEL: str = "gemini-1.5-pro"
OLLAMA_BASE_URL: str = "http://host.docker.internal:11434"
OLLAMA_MODEL: str = "llama3.1"
# News
NEWSAPI_KEY: str = ""
# pytrends
PYTRENDS_ENABLED: bool = True
@lru_cache
def get_settings() -> Settings:
return Settings()
settings = get_settings()

53
backend/app/database.py Normal file
View File

@@ -0,0 +1,53 @@
from contextlib import asynccontextmanager
from typing import AsyncGenerator
from sqlalchemy import create_engine
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker
from app.config import settings
class Base(DeclarativeBase):
pass
# ─── Async engine (FastAPI) ───────────────────────────────────────────────────
async_engine = create_async_engine(
settings.DATABASE_URL,
echo=False,
pool_size=10,
max_overflow=20,
)
AsyncSessionLocal = async_sessionmaker(
async_engine,
expire_on_commit=False,
class_=AsyncSession,
)
async def get_db() -> AsyncGenerator[AsyncSession, None]:
async with AsyncSessionLocal() as session:
yield session
# ─── Sync engine (Celery workers) ────────────────────────────────────────────
sync_engine = create_engine(
settings.SYNC_DATABASE_URL,
pool_size=5,
max_overflow=10,
pool_pre_ping=True,
)
SyncSessionLocal = sessionmaker(
bind=sync_engine,
autoflush=False,
autocommit=False,
)
def get_sync_db() -> Session:
return SyncSessionLocal()

28
backend/app/main.py Normal file
View File

@@ -0,0 +1,28 @@
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from app.api import bills, members, follows, dashboard, search, settings, admin, health
from app.config import settings as config
app = FastAPI(
title="PocketVeto",
description="Monitor US Congressional activity with AI-powered bill summaries.",
version="1.0.0",
)
app.add_middleware(
CORSMiddleware,
allow_origins=[o for o in [config.LOCAL_URL, config.PUBLIC_URL] if o],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
app.include_router(bills.router, prefix="/api/bills", tags=["bills"])
app.include_router(members.router, prefix="/api/members", tags=["members"])
app.include_router(follows.router, prefix="/api/follows", tags=["follows"])
app.include_router(dashboard.router, prefix="/api/dashboard", tags=["dashboard"])
app.include_router(search.router, prefix="/api/search", tags=["search"])
app.include_router(settings.router, prefix="/api/settings", tags=["settings"])
app.include_router(admin.router, prefix="/api/admin", tags=["admin"])
app.include_router(health.router, prefix="/api/health", tags=["health"])

View File

View File

@@ -0,0 +1,117 @@
"""
Historical data backfill script.
Usage (run inside the api or worker container):
python -m app.management.backfill --congress 118 119
python -m app.management.backfill --congress 119 --skip-llm
This script fetches all bills from the specified Congress numbers,
stores them in the database, and (optionally) enqueues document fetch
and LLM processing tasks for each bill.
Cost note: LLM processing 15,000+ bills can be expensive.
Consider using --skip-llm for initial backfill and processing
manually / in batches.
"""
import argparse
import logging
import sys
import time
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger(__name__)
def backfill_congress(congress_number: int, skip_llm: bool = False, dry_run: bool = False):
from app.database import get_sync_db
from app.models import AppSetting, Bill, Member
from app.services import congress_api
from app.workers.congress_poller import _sync_sponsor
db = get_sync_db()
offset = 0
total_processed = 0
total_new = 0
logger.info(f"Starting backfill for Congress {congress_number} (skip_llm={skip_llm}, dry_run={dry_run})")
try:
while True:
response = congress_api.get_bills(congress=congress_number, offset=offset, limit=250)
bills_data = response.get("bills", [])
if not bills_data:
break
for bill_data in bills_data:
parsed = congress_api.parse_bill_from_api(bill_data, congress_number)
bill_id = parsed["bill_id"]
if dry_run:
logger.info(f"[DRY RUN] Would process: {bill_id}")
total_processed += 1
continue
existing = db.get(Bill, bill_id)
if existing:
total_processed += 1
continue
# Sync sponsor
sponsor_id = _sync_sponsor(db, bill_data)
parsed["sponsor_id"] = sponsor_id
db.add(Bill(**parsed))
total_new += 1
total_processed += 1
if total_new % 50 == 0:
db.commit()
logger.info(f"Progress: {total_processed} processed, {total_new} new")
# Enqueue document + LLM at low priority
if not skip_llm:
from app.workers.document_fetcher import fetch_bill_documents
fetch_bill_documents.apply_async(args=[bill_id], priority=3)
# Stay well under Congress.gov rate limit (5,000/hr = ~1.4/sec)
time.sleep(0.25)
db.commit()
offset += 250
if len(bills_data) < 250:
break # Last page
logger.info(f"Fetched page ending at offset {offset}, total processed: {total_processed}")
time.sleep(1) # Polite pause between pages
except KeyboardInterrupt:
logger.info("Interrupted by user")
db.commit()
finally:
db.close()
logger.info(f"Backfill complete: {total_new} new bills added ({total_processed} total processed)")
return total_new
def main():
parser = argparse.ArgumentParser(description="Backfill Congressional bill data")
parser.add_argument("--congress", type=int, nargs="+", default=[119],
help="Congress numbers to backfill (default: 119)")
parser.add_argument("--skip-llm", action="store_true",
help="Skip LLM processing (fetch documents only, don't enqueue briefs)")
parser.add_argument("--dry-run", action="store_true",
help="Count bills without actually inserting them")
args = parser.parse_args()
total = 0
for congress_number in args.congress:
total += backfill_congress(congress_number, skip_llm=args.skip_llm, dry_run=args.dry_run)
logger.info(f"All done. Total new bills: {total}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,22 @@
from app.models.bill import Bill, BillAction, BillDocument
from app.models.brief import BillBrief
from app.models.follow import Follow
from app.models.member import Member
from app.models.news import NewsArticle
from app.models.setting import AppSetting
from app.models.trend import TrendScore
from app.models.committee import Committee, CommitteeBill
__all__ = [
"Bill",
"BillAction",
"BillDocument",
"BillBrief",
"Follow",
"Member",
"NewsArticle",
"AppSetting",
"TrendScore",
"Committee",
"CommitteeBill",
]

View File

@@ -0,0 +1,88 @@
from sqlalchemy import (
Column, String, Integer, Date, DateTime, Text, ForeignKey, Index
)
from sqlalchemy.orm import relationship
from sqlalchemy.sql import func
from app.database import Base
class Bill(Base):
__tablename__ = "bills"
# Natural key: "{congress}-{bill_type_lower}-{bill_number}" e.g. "119-hr-1234"
bill_id = Column(String, primary_key=True)
congress_number = Column(Integer, nullable=False)
bill_type = Column(String(10), nullable=False) # hr, s, hjres, sjres, hconres, sconres, hres, sres
bill_number = Column(Integer, nullable=False)
title = Column(Text)
short_title = Column(Text)
sponsor_id = Column(String, ForeignKey("members.bioguide_id"), nullable=True)
introduced_date = Column(Date)
latest_action_date = Column(Date)
latest_action_text = Column(Text)
status = Column(String(100))
chamber = Column(String(50))
congress_url = Column(String)
govtrack_url = Column(String)
# Ingestion tracking
last_checked_at = Column(DateTime(timezone=True))
actions_fetched_at = Column(DateTime(timezone=True))
created_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now())
sponsor = relationship("Member", back_populates="bills", foreign_keys=[sponsor_id])
actions = relationship("BillAction", back_populates="bill", order_by="desc(BillAction.action_date)")
documents = relationship("BillDocument", back_populates="bill")
briefs = relationship("BillBrief", back_populates="bill", order_by="desc(BillBrief.created_at)")
news_articles = relationship("NewsArticle", back_populates="bill", order_by="desc(NewsArticle.published_at)")
trend_scores = relationship("TrendScore", back_populates="bill", order_by="desc(TrendScore.score_date)")
committee_bills = relationship("CommitteeBill", back_populates="bill")
__table_args__ = (
Index("ix_bills_congress_number", "congress_number"),
Index("ix_bills_latest_action_date", "latest_action_date"),
Index("ix_bills_introduced_date", "introduced_date"),
Index("ix_bills_chamber", "chamber"),
Index("ix_bills_sponsor_id", "sponsor_id"),
)
class BillAction(Base):
__tablename__ = "bill_actions"
id = Column(Integer, primary_key=True, autoincrement=True)
bill_id = Column(String, ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False)
action_date = Column(Date)
action_text = Column(Text)
action_type = Column(String(100))
chamber = Column(String(50))
created_at = Column(DateTime(timezone=True), server_default=func.now())
bill = relationship("Bill", back_populates="actions")
__table_args__ = (
Index("ix_bill_actions_bill_id", "bill_id"),
Index("ix_bill_actions_action_date", "action_date"),
)
class BillDocument(Base):
__tablename__ = "bill_documents"
id = Column(Integer, primary_key=True, autoincrement=True)
bill_id = Column(String, ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False)
doc_type = Column(String(50)) # bill_text | committee_report | amendment
doc_version = Column(String(50)) # Introduced, Enrolled, etc.
govinfo_url = Column(String)
raw_text = Column(Text)
fetched_at = Column(DateTime(timezone=True))
created_at = Column(DateTime(timezone=True), server_default=func.now())
bill = relationship("Bill", back_populates="documents")
briefs = relationship("BillBrief", back_populates="document")
__table_args__ = (
Index("ix_bill_documents_bill_id", "bill_id"),
)

View File

@@ -0,0 +1,31 @@
from sqlalchemy import Column, Integer, String, Text, ForeignKey, DateTime, Index
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import relationship
from sqlalchemy.sql import func
from app.database import Base
class BillBrief(Base):
__tablename__ = "bill_briefs"
id = Column(Integer, primary_key=True, autoincrement=True)
bill_id = Column(String, ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False)
document_id = Column(Integer, ForeignKey("bill_documents.id", ondelete="SET NULL"), nullable=True)
brief_type = Column(String(20), nullable=False, server_default="full") # full | amendment
summary = Column(Text)
key_points = Column(JSONB) # list[str]
risks = Column(JSONB) # list[str]
deadlines = Column(JSONB) # list[{date: str, description: str}]
topic_tags = Column(JSONB) # list[str]
llm_provider = Column(String(50))
llm_model = Column(String(100))
created_at = Column(DateTime(timezone=True), server_default=func.now())
bill = relationship("Bill", back_populates="briefs")
document = relationship("BillDocument", back_populates="briefs")
__table_args__ = (
Index("ix_bill_briefs_bill_id", "bill_id"),
Index("ix_bill_briefs_topic_tags", "topic_tags", postgresql_using="gin"),
)

View File

@@ -0,0 +1,33 @@
from sqlalchemy import Column, Integer, String, Date, ForeignKey, Index
from sqlalchemy.orm import relationship
from app.database import Base
class Committee(Base):
__tablename__ = "committees"
id = Column(Integer, primary_key=True, autoincrement=True)
committee_code = Column(String(20), unique=True, nullable=False)
name = Column(String(500))
chamber = Column(String(10))
committee_type = Column(String(50)) # Standing, Select, Joint, etc.
committee_bills = relationship("CommitteeBill", back_populates="committee")
class CommitteeBill(Base):
__tablename__ = "committee_bills"
id = Column(Integer, primary_key=True, autoincrement=True)
committee_id = Column(Integer, ForeignKey("committees.id", ondelete="CASCADE"), nullable=False)
bill_id = Column(String, ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False)
referral_date = Column(Date)
committee = relationship("Committee", back_populates="committee_bills")
bill = relationship("Bill", back_populates="committee_bills")
__table_args__ = (
Index("ix_committee_bills_bill_id", "bill_id"),
Index("ix_committee_bills_committee_id", "committee_id"),
)

View File

@@ -0,0 +1,17 @@
from sqlalchemy import Column, Integer, String, DateTime, UniqueConstraint
from sqlalchemy.sql import func
from app.database import Base
class Follow(Base):
__tablename__ = "follows"
id = Column(Integer, primary_key=True, autoincrement=True)
follow_type = Column(String(20), nullable=False) # bill | member | topic
follow_value = Column(String, nullable=False) # bill_id | bioguide_id | tag string
created_at = Column(DateTime(timezone=True), server_default=func.now())
__table_args__ = (
UniqueConstraint("follow_type", "follow_value", name="uq_follows_type_value"),
)

View File

@@ -0,0 +1,24 @@
from sqlalchemy import Column, String, DateTime
from sqlalchemy.orm import relationship
from sqlalchemy.sql import func
from app.database import Base
class Member(Base):
__tablename__ = "members"
bioguide_id = Column(String, primary_key=True)
name = Column(String, nullable=False)
first_name = Column(String)
last_name = Column(String)
party = Column(String(50))
state = Column(String(50))
chamber = Column(String(50))
district = Column(String(50))
photo_url = Column(String)
official_url = Column(String)
created_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now())
bills = relationship("Bill", back_populates="sponsor", foreign_keys="Bill.sponsor_id")

View File

@@ -0,0 +1,25 @@
from sqlalchemy import Column, Integer, String, Text, Float, DateTime, ForeignKey, Index
from sqlalchemy.orm import relationship
from sqlalchemy.sql import func
from app.database import Base
class NewsArticle(Base):
__tablename__ = "news_articles"
id = Column(Integer, primary_key=True, autoincrement=True)
bill_id = Column(String, ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False)
source = Column(String(200))
headline = Column(Text)
url = Column(String, unique=True)
published_at = Column(DateTime(timezone=True))
relevance_score = Column(Float, default=0.0)
created_at = Column(DateTime(timezone=True), server_default=func.now())
bill = relationship("Bill", back_populates="news_articles")
__table_args__ = (
Index("ix_news_articles_bill_id", "bill_id"),
Index("ix_news_articles_published_at", "published_at"),
)

View File

@@ -0,0 +1,12 @@
from sqlalchemy import Column, String, DateTime
from sqlalchemy.sql import func
from app.database import Base
class AppSetting(Base):
__tablename__ = "app_settings"
key = Column(String, primary_key=True)
value = Column(String)
updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now())

View File

@@ -0,0 +1,25 @@
from sqlalchemy import Column, Integer, String, Date, Float, ForeignKey, Index, UniqueConstraint
from sqlalchemy.orm import relationship
from app.database import Base
class TrendScore(Base):
__tablename__ = "trend_scores"
id = Column(Integer, primary_key=True, autoincrement=True)
bill_id = Column(String, ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False)
score_date = Column(Date, nullable=False)
newsapi_count = Column(Integer, default=0)
gnews_count = Column(Integer, default=0)
gtrends_score = Column(Float, default=0.0)
composite_score = Column(Float, default=0.0)
bill = relationship("Bill", back_populates="trend_scores")
__table_args__ = (
UniqueConstraint("bill_id", "score_date", name="uq_trend_scores_bill_date"),
Index("ix_trend_scores_bill_id", "bill_id"),
Index("ix_trend_scores_score_date", "score_date"),
Index("ix_trend_scores_composite", "composite_score"),
)

View File

View File

@@ -0,0 +1,145 @@
from datetime import date, datetime
from typing import Any, Generic, Optional, TypeVar
from pydantic import BaseModel
T = TypeVar("T")
class PaginatedResponse(BaseModel, Generic[T]):
items: list[T]
total: int
page: int
per_page: int
pages: int
# ── Member ────────────────────────────────────────────────────────────────────
class MemberSchema(BaseModel):
bioguide_id: str
name: str
first_name: Optional[str] = None
last_name: Optional[str] = None
party: Optional[str] = None
state: Optional[str] = None
chamber: Optional[str] = None
district: Optional[str] = None
photo_url: Optional[str] = None
model_config = {"from_attributes": True}
# ── Bill Brief ────────────────────────────────────────────────────────────────
class BriefSchema(BaseModel):
id: int
brief_type: str = "full"
summary: Optional[str] = None
key_points: Optional[list[str]] = None
risks: Optional[list[str]] = None
deadlines: Optional[list[dict[str, Any]]] = None
topic_tags: Optional[list[str]] = None
llm_provider: Optional[str] = None
llm_model: Optional[str] = None
created_at: Optional[datetime] = None
model_config = {"from_attributes": True}
# ── Bill Action ───────────────────────────────────────────────────────────────
class BillActionSchema(BaseModel):
id: int
action_date: Optional[date] = None
action_text: Optional[str] = None
action_type: Optional[str] = None
chamber: Optional[str] = None
model_config = {"from_attributes": True}
# ── News Article ──────────────────────────────────────────────────────────────
class NewsArticleSchema(BaseModel):
id: int
source: Optional[str] = None
headline: Optional[str] = None
url: Optional[str] = None
published_at: Optional[datetime] = None
relevance_score: Optional[float] = None
model_config = {"from_attributes": True}
# ── Trend Score ───────────────────────────────────────────────────────────────
class TrendScoreSchema(BaseModel):
score_date: date
newsapi_count: int
gnews_count: int
gtrends_score: float
composite_score: float
model_config = {"from_attributes": True}
# ── Bill ──────────────────────────────────────────────────────────────────────
class BillSchema(BaseModel):
bill_id: str
congress_number: int
bill_type: str
bill_number: int
title: Optional[str] = None
short_title: Optional[str] = None
introduced_date: Optional[date] = None
latest_action_date: Optional[date] = None
latest_action_text: Optional[str] = None
status: Optional[str] = None
chamber: Optional[str] = None
congress_url: Optional[str] = None
sponsor: Optional[MemberSchema] = None
latest_brief: Optional[BriefSchema] = None
latest_trend: Optional[TrendScoreSchema] = None
updated_at: Optional[datetime] = None
model_config = {"from_attributes": True}
class BillDetailSchema(BillSchema):
actions: list[BillActionSchema] = []
news_articles: list[NewsArticleSchema] = []
trend_scores: list[TrendScoreSchema] = []
briefs: list[BriefSchema] = []
# ── Follow ────────────────────────────────────────────────────────────────────
class FollowCreate(BaseModel):
follow_type: str # bill | member | topic
follow_value: str
class FollowSchema(BaseModel):
id: int
follow_type: str
follow_value: str
created_at: datetime
model_config = {"from_attributes": True}
# ── Settings ──────────────────────────────────────────────────────────────────
class SettingUpdate(BaseModel):
key: str
value: str
class SettingsResponse(BaseModel):
llm_provider: str
llm_model: str
congress_poll_interval_minutes: int
newsapi_enabled: bool
pytrends_enabled: bool

View File

View File

@@ -0,0 +1,120 @@
"""
Congress.gov API client.
Rate limit: 5,000 requests/hour (enforced server-side by Congress.gov).
We track usage in Redis to stay well under the limit.
"""
import time
from datetime import datetime
from typing import Optional
import requests
from tenacity import retry, stop_after_attempt, wait_exponential
from app.config import settings
BASE_URL = "https://api.congress.gov/v3"
def _get_current_congress() -> int:
"""Calculate the current Congress number. 119th started Jan 3, 2025."""
year = datetime.utcnow().year
# Congress changes on odd years (Jan 3)
if datetime.utcnow().month == 1 and datetime.utcnow().day < 3:
year -= 1
return 118 + ((year - 2023) // 2 + (1 if year % 2 == 1 else 0))
@retry(stop=stop_after_attempt(3), wait=wait_exponential(min=1, max=10))
def _get(endpoint: str, params: dict) -> dict:
params["api_key"] = settings.DATA_GOV_API_KEY
params["format"] = "json"
response = requests.get(f"{BASE_URL}{endpoint}", params=params, timeout=30)
response.raise_for_status()
return response.json()
def get_current_congress() -> int:
return _get_current_congress()
def build_bill_id(congress: int, bill_type: str, bill_number: int) -> str:
return f"{congress}-{bill_type.lower()}-{bill_number}"
def get_bills(
congress: int,
offset: int = 0,
limit: int = 250,
from_date_time: Optional[str] = None,
) -> dict:
params: dict = {"offset": offset, "limit": limit, "sort": "updateDate+desc"}
if from_date_time:
params["fromDateTime"] = from_date_time
return _get(f"/bill/{congress}", params)
def get_bill_detail(congress: int, bill_type: str, bill_number: int) -> dict:
return _get(f"/bill/{congress}/{bill_type.lower()}/{bill_number}", {})
def get_bill_actions(congress: int, bill_type: str, bill_number: int, offset: int = 0) -> dict:
return _get(f"/bill/{congress}/{bill_type.lower()}/{bill_number}/actions", {"offset": offset, "limit": 250})
def get_bill_text_versions(congress: int, bill_type: str, bill_number: int) -> dict:
return _get(f"/bill/{congress}/{bill_type.lower()}/{bill_number}/text", {})
def get_members(offset: int = 0, limit: int = 250, current_member: bool = True) -> dict:
params: dict = {"offset": offset, "limit": limit}
if current_member:
params["currentMember"] = "true"
return _get("/member", params)
def get_member_detail(bioguide_id: str) -> dict:
return _get(f"/member/{bioguide_id}", {})
def get_committees(offset: int = 0, limit: int = 250) -> dict:
return _get("/committee", {"offset": offset, "limit": limit})
def parse_bill_from_api(data: dict, congress: int) -> dict:
"""Normalize raw API bill data into our model fields."""
bill_type = data.get("type", "").lower()
bill_number = data.get("number", 0)
latest_action = data.get("latestAction") or {}
return {
"bill_id": build_bill_id(congress, bill_type, bill_number),
"congress_number": congress,
"bill_type": bill_type,
"bill_number": bill_number,
"title": data.get("title"),
"short_title": data.get("shortTitle"),
"introduced_date": data.get("introducedDate"),
"latest_action_date": latest_action.get("actionDate"),
"latest_action_text": latest_action.get("text"),
"status": latest_action.get("text", "")[:100] if latest_action.get("text") else None,
"chamber": "House" if bill_type.startswith("h") else "Senate",
"congress_url": data.get("url"),
}
def parse_member_from_api(data: dict) -> dict:
"""Normalize raw API member data into our model fields."""
terms = data.get("terms", {}).get("item", [])
current_term = terms[-1] if terms else {}
return {
"bioguide_id": data.get("bioguideId"),
"name": data.get("name", ""),
"first_name": data.get("firstName"),
"last_name": data.get("lastName"),
"party": data.get("partyName") or None,
"state": data.get("state"),
"chamber": current_term.get("chamber"),
"district": str(current_term.get("district")) if current_term.get("district") else None,
"photo_url": data.get("depiction", {}).get("imageUrl"),
"official_url": data.get("officialWebsiteUrl"),
}

View File

@@ -0,0 +1,95 @@
"""
GovInfo API client for fetching actual bill text.
Priority order for text formats: htm > txt > pdf
"""
import logging
import re
from typing import Optional
import requests
from bs4 import BeautifulSoup
from tenacity import retry, stop_after_attempt, wait_exponential
from app.config import settings
logger = logging.getLogger(__name__)
GOVINFO_BASE = "https://api.govinfo.gov"
FORMAT_PRIORITY = ["htm", "html", "txt", "pdf"]
@retry(stop=stop_after_attempt(3), wait=wait_exponential(min=2, max=15))
def _get(url: str, params: dict = None) -> requests.Response:
p = {"api_key": settings.DATA_GOV_API_KEY, **(params or {})}
response = requests.get(url, params=p, timeout=60)
response.raise_for_status()
return response
def get_package_summary(package_id: str) -> dict:
response = _get(f"{GOVINFO_BASE}/packages/{package_id}/summary")
return response.json()
def get_package_content_detail(package_id: str) -> dict:
response = _get(f"{GOVINFO_BASE}/packages/{package_id}/content-detail")
return response.json()
def find_best_text_url(text_versions: list[dict]) -> Optional[tuple[str, str]]:
"""
From a list of text version objects (from Congress.gov API), find the best
available text format. Returns (url, format) or None.
Matches by URL extension since Congress.gov type strings are "Formatted Text", "PDF", etc.
"""
for fmt in FORMAT_PRIORITY:
for version in text_versions:
for fmt_info in version.get("formats", []):
if not isinstance(fmt_info, dict):
continue
url = fmt_info.get("url", "")
if url.lower().endswith(f".{fmt}"):
return url, fmt
return None, None
def fetch_text_from_url(url: str, fmt: str) -> Optional[str]:
"""Download and extract plain text from a GovInfo document URL."""
try:
response = requests.get(url, timeout=120)
response.raise_for_status()
if fmt in ("htm", "html"):
return _extract_from_html(response.text)
elif fmt == "txt":
return response.text
elif fmt == "pdf":
return _extract_from_pdf(response.content)
except Exception as e:
logger.error(f"Failed to fetch text from {url}: {e}")
return None
def _extract_from_html(html: str) -> str:
"""Strip HTML tags and clean up whitespace."""
soup = BeautifulSoup(html, "lxml")
# Remove script/style tags
for tag in soup(["script", "style", "nav", "header", "footer"]):
tag.decompose()
text = soup.get_text(separator="\n")
# Collapse excessive whitespace
text = re.sub(r"\n{3,}", "\n\n", text)
text = re.sub(r" {2,}", " ", text)
return text.strip()
def _extract_from_pdf(content: bytes) -> Optional[str]:
"""Extract text from PDF bytes using pdfminer."""
try:
from io import BytesIO
from pdfminer.high_level import extract_text as pdf_extract
return pdf_extract(BytesIO(content))
except Exception as e:
logger.error(f"PDF extraction failed: {e}")
return None

View File

@@ -0,0 +1,327 @@
"""
LLM provider abstraction.
All providers implement generate_brief(doc_text, bill_metadata) -> ReverseBrief.
Select provider via LLM_PROVIDER env var.
"""
import json
import logging
import re
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from app.config import settings
logger = logging.getLogger(__name__)
SYSTEM_PROMPT = """You are a nonpartisan legislative analyst specializing in translating complex \
legislation into clear, accurate summaries for informed citizens. You analyze bills objectively \
without political bias.
Always respond with valid JSON matching exactly this schema:
{
"summary": "2-4 paragraph plain-language summary of what this bill does",
"key_points": ["specific concrete fact 1", "specific concrete fact 2"],
"risks": ["legitimate concern or challenge 1", "legitimate concern 2"],
"deadlines": [{"date": "YYYY-MM-DD or null", "description": "what happens on this date"}],
"topic_tags": ["healthcare", "taxation"]
}
Rules:
- summary: Explain WHAT the bill does, not whether it is good or bad. Be factual and complete.
- key_points: 5-10 specific, concrete things the bill changes, authorizes, or appropriates.
- risks: Legitimate concerns from any perspective — costs, implementation challenges, \
constitutional questions, unintended consequences. Include at least 2 even for benign bills.
- deadlines: Only include if explicitly stated in the text. Use null for date if a deadline \
is mentioned without a specific date. Empty list if none.
- topic_tags: 3-8 lowercase tags. Prefer these standard tags: healthcare, taxation, defense, \
education, immigration, environment, housing, infrastructure, technology, agriculture, judiciary, \
foreign-policy, veterans, social-security, trade, budget, energy, banking, transportation, \
public-lands, labor, civil-rights, science.
Respond with ONLY valid JSON. No preamble, no explanation, no markdown code blocks."""
MAX_TOKENS_DEFAULT = 6000
MAX_TOKENS_OLLAMA = 3000
TOKENS_PER_CHAR = 0.25 # rough approximation: 4 chars ≈ 1 token
@dataclass
class ReverseBrief:
summary: str
key_points: list[str]
risks: list[str]
deadlines: list[dict]
topic_tags: list[str]
llm_provider: str
llm_model: str
def smart_truncate(text: str, max_tokens: int) -> str:
"""Truncate bill text intelligently if it exceeds token budget."""
approx_tokens = len(text) * TOKENS_PER_CHAR
if approx_tokens <= max_tokens:
return text
# Keep first 75% of budget for the preamble (purpose section)
# and last 25% for effective dates / enforcement sections
preamble_chars = int(max_tokens * 0.75 / TOKENS_PER_CHAR)
tail_chars = int(max_tokens * 0.25 / TOKENS_PER_CHAR)
omitted_chars = len(text) - preamble_chars - tail_chars
return (
text[:preamble_chars]
+ f"\n\n[... {omitted_chars:,} characters omitted for length ...]\n\n"
+ text[-tail_chars:]
)
AMENDMENT_SYSTEM_PROMPT = """You are a nonpartisan legislative analyst. A bill has been updated \
and you must summarize what changed between the previous and new version.
Always respond with valid JSON matching exactly this schema:
{
"summary": "2-3 paragraph plain-language description of what changed in this version",
"key_points": ["specific change 1", "specific change 2"],
"risks": ["new concern introduced by this change 1", "concern 2"],
"deadlines": [{"date": "YYYY-MM-DD or null", "description": "new deadline added"}],
"topic_tags": ["healthcare", "taxation"]
}
Rules:
- summary: Focus ONLY on what is different from the previous version. Be specific.
- key_points: List concrete additions, removals, or modifications in this version.
- risks: Only include risks that are new or changed relative to the previous version.
- deadlines: Only new or changed deadlines. Empty list if none.
- topic_tags: Same standard tags as before — include any new topics this version adds.
Respond with ONLY valid JSON. No preamble, no explanation, no markdown code blocks."""
def build_amendment_prompt(new_text: str, previous_text: str, bill_metadata: dict, max_tokens: int) -> str:
half = max_tokens // 2
truncated_new = smart_truncate(new_text, half)
truncated_prev = smart_truncate(previous_text, half)
return f"""A bill has been updated. Summarize what changed between the previous and new version.
BILL METADATA:
- Title: {bill_metadata.get('title', 'Unknown')}
- Sponsor: {bill_metadata.get('sponsor_name', 'Unknown')} \
({bill_metadata.get('party', '?')}-{bill_metadata.get('state', '?')})
- Latest Action: {bill_metadata.get('latest_action_text', 'None')} \
({bill_metadata.get('latest_action_date', 'Unknown')})
PREVIOUS VERSION:
{truncated_prev}
NEW VERSION:
{truncated_new}
Produce the JSON amendment summary now:"""
def build_prompt(doc_text: str, bill_metadata: dict, max_tokens: int) -> str:
truncated = smart_truncate(doc_text, max_tokens)
return f"""Analyze this legislation and produce a structured brief.
BILL METADATA:
- Title: {bill_metadata.get('title', 'Unknown')}
- Sponsor: {bill_metadata.get('sponsor_name', 'Unknown')} \
({bill_metadata.get('party', '?')}-{bill_metadata.get('state', '?')})
- Introduced: {bill_metadata.get('introduced_date', 'Unknown')}
- Chamber: {bill_metadata.get('chamber', 'Unknown')}
- Latest Action: {bill_metadata.get('latest_action_text', 'None')} \
({bill_metadata.get('latest_action_date', 'Unknown')})
BILL TEXT:
{truncated}
Produce the JSON brief now:"""
def parse_brief_json(raw: str | dict, provider: str, model: str) -> ReverseBrief:
"""Parse and validate LLM JSON response into a ReverseBrief."""
if isinstance(raw, str):
# Strip markdown code fences if present
raw = re.sub(r"^```(?:json)?\s*", "", raw.strip())
raw = re.sub(r"\s*```$", "", raw.strip())
data = json.loads(raw)
else:
data = raw
return ReverseBrief(
summary=str(data.get("summary", "")),
key_points=list(data.get("key_points", [])),
risks=list(data.get("risks", [])),
deadlines=list(data.get("deadlines", [])),
topic_tags=list(data.get("topic_tags", [])),
llm_provider=provider,
llm_model=model,
)
class LLMProvider(ABC):
@abstractmethod
def generate_brief(self, doc_text: str, bill_metadata: dict) -> ReverseBrief:
pass
@abstractmethod
def generate_amendment_brief(self, new_text: str, previous_text: str, bill_metadata: dict) -> ReverseBrief:
pass
class OpenAIProvider(LLMProvider):
def __init__(self):
from openai import OpenAI
self.client = OpenAI(api_key=settings.OPENAI_API_KEY)
self.model = settings.OPENAI_MODEL
def generate_brief(self, doc_text: str, bill_metadata: dict) -> ReverseBrief:
prompt = build_prompt(doc_text, bill_metadata, MAX_TOKENS_DEFAULT)
response = self.client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": prompt},
],
response_format={"type": "json_object"},
temperature=0.1,
)
raw = response.choices[0].message.content
return parse_brief_json(raw, "openai", self.model)
def generate_amendment_brief(self, new_text: str, previous_text: str, bill_metadata: dict) -> ReverseBrief:
prompt = build_amendment_prompt(new_text, previous_text, bill_metadata, MAX_TOKENS_DEFAULT)
response = self.client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": AMENDMENT_SYSTEM_PROMPT},
{"role": "user", "content": prompt},
],
response_format={"type": "json_object"},
temperature=0.1,
)
raw = response.choices[0].message.content
return parse_brief_json(raw, "openai", self.model)
class AnthropicProvider(LLMProvider):
def __init__(self):
import anthropic
self.client = anthropic.Anthropic(api_key=settings.ANTHROPIC_API_KEY)
self.model = settings.ANTHROPIC_MODEL
def generate_brief(self, doc_text: str, bill_metadata: dict) -> ReverseBrief:
prompt = build_prompt(doc_text, bill_metadata, MAX_TOKENS_DEFAULT)
response = self.client.messages.create(
model=self.model,
max_tokens=4096,
system=SYSTEM_PROMPT + "\n\nIMPORTANT: Respond with ONLY valid JSON. No other text.",
messages=[{"role": "user", "content": prompt}],
)
raw = response.content[0].text
return parse_brief_json(raw, "anthropic", self.model)
def generate_amendment_brief(self, new_text: str, previous_text: str, bill_metadata: dict) -> ReverseBrief:
prompt = build_amendment_prompt(new_text, previous_text, bill_metadata, MAX_TOKENS_DEFAULT)
response = self.client.messages.create(
model=self.model,
max_tokens=4096,
system=AMENDMENT_SYSTEM_PROMPT + "\n\nIMPORTANT: Respond with ONLY valid JSON. No other text.",
messages=[{"role": "user", "content": prompt}],
)
raw = response.content[0].text
return parse_brief_json(raw, "anthropic", self.model)
class GeminiProvider(LLMProvider):
def __init__(self):
import google.generativeai as genai
genai.configure(api_key=settings.GEMINI_API_KEY)
self._genai = genai
self.model_name = settings.GEMINI_MODEL
def _make_model(self, system_prompt: str):
return self._genai.GenerativeModel(
model_name=self.model_name,
generation_config={"response_mime_type": "application/json", "temperature": 0.1},
system_instruction=system_prompt,
)
def generate_brief(self, doc_text: str, bill_metadata: dict) -> ReverseBrief:
prompt = build_prompt(doc_text, bill_metadata, MAX_TOKENS_DEFAULT)
response = self._make_model(SYSTEM_PROMPT).generate_content(prompt)
return parse_brief_json(response.text, "gemini", self.model_name)
def generate_amendment_brief(self, new_text: str, previous_text: str, bill_metadata: dict) -> ReverseBrief:
prompt = build_amendment_prompt(new_text, previous_text, bill_metadata, MAX_TOKENS_DEFAULT)
response = self._make_model(AMENDMENT_SYSTEM_PROMPT).generate_content(prompt)
return parse_brief_json(response.text, "gemini", self.model_name)
class OllamaProvider(LLMProvider):
def __init__(self):
self.base_url = settings.OLLAMA_BASE_URL.rstrip("/")
self.model = settings.OLLAMA_MODEL
def _generate(self, system_prompt: str, user_prompt: str) -> str:
import requests as req
full_prompt = f"{system_prompt}\n\n{user_prompt}"
response = req.post(
f"{self.base_url}/api/generate",
json={"model": self.model, "prompt": full_prompt, "stream": False, "format": "json"},
timeout=300,
)
response.raise_for_status()
raw = response.json().get("response", "")
try:
return raw
except Exception:
strict = f"{full_prompt}\n\nCRITICAL: Your response MUST be valid JSON only."
r2 = req.post(
f"{self.base_url}/api/generate",
json={"model": self.model, "prompt": strict, "stream": False, "format": "json"},
timeout=300,
)
r2.raise_for_status()
return r2.json().get("response", "")
def generate_brief(self, doc_text: str, bill_metadata: dict) -> ReverseBrief:
prompt = build_prompt(doc_text, bill_metadata, MAX_TOKENS_OLLAMA)
raw = self._generate(SYSTEM_PROMPT, prompt)
try:
return parse_brief_json(raw, "ollama", self.model)
except (json.JSONDecodeError, KeyError) as e:
logger.warning(f"Ollama JSON parse failed, retrying: {e}")
raw2 = self._generate(
SYSTEM_PROMPT,
prompt + "\n\nCRITICAL: Your response MUST be valid JSON only. No text before or after the JSON object."
)
return parse_brief_json(raw2, "ollama", self.model)
def generate_amendment_brief(self, new_text: str, previous_text: str, bill_metadata: dict) -> ReverseBrief:
prompt = build_amendment_prompt(new_text, previous_text, bill_metadata, MAX_TOKENS_OLLAMA)
raw = self._generate(AMENDMENT_SYSTEM_PROMPT, prompt)
try:
return parse_brief_json(raw, "ollama", self.model)
except (json.JSONDecodeError, KeyError) as e:
logger.warning(f"Ollama amendment JSON parse failed, retrying: {e}")
raw2 = self._generate(
AMENDMENT_SYSTEM_PROMPT,
prompt + "\n\nCRITICAL: Your response MUST be valid JSON only. No text before or after the JSON object."
)
return parse_brief_json(raw2, "ollama", self.model)
def get_llm_provider() -> LLMProvider:
"""Factory — returns the configured LLM provider."""
provider = settings.LLM_PROVIDER.lower()
if provider == "openai":
return OpenAIProvider()
elif provider == "anthropic":
return AnthropicProvider()
elif provider == "gemini":
return GeminiProvider()
elif provider == "ollama":
return OllamaProvider()
raise ValueError(f"Unknown LLM_PROVIDER: '{provider}'. Must be one of: openai, anthropic, gemini, ollama")

View File

@@ -0,0 +1,89 @@
"""
News correlation service.
- NewsAPI.org: structured news articles per bill (100 req/day limit)
- Google News RSS: volume signal for zeitgeist scoring (no limit)
"""
import logging
import time
import urllib.parse
from datetime import datetime, timedelta, timezone
from typing import Optional
import feedparser
import requests
from tenacity import retry, stop_after_attempt, wait_exponential
from app.config import settings
logger = logging.getLogger(__name__)
NEWSAPI_BASE = "https://newsapi.org/v2"
GOOGLE_NEWS_RSS = "https://news.google.com/rss/search"
NEWSAPI_DAILY_LIMIT = 95 # Leave 5 as buffer
@retry(stop=stop_after_attempt(2), wait=wait_exponential(min=1, max=5))
def _newsapi_get(endpoint: str, params: dict) -> dict:
params["apiKey"] = settings.NEWSAPI_KEY
response = requests.get(f"{NEWSAPI_BASE}/{endpoint}", params=params, timeout=30)
response.raise_for_status()
return response.json()
def build_news_query(bill_title: str, short_title: Optional[str], sponsor_name: Optional[str],
bill_type: str, bill_number: int) -> str:
"""Build a NewsAPI search query for a bill."""
terms = []
if short_title:
terms.append(f'"{short_title}"')
elif bill_title:
# Use first 6 words of title as phrase
words = bill_title.split()[:6]
if len(words) >= 3:
terms.append(f'"{" ".join(words)}"')
# Add bill number as fallback
terms.append(f'"{bill_type.upper()} {bill_number}"')
return " OR ".join(terms[:2]) # Keep queries short for relevance
def fetch_newsapi_articles(query: str, days: int = 30) -> list[dict]:
"""Fetch articles from NewsAPI.org. Returns empty list if quota is exhausted or key not set."""
if not settings.NEWSAPI_KEY:
return []
try:
from_date = (datetime.now(timezone.utc) - timedelta(days=days)).strftime("%Y-%m-%d")
data = _newsapi_get("everything", {
"q": query,
"language": "en",
"sortBy": "relevancy",
"pageSize": 10,
"from": from_date,
})
articles = data.get("articles", [])
return [
{
"source": a.get("source", {}).get("name", ""),
"headline": a.get("title", ""),
"url": a.get("url", ""),
"published_at": a.get("publishedAt"),
}
for a in articles
if a.get("url") and a.get("title")
]
except Exception as e:
logger.error(f"NewsAPI fetch failed: {e}")
return []
def fetch_gnews_count(query: str, days: int = 30) -> int:
"""Count articles in Google News RSS for the past N days. Used as volume signal."""
try:
encoded = urllib.parse.quote(f"{query} when:{days}d")
url = f"{GOOGLE_NEWS_RSS}?q={encoded}&hl=en-US&gl=US&ceid=US:en"
time.sleep(1) # Polite delay
feed = feedparser.parse(url)
return len(feed.entries)
except Exception as e:
logger.error(f"Google News RSS fetch failed: {e}")
return 0

View File

@@ -0,0 +1,64 @@
"""
Google Trends service (via pytrends).
pytrends is unofficial web scraping — Google blocks it sporadically.
All calls are wrapped in try/except and return 0 on any failure.
"""
import logging
import random
import time
from app.config import settings
logger = logging.getLogger(__name__)
def get_trends_score(keywords: list[str]) -> float:
"""
Return a 0100 interest score for the given keywords over the past 90 days.
Returns 0.0 on any failure (rate limit, empty data, exception).
"""
if not settings.PYTRENDS_ENABLED or not keywords:
return 0.0
try:
from pytrends.request import TrendReq
# Jitter to avoid detection as bot
time.sleep(random.uniform(2.0, 5.0))
pytrends = TrendReq(hl="en-US", tz=0, timeout=(10, 25))
kw_list = [k for k in keywords[:5] if k] # max 5 keywords
if not kw_list:
return 0.0
pytrends.build_payload(kw_list, timeframe="today 3-m", geo="US")
data = pytrends.interest_over_time()
if data is None or data.empty:
return 0.0
# Average the most recent 14 data points for the primary keyword
primary = kw_list[0]
if primary not in data.columns:
return 0.0
recent = data[primary].tail(14)
return float(recent.mean())
except Exception as e:
logger.debug(f"pytrends failed (non-critical): {e}")
return 0.0
def keywords_for_bill(title: str, short_title: str, topic_tags: list[str]) -> list[str]:
"""Extract meaningful search keywords for a bill."""
keywords = []
if short_title:
keywords.append(short_title)
elif title:
# Use first 5 words of title
words = title.split()[:5]
if len(words) >= 2:
keywords.append(" ".join(words))
keywords.extend(tag.replace("-", " ") for tag in (topic_tags or [])[:3])
return keywords[:5]

View File

View File

@@ -0,0 +1,62 @@
from celery import Celery
from celery.schedules import crontab
from kombu import Queue
from app.config import settings
celery_app = Celery(
"pocketveto",
broker=settings.REDIS_URL,
backend=settings.REDIS_URL,
include=[
"app.workers.congress_poller",
"app.workers.document_fetcher",
"app.workers.llm_processor",
"app.workers.news_fetcher",
"app.workers.trend_scorer",
],
)
celery_app.conf.update(
task_serializer="json",
result_serializer="json",
accept_content=["json"],
timezone="UTC",
enable_utc=True,
# Late ack: task is only removed from queue after completion, not on pickup.
# Combined with idempotent tasks, this ensures no work is lost if a worker crashes.
task_acks_late=True,
# Prevent workers from prefetching LLM tasks and blocking other workers.
worker_prefetch_multiplier=1,
# Route tasks to named queues
task_routes={
"app.workers.congress_poller.*": {"queue": "polling"},
"app.workers.document_fetcher.*": {"queue": "documents"},
"app.workers.llm_processor.*": {"queue": "llm"},
"app.workers.news_fetcher.*": {"queue": "news"},
"app.workers.trend_scorer.*": {"queue": "news"},
},
task_queues=[
Queue("polling"),
Queue("documents"),
Queue("llm"),
Queue("news"),
],
# RedBeat stores schedule in Redis — restart-safe and dynamically updatable
redbeat_redis_url=settings.REDIS_URL,
beat_scheduler="redbeat.RedBeatScheduler",
beat_schedule={
"poll-congress-bills": {
"task": "app.workers.congress_poller.poll_congress_bills",
"schedule": crontab(minute=f"*/{settings.CONGRESS_POLL_INTERVAL_MINUTES}"),
},
"fetch-news-active-bills": {
"task": "app.workers.news_fetcher.fetch_news_for_active_bills",
"schedule": crontab(hour="*/6", minute=0),
},
"calculate-trend-scores": {
"task": "app.workers.trend_scorer.calculate_all_trend_scores",
"schedule": crontab(hour=2, minute=0),
},
},
)

View File

@@ -0,0 +1,172 @@
"""
Congress.gov poller — incremental bill and member sync.
Runs on Celery Beat schedule (every 30 min by default).
Uses fromDateTime to fetch only recently updated bills.
All operations are idempotent.
"""
import logging
from datetime import datetime, timezone
from app.database import get_sync_db
from app.models import Bill, BillAction, Member, AppSetting
from app.services import congress_api
from app.workers.celery_app import celery_app
logger = logging.getLogger(__name__)
def _get_setting(db, key: str, default=None) -> str | None:
row = db.get(AppSetting, key)
return row.value if row else default
def _set_setting(db, key: str, value: str) -> None:
row = db.get(AppSetting, key)
if row:
row.value = value
else:
db.add(AppSetting(key=key, value=value))
db.commit()
@celery_app.task(bind=True, max_retries=3, name="app.workers.congress_poller.poll_congress_bills")
def poll_congress_bills(self):
"""Fetch recently updated bills from Congress.gov and enqueue document + LLM processing."""
db = get_sync_db()
try:
last_polled = _get_setting(db, "congress_last_polled_at")
current_congress = congress_api.get_current_congress()
logger.info(f"Polling Congress {current_congress} (since {last_polled})")
new_count = 0
updated_count = 0
offset = 0
while True:
response = congress_api.get_bills(
congress=current_congress,
offset=offset,
limit=250,
from_date_time=last_polled,
)
bills_data = response.get("bills", [])
if not bills_data:
break
for bill_data in bills_data:
parsed = congress_api.parse_bill_from_api(bill_data, current_congress)
bill_id = parsed["bill_id"]
existing = db.get(Bill, bill_id)
if existing is None:
# Upsert sponsor member if referenced
sponsor_id = _sync_sponsor(db, bill_data)
parsed["sponsor_id"] = sponsor_id
parsed["last_checked_at"] = datetime.now(timezone.utc)
db.add(Bill(**parsed))
db.commit()
new_count += 1
# Enqueue document fetch
from app.workers.document_fetcher import fetch_bill_documents
fetch_bill_documents.delay(bill_id)
else:
_update_bill_if_changed(db, existing, parsed)
updated_count += 1
db.commit()
offset += 250
if len(bills_data) < 250:
break
# Update last polled timestamp
_set_setting(db, "congress_last_polled_at", datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"))
logger.info(f"Poll complete: {new_count} new, {updated_count} updated")
return {"new": new_count, "updated": updated_count}
except Exception as exc:
db.rollback()
logger.error(f"Poll failed: {exc}")
raise self.retry(exc=exc, countdown=60)
finally:
db.close()
@celery_app.task(bind=True, max_retries=3, name="app.workers.congress_poller.sync_members")
def sync_members(self):
"""Sync current Congress members."""
db = get_sync_db()
try:
offset = 0
synced = 0
while True:
response = congress_api.get_members(offset=offset, limit=250, current_member=True)
members_data = response.get("members", [])
if not members_data:
break
for member_data in members_data:
parsed = congress_api.parse_member_from_api(member_data)
if not parsed.get("bioguide_id"):
continue
existing = db.get(Member, parsed["bioguide_id"])
if existing is None:
db.add(Member(**parsed))
else:
for k, v in parsed.items():
setattr(existing, k, v)
synced += 1
db.commit()
offset += 250
if len(members_data) < 250:
break
logger.info(f"Synced {synced} members")
return {"synced": synced}
except Exception as exc:
db.rollback()
raise self.retry(exc=exc, countdown=120)
finally:
db.close()
def _sync_sponsor(db, bill_data: dict) -> str | None:
"""Ensure the bill sponsor exists in the members table. Returns bioguide_id or None."""
sponsors = bill_data.get("sponsors", [])
if not sponsors:
return None
sponsor_raw = sponsors[0]
bioguide_id = sponsor_raw.get("bioguideId")
if not bioguide_id:
return None
existing = db.get(Member, bioguide_id)
if existing is None:
db.add(Member(
bioguide_id=bioguide_id,
name=sponsor_raw.get("fullName", ""),
first_name=sponsor_raw.get("firstName"),
last_name=sponsor_raw.get("lastName"),
party=sponsor_raw.get("party", "")[:10] if sponsor_raw.get("party") else None,
state=sponsor_raw.get("state"),
))
db.commit()
return bioguide_id
def _update_bill_if_changed(db, existing: Bill, parsed: dict) -> bool:
"""Update bill fields if anything has changed. Returns True if updated."""
changed = False
track_fields = ["title", "short_title", "latest_action_date", "latest_action_text", "status"]
for field in track_fields:
new_val = parsed.get(field)
if new_val and getattr(existing, field) != new_val:
setattr(existing, field, new_val)
changed = True
if changed:
existing.last_checked_at = datetime.now(timezone.utc)
db.commit()
# Check for new text versions now that the bill has changed
from app.workers.document_fetcher import fetch_bill_documents
fetch_bill_documents.delay(existing.bill_id)
return changed

View File

@@ -0,0 +1,87 @@
"""
Document fetcher — retrieves bill text from GovInfo and stores it.
Triggered by congress_poller when a new bill is detected.
"""
import logging
from datetime import datetime, timezone
from app.database import get_sync_db
from app.models import Bill, BillDocument
from app.services import congress_api, govinfo_api
from app.workers.celery_app import celery_app
logger = logging.getLogger(__name__)
@celery_app.task(bind=True, max_retries=3, name="app.workers.document_fetcher.fetch_bill_documents")
def fetch_bill_documents(self, bill_id: str):
"""Fetch bill text from GovInfo and store it. Then enqueue LLM processing."""
db = get_sync_db()
try:
bill = db.get(Bill, bill_id)
if not bill:
logger.warning(f"Bill {bill_id} not found in DB")
return {"status": "not_found"}
# Get text versions from Congress.gov
try:
text_response = congress_api.get_bill_text_versions(
bill.congress_number, bill.bill_type, bill.bill_number
)
except Exception as e:
logger.warning(f"No text versions for {bill_id}: {e}")
return {"status": "no_text_versions"}
text_versions = text_response.get("textVersions", [])
if not text_versions:
return {"status": "no_text_versions"}
url, fmt = govinfo_api.find_best_text_url(text_versions)
if not url:
return {"status": "no_suitable_format"}
# Idempotency: skip if we already have this exact document version
existing = (
db.query(BillDocument)
.filter_by(bill_id=bill_id, govinfo_url=url)
.filter(BillDocument.raw_text.isnot(None))
.first()
)
if existing:
return {"status": "already_fetched", "bill_id": bill_id}
logger.info(f"Fetching {bill_id} document ({fmt}) from {url}")
raw_text = govinfo_api.fetch_text_from_url(url, fmt)
if not raw_text:
raise ValueError(f"Empty text returned for {bill_id}")
# Get version label from first text version
type_obj = text_versions[0].get("type", {}) if text_versions else {}
doc_version = type_obj.get("name") if isinstance(type_obj, dict) else type_obj
doc = BillDocument(
bill_id=bill_id,
doc_type="bill_text",
doc_version=doc_version,
govinfo_url=url,
raw_text=raw_text,
fetched_at=datetime.now(timezone.utc),
)
db.add(doc)
db.commit()
db.refresh(doc)
logger.info(f"Stored document {doc.id} for bill {bill_id} ({len(raw_text):,} chars)")
# Enqueue LLM processing
from app.workers.llm_processor import process_document_with_llm
process_document_with_llm.delay(doc.id)
return {"status": "ok", "document_id": doc.id, "chars": len(raw_text)}
except Exception as exc:
db.rollback()
logger.error(f"Document fetch failed for {bill_id}: {exc}")
raise self.retry(exc=exc, countdown=120)
finally:
db.close()

View File

@@ -0,0 +1,107 @@
"""
LLM processor — generates AI briefs for fetched bill documents.
Triggered by document_fetcher after successful text retrieval.
"""
import logging
from app.database import get_sync_db
from app.models import Bill, BillBrief, BillDocument, Member
from app.services.llm_service import get_llm_provider
from app.workers.celery_app import celery_app
logger = logging.getLogger(__name__)
@celery_app.task(
bind=True,
max_retries=2,
rate_limit="10/m", # Respect LLM provider rate limits
name="app.workers.llm_processor.process_document_with_llm",
)
def process_document_with_llm(self, document_id: int):
"""Generate an AI brief for a bill document. Full brief for first version, amendment brief for subsequent versions."""
db = get_sync_db()
try:
# Idempotency: skip if brief already exists for this document
existing = db.query(BillBrief).filter_by(document_id=document_id).first()
if existing:
return {"status": "already_processed", "brief_id": existing.id}
doc = db.get(BillDocument, document_id)
if not doc or not doc.raw_text:
logger.warning(f"Document {document_id} not found or has no text")
return {"status": "no_document"}
bill = db.get(Bill, doc.bill_id)
if not bill:
return {"status": "no_bill"}
sponsor = db.get(Member, bill.sponsor_id) if bill.sponsor_id else None
bill_metadata = {
"title": bill.title or "Unknown Title",
"sponsor_name": sponsor.name if sponsor else "Unknown",
"party": sponsor.party if sponsor else "Unknown",
"state": sponsor.state if sponsor else "Unknown",
"chamber": bill.chamber or "Unknown",
"introduced_date": str(bill.introduced_date) if bill.introduced_date else "Unknown",
"latest_action_text": bill.latest_action_text or "None",
"latest_action_date": str(bill.latest_action_date) if bill.latest_action_date else "Unknown",
}
# Check if a full brief already exists for this bill (from an earlier document version)
previous_full_brief = (
db.query(BillBrief)
.filter_by(bill_id=doc.bill_id, brief_type="full")
.order_by(BillBrief.created_at.desc())
.first()
)
provider = get_llm_provider()
if previous_full_brief and previous_full_brief.document_id:
# New version of a bill we've already analyzed — generate amendment brief
previous_doc = db.get(BillDocument, previous_full_brief.document_id)
if previous_doc and previous_doc.raw_text:
logger.info(f"Generating amendment brief for document {document_id} (bill {doc.bill_id})")
brief = provider.generate_amendment_brief(doc.raw_text, previous_doc.raw_text, bill_metadata)
brief_type = "amendment"
else:
logger.info(f"Previous document unavailable, generating full brief for document {document_id}")
brief = provider.generate_brief(doc.raw_text, bill_metadata)
brief_type = "full"
else:
logger.info(f"Generating full brief for document {document_id} (bill {doc.bill_id})")
brief = provider.generate_brief(doc.raw_text, bill_metadata)
brief_type = "full"
db_brief = BillBrief(
bill_id=doc.bill_id,
document_id=document_id,
brief_type=brief_type,
summary=brief.summary,
key_points=brief.key_points,
risks=brief.risks,
deadlines=brief.deadlines,
topic_tags=brief.topic_tags,
llm_provider=brief.llm_provider,
llm_model=brief.llm_model,
)
db.add(db_brief)
db.commit()
db.refresh(db_brief)
logger.info(f"{brief_type.capitalize()} brief {db_brief.id} created for bill {doc.bill_id} using {brief.llm_provider}/{brief.llm_model}")
# Trigger news fetch now that we have topic tags
from app.workers.news_fetcher import fetch_news_for_bill
fetch_news_for_bill.delay(doc.bill_id)
return {"status": "ok", "brief_id": db_brief.id, "brief_type": brief_type}
except Exception as exc:
db.rollback()
logger.error(f"LLM processing failed for document {document_id}: {exc}")
raise self.retry(exc=exc, countdown=300) # 5 min backoff for LLM failures
finally:
db.close()

View File

@@ -0,0 +1,104 @@
"""
News fetcher — correlates bills with news articles.
Triggered after LLM brief creation and on a 6-hour schedule for active bills.
"""
import logging
from datetime import date, datetime, timedelta, timezone
from sqlalchemy import and_
from app.database import get_sync_db
from app.models import Bill, BillBrief, NewsArticle
from app.services import news_service
from app.workers.celery_app import celery_app
logger = logging.getLogger(__name__)
@celery_app.task(bind=True, max_retries=2, name="app.workers.news_fetcher.fetch_news_for_bill")
def fetch_news_for_bill(self, bill_id: str):
"""Fetch news articles for a specific bill."""
db = get_sync_db()
try:
bill = db.get(Bill, bill_id)
if not bill:
return {"status": "not_found"}
# Get topic tags from latest brief
latest_brief = (
db.query(BillBrief)
.filter_by(bill_id=bill_id)
.order_by(BillBrief.created_at.desc())
.first()
)
topic_tags = latest_brief.topic_tags if latest_brief else []
query = news_service.build_news_query(
bill_title=bill.title,
short_title=bill.short_title,
sponsor_name=None,
bill_type=bill.bill_type,
bill_number=bill.bill_number,
)
articles = news_service.fetch_newsapi_articles(query)
saved = 0
for article in articles:
url = article.get("url")
if not url:
continue
# Idempotency: skip duplicate URLs
existing = db.query(NewsArticle).filter_by(url=url).first()
if existing:
continue
pub_at = None
if article.get("published_at"):
try:
pub_at = datetime.fromisoformat(article["published_at"].replace("Z", "+00:00"))
except Exception:
pass
db.add(NewsArticle(
bill_id=bill_id,
source=article.get("source", "")[:200],
headline=article.get("headline", ""),
url=url,
published_at=pub_at,
relevance_score=1.0,
))
saved += 1
db.commit()
logger.info(f"Saved {saved} news articles for bill {bill_id}")
return {"status": "ok", "saved": saved}
except Exception as exc:
db.rollback()
logger.error(f"News fetch failed for {bill_id}: {exc}")
raise self.retry(exc=exc, countdown=300)
finally:
db.close()
@celery_app.task(bind=True, name="app.workers.news_fetcher.fetch_news_for_active_bills")
def fetch_news_for_active_bills(self):
"""
Scheduled task: fetch news for bills with recent actions (last 7 days).
Respects the 100/day NewsAPI limit by processing at most 80 bills per run.
"""
db = get_sync_db()
try:
cutoff = date.today() - timedelta(days=7)
active_bills = (
db.query(Bill)
.filter(Bill.latest_action_date >= cutoff)
.order_by(Bill.latest_action_date.desc())
.limit(80)
.all()
)
for bill in active_bills:
fetch_news_for_bill.delay(bill.bill_id)
logger.info(f"Queued news fetch for {len(active_bills)} active bills")
return {"queued": len(active_bills)}
finally:
db.close()

View File

@@ -0,0 +1,111 @@
"""
Trend scorer — calculates the daily zeitgeist score for bills.
Runs nightly via Celery Beat.
"""
import logging
from datetime import date, timedelta
from sqlalchemy import and_
from app.database import get_sync_db
from app.models import Bill, BillBrief, TrendScore
from app.services import news_service, trends_service
from app.workers.celery_app import celery_app
logger = logging.getLogger(__name__)
def calculate_composite_score(newsapi_count: int, gnews_count: int, gtrends_score: float) -> float:
"""
Weighted composite score (0100):
NewsAPI article count → 040 pts (saturates at 20 articles)
Google News RSS count → 030 pts (saturates at 50 articles)
Google Trends score → 030 pts (0100 input)
"""
newsapi_pts = min(newsapi_count / 20, 1.0) * 40
gnews_pts = min(gnews_count / 50, 1.0) * 30
gtrends_pts = (gtrends_score / 100) * 30
return round(newsapi_pts + gnews_pts + gtrends_pts, 2)
@celery_app.task(bind=True, name="app.workers.trend_scorer.calculate_all_trend_scores")
def calculate_all_trend_scores(self):
"""Nightly task: calculate trend scores for bills active in the last 90 days."""
db = get_sync_db()
try:
cutoff = date.today() - timedelta(days=90)
active_bills = (
db.query(Bill)
.filter(Bill.latest_action_date >= cutoff)
.all()
)
scored = 0
today = date.today()
for bill in active_bills:
# Skip if already scored today
existing = (
db.query(TrendScore)
.filter_by(bill_id=bill.bill_id, score_date=today)
.first()
)
if existing:
continue
# Get latest brief for topic tags
latest_brief = (
db.query(BillBrief)
.filter_by(bill_id=bill.bill_id)
.order_by(BillBrief.created_at.desc())
.first()
)
topic_tags = latest_brief.topic_tags if latest_brief else []
# Build search query
query = news_service.build_news_query(
bill_title=bill.title,
short_title=bill.short_title,
sponsor_name=None,
bill_type=bill.bill_type,
bill_number=bill.bill_number,
)
# Fetch counts
newsapi_articles = news_service.fetch_newsapi_articles(query, days=30)
newsapi_count = len(newsapi_articles)
gnews_count = news_service.fetch_gnews_count(query, days=30)
# Google Trends
keywords = trends_service.keywords_for_bill(
title=bill.title or "",
short_title=bill.short_title or "",
topic_tags=topic_tags,
)
gtrends_score = trends_service.get_trends_score(keywords)
composite = calculate_composite_score(newsapi_count, gnews_count, gtrends_score)
db.add(TrendScore(
bill_id=bill.bill_id,
score_date=today,
newsapi_count=newsapi_count,
gnews_count=gnews_count,
gtrends_score=gtrends_score,
composite_score=composite,
))
scored += 1
if scored % 20 == 0:
db.commit()
db.commit()
logger.info(f"Scored {scored} bills")
return {"scored": scored}
except Exception as exc:
db.rollback()
logger.error(f"Trend scoring failed: {exc}")
raise
finally:
db.close()

44
backend/requirements.txt Normal file
View File

@@ -0,0 +1,44 @@
# Web framework
fastapi==0.115.5
uvicorn[standard]==0.32.1
python-multipart==0.0.18
# Database
sqlalchemy==2.0.36
asyncpg==0.30.0
psycopg2-binary==2.9.10
alembic==1.14.0
# Config
pydantic-settings==2.6.1
# Task queue
celery==5.4.0
celery-redbeat==2.2.0
kombu==5.4.2
# HTTP clients
httpx==0.28.1
requests==2.32.3
tenacity==9.0.0
# LLM providers
openai==1.57.4
anthropic==0.40.0
google-generativeai==0.8.3
# Document parsing
beautifulsoup4==4.12.3
lxml==5.3.0
feedparser==6.0.11
pdfminer.six==20231228
# Trends
pytrends==4.9.2
# Redis client (for health check)
redis==5.2.1
# Utilities
python-dateutil==2.9.0
tiktoken==0.8.0