commit e418dd9ae090cfc2956617972c518b289e21670d Author: Jack Levy Date: Sat Feb 28 21:08:19 2026 -0500 Initial commit diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..6c75e96 --- /dev/null +++ b/.env.example @@ -0,0 +1,49 @@ +# ─── URLs ───────────────────────────────────────────────────────────────────── +# Local hostname used when accessing the app on your LAN/server directly +LOCAL_URL=http://localhost +# Public-facing URL when accessed via your reverse proxy (leave blank if none) +PUBLIC_URL= + +# ─── PostgreSQL ─────────────────────────────────────────────────────────────── +POSTGRES_USER=congress +POSTGRES_PASSWORD=congress +POSTGRES_DB=pocketveto + +# These are constructed automatically from the above in docker-compose.yml. +# Override here only if connecting to an external DB. +# DATABASE_URL=postgresql+asyncpg://congress:congress@postgres:5432/pocketveto +# SYNC_DATABASE_URL=postgresql://congress:congress@postgres:5432/pocketveto + +# ─── Redis ──────────────────────────────────────────────────────────────────── +REDIS_URL=redis://redis:6379/0 + +# ─── api.data.gov (Congress.gov + GovInfo share the same key) ───────────────── +# Free key: https://api.data.gov/signup/ +DATA_GOV_API_KEY= + +# How often to poll Congress.gov for new/updated bills (minutes) +CONGRESS_POLL_INTERVAL_MINUTES=30 + +# ─── LLM Provider ───────────────────────────────────────────────────────────── +# Choose one: openai | anthropic | gemini | ollama +LLM_PROVIDER=openai + +OPENAI_API_KEY= +OPENAI_MODEL=gpt-4o + +ANTHROPIC_API_KEY= +ANTHROPIC_MODEL=claude-opus-4-6 + +GEMINI_API_KEY= +GEMINI_MODEL=gemini-1.5-pro + +# For Ollama: use host.docker.internal to reach a locally running Ollama server +OLLAMA_BASE_URL=http://host.docker.internal:11434 +OLLAMA_MODEL=llama3.1 + +# ─── News ───────────────────────────────────────────────────────────────────── +# Free key (100 req/day): https://newsapi.org/register +NEWSAPI_KEY= + +# ─── Google Trends ──────────────────────────────────────────────────────────── +PYTRENDS_ENABLED=true diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..346fc30 --- /dev/null +++ b/.gitignore @@ -0,0 +1,19 @@ +.env +__pycache__/ +*.pyc +*.pyo +.pytest_cache/ +.mypy_cache/ +dist/ +build/ +*.egg-info/ +.venv/ +venv/ + +# Next.js +frontend/.next/ +frontend/node_modules/ +frontend/out/ + +# Docker +*.log diff --git a/backend/Dockerfile b/backend/Dockerfile new file mode 100644 index 0000000..5434248 --- /dev/null +++ b/backend/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.12-slim + +WORKDIR /app + +# System deps for psycopg2, pdfminer, lxml +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + libpq-dev \ + libxml2-dev \ + libxslt-dev \ + && rm -rf /var/lib/apt/lists/* + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +# Default command (overridden per service in docker-compose.yml) +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/backend/alembic.ini b/backend/alembic.ini new file mode 100644 index 0000000..2ac28cd --- /dev/null +++ b/backend/alembic.ini @@ -0,0 +1,41 @@ +[alembic] +script_location = alembic +prepend_sys_path = . +version_path_separator = os +sqlalchemy.url = postgresql://congress:congress@postgres:5432/pocketveto + +[post_write_hooks] + +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/backend/alembic/env.py b/backend/alembic/env.py new file mode 100644 index 0000000..7d0c4be --- /dev/null +++ b/backend/alembic/env.py @@ -0,0 +1,51 @@ +import os +from logging.config import fileConfig + +from alembic import context +from sqlalchemy import engine_from_config, pool + +# Import all models so Alembic can detect them +from app.database import Base +import app.models # noqa: F401 — registers all models with Base.metadata + +config = context.config + +# Override sqlalchemy.url from environment if set +sync_url = os.environ.get("SYNC_DATABASE_URL") +if sync_url: + config.set_main_option("sqlalchemy.url", sync_url) + +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +target_metadata = Base.metadata + + +def run_migrations_offline() -> None: + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + connectable = engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + with connectable.connect() as connection: + context.configure(connection=connection, target_metadata=target_metadata) + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/backend/alembic/script.py.mako b/backend/alembic/script.py.mako new file mode 100644 index 0000000..fbc4b07 --- /dev/null +++ b/backend/alembic/script.py.mako @@ -0,0 +1,26 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision: str = ${repr(up_revision)} +down_revision: Union[str, None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + + +def upgrade() -> None: + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + ${downgrades if downgrades else "pass"} diff --git a/backend/alembic/versions/0001_initial_schema.py b/backend/alembic/versions/0001_initial_schema.py new file mode 100644 index 0000000..823d954 --- /dev/null +++ b/backend/alembic/versions/0001_initial_schema.py @@ -0,0 +1,205 @@ +"""initial schema + +Revision ID: 0001 +Revises: +Create Date: 2025-01-01 00:00:00.000000 + +""" +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects.postgresql import JSONB + +revision: str = "0001" +down_revision: Union[str, None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ── members ────────────────────────────────────────────────────────────── + op.create_table( + "members", + sa.Column("bioguide_id", sa.String(), primary_key=True), + sa.Column("name", sa.String(), nullable=False), + sa.Column("first_name", sa.String()), + sa.Column("last_name", sa.String()), + sa.Column("party", sa.String(10)), + sa.Column("state", sa.String(5)), + sa.Column("chamber", sa.String(10)), + sa.Column("district", sa.String(10)), + sa.Column("photo_url", sa.String()), + sa.Column("official_url", sa.String()), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()), + sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now()), + ) + + # ── bills ───────────────────────────────────────────────────────────────── + op.create_table( + "bills", + sa.Column("bill_id", sa.String(), primary_key=True), + sa.Column("congress_number", sa.Integer(), nullable=False), + sa.Column("bill_type", sa.String(10), nullable=False), + sa.Column("bill_number", sa.Integer(), nullable=False), + sa.Column("title", sa.Text()), + sa.Column("short_title", sa.Text()), + sa.Column("sponsor_id", sa.String(), sa.ForeignKey("members.bioguide_id"), nullable=True), + sa.Column("introduced_date", sa.Date()), + sa.Column("latest_action_date", sa.Date()), + sa.Column("latest_action_text", sa.Text()), + sa.Column("status", sa.String(100)), + sa.Column("chamber", sa.String(10)), + sa.Column("congress_url", sa.String()), + sa.Column("govtrack_url", sa.String()), + sa.Column("last_checked_at", sa.DateTime(timezone=True)), + sa.Column("actions_fetched_at", sa.DateTime(timezone=True)), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()), + sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now()), + ) + op.create_index("ix_bills_congress_number", "bills", ["congress_number"]) + op.create_index("ix_bills_latest_action_date", "bills", ["latest_action_date"]) + op.create_index("ix_bills_introduced_date", "bills", ["introduced_date"]) + op.create_index("ix_bills_chamber", "bills", ["chamber"]) + op.create_index("ix_bills_sponsor_id", "bills", ["sponsor_id"]) + + # Full-text search vector (tsvector generated column) — manual, not in autogenerate + op.execute(""" + ALTER TABLE bills ADD COLUMN search_vector tsvector + GENERATED ALWAYS AS ( + setweight(to_tsvector('english', coalesce(title, '')), 'A') || + setweight(to_tsvector('english', coalesce(short_title, '')), 'A') || + setweight(to_tsvector('english', coalesce(latest_action_text, '')), 'C') + ) STORED + """) + op.execute("CREATE INDEX ix_bills_search_vector ON bills USING GIN(search_vector)") + + # ── bill_actions ────────────────────────────────────────────────────────── + op.create_table( + "bill_actions", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column("bill_id", sa.String(), sa.ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False), + sa.Column("action_date", sa.Date()), + sa.Column("action_text", sa.Text()), + sa.Column("action_type", sa.String(100)), + sa.Column("chamber", sa.String(10)), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()), + ) + op.create_index("ix_bill_actions_bill_id", "bill_actions", ["bill_id"]) + op.create_index("ix_bill_actions_action_date", "bill_actions", ["action_date"]) + + # ── bill_documents ──────────────────────────────────────────────────────── + op.create_table( + "bill_documents", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column("bill_id", sa.String(), sa.ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False), + sa.Column("doc_type", sa.String(50)), + sa.Column("doc_version", sa.String(50)), + sa.Column("govinfo_url", sa.String()), + sa.Column("raw_text", sa.Text()), + sa.Column("fetched_at", sa.DateTime(timezone=True)), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()), + ) + op.create_index("ix_bill_documents_bill_id", "bill_documents", ["bill_id"]) + + # ── bill_briefs ─────────────────────────────────────────────────────────── + op.create_table( + "bill_briefs", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column("bill_id", sa.String(), sa.ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False), + sa.Column("document_id", sa.Integer(), sa.ForeignKey("bill_documents.id", ondelete="SET NULL"), nullable=True), + sa.Column("summary", sa.Text()), + sa.Column("key_points", JSONB()), + sa.Column("risks", JSONB()), + sa.Column("deadlines", JSONB()), + sa.Column("topic_tags", JSONB()), + sa.Column("llm_provider", sa.String(50)), + sa.Column("llm_model", sa.String(100)), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()), + ) + op.create_index("ix_bill_briefs_bill_id", "bill_briefs", ["bill_id"]) + op.execute("CREATE INDEX ix_bill_briefs_topic_tags ON bill_briefs USING GIN(topic_tags)") + + # ── committees ──────────────────────────────────────────────────────────── + op.create_table( + "committees", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column("committee_code", sa.String(20), unique=True, nullable=False), + sa.Column("name", sa.String(500)), + sa.Column("chamber", sa.String(10)), + sa.Column("committee_type", sa.String(50)), + ) + + # ── committee_bills ─────────────────────────────────────────────────────── + op.create_table( + "committee_bills", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column("committee_id", sa.Integer(), sa.ForeignKey("committees.id", ondelete="CASCADE"), nullable=False), + sa.Column("bill_id", sa.String(), sa.ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False), + sa.Column("referral_date", sa.Date()), + ) + op.create_index("ix_committee_bills_bill_id", "committee_bills", ["bill_id"]) + op.create_index("ix_committee_bills_committee_id", "committee_bills", ["committee_id"]) + + # ── news_articles ───────────────────────────────────────────────────────── + op.create_table( + "news_articles", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column("bill_id", sa.String(), sa.ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False), + sa.Column("source", sa.String(200)), + sa.Column("headline", sa.Text()), + sa.Column("url", sa.String(), unique=True), + sa.Column("published_at", sa.DateTime(timezone=True)), + sa.Column("relevance_score", sa.Float(), default=0.0), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()), + ) + op.create_index("ix_news_articles_bill_id", "news_articles", ["bill_id"]) + op.create_index("ix_news_articles_published_at", "news_articles", ["published_at"]) + + # ── trend_scores ────────────────────────────────────────────────────────── + op.create_table( + "trend_scores", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column("bill_id", sa.String(), sa.ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False), + sa.Column("score_date", sa.Date(), nullable=False), + sa.Column("newsapi_count", sa.Integer(), default=0), + sa.Column("gnews_count", sa.Integer(), default=0), + sa.Column("gtrends_score", sa.Float(), default=0.0), + sa.Column("composite_score", sa.Float(), default=0.0), + sa.UniqueConstraint("bill_id", "score_date", name="uq_trend_scores_bill_date"), + ) + op.create_index("ix_trend_scores_bill_id", "trend_scores", ["bill_id"]) + op.create_index("ix_trend_scores_score_date", "trend_scores", ["score_date"]) + op.create_index("ix_trend_scores_composite", "trend_scores", ["composite_score"]) + + # ── follows ─────────────────────────────────────────────────────────────── + op.create_table( + "follows", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column("follow_type", sa.String(20), nullable=False), + sa.Column("follow_value", sa.String(), nullable=False), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()), + sa.UniqueConstraint("follow_type", "follow_value", name="uq_follows_type_value"), + ) + + # ── app_settings ────────────────────────────────────────────────────────── + op.create_table( + "app_settings", + sa.Column("key", sa.String(), primary_key=True), + sa.Column("value", sa.String()), + sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now()), + ) + + +def downgrade() -> None: + op.drop_table("app_settings") + op.drop_table("follows") + op.drop_table("trend_scores") + op.drop_table("news_articles") + op.drop_table("committee_bills") + op.drop_table("committees") + op.drop_table("bill_briefs") + op.drop_table("bill_documents") + op.drop_table("bill_actions") + op.drop_table("bills") + op.drop_table("members") diff --git a/backend/alembic/versions/0002_widen_chamber_party_columns.py b/backend/alembic/versions/0002_widen_chamber_party_columns.py new file mode 100644 index 0000000..b2130f5 --- /dev/null +++ b/backend/alembic/versions/0002_widen_chamber_party_columns.py @@ -0,0 +1,30 @@ +"""widen chamber and party columns + +Revision ID: 0002 +Revises: 0001 +Create Date: 2026-02-28 00:00:00.000000 + +""" +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +revision: str = "0002" +down_revision: Union[str, None] = "0001" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.alter_column("members", "chamber", type_=sa.String(50)) + op.alter_column("members", "party", type_=sa.String(50)) + op.alter_column("bills", "chamber", type_=sa.String(50)) + op.alter_column("bill_actions", "chamber", type_=sa.String(50)) + + +def downgrade() -> None: + op.alter_column("bill_actions", "chamber", type_=sa.String(10)) + op.alter_column("bills", "chamber", type_=sa.String(10)) + op.alter_column("members", "party", type_=sa.String(10)) + op.alter_column("members", "chamber", type_=sa.String(10)) diff --git a/backend/alembic/versions/0003_widen_member_state_district.py b/backend/alembic/versions/0003_widen_member_state_district.py new file mode 100644 index 0000000..d8af37e --- /dev/null +++ b/backend/alembic/versions/0003_widen_member_state_district.py @@ -0,0 +1,26 @@ +"""widen member state and district columns + +Revision ID: 0003 +Revises: 0002 +Create Date: 2026-03-01 00:00:00.000000 + +""" +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +revision: str = "0003" +down_revision: Union[str, None] = "0002" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.alter_column("members", "state", type_=sa.String(50)) + op.alter_column("members", "district", type_=sa.String(50)) + + +def downgrade() -> None: + op.alter_column("members", "district", type_=sa.String(10)) + op.alter_column("members", "state", type_=sa.String(5)) diff --git a/backend/alembic/versions/0004_add_brief_type.py b/backend/alembic/versions/0004_add_brief_type.py new file mode 100644 index 0000000..6f4838c --- /dev/null +++ b/backend/alembic/versions/0004_add_brief_type.py @@ -0,0 +1,27 @@ +"""add brief_type to bill_briefs + +Revision ID: 0004 +Revises: 0003 +Create Date: 2026-03-01 00:00:00.000000 + +""" +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +revision: str = "0004" +down_revision: Union[str, None] = "0003" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column( + "bill_briefs", + sa.Column("brief_type", sa.String(20), nullable=False, server_default="full"), + ) + + +def downgrade() -> None: + op.drop_column("bill_briefs", "brief_type") diff --git a/backend/app/api/__init__.py b/backend/app/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/app/api/admin.py b/backend/app/api/admin.py new file mode 100644 index 0000000..dfc5ed1 --- /dev/null +++ b/backend/app/api/admin.py @@ -0,0 +1,39 @@ +from fastapi import APIRouter + +router = APIRouter() + + +@router.post("/trigger-poll") +async def trigger_poll(): + """Manually trigger a Congress.gov poll without waiting for the Beat schedule.""" + from app.workers.congress_poller import poll_congress_bills + task = poll_congress_bills.delay() + return {"task_id": task.id, "status": "queued"} + + +@router.post("/trigger-member-sync") +async def trigger_member_sync(): + """Manually trigger a member sync.""" + from app.workers.congress_poller import sync_members + task = sync_members.delay() + return {"task_id": task.id, "status": "queued"} + + +@router.post("/trigger-trend-scores") +async def trigger_trend_scores(): + """Manually trigger trend score calculation.""" + from app.workers.trend_scorer import calculate_all_trend_scores + task = calculate_all_trend_scores.delay() + return {"task_id": task.id, "status": "queued"} + + +@router.get("/task-status/{task_id}") +async def get_task_status(task_id: str): + """Check the status of an async task.""" + from app.workers.celery_app import celery_app + result = celery_app.AsyncResult(task_id) + return { + "task_id": task_id, + "status": result.status, + "result": result.result if result.ready() else None, + } diff --git a/backend/app/api/bills.py b/backend/app/api/bills.py new file mode 100644 index 0000000..6429732 --- /dev/null +++ b/backend/app/api/bills.py @@ -0,0 +1,145 @@ +from typing import Optional + +from fastapi import APIRouter, Depends, Query +from sqlalchemy import desc, func, or_, select +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import selectinload + +from app.database import get_db +from app.models import Bill, BillAction, BillBrief, NewsArticle, TrendScore +from app.schemas.schemas import ( + BillDetailSchema, + BillSchema, + BillActionSchema, + NewsArticleSchema, + PaginatedResponse, + TrendScoreSchema, +) + +router = APIRouter() + + +@router.get("", response_model=PaginatedResponse[BillSchema]) +async def list_bills( + chamber: Optional[str] = Query(None), + topic: Optional[str] = Query(None), + sponsor_id: Optional[str] = Query(None), + q: Optional[str] = Query(None), + page: int = Query(1, ge=1), + per_page: int = Query(20, ge=1, le=100), + sort: str = Query("latest_action_date"), + db: AsyncSession = Depends(get_db), +): + query = ( + select(Bill) + .options( + selectinload(Bill.sponsor), + selectinload(Bill.briefs), + selectinload(Bill.trend_scores), + ) + ) + + if chamber: + query = query.where(Bill.chamber == chamber) + if sponsor_id: + query = query.where(Bill.sponsor_id == sponsor_id) + if topic: + query = query.join(BillBrief, Bill.bill_id == BillBrief.bill_id).where( + BillBrief.topic_tags.contains([topic]) + ) + if q: + query = query.where( + or_( + Bill.bill_id.ilike(f"%{q}%"), + Bill.title.ilike(f"%{q}%"), + Bill.short_title.ilike(f"%{q}%"), + ) + ) + + # Count total + count_query = select(func.count()).select_from(query.subquery()) + total = await db.scalar(count_query) or 0 + + # Sort + sort_col = getattr(Bill, sort, Bill.latest_action_date) + query = query.order_by(desc(sort_col)).offset((page - 1) * per_page).limit(per_page) + + result = await db.execute(query) + bills = result.scalars().unique().all() + + # Attach latest brief and trend to each bill + items = [] + for bill in bills: + bill_dict = BillSchema.model_validate(bill) + if bill.briefs: + bill_dict.latest_brief = bill.briefs[0] + if bill.trend_scores: + bill_dict.latest_trend = bill.trend_scores[0] + items.append(bill_dict) + + return PaginatedResponse( + items=items, + total=total, + page=page, + per_page=per_page, + pages=max(1, (total + per_page - 1) // per_page), + ) + + +@router.get("/{bill_id}", response_model=BillDetailSchema) +async def get_bill(bill_id: str, db: AsyncSession = Depends(get_db)): + result = await db.execute( + select(Bill) + .options( + selectinload(Bill.sponsor), + selectinload(Bill.actions), + selectinload(Bill.briefs), + selectinload(Bill.news_articles), + selectinload(Bill.trend_scores), + ) + .where(Bill.bill_id == bill_id) + ) + bill = result.scalar_one_or_none() + if not bill: + from fastapi import HTTPException + raise HTTPException(status_code=404, detail="Bill not found") + + detail = BillDetailSchema.model_validate(bill) + if bill.briefs: + detail.latest_brief = bill.briefs[0] + if bill.trend_scores: + detail.latest_trend = bill.trend_scores[0] + return detail + + +@router.get("/{bill_id}/actions", response_model=list[BillActionSchema]) +async def get_bill_actions(bill_id: str, db: AsyncSession = Depends(get_db)): + result = await db.execute( + select(BillAction) + .where(BillAction.bill_id == bill_id) + .order_by(desc(BillAction.action_date)) + ) + return result.scalars().all() + + +@router.get("/{bill_id}/news", response_model=list[NewsArticleSchema]) +async def get_bill_news(bill_id: str, db: AsyncSession = Depends(get_db)): + result = await db.execute( + select(NewsArticle) + .where(NewsArticle.bill_id == bill_id) + .order_by(desc(NewsArticle.published_at)) + .limit(20) + ) + return result.scalars().all() + + +@router.get("/{bill_id}/trend", response_model=list[TrendScoreSchema]) +async def get_bill_trend(bill_id: str, days: int = Query(30, ge=7, le=365), db: AsyncSession = Depends(get_db)): + from datetime import date, timedelta + cutoff = date.today() - timedelta(days=days) + result = await db.execute( + select(TrendScore) + .where(TrendScore.bill_id == bill_id, TrendScore.score_date >= cutoff) + .order_by(TrendScore.score_date) + ) + return result.scalars().all() diff --git a/backend/app/api/dashboard.py b/backend/app/api/dashboard.py new file mode 100644 index 0000000..084a334 --- /dev/null +++ b/backend/app/api/dashboard.py @@ -0,0 +1,102 @@ +from datetime import date, timedelta + +from fastapi import Depends +from fastapi import APIRouter +from sqlalchemy import desc, select +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import selectinload + +from app.database import get_db +from app.models import Bill, BillBrief, Follow, TrendScore +from app.schemas.schemas import BillSchema + +router = APIRouter() + + +@router.get("") +async def get_dashboard(db: AsyncSession = Depends(get_db)): + # Load all follows + follows_result = await db.execute(select(Follow)) + follows = follows_result.scalars().all() + + followed_bill_ids = [f.follow_value for f in follows if f.follow_type == "bill"] + followed_member_ids = [f.follow_value for f in follows if f.follow_type == "member"] + followed_topics = [f.follow_value for f in follows if f.follow_type == "topic"] + + feed_bills: list[Bill] = [] + seen_ids: set[str] = set() + + # 1. Directly followed bills + if followed_bill_ids: + result = await db.execute( + select(Bill) + .options(selectinload(Bill.sponsor), selectinload(Bill.briefs), selectinload(Bill.trend_scores)) + .where(Bill.bill_id.in_(followed_bill_ids)) + .order_by(desc(Bill.latest_action_date)) + .limit(20) + ) + for bill in result.scalars().all(): + if bill.bill_id not in seen_ids: + feed_bills.append(bill) + seen_ids.add(bill.bill_id) + + # 2. Bills from followed members + if followed_member_ids: + result = await db.execute( + select(Bill) + .options(selectinload(Bill.sponsor), selectinload(Bill.briefs), selectinload(Bill.trend_scores)) + .where(Bill.sponsor_id.in_(followed_member_ids)) + .order_by(desc(Bill.latest_action_date)) + .limit(20) + ) + for bill in result.scalars().all(): + if bill.bill_id not in seen_ids: + feed_bills.append(bill) + seen_ids.add(bill.bill_id) + + # 3. Bills matching followed topics + for topic in followed_topics: + result = await db.execute( + select(Bill) + .options(selectinload(Bill.sponsor), selectinload(Bill.briefs), selectinload(Bill.trend_scores)) + .join(BillBrief, Bill.bill_id == BillBrief.bill_id) + .where(BillBrief.topic_tags.contains([topic])) + .order_by(desc(Bill.latest_action_date)) + .limit(10) + ) + for bill in result.scalars().all(): + if bill.bill_id not in seen_ids: + feed_bills.append(bill) + seen_ids.add(bill.bill_id) + + # Sort feed by latest action date + feed_bills.sort(key=lambda b: b.latest_action_date or date.min, reverse=True) + + # 4. Trending bills (top 10 by composite score today) + trending_result = await db.execute( + select(Bill) + .options(selectinload(Bill.sponsor), selectinload(Bill.briefs), selectinload(Bill.trend_scores)) + .join(TrendScore, Bill.bill_id == TrendScore.bill_id) + .where(TrendScore.score_date >= date.today() - timedelta(days=1)) + .order_by(desc(TrendScore.composite_score)) + .limit(10) + ) + trending_bills = trending_result.scalars().unique().all() + + def serialize_bill(bill: Bill) -> dict: + b = BillSchema.model_validate(bill) + if bill.briefs: + b.latest_brief = bill.briefs[0] + if bill.trend_scores: + b.latest_trend = bill.trend_scores[0] + return b.model_dump() + + return { + "feed": [serialize_bill(b) for b in feed_bills[:50]], + "trending": [serialize_bill(b) for b in trending_bills], + "follows": { + "bills": len(followed_bill_ids), + "members": len(followed_member_ids), + "topics": len(followed_topics), + }, + } diff --git a/backend/app/api/follows.py b/backend/app/api/follows.py new file mode 100644 index 0000000..de12887 --- /dev/null +++ b/backend/app/api/follows.py @@ -0,0 +1,49 @@ +from fastapi import APIRouter, Depends, HTTPException +from sqlalchemy import select +from sqlalchemy.exc import IntegrityError +from sqlalchemy.ext.asyncio import AsyncSession + +from app.database import get_db +from app.models import Follow +from app.schemas.schemas import FollowCreate, FollowSchema + +router = APIRouter() + +VALID_FOLLOW_TYPES = {"bill", "member", "topic"} + + +@router.get("", response_model=list[FollowSchema]) +async def list_follows(db: AsyncSession = Depends(get_db)): + result = await db.execute(select(Follow).order_by(Follow.created_at.desc())) + return result.scalars().all() + + +@router.post("", response_model=FollowSchema, status_code=201) +async def add_follow(body: FollowCreate, db: AsyncSession = Depends(get_db)): + if body.follow_type not in VALID_FOLLOW_TYPES: + raise HTTPException(status_code=400, detail=f"follow_type must be one of {VALID_FOLLOW_TYPES}") + follow = Follow(follow_type=body.follow_type, follow_value=body.follow_value) + db.add(follow) + try: + await db.commit() + await db.refresh(follow) + except IntegrityError: + await db.rollback() + # Already following — return existing + result = await db.execute( + select(Follow).where( + Follow.follow_type == body.follow_type, + Follow.follow_value == body.follow_value, + ) + ) + return result.scalar_one() + return follow + + +@router.delete("/{follow_id}", status_code=204) +async def remove_follow(follow_id: int, db: AsyncSession = Depends(get_db)): + follow = await db.get(Follow, follow_id) + if not follow: + raise HTTPException(status_code=404, detail="Follow not found") + await db.delete(follow) + await db.commit() diff --git a/backend/app/api/health.py b/backend/app/api/health.py new file mode 100644 index 0000000..8b5adf9 --- /dev/null +++ b/backend/app/api/health.py @@ -0,0 +1,43 @@ +from datetime import datetime, timezone + +import redis as redis_lib +from fastapi import APIRouter, Depends +from sqlalchemy import text +from sqlalchemy.ext.asyncio import AsyncSession + +from app.config import settings +from app.database import get_db + +router = APIRouter() + + +@router.get("") +async def health(): + return {"status": "ok", "timestamp": datetime.now(timezone.utc).isoformat()} + + +@router.get("/detailed") +async def health_detailed(db: AsyncSession = Depends(get_db)): + # Check DB + db_ok = False + try: + await db.execute(text("SELECT 1")) + db_ok = True + except Exception: + pass + + # Check Redis + redis_ok = False + try: + r = redis_lib.from_url(settings.REDIS_URL) + redis_ok = r.ping() + except Exception: + pass + + status = "ok" if (db_ok and redis_ok) else "degraded" + return { + "status": status, + "database": "ok" if db_ok else "error", + "redis": "ok" if redis_ok else "error", + "timestamp": datetime.now(timezone.utc).isoformat(), + } diff --git a/backend/app/api/members.py b/backend/app/api/members.py new file mode 100644 index 0000000..084efe4 --- /dev/null +++ b/backend/app/api/members.py @@ -0,0 +1,85 @@ +from typing import Optional + +from fastapi import APIRouter, Depends, HTTPException, Query +from sqlalchemy import desc, func, select +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import selectinload + +from app.database import get_db +from app.models import Bill, Member +from app.schemas.schemas import BillSchema, MemberSchema, PaginatedResponse + +router = APIRouter() + + +@router.get("", response_model=PaginatedResponse[MemberSchema]) +async def list_members( + chamber: Optional[str] = Query(None), + party: Optional[str] = Query(None), + state: Optional[str] = Query(None), + q: Optional[str] = Query(None), + page: int = Query(1, ge=1), + per_page: int = Query(50, ge=1, le=250), + db: AsyncSession = Depends(get_db), +): + query = select(Member) + if chamber: + query = query.where(Member.chamber == chamber) + if party: + query = query.where(Member.party == party) + if state: + query = query.where(Member.state == state) + if q: + query = query.where(Member.name.ilike(f"%{q}%")) + + total = await db.scalar(select(func.count()).select_from(query.subquery())) or 0 + query = query.order_by(Member.last_name, Member.first_name).offset((page - 1) * per_page).limit(per_page) + + result = await db.execute(query) + members = result.scalars().all() + + return PaginatedResponse( + items=members, + total=total, + page=page, + per_page=per_page, + pages=max(1, (total + per_page - 1) // per_page), + ) + + +@router.get("/{bioguide_id}", response_model=MemberSchema) +async def get_member(bioguide_id: str, db: AsyncSession = Depends(get_db)): + member = await db.get(Member, bioguide_id) + if not member: + raise HTTPException(status_code=404, detail="Member not found") + return member + + +@router.get("/{bioguide_id}/bills", response_model=PaginatedResponse[BillSchema]) +async def get_member_bills( + bioguide_id: str, + page: int = Query(1, ge=1), + per_page: int = Query(20, ge=1, le=100), + db: AsyncSession = Depends(get_db), +): + query = select(Bill).options(selectinload(Bill.briefs)).where(Bill.sponsor_id == bioguide_id) + total = await db.scalar(select(func.count()).select_from(query.subquery())) or 0 + query = query.order_by(desc(Bill.introduced_date)).offset((page - 1) * per_page).limit(per_page) + + result = await db.execute(query) + bills = result.scalars().all() + + items = [] + for bill in bills: + b = BillSchema.model_validate(bill) + if bill.briefs: + b.latest_brief = bill.briefs[0] + items.append(b) + + return PaginatedResponse( + items=items, + total=total, + page=page, + per_page=per_page, + pages=max(1, (total + per_page - 1) // per_page), + ) diff --git a/backend/app/api/search.py b/backend/app/api/search.py new file mode 100644 index 0000000..ac81008 --- /dev/null +++ b/backend/app/api/search.py @@ -0,0 +1,53 @@ +from fastapi import APIRouter, Depends, Query +from sqlalchemy import select, text +from sqlalchemy.ext.asyncio import AsyncSession + +from app.database import get_db +from app.models import Bill, Member +from app.schemas.schemas import BillSchema, MemberSchema + +router = APIRouter() + + +@router.get("") +async def search( + q: str = Query(..., min_length=2), + db: AsyncSession = Depends(get_db), +): + # Bill ID direct match + id_results = await db.execute( + select(Bill).where(Bill.bill_id.ilike(f"%{q}%")).limit(20) + ) + id_bills = id_results.scalars().all() + + # Full-text search on title/content via tsvector + fts_results = await db.execute( + select(Bill) + .where(text("search_vector @@ plainto_tsquery('english', :q)")) + .order_by(text("ts_rank(search_vector, plainto_tsquery('english', :q)) DESC")) + .limit(20) + .params(q=q) + ) + fts_bills = fts_results.scalars().all() + + # Merge, dedup, preserve order (ID matches first) + seen = set() + bills = [] + for b in id_bills + fts_bills: + if b.bill_id not in seen: + seen.add(b.bill_id) + bills.append(b) + + # Fuzzy member search + member_results = await db.execute( + select(Member) + .where(Member.name.ilike(f"%{q}%")) + .order_by(Member.last_name) + .limit(10) + ) + members = member_results.scalars().all() + + return { + "bills": [BillSchema.model_validate(b) for b in bills], + "members": [MemberSchema.model_validate(m) for m in members], + } diff --git a/backend/app/api/settings.py b/backend/app/api/settings.py new file mode 100644 index 0000000..1f6db0c --- /dev/null +++ b/backend/app/api/settings.py @@ -0,0 +1,86 @@ +from fastapi import APIRouter, Depends +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.config import settings +from app.database import get_db +from app.models import AppSetting +from app.schemas.schemas import SettingUpdate, SettingsResponse + +router = APIRouter() + + +@router.get("", response_model=SettingsResponse) +async def get_settings(db: AsyncSession = Depends(get_db)): + """Return current effective settings (env + DB overrides).""" + # DB overrides take precedence over env vars + overrides: dict[str, str] = {} + result = await db.execute(select(AppSetting)) + for row in result.scalars().all(): + overrides[row.key] = row.value + + return SettingsResponse( + llm_provider=overrides.get("llm_provider", settings.LLM_PROVIDER), + llm_model=overrides.get("llm_model", _current_model(overrides.get("llm_provider", settings.LLM_PROVIDER))), + congress_poll_interval_minutes=int(overrides.get("congress_poll_interval_minutes", settings.CONGRESS_POLL_INTERVAL_MINUTES)), + newsapi_enabled=bool(settings.NEWSAPI_KEY), + pytrends_enabled=settings.PYTRENDS_ENABLED, + ) + + +@router.put("") +async def update_setting(body: SettingUpdate, db: AsyncSession = Depends(get_db)): + """Update a runtime setting.""" + ALLOWED_KEYS = {"llm_provider", "llm_model", "congress_poll_interval_minutes"} + if body.key not in ALLOWED_KEYS: + from fastapi import HTTPException + raise HTTPException(status_code=400, detail=f"Allowed setting keys: {ALLOWED_KEYS}") + + existing = await db.get(AppSetting, body.key) + if existing: + existing.value = body.value + else: + db.add(AppSetting(key=body.key, value=body.value)) + await db.commit() + return {"key": body.key, "value": body.value} + + +@router.post("/test-llm") +async def test_llm_connection(): + """Test that the configured LLM provider responds correctly.""" + from app.services.llm_service import get_llm_provider + try: + provider = get_llm_provider() + brief = provider.generate_brief( + doc_text="This is a test bill for connection verification purposes.", + bill_metadata={ + "title": "Test Connection Bill", + "sponsor_name": "Test Sponsor", + "party": "Test", + "state": "DC", + "chamber": "House", + "introduced_date": "2025-01-01", + "latest_action_text": "Test action", + "latest_action_date": "2025-01-01", + }, + ) + return { + "status": "ok", + "provider": brief.llm_provider, + "model": brief.llm_model, + "summary_preview": brief.summary[:100] + "..." if len(brief.summary) > 100 else brief.summary, + } + except Exception as e: + return {"status": "error", "detail": str(e)} + + +def _current_model(provider: str) -> str: + if provider == "openai": + return settings.OPENAI_MODEL + elif provider == "anthropic": + return settings.ANTHROPIC_MODEL + elif provider == "gemini": + return settings.GEMINI_MODEL + elif provider == "ollama": + return settings.OLLAMA_MODEL + return "unknown" diff --git a/backend/app/config.py b/backend/app/config.py new file mode 100644 index 0000000..07ca81b --- /dev/null +++ b/backend/app/config.py @@ -0,0 +1,50 @@ +from functools import lru_cache +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class Settings(BaseSettings): + model_config = SettingsConfigDict(env_file=".env", extra="ignore") + + # URLs + LOCAL_URL: str = "http://localhost" + PUBLIC_URL: str = "" + + # Database + DATABASE_URL: str = "postgresql+asyncpg://congress:congress@postgres:5432/pocketveto" + SYNC_DATABASE_URL: str = "postgresql://congress:congress@postgres:5432/pocketveto" + + # Redis + REDIS_URL: str = "redis://redis:6379/0" + + # api.data.gov (shared key for Congress.gov and GovInfo) + DATA_GOV_API_KEY: str = "" + CONGRESS_POLL_INTERVAL_MINUTES: int = 30 + + # LLM + LLM_PROVIDER: str = "openai" # openai | anthropic | gemini | ollama + + OPENAI_API_KEY: str = "" + OPENAI_MODEL: str = "gpt-4o" + + ANTHROPIC_API_KEY: str = "" + ANTHROPIC_MODEL: str = "claude-opus-4-6" + + GEMINI_API_KEY: str = "" + GEMINI_MODEL: str = "gemini-1.5-pro" + + OLLAMA_BASE_URL: str = "http://host.docker.internal:11434" + OLLAMA_MODEL: str = "llama3.1" + + # News + NEWSAPI_KEY: str = "" + + # pytrends + PYTRENDS_ENABLED: bool = True + + +@lru_cache +def get_settings() -> Settings: + return Settings() + + +settings = get_settings() diff --git a/backend/app/database.py b/backend/app/database.py new file mode 100644 index 0000000..6610cbc --- /dev/null +++ b/backend/app/database.py @@ -0,0 +1,53 @@ +from contextlib import asynccontextmanager +from typing import AsyncGenerator + +from sqlalchemy import create_engine +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine +from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker + +from app.config import settings + + +class Base(DeclarativeBase): + pass + + +# ─── Async engine (FastAPI) ─────────────────────────────────────────────────── + +async_engine = create_async_engine( + settings.DATABASE_URL, + echo=False, + pool_size=10, + max_overflow=20, +) + +AsyncSessionLocal = async_sessionmaker( + async_engine, + expire_on_commit=False, + class_=AsyncSession, +) + + +async def get_db() -> AsyncGenerator[AsyncSession, None]: + async with AsyncSessionLocal() as session: + yield session + + +# ─── Sync engine (Celery workers) ──────────────────────────────────────────── + +sync_engine = create_engine( + settings.SYNC_DATABASE_URL, + pool_size=5, + max_overflow=10, + pool_pre_ping=True, +) + +SyncSessionLocal = sessionmaker( + bind=sync_engine, + autoflush=False, + autocommit=False, +) + + +def get_sync_db() -> Session: + return SyncSessionLocal() diff --git a/backend/app/main.py b/backend/app/main.py new file mode 100644 index 0000000..bfa6b00 --- /dev/null +++ b/backend/app/main.py @@ -0,0 +1,28 @@ +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware + +from app.api import bills, members, follows, dashboard, search, settings, admin, health +from app.config import settings as config + +app = FastAPI( + title="PocketVeto", + description="Monitor US Congressional activity with AI-powered bill summaries.", + version="1.0.0", +) + +app.add_middleware( + CORSMiddleware, + allow_origins=[o for o in [config.LOCAL_URL, config.PUBLIC_URL] if o], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +app.include_router(bills.router, prefix="/api/bills", tags=["bills"]) +app.include_router(members.router, prefix="/api/members", tags=["members"]) +app.include_router(follows.router, prefix="/api/follows", tags=["follows"]) +app.include_router(dashboard.router, prefix="/api/dashboard", tags=["dashboard"]) +app.include_router(search.router, prefix="/api/search", tags=["search"]) +app.include_router(settings.router, prefix="/api/settings", tags=["settings"]) +app.include_router(admin.router, prefix="/api/admin", tags=["admin"]) +app.include_router(health.router, prefix="/api/health", tags=["health"]) diff --git a/backend/app/management/__init__.py b/backend/app/management/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/app/management/backfill.py b/backend/app/management/backfill.py new file mode 100644 index 0000000..1b167fa --- /dev/null +++ b/backend/app/management/backfill.py @@ -0,0 +1,117 @@ +""" +Historical data backfill script. + +Usage (run inside the api or worker container): + python -m app.management.backfill --congress 118 119 + python -m app.management.backfill --congress 119 --skip-llm + +This script fetches all bills from the specified Congress numbers, +stores them in the database, and (optionally) enqueues document fetch +and LLM processing tasks for each bill. + +Cost note: LLM processing 15,000+ bills can be expensive. +Consider using --skip-llm for initial backfill and processing +manually / in batches. +""" +import argparse +import logging +import sys +import time + +logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") +logger = logging.getLogger(__name__) + + +def backfill_congress(congress_number: int, skip_llm: bool = False, dry_run: bool = False): + from app.database import get_sync_db + from app.models import AppSetting, Bill, Member + from app.services import congress_api + from app.workers.congress_poller import _sync_sponsor + + db = get_sync_db() + offset = 0 + total_processed = 0 + total_new = 0 + + logger.info(f"Starting backfill for Congress {congress_number} (skip_llm={skip_llm}, dry_run={dry_run})") + + try: + while True: + response = congress_api.get_bills(congress=congress_number, offset=offset, limit=250) + bills_data = response.get("bills", []) + + if not bills_data: + break + + for bill_data in bills_data: + parsed = congress_api.parse_bill_from_api(bill_data, congress_number) + bill_id = parsed["bill_id"] + + if dry_run: + logger.info(f"[DRY RUN] Would process: {bill_id}") + total_processed += 1 + continue + + existing = db.get(Bill, bill_id) + if existing: + total_processed += 1 + continue + + # Sync sponsor + sponsor_id = _sync_sponsor(db, bill_data) + parsed["sponsor_id"] = sponsor_id + + db.add(Bill(**parsed)) + total_new += 1 + total_processed += 1 + + if total_new % 50 == 0: + db.commit() + logger.info(f"Progress: {total_processed} processed, {total_new} new") + + # Enqueue document + LLM at low priority + if not skip_llm: + from app.workers.document_fetcher import fetch_bill_documents + fetch_bill_documents.apply_async(args=[bill_id], priority=3) + + # Stay well under Congress.gov rate limit (5,000/hr = ~1.4/sec) + time.sleep(0.25) + + db.commit() + offset += 250 + + if len(bills_data) < 250: + break # Last page + + logger.info(f"Fetched page ending at offset {offset}, total processed: {total_processed}") + time.sleep(1) # Polite pause between pages + + except KeyboardInterrupt: + logger.info("Interrupted by user") + db.commit() + finally: + db.close() + + logger.info(f"Backfill complete: {total_new} new bills added ({total_processed} total processed)") + return total_new + + +def main(): + parser = argparse.ArgumentParser(description="Backfill Congressional bill data") + parser.add_argument("--congress", type=int, nargs="+", default=[119], + help="Congress numbers to backfill (default: 119)") + parser.add_argument("--skip-llm", action="store_true", + help="Skip LLM processing (fetch documents only, don't enqueue briefs)") + parser.add_argument("--dry-run", action="store_true", + help="Count bills without actually inserting them") + args = parser.parse_args() + + total = 0 + for congress_number in args.congress: + total += backfill_congress(congress_number, skip_llm=args.skip_llm, dry_run=args.dry_run) + + logger.info(f"All done. Total new bills: {total}") + + +if __name__ == "__main__": + main() diff --git a/backend/app/models/__init__.py b/backend/app/models/__init__.py new file mode 100644 index 0000000..0b3d6a3 --- /dev/null +++ b/backend/app/models/__init__.py @@ -0,0 +1,22 @@ +from app.models.bill import Bill, BillAction, BillDocument +from app.models.brief import BillBrief +from app.models.follow import Follow +from app.models.member import Member +from app.models.news import NewsArticle +from app.models.setting import AppSetting +from app.models.trend import TrendScore +from app.models.committee import Committee, CommitteeBill + +__all__ = [ + "Bill", + "BillAction", + "BillDocument", + "BillBrief", + "Follow", + "Member", + "NewsArticle", + "AppSetting", + "TrendScore", + "Committee", + "CommitteeBill", +] diff --git a/backend/app/models/bill.py b/backend/app/models/bill.py new file mode 100644 index 0000000..4722f12 --- /dev/null +++ b/backend/app/models/bill.py @@ -0,0 +1,88 @@ +from sqlalchemy import ( + Column, String, Integer, Date, DateTime, Text, ForeignKey, Index +) +from sqlalchemy.orm import relationship +from sqlalchemy.sql import func + +from app.database import Base + + +class Bill(Base): + __tablename__ = "bills" + + # Natural key: "{congress}-{bill_type_lower}-{bill_number}" e.g. "119-hr-1234" + bill_id = Column(String, primary_key=True) + congress_number = Column(Integer, nullable=False) + bill_type = Column(String(10), nullable=False) # hr, s, hjres, sjres, hconres, sconres, hres, sres + bill_number = Column(Integer, nullable=False) + title = Column(Text) + short_title = Column(Text) + sponsor_id = Column(String, ForeignKey("members.bioguide_id"), nullable=True) + introduced_date = Column(Date) + latest_action_date = Column(Date) + latest_action_text = Column(Text) + status = Column(String(100)) + chamber = Column(String(50)) + congress_url = Column(String) + govtrack_url = Column(String) + + # Ingestion tracking + last_checked_at = Column(DateTime(timezone=True)) + actions_fetched_at = Column(DateTime(timezone=True)) + created_at = Column(DateTime(timezone=True), server_default=func.now()) + updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now()) + + sponsor = relationship("Member", back_populates="bills", foreign_keys=[sponsor_id]) + actions = relationship("BillAction", back_populates="bill", order_by="desc(BillAction.action_date)") + documents = relationship("BillDocument", back_populates="bill") + briefs = relationship("BillBrief", back_populates="bill", order_by="desc(BillBrief.created_at)") + news_articles = relationship("NewsArticle", back_populates="bill", order_by="desc(NewsArticle.published_at)") + trend_scores = relationship("TrendScore", back_populates="bill", order_by="desc(TrendScore.score_date)") + committee_bills = relationship("CommitteeBill", back_populates="bill") + + __table_args__ = ( + Index("ix_bills_congress_number", "congress_number"), + Index("ix_bills_latest_action_date", "latest_action_date"), + Index("ix_bills_introduced_date", "introduced_date"), + Index("ix_bills_chamber", "chamber"), + Index("ix_bills_sponsor_id", "sponsor_id"), + ) + + +class BillAction(Base): + __tablename__ = "bill_actions" + + id = Column(Integer, primary_key=True, autoincrement=True) + bill_id = Column(String, ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False) + action_date = Column(Date) + action_text = Column(Text) + action_type = Column(String(100)) + chamber = Column(String(50)) + created_at = Column(DateTime(timezone=True), server_default=func.now()) + + bill = relationship("Bill", back_populates="actions") + + __table_args__ = ( + Index("ix_bill_actions_bill_id", "bill_id"), + Index("ix_bill_actions_action_date", "action_date"), + ) + + +class BillDocument(Base): + __tablename__ = "bill_documents" + + id = Column(Integer, primary_key=True, autoincrement=True) + bill_id = Column(String, ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False) + doc_type = Column(String(50)) # bill_text | committee_report | amendment + doc_version = Column(String(50)) # Introduced, Enrolled, etc. + govinfo_url = Column(String) + raw_text = Column(Text) + fetched_at = Column(DateTime(timezone=True)) + created_at = Column(DateTime(timezone=True), server_default=func.now()) + + bill = relationship("Bill", back_populates="documents") + briefs = relationship("BillBrief", back_populates="document") + + __table_args__ = ( + Index("ix_bill_documents_bill_id", "bill_id"), + ) diff --git a/backend/app/models/brief.py b/backend/app/models/brief.py new file mode 100644 index 0000000..d661ea7 --- /dev/null +++ b/backend/app/models/brief.py @@ -0,0 +1,31 @@ +from sqlalchemy import Column, Integer, String, Text, ForeignKey, DateTime, Index +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import relationship +from sqlalchemy.sql import func + +from app.database import Base + + +class BillBrief(Base): + __tablename__ = "bill_briefs" + + id = Column(Integer, primary_key=True, autoincrement=True) + bill_id = Column(String, ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False) + document_id = Column(Integer, ForeignKey("bill_documents.id", ondelete="SET NULL"), nullable=True) + brief_type = Column(String(20), nullable=False, server_default="full") # full | amendment + summary = Column(Text) + key_points = Column(JSONB) # list[str] + risks = Column(JSONB) # list[str] + deadlines = Column(JSONB) # list[{date: str, description: str}] + topic_tags = Column(JSONB) # list[str] + llm_provider = Column(String(50)) + llm_model = Column(String(100)) + created_at = Column(DateTime(timezone=True), server_default=func.now()) + + bill = relationship("Bill", back_populates="briefs") + document = relationship("BillDocument", back_populates="briefs") + + __table_args__ = ( + Index("ix_bill_briefs_bill_id", "bill_id"), + Index("ix_bill_briefs_topic_tags", "topic_tags", postgresql_using="gin"), + ) diff --git a/backend/app/models/committee.py b/backend/app/models/committee.py new file mode 100644 index 0000000..636230b --- /dev/null +++ b/backend/app/models/committee.py @@ -0,0 +1,33 @@ +from sqlalchemy import Column, Integer, String, Date, ForeignKey, Index +from sqlalchemy.orm import relationship + +from app.database import Base + + +class Committee(Base): + __tablename__ = "committees" + + id = Column(Integer, primary_key=True, autoincrement=True) + committee_code = Column(String(20), unique=True, nullable=False) + name = Column(String(500)) + chamber = Column(String(10)) + committee_type = Column(String(50)) # Standing, Select, Joint, etc. + + committee_bills = relationship("CommitteeBill", back_populates="committee") + + +class CommitteeBill(Base): + __tablename__ = "committee_bills" + + id = Column(Integer, primary_key=True, autoincrement=True) + committee_id = Column(Integer, ForeignKey("committees.id", ondelete="CASCADE"), nullable=False) + bill_id = Column(String, ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False) + referral_date = Column(Date) + + committee = relationship("Committee", back_populates="committee_bills") + bill = relationship("Bill", back_populates="committee_bills") + + __table_args__ = ( + Index("ix_committee_bills_bill_id", "bill_id"), + Index("ix_committee_bills_committee_id", "committee_id"), + ) diff --git a/backend/app/models/follow.py b/backend/app/models/follow.py new file mode 100644 index 0000000..ee3ff18 --- /dev/null +++ b/backend/app/models/follow.py @@ -0,0 +1,17 @@ +from sqlalchemy import Column, Integer, String, DateTime, UniqueConstraint +from sqlalchemy.sql import func + +from app.database import Base + + +class Follow(Base): + __tablename__ = "follows" + + id = Column(Integer, primary_key=True, autoincrement=True) + follow_type = Column(String(20), nullable=False) # bill | member | topic + follow_value = Column(String, nullable=False) # bill_id | bioguide_id | tag string + created_at = Column(DateTime(timezone=True), server_default=func.now()) + + __table_args__ = ( + UniqueConstraint("follow_type", "follow_value", name="uq_follows_type_value"), + ) diff --git a/backend/app/models/member.py b/backend/app/models/member.py new file mode 100644 index 0000000..96c7b52 --- /dev/null +++ b/backend/app/models/member.py @@ -0,0 +1,24 @@ +from sqlalchemy import Column, String, DateTime +from sqlalchemy.orm import relationship +from sqlalchemy.sql import func + +from app.database import Base + + +class Member(Base): + __tablename__ = "members" + + bioguide_id = Column(String, primary_key=True) + name = Column(String, nullable=False) + first_name = Column(String) + last_name = Column(String) + party = Column(String(50)) + state = Column(String(50)) + chamber = Column(String(50)) + district = Column(String(50)) + photo_url = Column(String) + official_url = Column(String) + created_at = Column(DateTime(timezone=True), server_default=func.now()) + updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now()) + + bills = relationship("Bill", back_populates="sponsor", foreign_keys="Bill.sponsor_id") diff --git a/backend/app/models/news.py b/backend/app/models/news.py new file mode 100644 index 0000000..a770806 --- /dev/null +++ b/backend/app/models/news.py @@ -0,0 +1,25 @@ +from sqlalchemy import Column, Integer, String, Text, Float, DateTime, ForeignKey, Index +from sqlalchemy.orm import relationship +from sqlalchemy.sql import func + +from app.database import Base + + +class NewsArticle(Base): + __tablename__ = "news_articles" + + id = Column(Integer, primary_key=True, autoincrement=True) + bill_id = Column(String, ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False) + source = Column(String(200)) + headline = Column(Text) + url = Column(String, unique=True) + published_at = Column(DateTime(timezone=True)) + relevance_score = Column(Float, default=0.0) + created_at = Column(DateTime(timezone=True), server_default=func.now()) + + bill = relationship("Bill", back_populates="news_articles") + + __table_args__ = ( + Index("ix_news_articles_bill_id", "bill_id"), + Index("ix_news_articles_published_at", "published_at"), + ) diff --git a/backend/app/models/setting.py b/backend/app/models/setting.py new file mode 100644 index 0000000..44536d3 --- /dev/null +++ b/backend/app/models/setting.py @@ -0,0 +1,12 @@ +from sqlalchemy import Column, String, DateTime +from sqlalchemy.sql import func + +from app.database import Base + + +class AppSetting(Base): + __tablename__ = "app_settings" + + key = Column(String, primary_key=True) + value = Column(String) + updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now()) diff --git a/backend/app/models/trend.py b/backend/app/models/trend.py new file mode 100644 index 0000000..f0c39f8 --- /dev/null +++ b/backend/app/models/trend.py @@ -0,0 +1,25 @@ +from sqlalchemy import Column, Integer, String, Date, Float, ForeignKey, Index, UniqueConstraint +from sqlalchemy.orm import relationship + +from app.database import Base + + +class TrendScore(Base): + __tablename__ = "trend_scores" + + id = Column(Integer, primary_key=True, autoincrement=True) + bill_id = Column(String, ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False) + score_date = Column(Date, nullable=False) + newsapi_count = Column(Integer, default=0) + gnews_count = Column(Integer, default=0) + gtrends_score = Column(Float, default=0.0) + composite_score = Column(Float, default=0.0) + + bill = relationship("Bill", back_populates="trend_scores") + + __table_args__ = ( + UniqueConstraint("bill_id", "score_date", name="uq_trend_scores_bill_date"), + Index("ix_trend_scores_bill_id", "bill_id"), + Index("ix_trend_scores_score_date", "score_date"), + Index("ix_trend_scores_composite", "composite_score"), + ) diff --git a/backend/app/schemas/__init__.py b/backend/app/schemas/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/app/schemas/schemas.py b/backend/app/schemas/schemas.py new file mode 100644 index 0000000..6ee9c49 --- /dev/null +++ b/backend/app/schemas/schemas.py @@ -0,0 +1,145 @@ +from datetime import date, datetime +from typing import Any, Generic, Optional, TypeVar + +from pydantic import BaseModel + +T = TypeVar("T") + + +class PaginatedResponse(BaseModel, Generic[T]): + items: list[T] + total: int + page: int + per_page: int + pages: int + + +# ── Member ──────────────────────────────────────────────────────────────────── + +class MemberSchema(BaseModel): + bioguide_id: str + name: str + first_name: Optional[str] = None + last_name: Optional[str] = None + party: Optional[str] = None + state: Optional[str] = None + chamber: Optional[str] = None + district: Optional[str] = None + photo_url: Optional[str] = None + + model_config = {"from_attributes": True} + + +# ── Bill Brief ──────────────────────────────────────────────────────────────── + +class BriefSchema(BaseModel): + id: int + brief_type: str = "full" + summary: Optional[str] = None + key_points: Optional[list[str]] = None + risks: Optional[list[str]] = None + deadlines: Optional[list[dict[str, Any]]] = None + topic_tags: Optional[list[str]] = None + llm_provider: Optional[str] = None + llm_model: Optional[str] = None + created_at: Optional[datetime] = None + + model_config = {"from_attributes": True} + + +# ── Bill Action ─────────────────────────────────────────────────────────────── + +class BillActionSchema(BaseModel): + id: int + action_date: Optional[date] = None + action_text: Optional[str] = None + action_type: Optional[str] = None + chamber: Optional[str] = None + + model_config = {"from_attributes": True} + + +# ── News Article ────────────────────────────────────────────────────────────── + +class NewsArticleSchema(BaseModel): + id: int + source: Optional[str] = None + headline: Optional[str] = None + url: Optional[str] = None + published_at: Optional[datetime] = None + relevance_score: Optional[float] = None + + model_config = {"from_attributes": True} + + +# ── Trend Score ─────────────────────────────────────────────────────────────── + +class TrendScoreSchema(BaseModel): + score_date: date + newsapi_count: int + gnews_count: int + gtrends_score: float + composite_score: float + + model_config = {"from_attributes": True} + + +# ── Bill ────────────────────────────────────────────────────────────────────── + +class BillSchema(BaseModel): + bill_id: str + congress_number: int + bill_type: str + bill_number: int + title: Optional[str] = None + short_title: Optional[str] = None + introduced_date: Optional[date] = None + latest_action_date: Optional[date] = None + latest_action_text: Optional[str] = None + status: Optional[str] = None + chamber: Optional[str] = None + congress_url: Optional[str] = None + sponsor: Optional[MemberSchema] = None + latest_brief: Optional[BriefSchema] = None + latest_trend: Optional[TrendScoreSchema] = None + updated_at: Optional[datetime] = None + + model_config = {"from_attributes": True} + + +class BillDetailSchema(BillSchema): + actions: list[BillActionSchema] = [] + news_articles: list[NewsArticleSchema] = [] + trend_scores: list[TrendScoreSchema] = [] + briefs: list[BriefSchema] = [] + + +# ── Follow ──────────────────────────────────────────────────────────────────── + +class FollowCreate(BaseModel): + follow_type: str # bill | member | topic + follow_value: str + + +class FollowSchema(BaseModel): + id: int + follow_type: str + follow_value: str + created_at: datetime + + model_config = {"from_attributes": True} + + +# ── Settings ────────────────────────────────────────────────────────────────── + +class SettingUpdate(BaseModel): + key: str + value: str + + +class SettingsResponse(BaseModel): + llm_provider: str + llm_model: str + congress_poll_interval_minutes: int + newsapi_enabled: bool + pytrends_enabled: bool diff --git a/backend/app/services/__init__.py b/backend/app/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/app/services/congress_api.py b/backend/app/services/congress_api.py new file mode 100644 index 0000000..70eabc9 --- /dev/null +++ b/backend/app/services/congress_api.py @@ -0,0 +1,120 @@ +""" +Congress.gov API client. + +Rate limit: 5,000 requests/hour (enforced server-side by Congress.gov). +We track usage in Redis to stay well under the limit. +""" +import time +from datetime import datetime +from typing import Optional + +import requests +from tenacity import retry, stop_after_attempt, wait_exponential + +from app.config import settings + +BASE_URL = "https://api.congress.gov/v3" + + +def _get_current_congress() -> int: + """Calculate the current Congress number. 119th started Jan 3, 2025.""" + year = datetime.utcnow().year + # Congress changes on odd years (Jan 3) + if datetime.utcnow().month == 1 and datetime.utcnow().day < 3: + year -= 1 + return 118 + ((year - 2023) // 2 + (1 if year % 2 == 1 else 0)) + + +@retry(stop=stop_after_attempt(3), wait=wait_exponential(min=1, max=10)) +def _get(endpoint: str, params: dict) -> dict: + params["api_key"] = settings.DATA_GOV_API_KEY + params["format"] = "json" + response = requests.get(f"{BASE_URL}{endpoint}", params=params, timeout=30) + response.raise_for_status() + return response.json() + + +def get_current_congress() -> int: + return _get_current_congress() + + +def build_bill_id(congress: int, bill_type: str, bill_number: int) -> str: + return f"{congress}-{bill_type.lower()}-{bill_number}" + + +def get_bills( + congress: int, + offset: int = 0, + limit: int = 250, + from_date_time: Optional[str] = None, +) -> dict: + params: dict = {"offset": offset, "limit": limit, "sort": "updateDate+desc"} + if from_date_time: + params["fromDateTime"] = from_date_time + return _get(f"/bill/{congress}", params) + + +def get_bill_detail(congress: int, bill_type: str, bill_number: int) -> dict: + return _get(f"/bill/{congress}/{bill_type.lower()}/{bill_number}", {}) + + +def get_bill_actions(congress: int, bill_type: str, bill_number: int, offset: int = 0) -> dict: + return _get(f"/bill/{congress}/{bill_type.lower()}/{bill_number}/actions", {"offset": offset, "limit": 250}) + + +def get_bill_text_versions(congress: int, bill_type: str, bill_number: int) -> dict: + return _get(f"/bill/{congress}/{bill_type.lower()}/{bill_number}/text", {}) + + +def get_members(offset: int = 0, limit: int = 250, current_member: bool = True) -> dict: + params: dict = {"offset": offset, "limit": limit} + if current_member: + params["currentMember"] = "true" + return _get("/member", params) + + +def get_member_detail(bioguide_id: str) -> dict: + return _get(f"/member/{bioguide_id}", {}) + + +def get_committees(offset: int = 0, limit: int = 250) -> dict: + return _get("/committee", {"offset": offset, "limit": limit}) + + +def parse_bill_from_api(data: dict, congress: int) -> dict: + """Normalize raw API bill data into our model fields.""" + bill_type = data.get("type", "").lower() + bill_number = data.get("number", 0) + latest_action = data.get("latestAction") or {} + return { + "bill_id": build_bill_id(congress, bill_type, bill_number), + "congress_number": congress, + "bill_type": bill_type, + "bill_number": bill_number, + "title": data.get("title"), + "short_title": data.get("shortTitle"), + "introduced_date": data.get("introducedDate"), + "latest_action_date": latest_action.get("actionDate"), + "latest_action_text": latest_action.get("text"), + "status": latest_action.get("text", "")[:100] if latest_action.get("text") else None, + "chamber": "House" if bill_type.startswith("h") else "Senate", + "congress_url": data.get("url"), + } + + +def parse_member_from_api(data: dict) -> dict: + """Normalize raw API member data into our model fields.""" + terms = data.get("terms", {}).get("item", []) + current_term = terms[-1] if terms else {} + return { + "bioguide_id": data.get("bioguideId"), + "name": data.get("name", ""), + "first_name": data.get("firstName"), + "last_name": data.get("lastName"), + "party": data.get("partyName") or None, + "state": data.get("state"), + "chamber": current_term.get("chamber"), + "district": str(current_term.get("district")) if current_term.get("district") else None, + "photo_url": data.get("depiction", {}).get("imageUrl"), + "official_url": data.get("officialWebsiteUrl"), + } diff --git a/backend/app/services/govinfo_api.py b/backend/app/services/govinfo_api.py new file mode 100644 index 0000000..15911b1 --- /dev/null +++ b/backend/app/services/govinfo_api.py @@ -0,0 +1,95 @@ +""" +GovInfo API client for fetching actual bill text. + +Priority order for text formats: htm > txt > pdf +""" +import logging +import re +from typing import Optional + +import requests +from bs4 import BeautifulSoup +from tenacity import retry, stop_after_attempt, wait_exponential + +from app.config import settings + +logger = logging.getLogger(__name__) + +GOVINFO_BASE = "https://api.govinfo.gov" +FORMAT_PRIORITY = ["htm", "html", "txt", "pdf"] + + +@retry(stop=stop_after_attempt(3), wait=wait_exponential(min=2, max=15)) +def _get(url: str, params: dict = None) -> requests.Response: + p = {"api_key": settings.DATA_GOV_API_KEY, **(params or {})} + response = requests.get(url, params=p, timeout=60) + response.raise_for_status() + return response + + +def get_package_summary(package_id: str) -> dict: + response = _get(f"{GOVINFO_BASE}/packages/{package_id}/summary") + return response.json() + + +def get_package_content_detail(package_id: str) -> dict: + response = _get(f"{GOVINFO_BASE}/packages/{package_id}/content-detail") + return response.json() + + +def find_best_text_url(text_versions: list[dict]) -> Optional[tuple[str, str]]: + """ + From a list of text version objects (from Congress.gov API), find the best + available text format. Returns (url, format) or None. + Matches by URL extension since Congress.gov type strings are "Formatted Text", "PDF", etc. + """ + for fmt in FORMAT_PRIORITY: + for version in text_versions: + for fmt_info in version.get("formats", []): + if not isinstance(fmt_info, dict): + continue + url = fmt_info.get("url", "") + if url.lower().endswith(f".{fmt}"): + return url, fmt + return None, None + + +def fetch_text_from_url(url: str, fmt: str) -> Optional[str]: + """Download and extract plain text from a GovInfo document URL.""" + try: + response = requests.get(url, timeout=120) + response.raise_for_status() + + if fmt in ("htm", "html"): + return _extract_from_html(response.text) + elif fmt == "txt": + return response.text + elif fmt == "pdf": + return _extract_from_pdf(response.content) + except Exception as e: + logger.error(f"Failed to fetch text from {url}: {e}") + return None + + +def _extract_from_html(html: str) -> str: + """Strip HTML tags and clean up whitespace.""" + soup = BeautifulSoup(html, "lxml") + # Remove script/style tags + for tag in soup(["script", "style", "nav", "header", "footer"]): + tag.decompose() + text = soup.get_text(separator="\n") + # Collapse excessive whitespace + text = re.sub(r"\n{3,}", "\n\n", text) + text = re.sub(r" {2,}", " ", text) + return text.strip() + + +def _extract_from_pdf(content: bytes) -> Optional[str]: + """Extract text from PDF bytes using pdfminer.""" + try: + from io import BytesIO + from pdfminer.high_level import extract_text as pdf_extract + return pdf_extract(BytesIO(content)) + except Exception as e: + logger.error(f"PDF extraction failed: {e}") + return None diff --git a/backend/app/services/llm_service.py b/backend/app/services/llm_service.py new file mode 100644 index 0000000..91bf065 --- /dev/null +++ b/backend/app/services/llm_service.py @@ -0,0 +1,327 @@ +""" +LLM provider abstraction. + +All providers implement generate_brief(doc_text, bill_metadata) -> ReverseBrief. +Select provider via LLM_PROVIDER env var. +""" +import json +import logging +import re +from abc import ABC, abstractmethod +from dataclasses import dataclass, field + +from app.config import settings + +logger = logging.getLogger(__name__) + +SYSTEM_PROMPT = """You are a nonpartisan legislative analyst specializing in translating complex \ +legislation into clear, accurate summaries for informed citizens. You analyze bills objectively \ +without political bias. + +Always respond with valid JSON matching exactly this schema: +{ + "summary": "2-4 paragraph plain-language summary of what this bill does", + "key_points": ["specific concrete fact 1", "specific concrete fact 2"], + "risks": ["legitimate concern or challenge 1", "legitimate concern 2"], + "deadlines": [{"date": "YYYY-MM-DD or null", "description": "what happens on this date"}], + "topic_tags": ["healthcare", "taxation"] +} + +Rules: +- summary: Explain WHAT the bill does, not whether it is good or bad. Be factual and complete. +- key_points: 5-10 specific, concrete things the bill changes, authorizes, or appropriates. +- risks: Legitimate concerns from any perspective — costs, implementation challenges, \ +constitutional questions, unintended consequences. Include at least 2 even for benign bills. +- deadlines: Only include if explicitly stated in the text. Use null for date if a deadline \ +is mentioned without a specific date. Empty list if none. +- topic_tags: 3-8 lowercase tags. Prefer these standard tags: healthcare, taxation, defense, \ +education, immigration, environment, housing, infrastructure, technology, agriculture, judiciary, \ +foreign-policy, veterans, social-security, trade, budget, energy, banking, transportation, \ +public-lands, labor, civil-rights, science. + +Respond with ONLY valid JSON. No preamble, no explanation, no markdown code blocks.""" + +MAX_TOKENS_DEFAULT = 6000 +MAX_TOKENS_OLLAMA = 3000 +TOKENS_PER_CHAR = 0.25 # rough approximation: 4 chars ≈ 1 token + + +@dataclass +class ReverseBrief: + summary: str + key_points: list[str] + risks: list[str] + deadlines: list[dict] + topic_tags: list[str] + llm_provider: str + llm_model: str + + +def smart_truncate(text: str, max_tokens: int) -> str: + """Truncate bill text intelligently if it exceeds token budget.""" + approx_tokens = len(text) * TOKENS_PER_CHAR + if approx_tokens <= max_tokens: + return text + + # Keep first 75% of budget for the preamble (purpose section) + # and last 25% for effective dates / enforcement sections + preamble_chars = int(max_tokens * 0.75 / TOKENS_PER_CHAR) + tail_chars = int(max_tokens * 0.25 / TOKENS_PER_CHAR) + omitted_chars = len(text) - preamble_chars - tail_chars + + return ( + text[:preamble_chars] + + f"\n\n[... {omitted_chars:,} characters omitted for length ...]\n\n" + + text[-tail_chars:] + ) + + +AMENDMENT_SYSTEM_PROMPT = """You are a nonpartisan legislative analyst. A bill has been updated \ +and you must summarize what changed between the previous and new version. + +Always respond with valid JSON matching exactly this schema: +{ + "summary": "2-3 paragraph plain-language description of what changed in this version", + "key_points": ["specific change 1", "specific change 2"], + "risks": ["new concern introduced by this change 1", "concern 2"], + "deadlines": [{"date": "YYYY-MM-DD or null", "description": "new deadline added"}], + "topic_tags": ["healthcare", "taxation"] +} + +Rules: +- summary: Focus ONLY on what is different from the previous version. Be specific. +- key_points: List concrete additions, removals, or modifications in this version. +- risks: Only include risks that are new or changed relative to the previous version. +- deadlines: Only new or changed deadlines. Empty list if none. +- topic_tags: Same standard tags as before — include any new topics this version adds. + +Respond with ONLY valid JSON. No preamble, no explanation, no markdown code blocks.""" + + +def build_amendment_prompt(new_text: str, previous_text: str, bill_metadata: dict, max_tokens: int) -> str: + half = max_tokens // 2 + truncated_new = smart_truncate(new_text, half) + truncated_prev = smart_truncate(previous_text, half) + return f"""A bill has been updated. Summarize what changed between the previous and new version. + +BILL METADATA: +- Title: {bill_metadata.get('title', 'Unknown')} +- Sponsor: {bill_metadata.get('sponsor_name', 'Unknown')} \ +({bill_metadata.get('party', '?')}-{bill_metadata.get('state', '?')}) +- Latest Action: {bill_metadata.get('latest_action_text', 'None')} \ +({bill_metadata.get('latest_action_date', 'Unknown')}) + +PREVIOUS VERSION: +{truncated_prev} + +NEW VERSION: +{truncated_new} + +Produce the JSON amendment summary now:""" + + +def build_prompt(doc_text: str, bill_metadata: dict, max_tokens: int) -> str: + truncated = smart_truncate(doc_text, max_tokens) + return f"""Analyze this legislation and produce a structured brief. + +BILL METADATA: +- Title: {bill_metadata.get('title', 'Unknown')} +- Sponsor: {bill_metadata.get('sponsor_name', 'Unknown')} \ +({bill_metadata.get('party', '?')}-{bill_metadata.get('state', '?')}) +- Introduced: {bill_metadata.get('introduced_date', 'Unknown')} +- Chamber: {bill_metadata.get('chamber', 'Unknown')} +- Latest Action: {bill_metadata.get('latest_action_text', 'None')} \ +({bill_metadata.get('latest_action_date', 'Unknown')}) + +BILL TEXT: +{truncated} + +Produce the JSON brief now:""" + + +def parse_brief_json(raw: str | dict, provider: str, model: str) -> ReverseBrief: + """Parse and validate LLM JSON response into a ReverseBrief.""" + if isinstance(raw, str): + # Strip markdown code fences if present + raw = re.sub(r"^```(?:json)?\s*", "", raw.strip()) + raw = re.sub(r"\s*```$", "", raw.strip()) + data = json.loads(raw) + else: + data = raw + + return ReverseBrief( + summary=str(data.get("summary", "")), + key_points=list(data.get("key_points", [])), + risks=list(data.get("risks", [])), + deadlines=list(data.get("deadlines", [])), + topic_tags=list(data.get("topic_tags", [])), + llm_provider=provider, + llm_model=model, + ) + + +class LLMProvider(ABC): + @abstractmethod + def generate_brief(self, doc_text: str, bill_metadata: dict) -> ReverseBrief: + pass + + @abstractmethod + def generate_amendment_brief(self, new_text: str, previous_text: str, bill_metadata: dict) -> ReverseBrief: + pass + + +class OpenAIProvider(LLMProvider): + def __init__(self): + from openai import OpenAI + self.client = OpenAI(api_key=settings.OPENAI_API_KEY) + self.model = settings.OPENAI_MODEL + + def generate_brief(self, doc_text: str, bill_metadata: dict) -> ReverseBrief: + prompt = build_prompt(doc_text, bill_metadata, MAX_TOKENS_DEFAULT) + response = self.client.chat.completions.create( + model=self.model, + messages=[ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": prompt}, + ], + response_format={"type": "json_object"}, + temperature=0.1, + ) + raw = response.choices[0].message.content + return parse_brief_json(raw, "openai", self.model) + + def generate_amendment_brief(self, new_text: str, previous_text: str, bill_metadata: dict) -> ReverseBrief: + prompt = build_amendment_prompt(new_text, previous_text, bill_metadata, MAX_TOKENS_DEFAULT) + response = self.client.chat.completions.create( + model=self.model, + messages=[ + {"role": "system", "content": AMENDMENT_SYSTEM_PROMPT}, + {"role": "user", "content": prompt}, + ], + response_format={"type": "json_object"}, + temperature=0.1, + ) + raw = response.choices[0].message.content + return parse_brief_json(raw, "openai", self.model) + + +class AnthropicProvider(LLMProvider): + def __init__(self): + import anthropic + self.client = anthropic.Anthropic(api_key=settings.ANTHROPIC_API_KEY) + self.model = settings.ANTHROPIC_MODEL + + def generate_brief(self, doc_text: str, bill_metadata: dict) -> ReverseBrief: + prompt = build_prompt(doc_text, bill_metadata, MAX_TOKENS_DEFAULT) + response = self.client.messages.create( + model=self.model, + max_tokens=4096, + system=SYSTEM_PROMPT + "\n\nIMPORTANT: Respond with ONLY valid JSON. No other text.", + messages=[{"role": "user", "content": prompt}], + ) + raw = response.content[0].text + return parse_brief_json(raw, "anthropic", self.model) + + def generate_amendment_brief(self, new_text: str, previous_text: str, bill_metadata: dict) -> ReverseBrief: + prompt = build_amendment_prompt(new_text, previous_text, bill_metadata, MAX_TOKENS_DEFAULT) + response = self.client.messages.create( + model=self.model, + max_tokens=4096, + system=AMENDMENT_SYSTEM_PROMPT + "\n\nIMPORTANT: Respond with ONLY valid JSON. No other text.", + messages=[{"role": "user", "content": prompt}], + ) + raw = response.content[0].text + return parse_brief_json(raw, "anthropic", self.model) + + +class GeminiProvider(LLMProvider): + def __init__(self): + import google.generativeai as genai + genai.configure(api_key=settings.GEMINI_API_KEY) + self._genai = genai + self.model_name = settings.GEMINI_MODEL + + def _make_model(self, system_prompt: str): + return self._genai.GenerativeModel( + model_name=self.model_name, + generation_config={"response_mime_type": "application/json", "temperature": 0.1}, + system_instruction=system_prompt, + ) + + def generate_brief(self, doc_text: str, bill_metadata: dict) -> ReverseBrief: + prompt = build_prompt(doc_text, bill_metadata, MAX_TOKENS_DEFAULT) + response = self._make_model(SYSTEM_PROMPT).generate_content(prompt) + return parse_brief_json(response.text, "gemini", self.model_name) + + def generate_amendment_brief(self, new_text: str, previous_text: str, bill_metadata: dict) -> ReverseBrief: + prompt = build_amendment_prompt(new_text, previous_text, bill_metadata, MAX_TOKENS_DEFAULT) + response = self._make_model(AMENDMENT_SYSTEM_PROMPT).generate_content(prompt) + return parse_brief_json(response.text, "gemini", self.model_name) + + +class OllamaProvider(LLMProvider): + def __init__(self): + self.base_url = settings.OLLAMA_BASE_URL.rstrip("/") + self.model = settings.OLLAMA_MODEL + + def _generate(self, system_prompt: str, user_prompt: str) -> str: + import requests as req + full_prompt = f"{system_prompt}\n\n{user_prompt}" + response = req.post( + f"{self.base_url}/api/generate", + json={"model": self.model, "prompt": full_prompt, "stream": False, "format": "json"}, + timeout=300, + ) + response.raise_for_status() + raw = response.json().get("response", "") + try: + return raw + except Exception: + strict = f"{full_prompt}\n\nCRITICAL: Your response MUST be valid JSON only." + r2 = req.post( + f"{self.base_url}/api/generate", + json={"model": self.model, "prompt": strict, "stream": False, "format": "json"}, + timeout=300, + ) + r2.raise_for_status() + return r2.json().get("response", "") + + def generate_brief(self, doc_text: str, bill_metadata: dict) -> ReverseBrief: + prompt = build_prompt(doc_text, bill_metadata, MAX_TOKENS_OLLAMA) + raw = self._generate(SYSTEM_PROMPT, prompt) + try: + return parse_brief_json(raw, "ollama", self.model) + except (json.JSONDecodeError, KeyError) as e: + logger.warning(f"Ollama JSON parse failed, retrying: {e}") + raw2 = self._generate( + SYSTEM_PROMPT, + prompt + "\n\nCRITICAL: Your response MUST be valid JSON only. No text before or after the JSON object." + ) + return parse_brief_json(raw2, "ollama", self.model) + + def generate_amendment_brief(self, new_text: str, previous_text: str, bill_metadata: dict) -> ReverseBrief: + prompt = build_amendment_prompt(new_text, previous_text, bill_metadata, MAX_TOKENS_OLLAMA) + raw = self._generate(AMENDMENT_SYSTEM_PROMPT, prompt) + try: + return parse_brief_json(raw, "ollama", self.model) + except (json.JSONDecodeError, KeyError) as e: + logger.warning(f"Ollama amendment JSON parse failed, retrying: {e}") + raw2 = self._generate( + AMENDMENT_SYSTEM_PROMPT, + prompt + "\n\nCRITICAL: Your response MUST be valid JSON only. No text before or after the JSON object." + ) + return parse_brief_json(raw2, "ollama", self.model) + + +def get_llm_provider() -> LLMProvider: + """Factory — returns the configured LLM provider.""" + provider = settings.LLM_PROVIDER.lower() + if provider == "openai": + return OpenAIProvider() + elif provider == "anthropic": + return AnthropicProvider() + elif provider == "gemini": + return GeminiProvider() + elif provider == "ollama": + return OllamaProvider() + raise ValueError(f"Unknown LLM_PROVIDER: '{provider}'. Must be one of: openai, anthropic, gemini, ollama") diff --git a/backend/app/services/news_service.py b/backend/app/services/news_service.py new file mode 100644 index 0000000..c69748f --- /dev/null +++ b/backend/app/services/news_service.py @@ -0,0 +1,89 @@ +""" +News correlation service. + +- NewsAPI.org: structured news articles per bill (100 req/day limit) +- Google News RSS: volume signal for zeitgeist scoring (no limit) +""" +import logging +import time +import urllib.parse +from datetime import datetime, timedelta, timezone +from typing import Optional + +import feedparser +import requests +from tenacity import retry, stop_after_attempt, wait_exponential + +from app.config import settings + +logger = logging.getLogger(__name__) + +NEWSAPI_BASE = "https://newsapi.org/v2" +GOOGLE_NEWS_RSS = "https://news.google.com/rss/search" +NEWSAPI_DAILY_LIMIT = 95 # Leave 5 as buffer + + +@retry(stop=stop_after_attempt(2), wait=wait_exponential(min=1, max=5)) +def _newsapi_get(endpoint: str, params: dict) -> dict: + params["apiKey"] = settings.NEWSAPI_KEY + response = requests.get(f"{NEWSAPI_BASE}/{endpoint}", params=params, timeout=30) + response.raise_for_status() + return response.json() + + +def build_news_query(bill_title: str, short_title: Optional[str], sponsor_name: Optional[str], + bill_type: str, bill_number: int) -> str: + """Build a NewsAPI search query for a bill.""" + terms = [] + if short_title: + terms.append(f'"{short_title}"') + elif bill_title: + # Use first 6 words of title as phrase + words = bill_title.split()[:6] + if len(words) >= 3: + terms.append(f'"{" ".join(words)}"') + # Add bill number as fallback + terms.append(f'"{bill_type.upper()} {bill_number}"') + return " OR ".join(terms[:2]) # Keep queries short for relevance + + +def fetch_newsapi_articles(query: str, days: int = 30) -> list[dict]: + """Fetch articles from NewsAPI.org. Returns empty list if quota is exhausted or key not set.""" + if not settings.NEWSAPI_KEY: + return [] + try: + from_date = (datetime.now(timezone.utc) - timedelta(days=days)).strftime("%Y-%m-%d") + data = _newsapi_get("everything", { + "q": query, + "language": "en", + "sortBy": "relevancy", + "pageSize": 10, + "from": from_date, + }) + articles = data.get("articles", []) + return [ + { + "source": a.get("source", {}).get("name", ""), + "headline": a.get("title", ""), + "url": a.get("url", ""), + "published_at": a.get("publishedAt"), + } + for a in articles + if a.get("url") and a.get("title") + ] + except Exception as e: + logger.error(f"NewsAPI fetch failed: {e}") + return [] + + +def fetch_gnews_count(query: str, days: int = 30) -> int: + """Count articles in Google News RSS for the past N days. Used as volume signal.""" + try: + encoded = urllib.parse.quote(f"{query} when:{days}d") + url = f"{GOOGLE_NEWS_RSS}?q={encoded}&hl=en-US&gl=US&ceid=US:en" + time.sleep(1) # Polite delay + feed = feedparser.parse(url) + return len(feed.entries) + except Exception as e: + logger.error(f"Google News RSS fetch failed: {e}") + return 0 diff --git a/backend/app/services/trends_service.py b/backend/app/services/trends_service.py new file mode 100644 index 0000000..cbe84d0 --- /dev/null +++ b/backend/app/services/trends_service.py @@ -0,0 +1,64 @@ +""" +Google Trends service (via pytrends). + +pytrends is unofficial web scraping — Google blocks it sporadically. +All calls are wrapped in try/except and return 0 on any failure. +""" +import logging +import random +import time + +from app.config import settings + +logger = logging.getLogger(__name__) + + +def get_trends_score(keywords: list[str]) -> float: + """ + Return a 0–100 interest score for the given keywords over the past 90 days. + Returns 0.0 on any failure (rate limit, empty data, exception). + """ + if not settings.PYTRENDS_ENABLED or not keywords: + return 0.0 + try: + from pytrends.request import TrendReq + + # Jitter to avoid detection as bot + time.sleep(random.uniform(2.0, 5.0)) + + pytrends = TrendReq(hl="en-US", tz=0, timeout=(10, 25)) + kw_list = [k for k in keywords[:5] if k] # max 5 keywords + if not kw_list: + return 0.0 + + pytrends.build_payload(kw_list, timeframe="today 3-m", geo="US") + data = pytrends.interest_over_time() + + if data is None or data.empty: + return 0.0 + + # Average the most recent 14 data points for the primary keyword + primary = kw_list[0] + if primary not in data.columns: + return 0.0 + + recent = data[primary].tail(14) + return float(recent.mean()) + + except Exception as e: + logger.debug(f"pytrends failed (non-critical): {e}") + return 0.0 + + +def keywords_for_bill(title: str, short_title: str, topic_tags: list[str]) -> list[str]: + """Extract meaningful search keywords for a bill.""" + keywords = [] + if short_title: + keywords.append(short_title) + elif title: + # Use first 5 words of title + words = title.split()[:5] + if len(words) >= 2: + keywords.append(" ".join(words)) + keywords.extend(tag.replace("-", " ") for tag in (topic_tags or [])[:3]) + return keywords[:5] diff --git a/backend/app/workers/__init__.py b/backend/app/workers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/app/workers/celery_app.py b/backend/app/workers/celery_app.py new file mode 100644 index 0000000..dd9fc01 --- /dev/null +++ b/backend/app/workers/celery_app.py @@ -0,0 +1,62 @@ +from celery import Celery +from celery.schedules import crontab +from kombu import Queue + +from app.config import settings + +celery_app = Celery( + "pocketveto", + broker=settings.REDIS_URL, + backend=settings.REDIS_URL, + include=[ + "app.workers.congress_poller", + "app.workers.document_fetcher", + "app.workers.llm_processor", + "app.workers.news_fetcher", + "app.workers.trend_scorer", + ], +) + +celery_app.conf.update( + task_serializer="json", + result_serializer="json", + accept_content=["json"], + timezone="UTC", + enable_utc=True, + # Late ack: task is only removed from queue after completion, not on pickup. + # Combined with idempotent tasks, this ensures no work is lost if a worker crashes. + task_acks_late=True, + # Prevent workers from prefetching LLM tasks and blocking other workers. + worker_prefetch_multiplier=1, + # Route tasks to named queues + task_routes={ + "app.workers.congress_poller.*": {"queue": "polling"}, + "app.workers.document_fetcher.*": {"queue": "documents"}, + "app.workers.llm_processor.*": {"queue": "llm"}, + "app.workers.news_fetcher.*": {"queue": "news"}, + "app.workers.trend_scorer.*": {"queue": "news"}, + }, + task_queues=[ + Queue("polling"), + Queue("documents"), + Queue("llm"), + Queue("news"), + ], + # RedBeat stores schedule in Redis — restart-safe and dynamically updatable + redbeat_redis_url=settings.REDIS_URL, + beat_scheduler="redbeat.RedBeatScheduler", + beat_schedule={ + "poll-congress-bills": { + "task": "app.workers.congress_poller.poll_congress_bills", + "schedule": crontab(minute=f"*/{settings.CONGRESS_POLL_INTERVAL_MINUTES}"), + }, + "fetch-news-active-bills": { + "task": "app.workers.news_fetcher.fetch_news_for_active_bills", + "schedule": crontab(hour="*/6", minute=0), + }, + "calculate-trend-scores": { + "task": "app.workers.trend_scorer.calculate_all_trend_scores", + "schedule": crontab(hour=2, minute=0), + }, + }, +) diff --git a/backend/app/workers/congress_poller.py b/backend/app/workers/congress_poller.py new file mode 100644 index 0000000..92f5cf1 --- /dev/null +++ b/backend/app/workers/congress_poller.py @@ -0,0 +1,172 @@ +""" +Congress.gov poller — incremental bill and member sync. + +Runs on Celery Beat schedule (every 30 min by default). +Uses fromDateTime to fetch only recently updated bills. +All operations are idempotent. +""" +import logging +from datetime import datetime, timezone + +from app.database import get_sync_db +from app.models import Bill, BillAction, Member, AppSetting +from app.services import congress_api +from app.workers.celery_app import celery_app + +logger = logging.getLogger(__name__) + + +def _get_setting(db, key: str, default=None) -> str | None: + row = db.get(AppSetting, key) + return row.value if row else default + + +def _set_setting(db, key: str, value: str) -> None: + row = db.get(AppSetting, key) + if row: + row.value = value + else: + db.add(AppSetting(key=key, value=value)) + db.commit() + + +@celery_app.task(bind=True, max_retries=3, name="app.workers.congress_poller.poll_congress_bills") +def poll_congress_bills(self): + """Fetch recently updated bills from Congress.gov and enqueue document + LLM processing.""" + db = get_sync_db() + try: + last_polled = _get_setting(db, "congress_last_polled_at") + current_congress = congress_api.get_current_congress() + logger.info(f"Polling Congress {current_congress} (since {last_polled})") + + new_count = 0 + updated_count = 0 + offset = 0 + + while True: + response = congress_api.get_bills( + congress=current_congress, + offset=offset, + limit=250, + from_date_time=last_polled, + ) + bills_data = response.get("bills", []) + if not bills_data: + break + + for bill_data in bills_data: + parsed = congress_api.parse_bill_from_api(bill_data, current_congress) + bill_id = parsed["bill_id"] + existing = db.get(Bill, bill_id) + + if existing is None: + # Upsert sponsor member if referenced + sponsor_id = _sync_sponsor(db, bill_data) + parsed["sponsor_id"] = sponsor_id + parsed["last_checked_at"] = datetime.now(timezone.utc) + db.add(Bill(**parsed)) + db.commit() + new_count += 1 + # Enqueue document fetch + from app.workers.document_fetcher import fetch_bill_documents + fetch_bill_documents.delay(bill_id) + else: + _update_bill_if_changed(db, existing, parsed) + updated_count += 1 + + db.commit() + offset += 250 + if len(bills_data) < 250: + break + + # Update last polled timestamp + _set_setting(db, "congress_last_polled_at", datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")) + logger.info(f"Poll complete: {new_count} new, {updated_count} updated") + return {"new": new_count, "updated": updated_count} + + except Exception as exc: + db.rollback() + logger.error(f"Poll failed: {exc}") + raise self.retry(exc=exc, countdown=60) + finally: + db.close() + + +@celery_app.task(bind=True, max_retries=3, name="app.workers.congress_poller.sync_members") +def sync_members(self): + """Sync current Congress members.""" + db = get_sync_db() + try: + offset = 0 + synced = 0 + while True: + response = congress_api.get_members(offset=offset, limit=250, current_member=True) + members_data = response.get("members", []) + if not members_data: + break + + for member_data in members_data: + parsed = congress_api.parse_member_from_api(member_data) + if not parsed.get("bioguide_id"): + continue + existing = db.get(Member, parsed["bioguide_id"]) + if existing is None: + db.add(Member(**parsed)) + else: + for k, v in parsed.items(): + setattr(existing, k, v) + synced += 1 + + db.commit() + offset += 250 + if len(members_data) < 250: + break + + logger.info(f"Synced {synced} members") + return {"synced": synced} + except Exception as exc: + db.rollback() + raise self.retry(exc=exc, countdown=120) + finally: + db.close() + + +def _sync_sponsor(db, bill_data: dict) -> str | None: + """Ensure the bill sponsor exists in the members table. Returns bioguide_id or None.""" + sponsors = bill_data.get("sponsors", []) + if not sponsors: + return None + sponsor_raw = sponsors[0] + bioguide_id = sponsor_raw.get("bioguideId") + if not bioguide_id: + return None + existing = db.get(Member, bioguide_id) + if existing is None: + db.add(Member( + bioguide_id=bioguide_id, + name=sponsor_raw.get("fullName", ""), + first_name=sponsor_raw.get("firstName"), + last_name=sponsor_raw.get("lastName"), + party=sponsor_raw.get("party", "")[:10] if sponsor_raw.get("party") else None, + state=sponsor_raw.get("state"), + )) + db.commit() + return bioguide_id + + +def _update_bill_if_changed(db, existing: Bill, parsed: dict) -> bool: + """Update bill fields if anything has changed. Returns True if updated.""" + changed = False + track_fields = ["title", "short_title", "latest_action_date", "latest_action_text", "status"] + for field in track_fields: + new_val = parsed.get(field) + if new_val and getattr(existing, field) != new_val: + setattr(existing, field, new_val) + changed = True + if changed: + existing.last_checked_at = datetime.now(timezone.utc) + db.commit() + # Check for new text versions now that the bill has changed + from app.workers.document_fetcher import fetch_bill_documents + fetch_bill_documents.delay(existing.bill_id) + return changed diff --git a/backend/app/workers/document_fetcher.py b/backend/app/workers/document_fetcher.py new file mode 100644 index 0000000..b745e6d --- /dev/null +++ b/backend/app/workers/document_fetcher.py @@ -0,0 +1,87 @@ +""" +Document fetcher — retrieves bill text from GovInfo and stores it. +Triggered by congress_poller when a new bill is detected. +""" +import logging +from datetime import datetime, timezone + +from app.database import get_sync_db +from app.models import Bill, BillDocument +from app.services import congress_api, govinfo_api +from app.workers.celery_app import celery_app + +logger = logging.getLogger(__name__) + + +@celery_app.task(bind=True, max_retries=3, name="app.workers.document_fetcher.fetch_bill_documents") +def fetch_bill_documents(self, bill_id: str): + """Fetch bill text from GovInfo and store it. Then enqueue LLM processing.""" + db = get_sync_db() + try: + bill = db.get(Bill, bill_id) + if not bill: + logger.warning(f"Bill {bill_id} not found in DB") + return {"status": "not_found"} + + # Get text versions from Congress.gov + try: + text_response = congress_api.get_bill_text_versions( + bill.congress_number, bill.bill_type, bill.bill_number + ) + except Exception as e: + logger.warning(f"No text versions for {bill_id}: {e}") + return {"status": "no_text_versions"} + + text_versions = text_response.get("textVersions", []) + if not text_versions: + return {"status": "no_text_versions"} + + url, fmt = govinfo_api.find_best_text_url(text_versions) + if not url: + return {"status": "no_suitable_format"} + + # Idempotency: skip if we already have this exact document version + existing = ( + db.query(BillDocument) + .filter_by(bill_id=bill_id, govinfo_url=url) + .filter(BillDocument.raw_text.isnot(None)) + .first() + ) + if existing: + return {"status": "already_fetched", "bill_id": bill_id} + + logger.info(f"Fetching {bill_id} document ({fmt}) from {url}") + raw_text = govinfo_api.fetch_text_from_url(url, fmt) + if not raw_text: + raise ValueError(f"Empty text returned for {bill_id}") + + # Get version label from first text version + type_obj = text_versions[0].get("type", {}) if text_versions else {} + doc_version = type_obj.get("name") if isinstance(type_obj, dict) else type_obj + + doc = BillDocument( + bill_id=bill_id, + doc_type="bill_text", + doc_version=doc_version, + govinfo_url=url, + raw_text=raw_text, + fetched_at=datetime.now(timezone.utc), + ) + db.add(doc) + db.commit() + db.refresh(doc) + + logger.info(f"Stored document {doc.id} for bill {bill_id} ({len(raw_text):,} chars)") + + # Enqueue LLM processing + from app.workers.llm_processor import process_document_with_llm + process_document_with_llm.delay(doc.id) + + return {"status": "ok", "document_id": doc.id, "chars": len(raw_text)} + + except Exception as exc: + db.rollback() + logger.error(f"Document fetch failed for {bill_id}: {exc}") + raise self.retry(exc=exc, countdown=120) + finally: + db.close() diff --git a/backend/app/workers/llm_processor.py b/backend/app/workers/llm_processor.py new file mode 100644 index 0000000..633ad6e --- /dev/null +++ b/backend/app/workers/llm_processor.py @@ -0,0 +1,107 @@ +""" +LLM processor — generates AI briefs for fetched bill documents. +Triggered by document_fetcher after successful text retrieval. +""" +import logging + +from app.database import get_sync_db +from app.models import Bill, BillBrief, BillDocument, Member +from app.services.llm_service import get_llm_provider +from app.workers.celery_app import celery_app + +logger = logging.getLogger(__name__) + + +@celery_app.task( + bind=True, + max_retries=2, + rate_limit="10/m", # Respect LLM provider rate limits + name="app.workers.llm_processor.process_document_with_llm", +) +def process_document_with_llm(self, document_id: int): + """Generate an AI brief for a bill document. Full brief for first version, amendment brief for subsequent versions.""" + db = get_sync_db() + try: + # Idempotency: skip if brief already exists for this document + existing = db.query(BillBrief).filter_by(document_id=document_id).first() + if existing: + return {"status": "already_processed", "brief_id": existing.id} + + doc = db.get(BillDocument, document_id) + if not doc or not doc.raw_text: + logger.warning(f"Document {document_id} not found or has no text") + return {"status": "no_document"} + + bill = db.get(Bill, doc.bill_id) + if not bill: + return {"status": "no_bill"} + + sponsor = db.get(Member, bill.sponsor_id) if bill.sponsor_id else None + + bill_metadata = { + "title": bill.title or "Unknown Title", + "sponsor_name": sponsor.name if sponsor else "Unknown", + "party": sponsor.party if sponsor else "Unknown", + "state": sponsor.state if sponsor else "Unknown", + "chamber": bill.chamber or "Unknown", + "introduced_date": str(bill.introduced_date) if bill.introduced_date else "Unknown", + "latest_action_text": bill.latest_action_text or "None", + "latest_action_date": str(bill.latest_action_date) if bill.latest_action_date else "Unknown", + } + + # Check if a full brief already exists for this bill (from an earlier document version) + previous_full_brief = ( + db.query(BillBrief) + .filter_by(bill_id=doc.bill_id, brief_type="full") + .order_by(BillBrief.created_at.desc()) + .first() + ) + + provider = get_llm_provider() + + if previous_full_brief and previous_full_brief.document_id: + # New version of a bill we've already analyzed — generate amendment brief + previous_doc = db.get(BillDocument, previous_full_brief.document_id) + if previous_doc and previous_doc.raw_text: + logger.info(f"Generating amendment brief for document {document_id} (bill {doc.bill_id})") + brief = provider.generate_amendment_brief(doc.raw_text, previous_doc.raw_text, bill_metadata) + brief_type = "amendment" + else: + logger.info(f"Previous document unavailable, generating full brief for document {document_id}") + brief = provider.generate_brief(doc.raw_text, bill_metadata) + brief_type = "full" + else: + logger.info(f"Generating full brief for document {document_id} (bill {doc.bill_id})") + brief = provider.generate_brief(doc.raw_text, bill_metadata) + brief_type = "full" + + db_brief = BillBrief( + bill_id=doc.bill_id, + document_id=document_id, + brief_type=brief_type, + summary=brief.summary, + key_points=brief.key_points, + risks=brief.risks, + deadlines=brief.deadlines, + topic_tags=brief.topic_tags, + llm_provider=brief.llm_provider, + llm_model=brief.llm_model, + ) + db.add(db_brief) + db.commit() + db.refresh(db_brief) + + logger.info(f"{brief_type.capitalize()} brief {db_brief.id} created for bill {doc.bill_id} using {brief.llm_provider}/{brief.llm_model}") + + # Trigger news fetch now that we have topic tags + from app.workers.news_fetcher import fetch_news_for_bill + fetch_news_for_bill.delay(doc.bill_id) + + return {"status": "ok", "brief_id": db_brief.id, "brief_type": brief_type} + + except Exception as exc: + db.rollback() + logger.error(f"LLM processing failed for document {document_id}: {exc}") + raise self.retry(exc=exc, countdown=300) # 5 min backoff for LLM failures + finally: + db.close() diff --git a/backend/app/workers/news_fetcher.py b/backend/app/workers/news_fetcher.py new file mode 100644 index 0000000..8f665d3 --- /dev/null +++ b/backend/app/workers/news_fetcher.py @@ -0,0 +1,104 @@ +""" +News fetcher — correlates bills with news articles. +Triggered after LLM brief creation and on a 6-hour schedule for active bills. +""" +import logging +from datetime import date, datetime, timedelta, timezone + +from sqlalchemy import and_ + +from app.database import get_sync_db +from app.models import Bill, BillBrief, NewsArticle +from app.services import news_service +from app.workers.celery_app import celery_app + +logger = logging.getLogger(__name__) + + +@celery_app.task(bind=True, max_retries=2, name="app.workers.news_fetcher.fetch_news_for_bill") +def fetch_news_for_bill(self, bill_id: str): + """Fetch news articles for a specific bill.""" + db = get_sync_db() + try: + bill = db.get(Bill, bill_id) + if not bill: + return {"status": "not_found"} + + # Get topic tags from latest brief + latest_brief = ( + db.query(BillBrief) + .filter_by(bill_id=bill_id) + .order_by(BillBrief.created_at.desc()) + .first() + ) + topic_tags = latest_brief.topic_tags if latest_brief else [] + + query = news_service.build_news_query( + bill_title=bill.title, + short_title=bill.short_title, + sponsor_name=None, + bill_type=bill.bill_type, + bill_number=bill.bill_number, + ) + + articles = news_service.fetch_newsapi_articles(query) + saved = 0 + for article in articles: + url = article.get("url") + if not url: + continue + # Idempotency: skip duplicate URLs + existing = db.query(NewsArticle).filter_by(url=url).first() + if existing: + continue + pub_at = None + if article.get("published_at"): + try: + pub_at = datetime.fromisoformat(article["published_at"].replace("Z", "+00:00")) + except Exception: + pass + db.add(NewsArticle( + bill_id=bill_id, + source=article.get("source", "")[:200], + headline=article.get("headline", ""), + url=url, + published_at=pub_at, + relevance_score=1.0, + )) + saved += 1 + + db.commit() + logger.info(f"Saved {saved} news articles for bill {bill_id}") + return {"status": "ok", "saved": saved} + + except Exception as exc: + db.rollback() + logger.error(f"News fetch failed for {bill_id}: {exc}") + raise self.retry(exc=exc, countdown=300) + finally: + db.close() + + +@celery_app.task(bind=True, name="app.workers.news_fetcher.fetch_news_for_active_bills") +def fetch_news_for_active_bills(self): + """ + Scheduled task: fetch news for bills with recent actions (last 7 days). + Respects the 100/day NewsAPI limit by processing at most 80 bills per run. + """ + db = get_sync_db() + try: + cutoff = date.today() - timedelta(days=7) + active_bills = ( + db.query(Bill) + .filter(Bill.latest_action_date >= cutoff) + .order_by(Bill.latest_action_date.desc()) + .limit(80) + .all() + ) + for bill in active_bills: + fetch_news_for_bill.delay(bill.bill_id) + + logger.info(f"Queued news fetch for {len(active_bills)} active bills") + return {"queued": len(active_bills)} + finally: + db.close() diff --git a/backend/app/workers/trend_scorer.py b/backend/app/workers/trend_scorer.py new file mode 100644 index 0000000..8b57193 --- /dev/null +++ b/backend/app/workers/trend_scorer.py @@ -0,0 +1,111 @@ +""" +Trend scorer — calculates the daily zeitgeist score for bills. +Runs nightly via Celery Beat. +""" +import logging +from datetime import date, timedelta + +from sqlalchemy import and_ + +from app.database import get_sync_db +from app.models import Bill, BillBrief, TrendScore +from app.services import news_service, trends_service +from app.workers.celery_app import celery_app + +logger = logging.getLogger(__name__) + + +def calculate_composite_score(newsapi_count: int, gnews_count: int, gtrends_score: float) -> float: + """ + Weighted composite score (0–100): + NewsAPI article count → 0–40 pts (saturates at 20 articles) + Google News RSS count → 0–30 pts (saturates at 50 articles) + Google Trends score → 0–30 pts (0–100 input) + """ + newsapi_pts = min(newsapi_count / 20, 1.0) * 40 + gnews_pts = min(gnews_count / 50, 1.0) * 30 + gtrends_pts = (gtrends_score / 100) * 30 + return round(newsapi_pts + gnews_pts + gtrends_pts, 2) + + +@celery_app.task(bind=True, name="app.workers.trend_scorer.calculate_all_trend_scores") +def calculate_all_trend_scores(self): + """Nightly task: calculate trend scores for bills active in the last 90 days.""" + db = get_sync_db() + try: + cutoff = date.today() - timedelta(days=90) + active_bills = ( + db.query(Bill) + .filter(Bill.latest_action_date >= cutoff) + .all() + ) + + scored = 0 + today = date.today() + + for bill in active_bills: + # Skip if already scored today + existing = ( + db.query(TrendScore) + .filter_by(bill_id=bill.bill_id, score_date=today) + .first() + ) + if existing: + continue + + # Get latest brief for topic tags + latest_brief = ( + db.query(BillBrief) + .filter_by(bill_id=bill.bill_id) + .order_by(BillBrief.created_at.desc()) + .first() + ) + topic_tags = latest_brief.topic_tags if latest_brief else [] + + # Build search query + query = news_service.build_news_query( + bill_title=bill.title, + short_title=bill.short_title, + sponsor_name=None, + bill_type=bill.bill_type, + bill_number=bill.bill_number, + ) + + # Fetch counts + newsapi_articles = news_service.fetch_newsapi_articles(query, days=30) + newsapi_count = len(newsapi_articles) + gnews_count = news_service.fetch_gnews_count(query, days=30) + + # Google Trends + keywords = trends_service.keywords_for_bill( + title=bill.title or "", + short_title=bill.short_title or "", + topic_tags=topic_tags, + ) + gtrends_score = trends_service.get_trends_score(keywords) + + composite = calculate_composite_score(newsapi_count, gnews_count, gtrends_score) + + db.add(TrendScore( + bill_id=bill.bill_id, + score_date=today, + newsapi_count=newsapi_count, + gnews_count=gnews_count, + gtrends_score=gtrends_score, + composite_score=composite, + )) + scored += 1 + + if scored % 20 == 0: + db.commit() + + db.commit() + logger.info(f"Scored {scored} bills") + return {"scored": scored} + + except Exception as exc: + db.rollback() + logger.error(f"Trend scoring failed: {exc}") + raise + finally: + db.close() diff --git a/backend/requirements.txt b/backend/requirements.txt new file mode 100644 index 0000000..776d967 --- /dev/null +++ b/backend/requirements.txt @@ -0,0 +1,44 @@ +# Web framework +fastapi==0.115.5 +uvicorn[standard]==0.32.1 +python-multipart==0.0.18 + +# Database +sqlalchemy==2.0.36 +asyncpg==0.30.0 +psycopg2-binary==2.9.10 +alembic==1.14.0 + +# Config +pydantic-settings==2.6.1 + +# Task queue +celery==5.4.0 +celery-redbeat==2.2.0 +kombu==5.4.2 + +# HTTP clients +httpx==0.28.1 +requests==2.32.3 +tenacity==9.0.0 + +# LLM providers +openai==1.57.4 +anthropic==0.40.0 +google-generativeai==0.8.3 + +# Document parsing +beautifulsoup4==4.12.3 +lxml==5.3.0 +feedparser==6.0.11 +pdfminer.six==20231228 + +# Trends +pytrends==4.9.2 + +# Redis client (for health check) +redis==5.2.1 + +# Utilities +python-dateutil==2.9.0 +tiktoken==0.8.0 diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..3aa2ad6 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,114 @@ +services: + postgres: + image: postgres:16-alpine + environment: + POSTGRES_USER: ${POSTGRES_USER:-congress} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-congress} + POSTGRES_DB: ${POSTGRES_DB:-pocketveto} + volumes: + - postgres_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-congress} -d ${POSTGRES_DB:-pocketveto}"] + interval: 5s + timeout: 5s + retries: 10 + networks: + - app_network + + redis: + image: redis:7-alpine + volumes: + - redis_data:/data + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 5s + timeout: 3s + retries: 10 + networks: + - app_network + + api: + build: + context: ./backend + dockerfile: Dockerfile + command: > + sh -c "alembic upgrade head && + uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload" + env_file: .env + environment: + - DATABASE_URL=postgresql+asyncpg://${POSTGRES_USER:-congress}:${POSTGRES_PASSWORD:-congress}@postgres:5432/${POSTGRES_DB:-pocketveto} + - SYNC_DATABASE_URL=postgresql://${POSTGRES_USER:-congress}:${POSTGRES_PASSWORD:-congress}@postgres:5432/${POSTGRES_DB:-pocketveto} + - REDIS_URL=redis://redis:6379/0 + depends_on: + postgres: + condition: service_healthy + redis: + condition: service_healthy + networks: + - app_network + + worker: + build: + context: ./backend + dockerfile: Dockerfile + command: celery -A app.workers.celery_app worker --loglevel=info --concurrency=4 -Q polling,documents,llm,news + env_file: .env + environment: + - DATABASE_URL=postgresql+asyncpg://${POSTGRES_USER:-congress}:${POSTGRES_PASSWORD:-congress}@postgres:5432/${POSTGRES_DB:-pocketveto} + - SYNC_DATABASE_URL=postgresql://${POSTGRES_USER:-congress}:${POSTGRES_PASSWORD:-congress}@postgres:5432/${POSTGRES_DB:-pocketveto} + - REDIS_URL=redis://redis:6379/0 + depends_on: + postgres: + condition: service_healthy + redis: + condition: service_healthy + networks: + - app_network + + beat: + build: + context: ./backend + dockerfile: Dockerfile + command: celery -A app.workers.celery_app beat --loglevel=info --scheduler=redbeat.RedBeatScheduler + env_file: .env + environment: + - DATABASE_URL=postgresql+asyncpg://${POSTGRES_USER:-congress}:${POSTGRES_PASSWORD:-congress}@postgres:5432/${POSTGRES_DB:-pocketveto} + - SYNC_DATABASE_URL=postgresql://${POSTGRES_USER:-congress}:${POSTGRES_PASSWORD:-congress}@postgres:5432/${POSTGRES_DB:-pocketveto} + - REDIS_URL=redis://redis:6379/0 + depends_on: + redis: + condition: service_healthy + networks: + - app_network + + frontend: + build: + context: ./frontend + dockerfile: Dockerfile + environment: + - NODE_ENV=production + depends_on: + - api + networks: + - app_network + + nginx: + image: nginx:alpine + ports: + - "80:80" + volumes: + - ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro + depends_on: + - api + - frontend + restart: unless-stopped + networks: + - app_network + +volumes: + postgres_data: + redis_data: + +networks: + app_network: + driver: bridge diff --git a/frontend/Dockerfile b/frontend/Dockerfile new file mode 100644 index 0000000..72ac230 --- /dev/null +++ b/frontend/Dockerfile @@ -0,0 +1,31 @@ +FROM node:20-alpine AS base + +WORKDIR /app + +FROM base AS deps +COPY package.json ./ +RUN npm install + +FROM base AS builder +COPY --from=deps /app/node_modules ./node_modules +COPY . . +ENV NEXT_TELEMETRY_DISABLED=1 +RUN mkdir -p public && npm run build + +FROM base AS runner +ENV NODE_ENV=production +ENV NEXT_TELEMETRY_DISABLED=1 + +RUN addgroup --system --gid 1001 nodejs && \ + adduser --system --uid 1001 nextjs + +COPY --from=builder /app/public ./public +COPY --from=builder --chown=nextjs:nodejs /app/.next/standalone ./ +COPY --from=builder --chown=nextjs:nodejs /app/.next/static ./.next/static + +USER nextjs +EXPOSE 3000 +ENV PORT=3000 +ENV HOSTNAME="0.0.0.0" + +CMD ["node", "server.js"] diff --git a/frontend/app/bills/[id]/page.tsx b/frontend/app/bills/[id]/page.tsx new file mode 100644 index 0000000..c8423dc --- /dev/null +++ b/frontend/app/bills/[id]/page.tsx @@ -0,0 +1,93 @@ +"use client"; + +import { use } from "react"; +import Link from "next/link"; +import { ArrowLeft, ExternalLink, User } from "lucide-react"; +import { useBill, useBillTrend } from "@/lib/hooks/useBills"; +import { AIBriefCard } from "@/components/bills/AIBriefCard"; +import { ActionTimeline } from "@/components/bills/ActionTimeline"; +import { TrendChart } from "@/components/bills/TrendChart"; +import { NewsPanel } from "@/components/bills/NewsPanel"; +import { FollowButton } from "@/components/shared/FollowButton"; +import { billLabel, formatDate, partyBadgeColor, cn } from "@/lib/utils"; + +export default function BillDetailPage({ params }: { params: Promise<{ id: string }> }) { + const { id } = use(params); + const billId = decodeURIComponent(id); + + const { data: bill, isLoading } = useBill(billId); + const { data: trendData } = useBillTrend(billId, 30); + + if (isLoading) { + return
Loading bill...
; + } + + if (!bill) { + return ( +
+

Bill not found.

+ ← Back to bills +
+ ); + } + + const label = billLabel(bill.bill_type, bill.bill_number); + + return ( +
+ {/* Header */} +
+
+
+ + + + + {label} + + {bill.chamber} + 119th Congress +
+

+ {bill.short_title || bill.title || "Untitled Bill"} +

+ {bill.sponsor && ( +
+ + + {bill.sponsor.name} + + {bill.sponsor.party && ( + + {bill.sponsor.party} + + )} + {bill.sponsor.state && {bill.sponsor.state}} +
+ )} +

+ Introduced: {formatDate(bill.introduced_date)} + {bill.congress_url && ( + + congress.gov + + )} +

+
+ +
+ + {/* Content grid */} +
+
+ + +
+
+ + +
+
+
+ ); +} diff --git a/frontend/app/bills/page.tsx b/frontend/app/bills/page.tsx new file mode 100644 index 0000000..001afe4 --- /dev/null +++ b/frontend/app/bills/page.tsx @@ -0,0 +1,109 @@ +"use client"; + +import { useState } from "react"; +import { Search, Filter } from "lucide-react"; +import { useBills } from "@/lib/hooks/useBills"; +import { BillCard } from "@/components/shared/BillCard"; + +const CHAMBERS = ["", "House", "Senate"]; +const TOPICS = [ + "", "healthcare", "taxation", "defense", "education", "immigration", + "environment", "housing", "infrastructure", "technology", "agriculture", + "judiciary", "foreign-policy", "veterans", "social-security", "trade", + "budget", "energy", "banking", "transportation", "labor", +]; + +export default function BillsPage() { + const [q, setQ] = useState(""); + const [chamber, setChamber] = useState(""); + const [topic, setTopic] = useState(""); + const [page, setPage] = useState(1); + + const params = { + ...(q && { q }), + ...(chamber && { chamber }), + ...(topic && { topic }), + page, + per_page: 20, + sort: "latest_action_date", + }; + + const { data, isLoading } = useBills(params); + + return ( +
+
+

Bills

+

Browse and search US Congressional legislation

+
+ + {/* Filters */} +
+
+ + { setQ(e.target.value.trim()); setPage(1); }} + className="w-full pl-9 pr-3 py-2 text-sm bg-card border border-border rounded-md focus:outline-none focus:ring-1 focus:ring-primary" + /> +
+ + +
+ + {/* Results */} + {isLoading ? ( +
Loading bills...
+ ) : ( + <> +
+ {data?.total ?? 0} bills found + Page {data?.page} of {data?.pages} +
+ +
+ {data?.items?.map((bill) => ( + + ))} +
+ + {/* Pagination */} + {data && data.pages > 1 && ( +
+ + +
+ )} + + )} +
+ ); +} diff --git a/frontend/app/following/page.tsx b/frontend/app/following/page.tsx new file mode 100644 index 0000000..827d02c --- /dev/null +++ b/frontend/app/following/page.tsx @@ -0,0 +1,88 @@ +"use client"; + +import Link from "next/link"; +import { Heart, X } from "lucide-react"; +import { useFollows, useRemoveFollow } from "@/lib/hooks/useFollows"; +import { billLabel } from "@/lib/utils"; + +export default function FollowingPage() { + const { data: follows = [], isLoading } = useFollows(); + const remove = useRemoveFollow(); + + const bills = follows.filter((f) => f.follow_type === "bill"); + const members = follows.filter((f) => f.follow_type === "member"); + const topics = follows.filter((f) => f.follow_type === "topic"); + + const Section = ({ title, items, renderValue }: { + title: string; + items: typeof follows; + renderValue: (v: string) => React.ReactNode; + }) => ( +
+

{title} ({items.length})

+ {!items.length ? ( +

Nothing followed yet.

+ ) : ( +
+ {items.map((f) => ( +
+
{renderValue(f.follow_value)}
+ +
+ ))} +
+ )} +
+ ); + + if (isLoading) return
Loading...
; + + return ( +
+
+

+ Following +

+

Manage what you follow

+
+ +
{ + const [congress, type, num] = v.split("-"); + return ( + + {type && num ? billLabel(type, parseInt(num)) : v} + + ); + }} + /> + +
( + + {v} + + )} + /> + +
( + + {v.replace("-", " ")} + + )} + /> +
+ ); +} diff --git a/frontend/app/globals.css b/frontend/app/globals.css new file mode 100644 index 0000000..6a08696 --- /dev/null +++ b/frontend/app/globals.css @@ -0,0 +1,55 @@ +@tailwind base; +@tailwind components; +@tailwind utilities; + +@layer base { + :root { + --background: 0 0% 100%; + --foreground: 224 71.4% 4.1%; + --card: 0 0% 100%; + --card-foreground: 224 71.4% 4.1%; + --primary: 220.9 39.3% 11%; + --primary-foreground: 210 20% 98%; + --secondary: 220 14.3% 95.9%; + --secondary-foreground: 220.9 39.3% 11%; + --muted: 220 14.3% 95.9%; + --muted-foreground: 220 8.9% 46.1%; + --accent: 220 14.3% 95.9%; + --accent-foreground: 220.9 39.3% 11%; + --destructive: 0 84.2% 60.2%; + --destructive-foreground: 210 20% 98%; + --border: 220 13% 91%; + --input: 220 13% 91%; + --ring: 224 71.4% 4.1%; + --radius: 0.5rem; + } + + .dark { + --background: 224 71.4% 4.1%; + --foreground: 210 20% 98%; + --card: 224 71.4% 6%; + --card-foreground: 210 20% 98%; + --primary: 210 20% 98%; + --primary-foreground: 220.9 39.3% 11%; + --secondary: 215 27.9% 16.9%; + --secondary-foreground: 210 20% 98%; + --muted: 215 27.9% 16.9%; + --muted-foreground: 217.9 10.6% 64.9%; + --accent: 215 27.9% 16.9%; + --accent-foreground: 210 20% 98%; + --destructive: 0 62.8% 30.6%; + --destructive-foreground: 210 20% 98%; + --border: 215 27.9% 16.9%; + --input: 215 27.9% 16.9%; + --ring: 216 12.2% 83.9%; + } +} + +@layer base { + * { + @apply border-border; + } + body { + @apply bg-background text-foreground; + } +} diff --git a/frontend/app/layout.tsx b/frontend/app/layout.tsx new file mode 100644 index 0000000..caa0df4 --- /dev/null +++ b/frontend/app/layout.tsx @@ -0,0 +1,35 @@ +import type { Metadata } from "next"; +import { Inter } from "next/font/google"; +import "./globals.css"; +import { Providers } from "./providers"; +import { Sidebar } from "@/components/shared/Sidebar"; + +const inter = Inter({ subsets: ["latin"] }); + +export const metadata: Metadata = { + title: "PocketVeto", + description: "Monitor US Congress with AI-powered bill summaries and trend analysis", +}; + +export default function RootLayout({ + children, +}: { + children: React.ReactNode; +}) { + return ( + + + +
+ +
+
+ {children} +
+
+
+
+ + + ); +} diff --git a/frontend/app/members/[id]/page.tsx b/frontend/app/members/[id]/page.tsx new file mode 100644 index 0000000..32b2cd2 --- /dev/null +++ b/frontend/app/members/[id]/page.tsx @@ -0,0 +1,55 @@ +"use client"; + +import { use } from "react"; +import Link from "next/link"; +import { ArrowLeft } from "lucide-react"; +import { useMember, useMemberBills } from "@/lib/hooks/useMembers"; +import { FollowButton } from "@/components/shared/FollowButton"; +import { BillCard } from "@/components/shared/BillCard"; +import { cn, partyBadgeColor } from "@/lib/utils"; + +export default function MemberDetailPage({ params }: { params: Promise<{ id: string }> }) { + const { id } = use(params); + const { data: member, isLoading } = useMember(id); + const { data: billsData } = useMemberBills(id); + + if (isLoading) return
Loading...
; + if (!member) return
Member not found.
; + + return ( +
+
+
+ + + +
+

{member.name}

+
+ {member.party && ( + + {member.party} + + )} + {member.state && {member.state}} + {member.chamber && {member.chamber}} + {member.district && District {member.district}} +
+
+
+ +
+ +
+

Sponsored Bills ({billsData?.total ?? 0})

+ {!billsData?.items?.length ? ( +

No bills found.

+ ) : ( +
+ {billsData.items.map((bill) => )} +
+ )} +
+
+ ); +} diff --git a/frontend/app/members/page.tsx b/frontend/app/members/page.tsx new file mode 100644 index 0000000..1aebb86 --- /dev/null +++ b/frontend/app/members/page.tsx @@ -0,0 +1,92 @@ +"use client"; + +import { useState } from "react"; +import Link from "next/link"; +import { Search } from "lucide-react"; +import { useMembers } from "@/lib/hooks/useMembers"; +import { FollowButton } from "@/components/shared/FollowButton"; +import { cn, partyBadgeColor } from "@/lib/utils"; + +export default function MembersPage() { + const [q, setQ] = useState(""); + const [chamber, setChamber] = useState(""); + const [party, setParty] = useState(""); + const [page, setPage] = useState(1); + + const { data, isLoading } = useMembers({ + ...(q && { q }), ...(chamber && { chamber }), ...(party && { party }), + page, per_page: 50, + }); + + return ( +
+
+

Members

+

Browse current Congress members

+
+ +
+
+ + { setQ(e.target.value.trim()); setPage(1); }} + className="w-full pl-9 pr-3 py-2 text-sm bg-card border border-border rounded-md focus:outline-none" + /> +
+ + +
+ + {isLoading ? ( +
Loading members...
+ ) : ( + <> +
{data?.total ?? 0} members
+
+ {data?.items?.map((member) => ( +
+
+ + {member.name} + +
+ {member.party && ( + + {member.party} + + )} + {member.state && {member.state}} + {member.chamber && {member.chamber}} +
+
+ +
+ ))} +
+ {data && data.pages > 1 && ( +
+ + +
+ )} + + )} +
+ ); +} diff --git a/frontend/app/page.tsx b/frontend/app/page.tsx new file mode 100644 index 0000000..cf34377 --- /dev/null +++ b/frontend/app/page.tsx @@ -0,0 +1,89 @@ +"use client"; + +import { TrendingUp, BookOpen, RefreshCw } from "lucide-react"; +import { useDashboard } from "@/lib/hooks/useDashboard"; +import { BillCard } from "@/components/shared/BillCard"; +import { adminAPI } from "@/lib/api"; +import { useState } from "react"; + +export default function DashboardPage() { + const { data, isLoading, refetch } = useDashboard(); + const [polling, setPolling] = useState(false); + + const triggerPoll = async () => { + setPolling(true); + try { + await adminAPI.triggerPoll(); + setTimeout(() => { refetch(); setPolling(false); }, 3000); + } catch { setPolling(false); } + }; + + return ( +
+
+
+

Dashboard

+

+ Your personalized Congressional activity feed +

+
+ +
+ + {isLoading ? ( +
Loading dashboard...
+ ) : ( +
+
+

+ + Your Feed + {data?.follows && ( + + ({data.follows.bills} bills · {data.follows.members} members · {data.follows.topics} topics) + + )} +

+ {!data?.feed?.length ? ( +
+

Your feed is empty.

+

Follow bills, members, or topics to see activity here.

+
+ ) : ( +
+ {data.feed.map((bill) => ( + + ))} +
+ )} +
+ +
+

+ + Trending +

+ {!data?.trending?.length ? ( +
+ No trend data yet. Run a poll to populate. +
+ ) : ( +
+ {data.trending.map((bill) => ( + + ))} +
+ )} +
+
+ )} +
+ ); +} diff --git a/frontend/app/providers.tsx b/frontend/app/providers.tsx new file mode 100644 index 0000000..df90edb --- /dev/null +++ b/frontend/app/providers.tsx @@ -0,0 +1,27 @@ +"use client"; + +import { QueryClient, QueryClientProvider } from "@tanstack/react-query"; +import { ThemeProvider } from "next-themes"; +import { useState } from "react"; + +export function Providers({ children }: { children: React.ReactNode }) { + const [queryClient] = useState( + () => + new QueryClient({ + defaultOptions: { + queries: { + retry: 1, + refetchOnWindowFocus: false, + }, + }, + }) + ); + + return ( + + + {children} + + + ); +} diff --git a/frontend/app/settings/page.tsx b/frontend/app/settings/page.tsx new file mode 100644 index 0000000..556d903 --- /dev/null +++ b/frontend/app/settings/page.tsx @@ -0,0 +1,188 @@ +"use client"; + +import { useState } from "react"; +import { useQuery, useMutation, useQueryClient } from "@tanstack/react-query"; +import { Settings, Cpu, RefreshCw, CheckCircle, XCircle, Play } from "lucide-react"; +import { settingsAPI, adminAPI } from "@/lib/api"; + +const LLM_PROVIDERS = [ + { value: "openai", label: "OpenAI (GPT-4o)", hint: "Requires OPENAI_API_KEY in .env" }, + { value: "anthropic", label: "Anthropic (Claude)", hint: "Requires ANTHROPIC_API_KEY in .env" }, + { value: "gemini", label: "Google Gemini", hint: "Requires GEMINI_API_KEY in .env" }, + { value: "ollama", label: "Ollama (Local)", hint: "Requires Ollama running on host" }, +]; + +export default function SettingsPage() { + const qc = useQueryClient(); + const { data: settings, isLoading } = useQuery({ + queryKey: ["settings"], + queryFn: () => settingsAPI.get(), + }); + + const updateSetting = useMutation({ + mutationFn: ({ key, value }: { key: string; value: string }) => settingsAPI.update(key, value), + onSuccess: () => qc.invalidateQueries({ queryKey: ["settings"] }), + }); + + const [testResult, setTestResult] = useState<{ status: string; detail?: string; summary_preview?: string; provider?: string } | null>(null); + const [testing, setTesting] = useState(false); + const [taskIds, setTaskIds] = useState>({}); + + const testLLM = async () => { + setTesting(true); + setTestResult(null); + try { + const result = await settingsAPI.testLLM(); + setTestResult(result); + } catch (e: unknown) { + setTestResult({ status: "error", detail: e instanceof Error ? e.message : String(e) }); + } finally { + setTesting(false); + } + }; + + const trigger = async (name: string, fn: () => Promise<{ task_id: string }>) => { + const result = await fn(); + setTaskIds((prev) => ({ ...prev, [name]: result.task_id })); + }; + + if (isLoading) return
Loading settings...
; + + return ( +
+
+

+ Settings +

+

Configure LLM provider and system settings

+
+ + {/* LLM Provider */} +
+

+ LLM Provider +

+

+ Current: {settings?.llm_provider} / {settings?.llm_model} +

+
+ {LLM_PROVIDERS.map(({ value, label, hint }) => ( + + ))} +
+ +
+ + {testResult && ( +
+ {testResult.status === "ok" ? ( + <> + + + {testResult.provider}/{testResult.summary_preview?.slice(0, 50)}... + + + ) : ( + <> + + {testResult.detail} + + )} +
+ )} +
+
+ + {/* Polling Settings */} +
+

+ Data Sources +

+
+
+
+
Congress.gov Poll Interval
+
How often to check for new bills
+
+ +
+
+
+
NewsAPI.org
+
100 requests/day free tier
+
+ + {settings?.newsapi_enabled ? "Configured" : "Not configured"} + +
+
+
+
Google Trends
+
Zeitgeist scoring via pytrends
+
+ + {settings?.pytrends_enabled ? "Enabled" : "Disabled"} + +
+
+
+ + {/* Manual Controls */} +
+

Manual Controls

+
+ + + +
+ {Object.entries(taskIds).map(([name, id]) => ( +

{name}: task {id} queued

+ ))} +
+
+ ); +} diff --git a/frontend/app/topics/page.tsx b/frontend/app/topics/page.tsx new file mode 100644 index 0000000..84c0457 --- /dev/null +++ b/frontend/app/topics/page.tsx @@ -0,0 +1,61 @@ +"use client"; + +import Link from "next/link"; +import { Tags } from "lucide-react"; +import { FollowButton } from "@/components/shared/FollowButton"; + +const TOPICS = [ + { tag: "healthcare", label: "Healthcare", desc: "Health policy, insurance, Medicare, Medicaid" }, + { tag: "taxation", label: "Taxation", desc: "Tax law, IRS, fiscal policy" }, + { tag: "defense", label: "Defense", desc: "Military, NDAA, national security" }, + { tag: "education", label: "Education", desc: "Schools, student loans, higher education" }, + { tag: "immigration", label: "Immigration", desc: "Border, visas, asylum, citizenship" }, + { tag: "environment", label: "Environment", desc: "Climate, EPA, conservation, energy" }, + { tag: "housing", label: "Housing", desc: "Affordable housing, mortgages, HUD" }, + { tag: "infrastructure", label: "Infrastructure", desc: "Roads, bridges, broadband, transit" }, + { tag: "technology", label: "Technology", desc: "AI, cybersecurity, telecom, internet" }, + { tag: "agriculture", label: "Agriculture", desc: "Farm bill, USDA, food policy" }, + { tag: "judiciary", label: "Judiciary", desc: "Courts, criminal justice, civil rights" }, + { tag: "foreign-policy", label: "Foreign Policy", desc: "Diplomacy, foreign aid, sanctions" }, + { tag: "veterans", label: "Veterans", desc: "VA, veteran benefits, military families" }, + { tag: "social-security", label: "Social Security", desc: "SS, Medicare, retirement benefits" }, + { tag: "trade", label: "Trade", desc: "Tariffs, trade agreements, WTO" }, + { tag: "budget", label: "Budget", desc: "Appropriations, debt ceiling, spending" }, + { tag: "energy", label: "Energy", desc: "Oil, gas, renewables, nuclear" }, + { tag: "banking", label: "Banking", desc: "Financial regulation, Fed, CFPB" }, + { tag: "transportation", label: "Transportation", desc: "FAA, DOT, aviation, rail" }, + { tag: "labor", label: "Labor", desc: "Minimum wage, unions, OSHA, employment" }, +]; + +export default function TopicsPage() { + return ( +
+
+

Topics

+

+ Follow topics to see related bills in your feed +

+
+ +
+ {TOPICS.map(({ tag, label, desc }) => ( +
+
+
+ + + {label} + +
+

{desc}

+
+ +
+ ))} +
+
+ ); +} diff --git a/frontend/components/bills/AIBriefCard.tsx b/frontend/components/bills/AIBriefCard.tsx new file mode 100644 index 0000000..11f57a8 --- /dev/null +++ b/frontend/components/bills/AIBriefCard.tsx @@ -0,0 +1,105 @@ +"use client"; + +import { AlertTriangle, CheckCircle, Clock, Cpu, Tag } from "lucide-react"; +import { BriefSchema } from "@/lib/types"; +import { formatDate } from "@/lib/utils"; + +interface AIBriefCardProps { + brief?: BriefSchema | null; +} + +export function AIBriefCard({ brief }: AIBriefCardProps) { + if (!brief) { + return ( +
+
+ +

AI Analysis

+
+

+ Analysis not yet generated. It will appear once the bill text has been processed. +

+
+ ); + } + + return ( +
+
+
+ +

AI Analysis

+
+ + {brief.llm_provider}/{brief.llm_model} · {formatDate(brief.created_at)} + +
+ + {brief.summary && ( +
+

Summary

+

{brief.summary}

+
+ )} + + {brief.key_points && brief.key_points.length > 0 && ( +
+

Key Points

+
    + {brief.key_points.map((point, i) => ( +
  • + + {point} +
  • + ))} +
+
+ )} + + {brief.risks && brief.risks.length > 0 && ( +
+

Risks & Concerns

+
    + {brief.risks.map((risk, i) => ( +
  • + + {risk} +
  • + ))} +
+
+ )} + + {brief.deadlines && brief.deadlines.length > 0 && ( +
+

Deadlines

+
    + {brief.deadlines.map((d, i) => ( +
  • + + + {d.date ? {formatDate(d.date)}: : ""} + {d.description} + +
  • + ))} +
+
+ )} + + {brief.topic_tags && brief.topic_tags.length > 0 && ( +
+ + {brief.topic_tags.map((tag) => ( + + {tag} + + ))} +
+ )} +
+ ); +} diff --git a/frontend/components/bills/ActionTimeline.tsx b/frontend/components/bills/ActionTimeline.tsx new file mode 100644 index 0000000..01dcae1 --- /dev/null +++ b/frontend/components/bills/ActionTimeline.tsx @@ -0,0 +1,47 @@ +import { Clock } from "lucide-react"; +import { BillAction } from "@/lib/types"; +import { formatDate } from "@/lib/utils"; + +interface ActionTimelineProps { + actions: BillAction[]; +} + +export function ActionTimeline({ actions }: ActionTimelineProps) { + if (!actions || actions.length === 0) { + return ( +
+

+ + Action History +

+

No actions recorded yet.

+
+ ); + } + + return ( +
+

+ + Action History + ({actions.length}) +

+ +
+
+
    + {actions.map((action, i) => ( +
  • +
    +
    + {formatDate(action.action_date)} + {action.chamber && ` · ${action.chamber}`} +
    +

    {action.action_text}

    +
  • + ))} +
+
+
+ ); +} diff --git a/frontend/components/bills/NewsPanel.tsx b/frontend/components/bills/NewsPanel.tsx new file mode 100644 index 0000000..a70e295 --- /dev/null +++ b/frontend/components/bills/NewsPanel.tsx @@ -0,0 +1,46 @@ +import { ExternalLink, Newspaper } from "lucide-react"; +import { NewsArticle } from "@/lib/types"; +import { formatDate } from "@/lib/utils"; + +interface NewsPanelProps { + articles?: NewsArticle[]; +} + +export function NewsPanel({ articles }: NewsPanelProps) { + return ( +
+

+ + Related News + {articles && articles.length > 0 && ( + ({articles.length}) + )} +

+ + {!articles || articles.length === 0 ? ( +

No news articles found yet.

+ ) : ( + + )} +
+ ); +} diff --git a/frontend/components/bills/TrendChart.tsx b/frontend/components/bills/TrendChart.tsx new file mode 100644 index 0000000..efad523 --- /dev/null +++ b/frontend/components/bills/TrendChart.tsx @@ -0,0 +1,86 @@ +"use client"; + +import { TrendingUp } from "lucide-react"; +import { + LineChart, + Line, + XAxis, + YAxis, + Tooltip, + ResponsiveContainer, + CartesianGrid, +} from "recharts"; +import { TrendScore } from "@/lib/types"; +import { formatDate } from "@/lib/utils"; + +interface TrendChartProps { + data?: TrendScore[]; +} + +export function TrendChart({ data }: TrendChartProps) { + const chartData = data?.map((d) => ({ + date: new Date(d.score_date).toLocaleDateString("en-US", { month: "short", day: "numeric" }), + score: Math.round(d.composite_score), + news: d.newsapi_count, + gnews: d.gnews_count, + })) ?? []; + + const latest = data?.[data.length - 1]?.composite_score; + + return ( +
+
+

+ + Public Interest +

+ {latest !== undefined && ( + {Math.round(latest)} + )} +
+ + {chartData.length === 0 ? ( +

+ Trend data not yet available. +

+ ) : ( + + + + + + + + + + )} + +
+ Score: 0–100 composite + NewsAPI + Google News + Trends +
+
+ ); +} diff --git a/frontend/components/shared/BillCard.tsx b/frontend/components/shared/BillCard.tsx new file mode 100644 index 0000000..4119f55 --- /dev/null +++ b/frontend/components/shared/BillCard.tsx @@ -0,0 +1,81 @@ +import Link from "next/link"; +import { TrendingUp, Calendar, User } from "lucide-react"; +import { Bill } from "@/lib/types"; +import { billLabel, cn, formatDate, partyBadgeColor, trendColor } from "@/lib/utils"; +import { FollowButton } from "./FollowButton"; + +interface BillCardProps { + bill: Bill; + compact?: boolean; +} + +export function BillCard({ bill, compact = false }: BillCardProps) { + const label = billLabel(bill.bill_type, bill.bill_number); + const score = bill.latest_trend?.composite_score; + const tags = bill.latest_brief?.topic_tags?.slice(0, 3) || []; + + return ( +
+
+
+
+ + {label} + + + {bill.chamber} + + {tags.map((tag) => ( + + {tag} + + ))} +
+ + +

+ {bill.short_title || bill.title || "Untitled Bill"} +

+ + + {!compact && bill.sponsor && ( +
+ + + {bill.sponsor.name} + + {bill.sponsor.party && ( + + {bill.sponsor.party} + + )} + {bill.sponsor.state && ( + {bill.sponsor.state} + )} +
+ )} +
+ +
+ + {score !== undefined && score > 0 && ( +
+ + {Math.round(score)} +
+ )} +
+
+ + {!compact && bill.latest_action_text && ( +

+ + {formatDate(bill.latest_action_date)} — {bill.latest_action_text} +

+ )} +
+ ); +} diff --git a/frontend/components/shared/FollowButton.tsx b/frontend/components/shared/FollowButton.tsx new file mode 100644 index 0000000..f753d0b --- /dev/null +++ b/frontend/components/shared/FollowButton.tsx @@ -0,0 +1,44 @@ +"use client"; + +import { Heart } from "lucide-react"; +import { useAddFollow, useIsFollowing, useRemoveFollow } from "@/lib/hooks/useFollows"; +import { cn } from "@/lib/utils"; + +interface FollowButtonProps { + type: "bill" | "member" | "topic"; + value: string; + label?: string; +} + +export function FollowButton({ type, value, label }: FollowButtonProps) { + const existing = useIsFollowing(type, value); + const add = useAddFollow(); + const remove = useRemoveFollow(); + + const isFollowing = !!existing; + const isPending = add.isPending || remove.isPending; + + const handleClick = () => { + if (isFollowing && existing) { + remove.mutate(existing.id); + } else { + add.mutate({ type, value }); + } + }; + + return ( + + ); +} diff --git a/frontend/components/shared/Sidebar.tsx b/frontend/components/shared/Sidebar.tsx new file mode 100644 index 0000000..1ea8c7e --- /dev/null +++ b/frontend/components/shared/Sidebar.tsx @@ -0,0 +1,63 @@ +"use client"; + +import Link from "next/link"; +import { usePathname } from "next/navigation"; +import { + LayoutDashboard, + FileText, + Users, + Tags, + Heart, + Settings, + Landmark, +} from "lucide-react"; +import { cn } from "@/lib/utils"; +import { ThemeToggle } from "./ThemeToggle"; + +const NAV = [ + { href: "/", label: "Dashboard", icon: LayoutDashboard }, + { href: "/bills", label: "Bills", icon: FileText }, + { href: "/members", label: "Members", icon: Users }, + { href: "/topics", label: "Topics", icon: Tags }, + { href: "/following", label: "Following", icon: Heart }, + { href: "/settings", label: "Settings", icon: Settings }, +]; + +export function Sidebar() { + const pathname = usePathname(); + + return ( + + ); +} diff --git a/frontend/components/shared/ThemeToggle.tsx b/frontend/components/shared/ThemeToggle.tsx new file mode 100644 index 0000000..8c71b63 --- /dev/null +++ b/frontend/components/shared/ThemeToggle.tsx @@ -0,0 +1,19 @@ +"use client"; + +import { Moon, Sun } from "lucide-react"; +import { useTheme } from "next-themes"; + +export function ThemeToggle() { + const { theme, setTheme } = useTheme(); + + return ( + + ); +} diff --git a/frontend/lib/api.ts b/frontend/lib/api.ts new file mode 100644 index 0000000..9c9fcfa --- /dev/null +++ b/frontend/lib/api.ts @@ -0,0 +1,86 @@ +import axios from "axios"; +import type { + Bill, + BillAction, + BillDetail, + DashboardData, + Follow, + Member, + NewsArticle, + PaginatedResponse, + SettingsData, + TrendScore, +} from "./types"; + +const apiClient = axios.create({ + baseURL: process.env.NEXT_PUBLIC_API_URL || "", + timeout: 30000, +}); + +// Bills +export const billsAPI = { + list: (params?: Record) => + apiClient.get>("/api/bills", { params }).then((r) => r.data), + get: (id: string) => + apiClient.get(`/api/bills/${id}`).then((r) => r.data), + getActions: (id: string) => + apiClient.get(`/api/bills/${id}/actions`).then((r) => r.data), + getNews: (id: string) => + apiClient.get(`/api/bills/${id}/news`).then((r) => r.data), + getTrend: (id: string, days?: number) => + apiClient.get(`/api/bills/${id}/trend`, { params: { days } }).then((r) => r.data), +}; + +// Members +export const membersAPI = { + list: (params?: Record) => + apiClient.get>("/api/members", { params }).then((r) => r.data), + get: (id: string) => + apiClient.get(`/api/members/${id}`).then((r) => r.data), + getBills: (id: string, params?: Record) => + apiClient.get>(`/api/members/${id}/bills`, { params }).then((r) => r.data), +}; + +// Follows +export const followsAPI = { + list: () => + apiClient.get("/api/follows").then((r) => r.data), + add: (follow_type: string, follow_value: string) => + apiClient.post("/api/follows", { follow_type, follow_value }).then((r) => r.data), + remove: (id: number) => + apiClient.delete(`/api/follows/${id}`), +}; + +// Dashboard +export const dashboardAPI = { + get: () => + apiClient.get("/api/dashboard").then((r) => r.data), +}; + +// Search +export const searchAPI = { + search: (q: string) => + apiClient.get<{ bills: Bill[]; members: Member[] }>("/api/search", { params: { q } }).then((r) => r.data), +}; + +// Settings +export const settingsAPI = { + get: () => + apiClient.get("/api/settings").then((r) => r.data), + update: (key: string, value: string) => + apiClient.put("/api/settings", { key, value }).then((r) => r.data), + testLLM: () => + apiClient.post("/api/settings/test-llm").then((r) => r.data), +}; + +// Admin +export const adminAPI = { + triggerPoll: () => + apiClient.post("/api/admin/trigger-poll").then((r) => r.data), + triggerMemberSync: () => + apiClient.post("/api/admin/trigger-member-sync").then((r) => r.data), + triggerTrendScores: () => + apiClient.post("/api/admin/trigger-trend-scores").then((r) => r.data), + getTaskStatus: (taskId: string) => + apiClient.get(`/api/admin/task-status/${taskId}`).then((r) => r.data), +}; diff --git a/frontend/lib/hooks/useBills.ts b/frontend/lib/hooks/useBills.ts new file mode 100644 index 0000000..8af57cd --- /dev/null +++ b/frontend/lib/hooks/useBills.ts @@ -0,0 +1,46 @@ +import { useQuery } from "@tanstack/react-query"; +import { billsAPI } from "../api"; + +export function useBills(params?: Record) { + return useQuery({ + queryKey: ["bills", params], + queryFn: () => billsAPI.list(params), + staleTime: 5 * 60 * 1000, + }); +} + +export function useBill(id: string) { + return useQuery({ + queryKey: ["bill", id], + queryFn: () => billsAPI.get(id), + staleTime: 2 * 60 * 1000, + enabled: !!id, + }); +} + +export function useBillActions(id: string) { + return useQuery({ + queryKey: ["bill-actions", id], + queryFn: () => billsAPI.getActions(id), + staleTime: 5 * 60 * 1000, + enabled: !!id, + }); +} + +export function useBillNews(id: string) { + return useQuery({ + queryKey: ["bill-news", id], + queryFn: () => billsAPI.getNews(id), + staleTime: 10 * 60 * 1000, + enabled: !!id, + }); +} + +export function useBillTrend(id: string, days = 30) { + return useQuery({ + queryKey: ["bill-trend", id, days], + queryFn: () => billsAPI.getTrend(id, days), + staleTime: 60 * 60 * 1000, + enabled: !!id, + }); +} diff --git a/frontend/lib/hooks/useDashboard.ts b/frontend/lib/hooks/useDashboard.ts new file mode 100644 index 0000000..a7174ce --- /dev/null +++ b/frontend/lib/hooks/useDashboard.ts @@ -0,0 +1,11 @@ +import { useQuery } from "@tanstack/react-query"; +import { dashboardAPI } from "../api"; + +export function useDashboard() { + return useQuery({ + queryKey: ["dashboard"], + queryFn: () => dashboardAPI.get(), + staleTime: 2 * 60 * 1000, + refetchInterval: 5 * 60 * 1000, + }); +} diff --git a/frontend/lib/hooks/useFollows.ts b/frontend/lib/hooks/useFollows.ts new file mode 100644 index 0000000..e99a144 --- /dev/null +++ b/frontend/lib/hooks/useFollows.ts @@ -0,0 +1,32 @@ +import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query"; +import { followsAPI } from "../api"; + +export function useFollows() { + return useQuery({ + queryKey: ["follows"], + queryFn: () => followsAPI.list(), + staleTime: 30 * 1000, + }); +} + +export function useAddFollow() { + const qc = useQueryClient(); + return useMutation({ + mutationFn: ({ type, value }: { type: string; value: string }) => + followsAPI.add(type, value), + onSuccess: () => qc.invalidateQueries({ queryKey: ["follows"] }), + }); +} + +export function useRemoveFollow() { + const qc = useQueryClient(); + return useMutation({ + mutationFn: (id: number) => followsAPI.remove(id), + onSuccess: () => qc.invalidateQueries({ queryKey: ["follows"] }), + }); +} + +export function useIsFollowing(type: string, value: string) { + const { data: follows = [] } = useFollows(); + return follows.find((f) => f.follow_type === type && f.follow_value === value); +} diff --git a/frontend/lib/hooks/useMembers.ts b/frontend/lib/hooks/useMembers.ts new file mode 100644 index 0000000..d2eeb24 --- /dev/null +++ b/frontend/lib/hooks/useMembers.ts @@ -0,0 +1,28 @@ +import { useQuery } from "@tanstack/react-query"; +import { membersAPI } from "../api"; + +export function useMembers(params?: Record) { + return useQuery({ + queryKey: ["members", params], + queryFn: () => membersAPI.list(params), + staleTime: 10 * 60 * 1000, + }); +} + +export function useMember(id: string) { + return useQuery({ + queryKey: ["member", id], + queryFn: () => membersAPI.get(id), + staleTime: 10 * 60 * 1000, + enabled: !!id, + }); +} + +export function useMemberBills(id: string) { + return useQuery({ + queryKey: ["member-bills", id], + queryFn: () => membersAPI.getBills(id), + staleTime: 5 * 60 * 1000, + enabled: !!id, + }); +} diff --git a/frontend/lib/types.ts b/frontend/lib/types.ts new file mode 100644 index 0000000..d3d172f --- /dev/null +++ b/frontend/lib/types.ts @@ -0,0 +1,103 @@ +export interface Member { + bioguide_id: string; + name: string; + first_name?: string; + last_name?: string; + party?: string; + state?: string; + chamber?: string; + district?: string; + photo_url?: string; +} + +export interface BriefSchema { + id: number; + summary?: string; + key_points?: string[]; + risks?: string[]; + deadlines?: { date: string | null; description: string }[]; + topic_tags?: string[]; + llm_provider?: string; + llm_model?: string; + created_at?: string; +} + +export interface TrendScore { + score_date: string; + newsapi_count: number; + gnews_count: number; + gtrends_score: number; + composite_score: number; +} + +export interface BillAction { + id: number; + action_date?: string; + action_text?: string; + action_type?: string; + chamber?: string; +} + +export interface NewsArticle { + id: number; + source?: string; + headline?: string; + url?: string; + published_at?: string; + relevance_score?: number; +} + +export interface Bill { + bill_id: string; + congress_number: number; + bill_type: string; + bill_number: number; + title?: string; + short_title?: string; + introduced_date?: string; + latest_action_date?: string; + latest_action_text?: string; + status?: string; + chamber?: string; + congress_url?: string; + sponsor?: Member; + latest_brief?: BriefSchema; + latest_trend?: TrendScore; + updated_at?: string; +} + +export interface BillDetail extends Bill { + actions: BillAction[]; + news_articles: NewsArticle[]; + trend_scores: TrendScore[]; + briefs: BriefSchema[]; +} + +export interface PaginatedResponse { + items: T[]; + total: number; + page: number; + per_page: number; + pages: number; +} + +export interface Follow { + id: number; + follow_type: "bill" | "member" | "topic"; + follow_value: string; + created_at: string; +} + +export interface DashboardData { + feed: Bill[]; + trending: Bill[]; + follows: { bills: number; members: number; topics: number }; +} + +export interface SettingsData { + llm_provider: string; + llm_model: string; + congress_poll_interval_minutes: number; + newsapi_enabled: boolean; + pytrends_enabled: boolean; +} diff --git a/frontend/lib/utils.ts b/frontend/lib/utils.ts new file mode 100644 index 0000000..85eedb1 --- /dev/null +++ b/frontend/lib/utils.ts @@ -0,0 +1,52 @@ +import { clsx, type ClassValue } from "clsx"; +import { twMerge } from "tailwind-merge"; + +export function cn(...inputs: ClassValue[]) { + return twMerge(clsx(inputs)); +} + +export function formatDate(date?: string | null): string { + if (!date) return "—"; + return new Date(date).toLocaleDateString("en-US", { + year: "numeric", + month: "short", + day: "numeric", + }); +} + +export function billLabel(billType: string, billNumber: number): string { + const labels: Record = { + hr: "H.R.", + s: "S.", + hjres: "H.J.Res.", + sjres: "S.J.Res.", + hconres: "H.Con.Res.", + sconres: "S.Con.Res.", + hres: "H.Res.", + sres: "S.Res.", + }; + return `${labels[billType?.toLowerCase()] ?? billType?.toUpperCase()} ${billNumber}`; +} + +export function partyColor(party?: string): string { + if (!party) return "text-muted-foreground"; + const p = party.toLowerCase(); + if (p.includes("democrat") || p === "d") return "text-blue-500"; + if (p.includes("republican") || p === "r") return "text-red-500"; + return "text-yellow-500"; +} + +export function partyBadgeColor(party?: string): string { + if (!party) return "bg-muted text-muted-foreground"; + const p = party.toLowerCase(); + if (p.includes("democrat") || p === "d") return "bg-blue-100 text-blue-800 dark:bg-blue-900 dark:text-blue-200"; + if (p.includes("republican") || p === "r") return "bg-red-100 text-red-800 dark:bg-red-900 dark:text-red-200"; + return "bg-yellow-100 text-yellow-800 dark:bg-yellow-900 dark:text-yellow-200"; +} + +export function trendColor(score?: number): string { + if (!score) return "text-muted-foreground"; + if (score >= 70) return "text-red-500"; + if (score >= 40) return "text-yellow-500"; + return "text-green-500"; +} diff --git a/frontend/next.config.ts b/frontend/next.config.ts new file mode 100644 index 0000000..05d9b64 --- /dev/null +++ b/frontend/next.config.ts @@ -0,0 +1,13 @@ +import type { NextConfig } from "next"; + +const nextConfig: NextConfig = { + output: "standalone", + images: { + remotePatterns: [ + { protocol: "https", hostname: "bioguide.congress.gov" }, + { protocol: "https", hostname: "www.congress.gov" }, + ], + }, +}; + +export default nextConfig; diff --git a/frontend/package.json b/frontend/package.json new file mode 100644 index 0000000..3fe3e41 --- /dev/null +++ b/frontend/package.json @@ -0,0 +1,48 @@ +{ + "name": "civicstack-frontend", + "version": "0.1.0", + "private": true, + "scripts": { + "dev": "next dev", + "build": "next build", + "start": "next start", + "lint": "next lint" + }, + "dependencies": { + "next": "^15.2.0", + "react": "^19.0.0", + "react-dom": "^19.0.0", + "@tanstack/react-query": "^5.62.7", + "@tanstack/react-query-devtools": "^5.62.7", + "axios": "^1.7.9", + "recharts": "^2.13.3", + "next-themes": "^0.4.3", + "lucide-react": "^0.468.0", + "date-fns": "^4.1.0", + "zustand": "^5.0.2", + "clsx": "^2.1.1", + "tailwind-merge": "^2.5.5", + "class-variance-authority": "^0.7.1", + "@radix-ui/react-dialog": "^1.1.2", + "@radix-ui/react-dropdown-menu": "^2.1.2", + "@radix-ui/react-select": "^2.1.2", + "@radix-ui/react-switch": "^1.1.1", + "@radix-ui/react-tabs": "^1.1.1", + "@radix-ui/react-tooltip": "^1.1.4", +"@radix-ui/react-separator": "^1.1.0", + "@radix-ui/react-avatar": "^1.1.1", + "@radix-ui/react-label": "^2.1.0", + "@radix-ui/react-scroll-area": "^1.2.1" + }, + "devDependencies": { + "@types/node": "^22", + "@types/react": "^19", + "@types/react-dom": "^19", + "eslint": "^9", + "eslint-config-next": "^15.2.0", + "tailwindcss": "^3.4.16", + "autoprefixer": "^10.4.20", + "postcss": "^8.4.49", + "typescript": "^5" + } +} diff --git a/frontend/postcss.config.mjs b/frontend/postcss.config.mjs new file mode 100644 index 0000000..a982c64 --- /dev/null +++ b/frontend/postcss.config.mjs @@ -0,0 +1,8 @@ +const config = { + plugins: { + tailwindcss: {}, + autoprefixer: {}, + }, +}; + +export default config; diff --git a/frontend/tailwind.config.ts b/frontend/tailwind.config.ts new file mode 100644 index 0000000..d23e647 --- /dev/null +++ b/frontend/tailwind.config.ts @@ -0,0 +1,53 @@ +import type { Config } from "tailwindcss"; + +const config: Config = { + darkMode: ["class"], + content: [ + "./pages/**/*.{js,ts,jsx,tsx,mdx}", + "./components/**/*.{js,ts,jsx,tsx,mdx}", + "./app/**/*.{js,ts,jsx,tsx,mdx}", + ], + theme: { + extend: { + colors: { + border: "hsl(var(--border))", + input: "hsl(var(--input))", + ring: "hsl(var(--ring))", + background: "hsl(var(--background))", + foreground: "hsl(var(--foreground))", + primary: { + DEFAULT: "hsl(var(--primary))", + foreground: "hsl(var(--primary-foreground))", + }, + secondary: { + DEFAULT: "hsl(var(--secondary))", + foreground: "hsl(var(--secondary-foreground))", + }, + destructive: { + DEFAULT: "hsl(var(--destructive))", + foreground: "hsl(var(--destructive-foreground))", + }, + muted: { + DEFAULT: "hsl(var(--muted))", + foreground: "hsl(var(--muted-foreground))", + }, + accent: { + DEFAULT: "hsl(var(--accent))", + foreground: "hsl(var(--accent-foreground))", + }, + card: { + DEFAULT: "hsl(var(--card))", + foreground: "hsl(var(--card-foreground))", + }, + }, + borderRadius: { + lg: "var(--radius)", + md: "calc(var(--radius) - 2px)", + sm: "calc(var(--radius) - 4px)", + }, + }, + }, + plugins: [], +}; + +export default config; diff --git a/frontend/tsconfig.json b/frontend/tsconfig.json new file mode 100644 index 0000000..522b6a0 --- /dev/null +++ b/frontend/tsconfig.json @@ -0,0 +1,22 @@ +{ + "compilerOptions": { + "lib": ["dom", "dom.iterable", "esnext"], + "allowJs": true, + "skipLibCheck": true, + "strict": true, + "noEmit": true, + "esModuleInterop": true, + "module": "esnext", + "moduleResolution": "bundler", + "resolveJsonModule": true, + "isolatedModules": true, + "jsx": "preserve", + "incremental": true, + "plugins": [{ "name": "next" }], + "paths": { + "@/*": ["./*"] + } + }, + "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], + "exclude": ["node_modules"] +} diff --git a/nginx/nginx.conf b/nginx/nginx.conf new file mode 100644 index 0000000..386c3cf --- /dev/null +++ b/nginx/nginx.conf @@ -0,0 +1,55 @@ +events { + worker_connections 1024; +} + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + sendfile on; + keepalive_timeout 65; + + upstream api { + server api:8000; + } + + upstream frontend { + server frontend:3000; + } + + server { + listen 80; + server_name _; + + client_max_body_size 10M; + + # API + location /api/ { + proxy_pass http://api; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 120s; + proxy_connect_timeout 10s; + } + + # Next.js static assets (long cache) + location /_next/static/ { + proxy_pass http://frontend; + proxy_cache_valid 200 1d; + add_header Cache-Control "public, max-age=86400, immutable"; + } + + # Everything else → frontend + location / { + proxy_pass http://frontend; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + # WebSocket support (Next.js HMR in dev) + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + } + } +} diff --git a/queue_docs.py b/queue_docs.py new file mode 100644 index 0000000..da0a0d5 --- /dev/null +++ b/queue_docs.py @@ -0,0 +1,15 @@ +from app.database import get_sync_db +from app.models import Bill, BillDocument +from app.workers.document_fetcher import fetch_bill_documents + +db = get_sync_db() +fetched_ids = {r.bill_id for r in db.query(BillDocument.bill_id).filter(BillDocument.raw_text.isnot(None)).all()} +bills = db.query(Bill.bill_id).all() +queued = 0 +for (bill_id,) in bills: + if bill_id not in fetched_ids: + fetch_bill_documents.apply_async(args=[bill_id], queue='documents') + print(f' queued: {bill_id}') + queued += 1 +print(f'Queued {queued} bills for document fetching') +db.close() diff --git a/status.py b/status.py new file mode 100644 index 0000000..9a8bcab --- /dev/null +++ b/status.py @@ -0,0 +1,14 @@ +from app.database import get_sync_db +from app.models import BillDocument, BillBrief, Bill + +db = get_sync_db() +total_bills = db.query(Bill).count() +docs = db.query(BillDocument).filter(BillDocument.raw_text.isnot(None)).count() +briefs = db.query(BillBrief).count() +full_briefs = db.query(BillBrief).filter_by(brief_type="full").count() +amendment_briefs = db.query(BillBrief).filter_by(brief_type="amendment").count() +print(f"Total bills: {total_bills}") +print(f"Documents fetched: {docs}") +print(f"Briefs generated: {briefs} ({full_briefs} full, {amendment_briefs} amendments)") +print(f"Remaining: {total_bills - briefs}") +db.close()