206 lines
11 KiB
Python
206 lines
11 KiB
Python
"""initial schema
|
|
|
|
Revision ID: 0001
|
|
Revises:
|
|
Create Date: 2025-01-01 00:00:00.000000
|
|
|
|
"""
|
|
from typing import Sequence, Union
|
|
|
|
import sqlalchemy as sa
|
|
from alembic import op
|
|
from sqlalchemy.dialects.postgresql import JSONB
|
|
|
|
revision: str = "0001"
|
|
down_revision: Union[str, None] = None
|
|
branch_labels: Union[str, Sequence[str], None] = None
|
|
depends_on: Union[str, Sequence[str], None] = None
|
|
|
|
|
|
def upgrade() -> None:
|
|
# ── members ──────────────────────────────────────────────────────────────
|
|
op.create_table(
|
|
"members",
|
|
sa.Column("bioguide_id", sa.String(), primary_key=True),
|
|
sa.Column("name", sa.String(), nullable=False),
|
|
sa.Column("first_name", sa.String()),
|
|
sa.Column("last_name", sa.String()),
|
|
sa.Column("party", sa.String(10)),
|
|
sa.Column("state", sa.String(5)),
|
|
sa.Column("chamber", sa.String(10)),
|
|
sa.Column("district", sa.String(10)),
|
|
sa.Column("photo_url", sa.String()),
|
|
sa.Column("official_url", sa.String()),
|
|
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
|
|
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
|
|
)
|
|
|
|
# ── bills ─────────────────────────────────────────────────────────────────
|
|
op.create_table(
|
|
"bills",
|
|
sa.Column("bill_id", sa.String(), primary_key=True),
|
|
sa.Column("congress_number", sa.Integer(), nullable=False),
|
|
sa.Column("bill_type", sa.String(10), nullable=False),
|
|
sa.Column("bill_number", sa.Integer(), nullable=False),
|
|
sa.Column("title", sa.Text()),
|
|
sa.Column("short_title", sa.Text()),
|
|
sa.Column("sponsor_id", sa.String(), sa.ForeignKey("members.bioguide_id"), nullable=True),
|
|
sa.Column("introduced_date", sa.Date()),
|
|
sa.Column("latest_action_date", sa.Date()),
|
|
sa.Column("latest_action_text", sa.Text()),
|
|
sa.Column("status", sa.String(100)),
|
|
sa.Column("chamber", sa.String(10)),
|
|
sa.Column("congress_url", sa.String()),
|
|
sa.Column("govtrack_url", sa.String()),
|
|
sa.Column("last_checked_at", sa.DateTime(timezone=True)),
|
|
sa.Column("actions_fetched_at", sa.DateTime(timezone=True)),
|
|
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
|
|
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
|
|
)
|
|
op.create_index("ix_bills_congress_number", "bills", ["congress_number"])
|
|
op.create_index("ix_bills_latest_action_date", "bills", ["latest_action_date"])
|
|
op.create_index("ix_bills_introduced_date", "bills", ["introduced_date"])
|
|
op.create_index("ix_bills_chamber", "bills", ["chamber"])
|
|
op.create_index("ix_bills_sponsor_id", "bills", ["sponsor_id"])
|
|
|
|
# Full-text search vector (tsvector generated column) — manual, not in autogenerate
|
|
op.execute("""
|
|
ALTER TABLE bills ADD COLUMN search_vector tsvector
|
|
GENERATED ALWAYS AS (
|
|
setweight(to_tsvector('english', coalesce(title, '')), 'A') ||
|
|
setweight(to_tsvector('english', coalesce(short_title, '')), 'A') ||
|
|
setweight(to_tsvector('english', coalesce(latest_action_text, '')), 'C')
|
|
) STORED
|
|
""")
|
|
op.execute("CREATE INDEX ix_bills_search_vector ON bills USING GIN(search_vector)")
|
|
|
|
# ── bill_actions ──────────────────────────────────────────────────────────
|
|
op.create_table(
|
|
"bill_actions",
|
|
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
|
|
sa.Column("bill_id", sa.String(), sa.ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False),
|
|
sa.Column("action_date", sa.Date()),
|
|
sa.Column("action_text", sa.Text()),
|
|
sa.Column("action_type", sa.String(100)),
|
|
sa.Column("chamber", sa.String(10)),
|
|
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
|
|
)
|
|
op.create_index("ix_bill_actions_bill_id", "bill_actions", ["bill_id"])
|
|
op.create_index("ix_bill_actions_action_date", "bill_actions", ["action_date"])
|
|
|
|
# ── bill_documents ────────────────────────────────────────────────────────
|
|
op.create_table(
|
|
"bill_documents",
|
|
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
|
|
sa.Column("bill_id", sa.String(), sa.ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False),
|
|
sa.Column("doc_type", sa.String(50)),
|
|
sa.Column("doc_version", sa.String(50)),
|
|
sa.Column("govinfo_url", sa.String()),
|
|
sa.Column("raw_text", sa.Text()),
|
|
sa.Column("fetched_at", sa.DateTime(timezone=True)),
|
|
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
|
|
)
|
|
op.create_index("ix_bill_documents_bill_id", "bill_documents", ["bill_id"])
|
|
|
|
# ── bill_briefs ───────────────────────────────────────────────────────────
|
|
op.create_table(
|
|
"bill_briefs",
|
|
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
|
|
sa.Column("bill_id", sa.String(), sa.ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False),
|
|
sa.Column("document_id", sa.Integer(), sa.ForeignKey("bill_documents.id", ondelete="SET NULL"), nullable=True),
|
|
sa.Column("summary", sa.Text()),
|
|
sa.Column("key_points", JSONB()),
|
|
sa.Column("risks", JSONB()),
|
|
sa.Column("deadlines", JSONB()),
|
|
sa.Column("topic_tags", JSONB()),
|
|
sa.Column("llm_provider", sa.String(50)),
|
|
sa.Column("llm_model", sa.String(100)),
|
|
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
|
|
)
|
|
op.create_index("ix_bill_briefs_bill_id", "bill_briefs", ["bill_id"])
|
|
op.execute("CREATE INDEX ix_bill_briefs_topic_tags ON bill_briefs USING GIN(topic_tags)")
|
|
|
|
# ── committees ────────────────────────────────────────────────────────────
|
|
op.create_table(
|
|
"committees",
|
|
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
|
|
sa.Column("committee_code", sa.String(20), unique=True, nullable=False),
|
|
sa.Column("name", sa.String(500)),
|
|
sa.Column("chamber", sa.String(10)),
|
|
sa.Column("committee_type", sa.String(50)),
|
|
)
|
|
|
|
# ── committee_bills ───────────────────────────────────────────────────────
|
|
op.create_table(
|
|
"committee_bills",
|
|
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
|
|
sa.Column("committee_id", sa.Integer(), sa.ForeignKey("committees.id", ondelete="CASCADE"), nullable=False),
|
|
sa.Column("bill_id", sa.String(), sa.ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False),
|
|
sa.Column("referral_date", sa.Date()),
|
|
)
|
|
op.create_index("ix_committee_bills_bill_id", "committee_bills", ["bill_id"])
|
|
op.create_index("ix_committee_bills_committee_id", "committee_bills", ["committee_id"])
|
|
|
|
# ── news_articles ─────────────────────────────────────────────────────────
|
|
op.create_table(
|
|
"news_articles",
|
|
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
|
|
sa.Column("bill_id", sa.String(), sa.ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False),
|
|
sa.Column("source", sa.String(200)),
|
|
sa.Column("headline", sa.Text()),
|
|
sa.Column("url", sa.String(), unique=True),
|
|
sa.Column("published_at", sa.DateTime(timezone=True)),
|
|
sa.Column("relevance_score", sa.Float(), default=0.0),
|
|
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
|
|
)
|
|
op.create_index("ix_news_articles_bill_id", "news_articles", ["bill_id"])
|
|
op.create_index("ix_news_articles_published_at", "news_articles", ["published_at"])
|
|
|
|
# ── trend_scores ──────────────────────────────────────────────────────────
|
|
op.create_table(
|
|
"trend_scores",
|
|
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
|
|
sa.Column("bill_id", sa.String(), sa.ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False),
|
|
sa.Column("score_date", sa.Date(), nullable=False),
|
|
sa.Column("newsapi_count", sa.Integer(), default=0),
|
|
sa.Column("gnews_count", sa.Integer(), default=0),
|
|
sa.Column("gtrends_score", sa.Float(), default=0.0),
|
|
sa.Column("composite_score", sa.Float(), default=0.0),
|
|
sa.UniqueConstraint("bill_id", "score_date", name="uq_trend_scores_bill_date"),
|
|
)
|
|
op.create_index("ix_trend_scores_bill_id", "trend_scores", ["bill_id"])
|
|
op.create_index("ix_trend_scores_score_date", "trend_scores", ["score_date"])
|
|
op.create_index("ix_trend_scores_composite", "trend_scores", ["composite_score"])
|
|
|
|
# ── follows ───────────────────────────────────────────────────────────────
|
|
op.create_table(
|
|
"follows",
|
|
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
|
|
sa.Column("follow_type", sa.String(20), nullable=False),
|
|
sa.Column("follow_value", sa.String(), nullable=False),
|
|
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
|
|
sa.UniqueConstraint("follow_type", "follow_value", name="uq_follows_type_value"),
|
|
)
|
|
|
|
# ── app_settings ──────────────────────────────────────────────────────────
|
|
op.create_table(
|
|
"app_settings",
|
|
sa.Column("key", sa.String(), primary_key=True),
|
|
sa.Column("value", sa.String()),
|
|
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
|
|
)
|
|
|
|
|
|
def downgrade() -> None:
|
|
op.drop_table("app_settings")
|
|
op.drop_table("follows")
|
|
op.drop_table("trend_scores")
|
|
op.drop_table("news_articles")
|
|
op.drop_table("committee_bills")
|
|
op.drop_table("committees")
|
|
op.drop_table("bill_briefs")
|
|
op.drop_table("bill_documents")
|
|
op.drop_table("bill_actions")
|
|
op.drop_table("bills")
|
|
op.drop_table("members")
|