Source code for compass.utilities.jurisdictions

"""Ordinance jurisdiction info"""

import logging
from warnings import warn
import importlib.resources
from functools import cached_property

import numpy as np
import pandas as pd

from compass.exceptions import COMPASSValueError
from compass.warn import COMPASSWarning


logger = logging.getLogger(__name__)
KNOWN_JURISDICTIONS_REGISTRY = {
    importlib.resources.files("compass") / "data" / "conus_jurisdictions.csv",
}
_JUR_COLS = [
    "Jurisdiction Type",
    "State",
    "County",
    "Subdivision",
    "FIPS",
    "Website",
]
_JURISDICTION_TYPES_AS_PREFIXES = {
    "town",
    "township",
    "city",
    "borough",
    "village",
    "unorganized territory",
}



[docs]
class Jurisdiction:
    """Model a geographic jurisdiction used throughout COMPASS

    The class normalizes casing for location components and provides
    convenience properties for rendering jurisdiction names with
    correct prefixes. It is designed to align with ordinance validation
    logic that expects consistent casing and phrasing across states,
    counties, and municipal subdivisions.

    Notes
    -----
    Instances compare case-insensitively for type and state, while the
    county and subdivision name comparisons preserve their stored
    casing. Hashing and ``str`` conversions defer to the full display
    name generated by :attr:`full_name`.
    """

    def __init__(
        self,
        subdivision_type,
        state,
        county=None,
        subdivision_name=None,
        code=None,
        website_url=None,
    ):
        """

        Parameters
        ----------
        subdivision_type : str
            Type of subdivision that this jurisdiction represents.
            Typical values are "state", "county", "town", "city",
            "borough", "parish", "township", etc.
        state : str
            Name of the state containing the jurisdiction.
        county : str, optional
            Name of the county containing the jurisdiction, if
            applicable. If the jurisdiction represents a state, leave
            this input unspecified. If the jurisdiction represents a
            county or a subdivision within a county, provide the county
            name here.

            .. IMPORTANT:: Make sure this input is capitalized properly!

            By default, ``None``.
        subdivision_name : str, optional
            Name of the subdivision that the jurisdiction represents, if
            applicable. If the jurisdiction represents a state or
            county, leave this input unspecified. Otherwise, provide the
            jurisdiction name here.

            .. IMPORTANT:: Make sure this input is capitalized properly!

            By default, ``None``.
        code : int or str, optional
            Optional jurisdiction code (typically FIPS or similar).
            By default, ``None``.
        website_url : str, optional
            Optional URL for the jurisdiction's main website.
            By default, ``None``.
        """
        self.type = subdivision_type.title()
        self.state = state.title()
        self.county = county
        self.subdivision_name = subdivision_name
        self.code = code
        self.website_url = website_url


[docs]
    @cached_property
    def full_name(self):
        """str: Comma-separated jurisdiction display name"""
        name_parts = [
            self.full_subdivision_phrase,
            self.full_county_phrase,
            self.state,
        ]

        return ", ".join(filter(None, name_parts))



[docs]
    @cached_property
    def full_name_the_prefixed(self):
        """str: Full location name prefixed with ``the`` as needed"""
        if self.type.casefold() == "state":
            return f"the state of {self.state}"

        if self.type.casefold() in _JURISDICTION_TYPES_AS_PREFIXES:
            return f"the {self.full_name}"

        return self.full_name



[docs]
    @cached_property
    def full_subdivision_phrase(self):
        """str: Subdivision phrase for the jurisdiction or empty str"""
        if not self.subdivision_name:
            return ""

        if self.type.casefold() in _JURISDICTION_TYPES_AS_PREFIXES:
            return f"{self.type} of {self.subdivision_name}"

        return f"{self.subdivision_name} {self.type}"



[docs]
    @cached_property
    def full_subdivision_phrase_the_prefixed(self):
        """str: Subdivision phrase prefixed with ``the`` as needed"""
        if self.type.casefold() in _JURISDICTION_TYPES_AS_PREFIXES:
            return f"the {self.full_subdivision_phrase}"

        return self.full_subdivision_phrase



[docs]
    @cached_property
    def full_county_phrase(self):
        """str: County phrase for the jurisdiction or empty str"""
        if not self.county:
            return ""

        if not self.subdivision_name:
            return f"{self.county} {self.type}"

        return f"{self.county} County"


    def __repr__(self):
        return str(self)

    def __str__(self):
        return self.full_name

    def __eq__(self, other):
        if isinstance(other, self.__class__):
            return (
                self.type.casefold() == other.type.casefold()
                and self.state.casefold() == other.state.casefold()
                and self.county == other.county
                and self.subdivision_name == other.subdivision_name
            )
        if isinstance(other, str):
            return self.full_name.casefold() == other.casefold()
        return False

    def __hash__(self):
        return hash(self.full_name.casefold())




[docs]
def load_all_jurisdiction_info():
    """Load canonical jurisdiction metadata for the continental US

    Returns
    -------
    pandas.DataFrame
        Table containing jurisdiction names, FIPS codes, official
        websites, and related attributes.

    Notes
    -----
    Missing values are normalized to ``None`` to simplify downstream
    serialization.
    """
    return pd.concat(
        pd.read_csv(fp).replace({np.nan: None})
        for fp in KNOWN_JURISDICTIONS_REGISTRY
    )




[docs]
def jurisdiction_websites(jurisdiction_info=None):
    """Build a mapping of jurisdiction identifiers to website URLs

    Parameters
    ----------
    jurisdiction_info : pandas.DataFrame, optional
        DataFrame containing jurisdiction names and websites. If
        ``None``, this info is loaded using
        :func:`load_all_jurisdiction_info`.
        By default, ``None``.

    Returns
    -------
    dict
        Mapping from jurisdiction FIPS codes to their primary website
        URLs.

    Notes
    -----
    The helper uses FIPS codes rather than string names to avoid
    collisions between same-named jurisdictions in different states.
    """
    if jurisdiction_info is None:
        jurisdiction_info = load_all_jurisdiction_info()

    return {
        row["FIPS"]: row["Website"] for __, row in jurisdiction_info.iterrows()
    }




[docs]
def load_jurisdictions_from_fp(jurisdiction_fp):
    """Load jurisdiction metadata for entries listed in a CSV file

    This loader trims whitespace, deduplicates request rows, and filters
    out jurisdictions not present in the canonical data set.

    Parameters
    ----------
    jurisdiction_fp : path-like
        Path to csv file containing "County" and "State" columns that
        define the jurisdictions for which info should be loaded.

    Returns
    -------
    pandas.DataFrame
        Jurisdiction information, including FIPS codes and websites,
        for every matching entry in the lookup table.

    Raises
    ------
    COMPASSValueError
        If the input file is missing required columns (``State`` or
        ``Jurisdiction Type`` when subdivisions are provided).

    Notes
    -----
    Missing jurisdictions trigger warnings with a tabular summary.
    """
    jurisdictions = pd.read_csv(jurisdiction_fp).replace({np.nan: None})
    jurisdictions = _validate_jurisdiction_input(jurisdictions)

    all_jurisdiction_info = load_all_jurisdiction_info()
    merge_cols = ["County", "State"]
    if "Subdivision" in jurisdictions:
        merge_cols += ["Subdivision", "Jurisdiction Type"]
    else:
        all_jurisdiction_info = all_jurisdiction_info[
            all_jurisdiction_info["Subdivision"].isna()
        ].reset_index(drop=True)

    jurisdictions = (  # remove dupes
        jurisdictions.groupby(merge_cols, dropna=False)
        .first()
        .reset_index()
        .drop(columns="Unnamed: 0", errors="ignore")
        .replace({np.nan: None})
    )
    jurisdictions["jur_merge"] = jurisdictions.apply(
        _build_merge_col, axis=1, merge_cols=merge_cols
    )
    all_jurisdiction_info["jur_merge"] = all_jurisdiction_info.apply(
        _build_merge_col, axis=1, merge_cols=merge_cols
    )
    jurisdictions = jurisdictions.merge(
        all_jurisdiction_info,
        on="jur_merge",
        how="left",
        suffixes=["_user", ""],
    )

    jurisdictions = _filter_not_found_jurisdictions(jurisdictions, merge_cols)
    return _format_jurisdiction_df_for_output(jurisdictions)




[docs]
def jurisdictions_from_df(jurisdiction_info=None):
    """Convert rows DataFrame into Jurisdiction instances

    Parameters
    ----------
    jurisdiction_info : pandas.DataFrame, optional
        DataFrame containing jurisdiction info with columns:
        ``["Jurisdiction Type", "State", "County", "Subdivision",
        "FIPS", "Website"]``. If ``None``, this info is loaded using
        :func:`load_all_jurisdiction_info`. By default, ``None``.

    Yields
    ------
    Jurisdiction
        Jurisdiction instance built from each row of the input
        DataFrame.
    """
    if jurisdiction_info is None:
        jurisdiction_info = load_all_jurisdiction_info()

    for __, row in jurisdiction_info.iterrows():
        jur_type, state, county, sub, fips, website = row[_JUR_COLS]
        yield Jurisdiction(
            subdivision_type=jur_type,
            state=state,
            county=county,
            subdivision_name=sub,
            code=fips,
            website_url=website,
        )



def _validate_jurisdiction_input(jurisdictions):
    """Throw error if user is missing required columns"""
    if "State" not in jurisdictions:
        msg = "The jurisdiction input must have at least a 'State' column!"
        raise COMPASSValueError(msg)

    jurisdictions["State"] = jurisdictions["State"].str.strip()
    if "County" not in jurisdictions:
        jurisdictions["County"] = None
    else:
        jurisdictions["County"] = jurisdictions["County"].str.strip()

    if "Subdivision" in jurisdictions:
        if "Jurisdiction Type" not in jurisdictions:
            msg = (
                "The jurisdiction input must have a 'Jurisdiction Type' "
                "column if a 'Subdivision' column is provided (this helps "
                "avoid name clashes for certain subdivisions)!"
            )
            raise COMPASSValueError(msg)

        jurisdictions["Subdivision"] = jurisdictions["Subdivision"].str.strip()
        jurisdictions["Jurisdiction Type"] = (
            jurisdictions["Jurisdiction Type"].str.casefold().str.strip()
        )

    return jurisdictions


def _filter_not_found_jurisdictions(df, merge_cols):
    """Filter out jurisdictions with null FIPS codes"""
    _warn_about_missing_jurisdictions(df, merge_cols)
    return df[~df["FIPS"].isna()].copy()


def _warn_about_missing_jurisdictions(df, merge_cols):
    """Throw warning about jurisdictions that were not in the list"""
    not_found_jurisdictions = df[df["FIPS"].isna()]
    if len(not_found_jurisdictions):
        out_cols = {f"{col}_user": col for col in merge_cols}
        not_found_jurisdictions = not_found_jurisdictions[
            list(out_cols)
        ].rename(columns=out_cols)
        not_found_jurisdictions_str = not_found_jurisdictions[
            merge_cols
            # cspell: disable-next-line
        ].to_markdown(index=False, tablefmt="psql")
        msg = (
            "The following jurisdictions were not found! Please make sure to "
            "use proper spelling and capitalization.\n"
            f"{not_found_jurisdictions_str}"
        )
        warn(msg, COMPASSWarning)


def _format_jurisdiction_df_for_output(df):
    """Format jurisdiction DataFrame for output"""
    out_cols = [
        "County",
        "State",
        "Subdivision",
        "Jurisdiction Type",
        "FIPS",
        "Website",
    ]
    df["FIPS"] = df["FIPS"].astype(int)
    return df[out_cols].replace({np.nan: None}).reset_index(drop=True)


def _build_merge_col(row, merge_cols):
    """Build column to merge jurisdiction DataFrames on"""
    return " ".join(str(row[c]).casefold() for c in merge_cols)