Source code for routee.transit.depot_deadhead

import geopandas as gpd
import pandas as pd
from geopy.distance import geodesic
from gtfsblocks import Feed
from shapely.geometry import Point

from routee.transit.ntd import NTDAgencyMatch, load_ntd_facilities, match_agency_to_ntd

# Re-export for backward compatibility
__all__ = [
    "load_ntd_facilities",
    "match_agency_to_ntd",
    "NTDAgencyMatch",
    "create_depot_deadhead_trips",
    "infer_depot_trip_endpoints",
    "create_depot_deadhead_stops",
]


[docs] def create_depot_deadhead_trips( trips_df: pd.DataFrame, stop_times_df: pd.DataFrame ) -> pd.DataFrame: """Create deadhead trips from and to depots for each block. This function essentially creates rows for the trips.txt DataFrame. It does not generate shape traces for them (that is handled by other functions in this module). Parameters ---------- trips_df : pd.DataFrame trips_df of selected date route (e.g. result from read_in_gtfs). stop_times_df: pd.DataFrame stop_times df in feed resulted from read_in_gtfs. Returns ------- pd.DataFrame: DataFrame with created deadhead trips. """ block_ids = trips_df["block_id"].dropna().unique().tolist() # Get earliest start time for each trip and merge then in to trips DF trip_start_times = ( stop_times_df.groupby("trip_id")["arrival_time"].min().reset_index() ) trips_with_times = trips_df.merge(trip_start_times, on="trip_id", how="left") # For each block id, create two deadhead trips: one from depot to first stop, # and one from last stop to depot. depot_trips = list() for block_id in block_ids: block_trips = trips_with_times[trips_with_times["block_id"] == block_id] # Exclude any between-trip deadhead trips that may have been added if "from_trip" in block_trips.columns: block_trips = block_trips.loc[block_trips["from_trip"].isna()] # Ensure trips have been sorted in chronological order block_trips = block_trips.sort_values(by="arrival_time") first_trip = block_trips.iloc[0] last_trip = block_trips.iloc[-1] # Create trip from depot to first stop from_depot_trip_id = f"depot_to_{first_trip['trip_id']}" from_depot_route = f"from_depot_{block_id}" from_depot_trip = { "trip_id": from_depot_trip_id, "trip_type": "pull-out", "route_id": from_depot_route, "service_id": first_trip["service_id"], "block_id": block_id, "shape_id": from_depot_route, "route_short_name": from_depot_route, "route_type": 3, # 3 means bus "route_desc": f"Deadhead from depot to {first_trip['trip_id']}", "agency_id": first_trip.get("agency_id", None), } depot_trips.append(from_depot_trip) # Create trip from last stop to depot to_depot_trip_id = f"{last_trip['trip_id']}_to_depot" to_depot_route = f"to_depot_{block_id}" to_depot_trip = { "trip_id": to_depot_trip_id, "trip_type": "pull-in", "route_id": to_depot_route, "service_id": last_trip["service_id"], "block_id": block_id, "shape_id": to_depot_route, "route_short_name": to_depot_route, "route_type": 3, # 3 means bus "route_desc": f"Deadhead from {last_trip['trip_id']} to depot", "agency_id": last_trip.get("agency_id", None), } depot_trips.append(to_depot_trip) deadhead_trips_df = pd.DataFrame(depot_trips) return deadhead_trips_df
[docs] def infer_depot_trip_endpoints( trips_df: pd.DataFrame, feed: Feed, depots_gdf: gpd.GeoDataFrame, ) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame, gpd.GeoDataFrame]: """Add origin/destination depot geometry for each block. Parameters ---------- trips_df: pd.DataFrame trips_df of selected date and route (result from read_in_gtfs). feed : Feed GTFS feed object (e.g. result from read_in_gtfs). depots_gdf : gpd.GeoDataFrame Point GeoDataFrame of candidate depot locations in EPSG:4326. Typically the result of :func:`load_ntd_facilities`. If a ``depot_priority`` column is present (0 = best), candidate depots are first restricted to the highest-priority type available before distance minimisation; if no higher-priority depot is reachable the next tier is tried. Returns ------- tuple[GeoDataFrame, GeoDataFrame, GeoDataFrame] (first_stops_gdf, last_stops_gdf, depots_gdf). The first two contain stop geometry and matched depot geometry. ``depots_gdf`` is the full depot GeoDataFrame (EPSG:4326) so callers can look up metadata by row index. """ # Process trips and stops dataframes in feed to get first and last stops of each block id trips_df = trips_df.copy() stop_times_df = feed.stop_times stops_df = feed.stops blocks_trips_stops = stop_times_df.merge( trips_df[["trip_id", "block_id"]], on="trip_id", how="right" ) blocks_trips_stops = blocks_trips_stops.merge(stops_df, on="stop_id", how="left") blocks_trips_stops = blocks_trips_stops.sort_values(by=["block_id", "arrival_time"]) first_stops = blocks_trips_stops.groupby("block_id").first().reset_index() last_stops = blocks_trips_stops.groupby("block_id").last().reset_index() first_stops = first_stops[ ["block_id", "stop_id", "arrival_time", "stop_lat", "stop_lon"] ] last_stops = last_stops[ ["block_id", "stop_id", "arrival_time", "stop_lat", "stop_lon"] ] first_stops["geometry"] = first_stops.apply( lambda row: Point(row["stop_lon"], row["stop_lat"]), axis=1 ) last_stops["geometry"] = last_stops.apply( lambda row: Point(row["stop_lon"], row["stop_lat"]), axis=1 ) first_stops_gdf = gpd.GeoDataFrame( first_stops, geometry="geometry", crs="EPSG:4326" ) last_stops_gdf = gpd.GeoDataFrame(last_stops, geometry="geometry", crs="EPSG:4326") # Ensure depot geometries are in WGS84 if depots_gdf.crs is None: depots_gdf = depots_gdf.set_crs(epsg=4326) else: depots_gdf = depots_gdf.to_crs(epsg=4326) has_priority = "depot_priority" in depots_gdf.columns priority_levels: list[int] = ( sorted(depots_gdf["depot_priority"].dropna().unique().tolist()) if has_priority else [] ) # Create a simple mapping from depot index to geometry for fast lookup depots_geom_map = depots_gdf["geometry"].to_dict() # Project to Web Mercator (EPSG:3857) for distance computations proj_crs = "EPSG:3857" first_proj = first_stops_gdf.to_crs(proj_crs).reset_index(drop=True) last_proj = last_stops_gdf.to_crs(proj_crs).reset_index(drop=True) depots_proj = depots_gdf.to_crs(proj_crs).copy() best_depot_idx: dict[object, int] = {} for block_id, first_row in first_proj.groupby("block_id"): first_geom = first_row.iloc[0].geometry last_geom = last_proj.loc[last_proj["block_id"] == block_id, "geometry"].values[ 0 ] # Compute pull-out + pull-in distance for every depot candidate working = depots_proj.copy() working["pullout"] = working.geometry.distance(first_geom) working["pullin"] = working.geometry.distance(last_geom) working["total"] = working["pullout"] + working["pullin"] if has_priority: # Pick nearest depot from the highest-priority tier that is # non-empty; fall through to subsequent tiers if needed. best_idx: int = working["total"].idxmin() for level in priority_levels: tier = working[working["depot_priority"] == level] if not tier.empty: best_idx = int(tier["total"].idxmin()) break else: best_idx = int(working["total"].idxmin()) best_depot_idx[block_id] = best_idx first_stops_gdf["nearest_depot_idx"] = first_stops_gdf["block_id"].map( best_depot_idx ) last_stops_gdf["nearest_depot_idx"] = last_stops_gdf["block_id"].map(best_depot_idx) first_stops_gdf["geometry_origin"] = first_stops_gdf["nearest_depot_idx"].map( depots_geom_map ) first_stops_gdf["geometry_destination"] = first_stops_gdf.geometry last_stops_gdf["geometry_destination"] = last_stops_gdf["nearest_depot_idx"].map( depots_geom_map ) last_stops_gdf["geometry_origin"] = last_stops_gdf.geometry # Attach NTD metadata (NTD ID, agency name, facility name/type) to both # stop GDFs so downstream callers and outputs can identify which depot was # matched without having to rejoin on nearest_depot_idx themselves. _ntd_meta_cols: dict[str, str] = { "NTD ID": "depot_ntd_id", "Agency Name": "depot_agency_name", "Facility Name": "depot_facility_name", "Facility Type": "depot_facility_type", } for src_col, dst_col in _ntd_meta_cols.items(): if src_col in depots_gdf.columns: col_map = depots_gdf[src_col].to_dict() first_stops_gdf[dst_col] = first_stops_gdf["nearest_depot_idx"].map(col_map) last_stops_gdf[dst_col] = last_stops_gdf["nearest_depot_idx"].map(col_map) # Set the arrival time as departure time for deadhead trip to depot for the last_stop_gdf last_stops_gdf["departure_time"] = last_stops_gdf["arrival_time"] # Drop the arrival_time column for the last_stop_gdf last_stops_gdf = last_stops_gdf.drop(columns=["arrival_time"]) # Keep only relevant columns and set stop_geometry as the active geometry first_stops_gdf = first_stops_gdf.drop(columns=["geometry"]) first_stops_gdf = gpd.GeoDataFrame( first_stops_gdf, geometry="geometry_destination", crs="EPSG:4326" ) last_stops_gdf = last_stops_gdf.drop(columns=["geometry"]) last_stops_gdf = gpd.GeoDataFrame( last_stops_gdf, geometry="geometry_origin", crs="EPSG:4326" ) return first_stops_gdf, last_stops_gdf, depots_gdf
[docs] def create_depot_deadhead_stops( first_stops_gdf: gpd.GeoDataFrame, last_stops_gdf: gpd.GeoDataFrame, deadhead_trips: pd.DataFrame, ) -> tuple[pd.DataFrame, pd.DataFrame]: """Create stop_times and stops for deadhead trips from and to depots. Parameters ---------- first_stops_gdf: gpd.GeoDataFrame GeoDataFrame of first stops for each block, with ``geometry_origin`` (depot) and ``geometry_destination`` (first stop) columns. Result from :func:`infer_depot_trip_endpoints`. last_stops_gdf: gpd.GeoDataFrame GeoDataFrame of last stops for each block, with ``geometry_origin`` (last stop) and ``geometry_destination`` (depot) columns. Result from :func:`infer_depot_trip_endpoints`. deadhead_trips: pd.DataFrame Deadhead trip records from :func:`create_depot_deadhead_trips`. Returns ------- tuple[pd.DataFrame, pd.DataFrame] A ``(stop_times_df, stops_df)`` tuple for the depot deadhead trips. """ from_depot = first_stops_gdf.copy() to_depot = last_stops_gdf.copy() # Calculate distance from depot to first stop from_depot["distance_m"] = from_depot.apply( lambda row: ( geodesic( (row.geometry_origin.y, row.geometry_origin.x), (row.geometry_destination.y, row.geometry_destination.x), ).meters ), axis=1, ) # Calculate distance from last stop to depot to_depot["distance_m"] = to_depot.apply( lambda row: ( geodesic( (row.geometry_origin.y, row.geometry_origin.x), (row.geometry_destination.y, row.geometry_destination.x), ).meters ), axis=1, ) # Assume average speed of 30 km/h (to be consistant with the number adopted in gtfs_feature_processing.py) # to estimate travel time from_depot["travel_time_sec"] = (from_depot["distance_m"] / 30000) * 3600 to_depot["travel_time_sec"] = (to_depot["distance_m"] / 30000) * 3600 # Calculate departure time from depot for deadhead trip to first stop from_depot["departure_time"] = from_depot["arrival_time"] - pd.to_timedelta( from_depot["travel_time_sec"], unit="s" ) # Calculate arrival time at depot for deadhead trip from last stop to_depot["arrival_time"] = to_depot["departure_time"] + pd.to_timedelta( to_depot["travel_time_sec"], unit="s" ) # Create stop_times df for deadhead trips deadhead_trips_df = deadhead_trips.copy() deadhead_trips_df_from_depot = deadhead_trips_df[ deadhead_trips_df.trip_type == "pull-out" ].copy() deadhead_trips_df_from_depot = deadhead_trips_df_from_depot.merge( from_depot[ [ "block_id", "stop_id", "nearest_depot_idx", "departure_time", "arrival_time", ] ], on="block_id", ) deadhead_trips_df_to_depot = deadhead_trips_df[ deadhead_trips_df.trip_type == "pull-in" ].copy() deadhead_trips_df_to_depot = deadhead_trips_df_to_depot.merge( to_depot[ [ "block_id", "stop_id", "nearest_depot_idx", "departure_time", "arrival_time", ] ], on="block_id", ) deadhead_trips_df = pd.concat( [deadhead_trips_df_from_depot, deadhead_trips_df_to_depot], ignore_index=True ) stop_times_df = pd.DataFrame( columns=[ "trip_id", "stop_sequence", "arrival_time", "stop_id", "departure_time", "shape_dist_traveled", ] ) stop_times_df["trip_id"] = deadhead_trips_df["trip_id"].repeat(2).values stop_times_df["stop_sequence"] = [1, 2] * len(deadhead_trips_df) stop_times_df["arrival_time"] = [ x for pair in zip( deadhead_trips_df["departure_time"].to_list(), deadhead_trips_df["arrival_time"].to_list(), ) for x in pair ] # For pull-out trips: stop_sequence 1 = depot stop (new), stop_sequence 2 = first # revenue stop (existing GTFS stop). For pull-in trips the order is reversed. # Depot stops are keyed as "depot_{nearest_depot_idx}" where nearest_depot_idx is # the row index in the FTA shapefile. This means all blocks that share the same # physical depot get the same stop_id. from_depot_stop_ids = [ x for pair in zip( ( "depot_" + deadhead_trips_df_from_depot["nearest_depot_idx"].astype(str) ).tolist(), deadhead_trips_df_from_depot["stop_id"].tolist(), ) for x in pair ] to_depot_stop_ids = [ x for pair in zip( deadhead_trips_df_to_depot["stop_id"].tolist(), ( "depot_" + deadhead_trips_df_to_depot["nearest_depot_idx"].astype(str) ).tolist(), ) for x in pair ] stop_times_df["stop_id"] = from_depot_stop_ids + to_depot_stop_ids stop_times_df["departure_time"] = stop_times_df["arrival_time"] stop_times_df["shape_dist_traveled"] = 0.0 # Create stops df — one row per unique physical depot (keyed by nearest_depot_idx). # Revenue stop endpoints are already in the GTFS feed and must not be duplicated. # Use depot_facility_name as stop_name when available so stops_supplement.txt # carries a human-readable depot identifier. from_depot_stop_name = ( from_depot["depot_facility_name"] if "depot_facility_name" in from_depot.columns else pd.Series([""] * len(from_depot), index=from_depot.index) ) to_depot_stop_name = ( to_depot["depot_facility_name"] if "depot_facility_name" in to_depot.columns else pd.Series([""] * len(to_depot), index=to_depot.index) ) from_depot_stops = pd.DataFrame( { "stop_id": "depot_" + from_depot["nearest_depot_idx"].astype(str), "stop_name": from_depot_stop_name.values, "stop_lat": from_depot.geometry_origin.apply(lambda p: p.y).values, "stop_lon": from_depot.geometry_origin.apply(lambda p: p.x).values, } ) to_depot_stops = pd.DataFrame( { "stop_id": "depot_" + to_depot["nearest_depot_idx"].astype(str), "stop_name": to_depot_stop_name.values, "stop_lat": to_depot.geometry_destination.apply(lambda p: p.y).values, "stop_lon": to_depot.geometry_destination.apply(lambda p: p.x).values, } ) stops_df = ( pd.concat([from_depot_stops, to_depot_stops]) .drop_duplicates(subset="stop_id") .reset_index(drop=True) ) return stop_times_df, stops_df