Source code for routee.transit.mid_block_deadhead

from typing import Any

import geopandas as gpd
import numpy as np
import pandas as pd
from geopy.distance import geodesic


[docs] def create_mid_block_deadhead_trips( trips_df: pd.DataFrame, stop_times_df: pd.DataFrame ) -> pd.DataFrame: """Create deadhead trips between consecutive trips for each block. Parameters ---------- trips_df : pd.DataFrame GTFS trips_df (e.g. result from read_in_gtfs). stop_times_df: pd.DataFrame stop_times df in feed resulted from read_in_gtfs. Returns ------- pd.DataFrame: DataFrame with created deadhead trips. """ # For each block id, create one deadhead trip between consecutive trips. deadhead_trips = pd.DataFrame( { "trip_id": [], "route_id": [], "service_id": [], "block_id": [], "shape_id": [], "route_short_name": [], "route_type": [], "route_desc": [], "agency_id": [], } ) trip_start = ( stop_times_df.groupby("trip_id")["arrival_time"].min().reset_index() ) # trip start time of each trip trips_df = trips_df.merge( trip_start, on="trip_id", how="left" ) # only look at trips on selected date and route trips_df = trips_df.sort_values(by=["block_id", "arrival_time"]) block_gb = trips_df.groupby("block_id") dh_dfs = list() for _, block_df in block_gb: block_df = block_df.copy() block_df["to_trip"] = block_df["trip_id"].shift(-1) block_df = block_df.dropna(subset=["to_trip"]) block_df["to_trip"] = block_df["to_trip"].astype(str) block_df["deadhead_trip"] = ( block_df["trip_id"].astype(str) + "_to_" + block_df["to_trip"] ) block_df = block_df[ ["deadhead_trip", "route_id", "service_id", "block_id", "shape_id"] ] block_df = block_df.rename(columns=({"deadhead_trip": "trip_id"})) dh_dfs.append(block_df) deadhead_trips = pd.concat(dh_dfs).reset_index(drop=True) deadhead_trips["route_short_name"] = None deadhead_trips["route_type"] = 3 deadhead_trips["route_desc"] = "Deadhead_from_" + deadhead_trips["trip_id"] deadhead_trips["agency_id"] = None deadhead_trips["shape_id"] = deadhead_trips["trip_id"] deadhead_trips["trip_type"] = "mid_block_deadhead" return deadhead_trips
[docs] def create_mid_block_deadhead_stops( feed: Any, deadhead_trips: pd.DataFrame ) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: """Create stop_times and stops for mid-block deadhead trips. Parameters ---------- feed: Any GTFS feed object (e.g. result from read_in_gtfs). deadhead_trips: pd.DataFrame Deadhead trip records from :func:`create_mid_block_deadhead_trips`. Returns ------- tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame] A ``(stop_times_df, stops_df, deadhead_ods_df)`` tuple where ``stop_times_df`` and ``stops_df`` can be merged into the GTFS feed, and ``deadhead_ods_df`` holds origin/destination geometry for routing. """ # Calculate distance from end stop of first trip to start stop of second trip deadhead_trips["from_trip"] = deadhead_trips["trip_id"].apply( lambda x: x.split("_to_")[0] ) deadhead_trips["to_trip"] = deadhead_trips["trip_id"].apply( lambda x: x.split("_to_")[1] ) # First stops of deadhead trips first_stops = feed.stop_times[ feed.stop_times["trip_id"].isin(deadhead_trips["from_trip"]) ].copy() first_stops = first_stops.sort_values(by=["trip_id", "stop_sequence"]) first_stops = first_stops.groupby("trip_id").last().reset_index() first_stops = first_stops.rename( columns={"stop_id": "from_stop_id", "trip_id": "from_trip"} ) # Last stops of deadhead trips last_stops = feed.stop_times[ feed.stop_times["trip_id"].isin(deadhead_trips["to_trip"]) ].copy() last_stops = last_stops.sort_values(by=["trip_id", "stop_sequence"]) last_stops = last_stops.groupby("trip_id").first().reset_index() last_stops = last_stops.rename( columns={"stop_id": "to_stop_id", "trip_id": "to_trip"} ) # Merge to get stop ids and stop lat/lon deadhead_trips = deadhead_trips.merge( first_stops[["from_trip", "from_stop_id", "departure_time"]], on="from_trip", how="left", ) deadhead_trips = deadhead_trips.merge( last_stops[["to_trip", "to_stop_id", "arrival_time"]], on="to_trip", how="left" ) deadhead_trips = deadhead_trips.merge( feed.stops[["stop_id", "stop_lat", "stop_lon"]], left_on="from_stop_id", right_on="stop_id", how="left", ) deadhead_trips = deadhead_trips.rename( columns={"stop_lat": "from_stop_lat", "stop_lon": "from_stop_lon"} ) deadhead_trips = deadhead_trips.merge( feed.stops[["stop_id", "stop_lat", "stop_lon"]], left_on="to_stop_id", right_on="stop_id", how="left", ) deadhead_trips = deadhead_trips.rename( columns={"stop_lat": "to_stop_lat", "stop_lon": "to_stop_lon"} ) deadhead_trips = deadhead_trips.drop(columns=["stop_id_x", "stop_id_y"]) # Create geometry columns for geospatial calculations deadhead_trips["geometry_origin"] = gpd.points_from_xy( deadhead_trips["from_stop_lon"], deadhead_trips["from_stop_lat"] ) deadhead_trips["geometry_destination"] = gpd.points_from_xy( deadhead_trips["to_stop_lon"], deadhead_trips["to_stop_lat"] ) # Calculate distance from origin to destination for deadhead trips deadhead_trips["distance_m"] = deadhead_trips.apply( lambda row: ( geodesic( (row.geometry_origin.y, row.geometry_origin.x), (row.geometry_destination.y, row.geometry_destination.x), ).meters ), axis=1, ) # Assume average speed of 30 km/h (to be consistant with the number adopted in gtfs_feature_processing.py) # to estimate travel time deadhead_trips["travel_time_sec"] = (deadhead_trips["distance_m"] / 30000) * 3600 # Calculate arrival time at to_stop for deadhead trip deadhead_trips["arrival_time_cal"] = deadhead_trips[ "departure_time" ] + pd.to_timedelta(deadhead_trips["travel_time_sec"], unit="s") # Use the minimum of scheduled arrival time and calculated arrival time deadhead_trips["arrival_time"] = deadhead_trips[ ["arrival_time", "arrival_time_cal"] ].min(axis=1) # Create stop_times df for deadhead trips stop_times_df = pd.DataFrame( columns=[ "trip_id", "stop_sequence", "arrival_time", "stop_id", "departure_time", "shape_dist_traveled", ] ) stop_times_df["trip_id"] = deadhead_trips["trip_id"].repeat(2).values stop_times_df["stop_sequence"] = [1, 2] * len(deadhead_trips) stop_times_df["arrival_time"] = [ x for pair in zip( deadhead_trips["departure_time"].to_list(), deadhead_trips["arrival_time"].to_list(), ) for x in pair ] stop_times_df["stop_id"] = range(1, len(stop_times_df) + 1) stop_times_df["stop_id"] = stop_times_df["stop_id"].apply( lambda x: f"mid_block_deadhead_{x}" ) stop_times_df["departure_time"] = stop_times_df["arrival_time"] stop_times_df["shape_dist_traveled"] = 0.0 # Create stops df for deadhead trips stops_df = pd.DataFrame(columns=["stop_id", "stop_lat", "stop_lon"]) stops_df["stop_id"] = stop_times_df["stop_id"] x_start = deadhead_trips.geometry_origin.apply(lambda p: p.x).to_numpy() x_end = deadhead_trips.geometry_destination.apply(lambda p: p.x).to_numpy() stop_lon = np.ravel(np.column_stack((x_start, x_end))) y_start = deadhead_trips.geometry_origin.apply(lambda p: p.y).to_numpy() y_end = deadhead_trips.geometry_destination.apply(lambda p: p.y).to_numpy() stop_lat = np.ravel(np.column_stack((y_start, y_end))) stops_df["stop_lat"] = stop_lat stops_df["stop_lon"] = stop_lon deadhead_trips["block_id"] = deadhead_trips[ "trip_id" ] # Use trip_id as block_id for deadhead trips for trace generation purpose in generate_deadhead_traces.py return ( stop_times_df, stops_df, deadhead_trips[["geometry_origin", "geometry_destination", "block_id"]], )