Source code for mappymatch.constructs.trace

from __future__ import annotations

import re
from functools import cached_property
from pathlib import Path
from typing import List, Optional, Union

import numpy as np
import pandas as pd
from geopandas import GeoDataFrame, points_from_xy, read_file, read_parquet
from pyproj import CRS

from mappymatch.constructs.coordinate import Coordinate
from mappymatch.utils.crs import LATLON_CRS, XY_CRS


[docs] class Trace: """ A collection of coordinates representing a GPS trajectory or path to be map-matched. A Trace wraps a GeoDataFrame of point geometries and provides methods for creating, manipulating, and transforming GPS trajectories. Traces are the primary input for map matching algorithms. The underlying GeoDataFrame must have unique indices - duplicate indices will raise an IndexError during initialization. Attributes: coords: A list of Coordinate objects representing each point in the trajectory crs: The coordinate reference system (CRS) of the trace index: The pandas Index from the underlying GeoDataFrame Examples: >>> import pandas as pd >>> from mappymatch.constructs.trace import Trace >>> >>> # Create from a DataFrame with lat/lon columns >>> df = pd.DataFrame({ ... 'latitude': [40.7128, 40.7589, 40.7614], ... 'longitude': [-74.0060, -73.9851, -73.9776] ... }) >>> trace = Trace.from_dataframe(df) >>> >>> # Create from a GPX file >>> trace = Trace.from_gpx('path/to/track.gpx') >>> >>> # Access coordinates >>> print(len(trace)) # Number of points >>> first_coord = trace.coords[0] """ _frame: GeoDataFrame def __init__(self, frame: GeoDataFrame): if frame.index.has_duplicates: duplicates = frame.index[frame.index.duplicated()].values raise IndexError( f"Trace cannot have duplicates in the index but found {duplicates}" ) self._frame = frame def __getitem__(self, i) -> Trace: if isinstance(i, int): i = [i] new_frame = self._frame.iloc[i] return Trace(new_frame) def __add__(self, other: Trace) -> Trace: if self.crs != other.crs: raise TypeError("cannot add two traces together with different crs") new_frame = pd.concat([self._frame, other._frame]) return Trace(new_frame) def __len__(self): """Number of coordinate pairs.""" return len(self._frame) def __str__(self): output_lines = [ "Mappymatch Trace object", f"coords: {self.coords if hasattr(self, 'coords') else None}", f"frame: {self._frame}", ] return "\n".join(output_lines) def __repr__(self): return self.__str__() @property def index(self) -> pd.Index: """Get index to underlying GeoDataFrame.""" return self._frame.index @cached_property def coords(self) -> List[Coordinate]: """ Get all coordinates in the trace as Coordinate objects. This property constructs Coordinate objects from the underlying GeoDataFrame, preserving the index values as coordinate IDs. The result is cached for performance. Returns: A list of Coordinate objects, one for each point in the trace, ordered by the trace index """ coords_list = [ Coordinate(i, g, self.crs) for i, g in zip(self._frame.index, self._frame.geometry) ] return coords_list @property def crs(self) -> CRS: """Get Coordinate Reference System(CRS) to underlying GeoDataFrame.""" return self._frame.crs
[docs] @classmethod def from_geo_dataframe( cls, frame: GeoDataFrame, xy: bool = True, ) -> Trace: """ Create a trace from a GeoPandas GeoDataFrame. The GeoDataFrame must contain a geometry column with Point geometries representing the GPS trajectory. Additional columns are discarded - only the geometry and index are retained. Args: frame: A GeoDataFrame with Point geometries representing the trajectory. Must have a valid CRS and unique index values. xy: If True, reproject the trace to Web Mercator (EPSG:3857) for distance calculations. If False, keep the original CRS. Default is True. Returns: A new Trace instance Examples: >>> import geopandas as gpd >>> from shapely.geometry import Point >>> >>> # Create a GeoDataFrame with point geometries >>> gdf = gpd.GeoDataFrame( ... geometry=[Point(-74.0060, 40.7128), Point(-73.9851, 40.7589)], ... crs='EPSG:4326' ... ) >>> trace = Trace.from_geo_dataframe(gdf) """ # get rid of any extra info besides geometry and index frame = GeoDataFrame(geometry=frame.geometry, index=frame.index) if xy: frame = frame.to_crs(XY_CRS) return Trace(frame)
[docs] @classmethod def from_dataframe( cls, dataframe: pd.DataFrame, xy: bool = True, lat_column: str = "latitude", lon_column: str = "longitude", ) -> Trace: """ Create a trace from a pandas DataFrame with latitude/longitude columns. This is one of the most common ways to create a Trace from GPS data. The DataFrame must contain columns with latitude and longitude values in WGS84 (EPSG:4326) format. Args: dataframe: A pandas DataFrame containing GPS coordinates in EPSG:4326 format xy: If True, reproject to Web Mercator (EPSG:3857) for accurate distance calculations. If False, maintain lat/lon coordinates. Default is True. lat_column: The name of the column containing latitude values. Default is "latitude". lon_column: The name of the column containing longitude values. Default is "longitude". Returns: A new Trace instance Examples: >>> import pandas as pd >>> >>> # Create from a DataFrame with default column names >>> df = pd.DataFrame({ ... 'latitude': [40.7128, 40.7589, 40.7614], ... 'longitude': [-74.0060, -73.9851, -73.9776] ... }) >>> trace = Trace.from_dataframe(df) >>> >>> # Use custom column names >>> df_custom = pd.DataFrame({ ... 'lat': [40.7128, 40.7589], ... 'lon': [-74.0060, -73.9851] ... }) >>> trace = Trace.from_dataframe(df_custom, lat_column='lat', lon_column='lon') """ frame = GeoDataFrame( geometry=points_from_xy(dataframe[lon_column], dataframe[lat_column]), index=dataframe.index, crs=LATLON_CRS, ) return Trace.from_geo_dataframe(frame, xy)
[docs] @classmethod def from_gpx( cls, file: Union[str, Path], xy: bool = True, ) -> Trace: """ Create a trace from a GPX (GPS Exchange Format) file. Parses GPX track data and extracts latitude/longitude coordinates from trackpoints. This method expects a simple GPX structure with a sequence of lat/lon coordinate pairs. Args: file: Path to the GPX file (as string or Path object) xy: If True, reproject to Web Mercator (EPSG:3857) for accurate distance calculations. If False, maintain lat/lon coordinates. Default is True. Returns: A new Trace instance with coordinates extracted from the GPX file Raises: FileNotFoundError: If the specified file does not exist TypeError: If the file does not have a .gpx extension Examples: >>> # Load a GPX track from a file >>> trace = Trace.from_gpx('morning_run.gpx') >>> >>> # Keep in lat/lon instead of projecting >>> trace_latlon = Trace.from_gpx('bike_ride.gpx', xy=False) """ filepath = Path(file) if not filepath.is_file(): raise FileNotFoundError(file) elif not filepath.suffix == ".gpx": raise TypeError( f"file of type {filepath.suffix} does not appear to be a gpx file" ) data = open(filepath).read() lat_column, lon_column = "lat", "lon" lat = np.array(re.findall(r'lat="([^"]+)', data), dtype=float) lon = np.array(re.findall(r'lon="([^"]+)', data), dtype=float) df = pd.DataFrame(zip(lat, lon), columns=[lat_column, lon_column]) return Trace.from_dataframe(df, xy, lat_column, lon_column)
[docs] @classmethod def from_csv( cls, file: Union[str, Path], xy: bool = True, lat_column: str = "latitude", lon_column: str = "longitude", ) -> Trace: """ Create a trace from a CSV file containing latitude/longitude coordinates. The CSV file must contain columns with latitude and longitude values in WGS84 (EPSG:4326) format. The DataFrame index will be used as coordinate IDs. Args: file: Path to the CSV file (as string or Path object) xy: If True, reproject to Web Mercator (EPSG:3857) for accurate distance calculations. If False, maintain lat/lon coordinates. Default is True. lat_column: The name of the column containing latitude values. Default is "latitude". lon_column: The name of the column containing longitude values. Default is "longitude". Returns: A new Trace instance with coordinates from the CSV file Raises: FileNotFoundError: If the specified file does not exist TypeError: If the file does not have a .csv extension ValueError: If the specified lat/lon columns are not found in the CSV Examples: >>> # Load from CSV with default column names >>> trace = Trace.from_csv('gps_data.csv') >>> >>> # Load with custom column names >>> trace = Trace.from_csv('track.csv', lat_column='lat', lon_column='lng') """ filepath = Path(file) if not filepath.is_file(): raise FileNotFoundError(file) elif not filepath.suffix == ".csv": raise TypeError( f"file of type {filepath.suffix} does not appear to be a csv file" ) columns = pd.read_csv(filepath, nrows=0).columns.to_list() if lat_column in columns and lon_column in columns: df = pd.read_csv(filepath) return Trace.from_dataframe(df, xy, lat_column, lon_column) else: raise ValueError( "Could not find any geometry information in the file; " "Make sure there are latitude and longitude columns " "[and provide the lat/lon column names to this function]" )
[docs] @classmethod def from_parquet(cls, file: Union[str, Path], xy: bool = True): """ Create a trace from a GeoParquet file. GeoParquet is a columnar storage format for geospatial data. The file must contain a geometry column with Point geometries and a valid CRS. Args: file: Path to the GeoParquet file (as string or Path object) xy: If True, reproject to Web Mercator (EPSG:3857) for accurate distance calculations. If False, maintain the original CRS. Default is True. Returns: A new Trace instance with coordinates from the GeoParquet file Examples: >>> # Load from a GeoParquet file >>> trace = Trace.from_parquet('trajectory.parquet') """ filepath = Path(file) frame = read_parquet(filepath) return Trace.from_geo_dataframe(frame, xy)
[docs] @classmethod def from_geojson( cls, file: Union[str, Path], index_property: Optional[str] = None, xy: bool = True, ): """ Create a trace from a GeoJSON file containing Point features. The GeoJSON file should contain Point geometries representing the GPS trajectory. If index_property is specified, that property will be used as the DataFrame index; otherwise, all non-geometry properties will be combined to create the index. Args: file: Path to the GeoJSON file (as string or Path object) index_property: The name of a GeoJSON property to use as the DataFrame index. If None, all properties excluding geometry will be used as index columns. Default is None. xy: If True, reproject to Web Mercator (EPSG:3857) for accurate distance calculations. If False, maintain the original CRS. Default is True. Returns: A new Trace instance with coordinates from the GeoJSON file Examples: >>> # Load from GeoJSON, using all properties as index >>> trace = Trace.from_geojson('path.geojson') >>> >>> # Use a specific property as index >>> trace = Trace.from_geojson('points.geojson', index_property='timestamp') """ filepath = Path(file) frame = read_file(filepath) if index_property and index_property in frame.columns: frame = frame.set_index(index_property) else: gname = frame.geometry.name index_cols = [c for c in frame.columns if c != gname] frame = frame.set_index(index_cols) return Trace.from_geo_dataframe(frame, xy)
[docs] def downsample(self, npoints: int) -> Trace: """ Downsample the trace to a specified number of evenly-spaced points. This method uses linear interpolation across the trace indices to select a subset of points that are approximately evenly distributed along the trajectory. Args: npoints: The target number of points in the downsampled trace Returns: A new Trace with approximately npoints evenly-distributed points Examples: >>> # Reduce a 1000-point trace to 100 points >>> long_trace = Trace.from_csv('detailed_track.csv') >>> print(len(long_trace)) # 1000 >>> short_trace = long_trace.downsample(100) >>> print(len(short_trace)) # 100 """ s = list(np.linspace(0, len(self._frame) - 1, npoints).astype(int)) new_frame = self._frame.iloc[s] return Trace(new_frame)
[docs] def drop(self, index=List) -> Trace: """ Remove points from the trace by their index values. This method creates a new trace with specified points removed. The index parameter should contain the DataFrame index values (not positional integers) of the points to remove. Args: index: A list of index values identifying the points to remove. These should be values from the trace's DataFrame index, not integer positions. Returns: A new Trace with the specified points removed Examples: >>> # Remove points with specific index values >>> trace = Trace.from_dataframe(df) # df has index [0, 1, 2, 3, 4] >>> cleaned_trace = trace.drop([1, 3]) # Removes points at index 1 and 3 >>> print(len(cleaned_trace)) # 3 (originally 5, removed 2) """ new_frame = self._frame.drop(index) return Trace(new_frame)
[docs] def to_crs(self, new_crs: CRS) -> Trace: """ Transform the trace to a different coordinate reference system (CRS). This method reprojects all coordinates in the trace to the specified CRS. Args: new_crs: The target CRS. Can be a pyproj.CRS object, EPSG code string (e.g., 'EPSG:4326'), or any format accepted by pyproj.CRS() Returns: A new Trace with all coordinates transformed to the target CRS Examples: >>> # Convert from Web Mercator to WGS84 lat/lon >>> trace_xy = Trace.from_csv('data.csv', xy=True) # In EPSG:3857 >>> trace_latlon = trace_xy.to_crs('EPSG:4326') >>> >>> # Convert to a UTM zone >>> from pyproj import CRS >>> utm_crs = CRS('EPSG:32618') # UTM Zone 18N >>> trace_utm = trace_latlon.to_crs(utm_crs) """ new_frame = self._frame.to_crs(new_crs) return Trace(new_frame)
[docs] def to_geojson(self, file: Union[str, Path]): """ Write the trace to a GeoJSON file. This exports the trace as a GeoJSON FeatureCollection where each point is a Feature with Point geometry. The CRS information and any index data are preserved. Args: file: Path where the GeoJSON file should be written (as string or Path object) Examples: >>> trace = Trace.from_csv('input.csv') >>> trace.to_geojson('output.geojson') """ self._frame.to_file(file, driver="GeoJSON")