Source code for compass.utilities.base
"""Base COMPASS utility functions"""
from pathlib import Path
[docs]
def title_preserving_caps(string):
"""Convert text to title case while keeping intentional capitals
Parameters
----------
string : str
Input text that may already contain capitalized acronyms or
proper nouns.
Returns
-------
str
Title-cased string in which words containing existing uppercase
characters retain their capitalization.
Examples
--------
>>> title_preserving_caps("NLR solar ordinance")
'NLR Solar Ordinance'
"""
return " ".join(map(_cap, string.split(" ")))
[docs]
class Directories:
"""Encapsulate filesystem locations used by a COMPASS run
The helper centralizes directory computations so downstream code
can rely on fully resolved :class:`pathlib.Path` instances for
logging, cleaned text, downloaded ordinances, and intermediate
databases.
Notes
-----
All provided paths are expanded to absolute form when the class is
instantiated, guaranteeing consistent behavior across relative and
user-expanded paths.
"""
def __init__(
self,
out,
logs=None,
clean_files=None,
ordinance_files=None,
jurisdiction_dbs=None,
collect_only=False,
):
"""
Parameters
----------
out : path-like
Output directory for COMPASS run.
logs : path-like, optional
Directory for storing logs. If not specified, defaults to
``out/logs``. By default, ``None``.
clean_files : path-like, optional
Directory for storing cleaned ordinance files. If not
specified, defaults to ``out/cleaned_text``.
By default, ``None``.
ordinance_files : path-like, optional
Directory for storing ordinance files. If not specified,
defaults to ``out/ordinance_files``.
By default, ``None``.
jurisdiction_dbs : path-like, optional
Directory for storing jurisdiction databases. If not
specified, defaults to ``out/jurisdiction_dbs``.
By default, ``None``
"""
self.out = _full_path(out)
self.logs = _full_path(logs) if logs else self.out / "logs"
self.clean_files = (
_full_path(clean_files)
if clean_files
else self.out / ("parsed_docs" if collect_only else "cleaned_text")
)
self.ordinance_files = (
_full_path(ordinance_files)
if ordinance_files
else self.out
/ ("source_docs" if collect_only else "ordinance_files")
)
self.jurisdiction_dbs = (
_full_path(jurisdiction_dbs)
if jurisdiction_dbs
else self.out
/ ("manifest_shards" if collect_only else "jurisdiction_dbs")
)
def __iter__(self):
"""Yield managed directory paths in canonical order
Yields
------
pathlib.Path
Each of the managed directories in the following order:
out, logs, clean_files, ordinance_files, jurisdiction_dbs.
"""
yield self.out
yield self.logs
yield self.clean_files
yield self.ordinance_files
yield self.jurisdiction_dbs
[docs]
def make_dirs(self):
"""Create the managed directories if they do not exist"""
for folder in self:
folder.mkdir(exist_ok=True, parents=True)
def _cap(word):
"""Capitalize the first character of ``word``; preserve the rest"""
return "".join([word[0].upper(), word[1:]])
def _full_path(in_path):
"""Resolve an input path to an absolute :class:`pathlib.Path`"""
return Path(in_path).expanduser().resolve()