Source code for compass.utilities.io

"""COMPASS I/O utilities"""

import pprint
import logging

from elm.web.file_loader import AsyncLocalFileLoader


logger = logging.getLogger(__name__)


[docs] async def load_local_docs(fps, **kwargs): """Load local documents into `elm` document instances Parameters ---------- fps : Iterable Iterable of paths referencing local files to load. **kwargs Additional keyword arguments forwarded to :class:`elm.web.file_loader.AsyncLocalFileLoader` for configuration such as ``loader``, caching, or parsing options. Returns ------- list of elm.web.document.BaseDocument Non-empty loaded documents corresponding to the supplied filepaths. Empty results (e.g., unreadable files) are filtered out of the returned list. Raises ------ elm.exceptions.ELMError Propagated when the underlying loader fails to read one of the provided files and is configured to raise on errors. Notes ----- Detailed debug information about loaded page counts is emitted via the ``compass.utilities.io`` logger at ``TRACE`` level to assist with troubleshooting ingestion runs. """ logger.trace("Loading docs for the following paths:\n%r", fps) logger.trace( "kwargs for AsyncLocalFileLoader:\n%s", pprint.PrettyPrinter().pformat(kwargs), ) file_loader = AsyncLocalFileLoader(**kwargs) docs = await file_loader.fetch_all(*fps) page_lens = { doc.attrs.get("source_fp", "Unknown"): len(doc.pages) for doc in docs } logger.debug( "Loaded the following number of pages for docs:\n%s", pprint.PrettyPrinter().pformat(page_lens), ) return [doc for doc in docs if not doc.empty]