Source code for geopfa.datasets

"""Functions to fetch sample datasets for geoPFA."""

import pandas as pd
import pooch
from pooch.processors import Unzip
from pathlib import Path

dogbert = pooch.create(
    path=pooch.os_cache("geoPFA"),
    base_url="https://github.com/NREL/geoPFA/releases/download/{version}/",
    version="v0.0.5",
    registry={
        "heat.zip": "sha256:fc7abec6d035f7be6e31b6071ee016afa629fcb1764dc506156cc535909d9055",
        "insulation.zip": "sha256:2b16eacf32be347cf0767ee3a450eed76a92b2a654e5d249ff2fef09fab381c0",
        "producibility.zip": "sha256:5d72eb75815f86ddbb4e19bea5664c9deb27d308debb40c92068a45bbb3a94ca",
    },
)


def _get_dataset(filename: str, dataset: str) -> pd.DataFrame:
    """Fetch dataset from zip file.

    Parameters
    ----------
    filename : str
        Name of the zip file to fetch.
    dataset : str
        Name of the dataset to fetch.

    Returns
    -------
    pd.DataFrame
        DataFrame containing the requested dataset.
    """
    fnames = dogbert.fetch(filename, processor=Unzip())
    inventory = {Path(f).stem: f for f in fnames}

    try:
        data = pd.read_parquet(inventory[dataset])
    except KeyError:
        source = dogbert.registry[filename]
        raise KeyError(
            f"Dataset {dataset} not included in {dogbert.get_url(filename)}."
        )

    return data


[docs] def fetch_heat(dataset: str) -> pd.DataFrame: """Fetch heat sample dataset. Currently available: - density_joint_inv_processed - density_joint_inv - earthquakes_processed - earthquakes - mt_resistivity_joint_inv_processed - mt_resistivity_joint_inv - temperature_model_500m_processed - temperature_model_500m - velocity_model_vp_processed - velocity_model_vp - velocity_model_vpvs_processed - velocity_model_vpvs - velocity_model_vs_processed - velocity_model_vs Parameters ---------- dataset : str Name of the dataset to fetch. Returns ------- pd.DataFrame DataFrame containing the requested heat dataset. Examples -------- >>> from geoPFA.datasets import fetch_heat >>> temperature = fetch_heat("temperature_model_500m") """ return _get_dataset("heat.zip", dataset)
[docs] def fetch_insulation(dataset: str) -> pd.DataFrame: """Fetch insulation sample dataset. Currently available: - density_joint_inv_processed - density_joint_inv - earthquakes_processed - earthquakes - mt_resistivity_joint_inv_processed - mt_resistivity_joint_inv - velocity_model_vp_processed - velocity_model_vp Parameters ---------- dataset : str Name of the dataset to fetch. Returns ------- pd.DataFrame DataFrame containing the requested insulation dataset. Examples -------- >>> from geoPFA.datasets import fetch_insulation >>> velocity = fetch_insulation("velocity_model_vp") """ return _get_dataset("insulation.zip", dataset)
[docs] def fetch_producibility(dataset: str) -> pd.DataFrame: """Fetch producibility sample dataset. Currently available: - density_joint_inv_processed - density_joint_inv - earthquakes_processed - earthquakes - faults_3d_processed - faults_3d - geology - mt_resistivity_joint_inv_processed - mt_resistivity_joint_inv - velocity_model_vp_processed - velocity_model_vp - velocity_model_vpvs_processed - velocity_model_vpvs - velocity_model_vs_processed - velocity_model_vs Parameters ---------- dataset : str Name of the dataset to fetch. Returns ------- pd.DataFrame DataFrame containing the requested producibility dataset. Examples -------- >>> from geoPFA.datasets import fetch_producibility >>> geology = fetch_producibility("geology") """ return _get_dataset("producibility.zip", dataset)