Source code for demos.datasources

import orca
import pandas as pd
from loguru import logger
from pydantic import BaseModel, Field
from typing import Annotated, Literal, Optional, Dict


[docs] class CSVTableSource(BaseModel): """""" file_type: Literal["csv"] #: Path to source file filepath: str #: Column in the file to be used as index (e.g. `person_id`) index_col: str #: Identifier of the table in orca table_name: str #: Delimiter character (pass down to pandas) delimiter: Optional[str] = None #: Apply custom casting to incoming values when reading CSV custom_dtype_casting: Optional[Dict[str, str]] = None def load_into_orca(self): logger.info(f"Loading CSV '{self.table_name}' table from {self.filepath}") df = pd.read_csv( self.filepath, delimiter=self.delimiter, dtype=self.custom_dtype_casting ).set_index(self.index_col) orca.add_table(self.table_name, df)
[docs] class H5TableSource(BaseModel): """""" file_type: Literal["h5"] #: Path to source file filepath: str #: key in the source HDF5 to be loaded h5_key: str #: Identifier of the table in orca table_name: str def load_into_orca(self): logger.info( f"Loading HDF5 '{self.table_name}' table from {self.filepath}/{self.h5_key}" ) df = pd.read_hdf(self.filepath, key=self.h5_key) orca.add_table(self.table_name, df)
DataSourceModel = Annotated[ H5TableSource | CSVTableSource, Field(discriminator="file_type") ]