Source code for demos.datasources
import orca
import pandas as pd
from loguru import logger
from pydantic import BaseModel, Field
from typing import Annotated, Literal, Optional, Dict
[docs]
class CSVTableSource(BaseModel):
""""""
file_type: Literal["csv"]
#: Path to source file
filepath: str
#: Column in the file to be used as index (e.g. `person_id`)
index_col: str
#: Identifier of the table in orca
table_name: str
#: Delimiter character (pass down to pandas)
delimiter: Optional[str] = None
#: Apply custom casting to incoming values when reading CSV
custom_dtype_casting: Optional[Dict[str, str]] = None
def load_into_orca(self):
logger.info(f"Loading CSV '{self.table_name}' table from {self.filepath}")
df = pd.read_csv(
self.filepath, delimiter=self.delimiter, dtype=self.custom_dtype_casting
).set_index(self.index_col)
orca.add_table(self.table_name, df)
[docs]
class H5TableSource(BaseModel):
""""""
file_type: Literal["h5"]
#: Path to source file
filepath: str
#: key in the source HDF5 to be loaded
h5_key: str
#: Identifier of the table in orca
table_name: str
def load_into_orca(self):
logger.info(
f"Loading HDF5 '{self.table_name}' table from {self.filepath}/{self.h5_key}"
)
df = pd.read_hdf(self.filepath, key=self.h5_key)
orca.add_table(self.table_name, df)
DataSourceModel = Annotated[
H5TableSource | CSVTableSource, Field(discriminator="file_type")
]