Source code for r2x_core.utils.validation
"""Utility function for validation."""
import inspect
from collections.abc import Callable
from pathlib import Path
from typing import TYPE_CHECKING, Any
from ..file_types import EXTENSION_MAPPING
if TYPE_CHECKING:
from pydantic import ValidationInfo
def filter_kwargs_for(func: Callable[..., Any], *, kwargs: dict[str, Any]) -> dict[str, Any]:
"""Filter kwargs to only include valid parameters for the given function.
Parameters
----------
func : Callable
The function to filter kwargs for
kwargs : dict[str, Any]
The keyword arguments to filter
Returns
-------
dict[str, Any]
Filtered kwargs containing only valid parameters
"""
sig = inspect.signature(func)
valid_params = set(sig.parameters.keys())
return {k: v for k, v in kwargs.items() if k in valid_params}
# Backward compatibility alias
[docs]
def filter_valid_kwargs(func: Callable[..., Any], *, kwargs: dict[str, Any]) -> dict[str, Any]:
"""Filter function kwargs, maintaining backward compatibility."""
return filter_kwargs_for(func, kwargs=kwargs)
[docs]
def filter_kwargs_by_signatures(
kwargs: dict[str, Any], *, callables: list[Callable[..., Any]]
) -> dict[str, Any]:
"""Filter kwargs to those accepted by the provided callables."""
valid_params: set[str] = set()
for callable_obj in callables:
valid_params.update(inspect.signature(callable_obj).parameters.keys())
return {k: v for k, v in kwargs.items() if k in valid_params}
[docs]
def validate_glob_pattern(pattern: str | None) -> str | None:
"""Validate that a string is a valid glob pattern.
Parameters
----------
pattern : str | None
The glob pattern to validate
Returns
-------
str | None
The validated pattern
Raises
------
ValueError
If the pattern is invalid (empty, only whitespace, or contains invalid characters)
"""
if pattern is None:
return None
if not pattern or not pattern.strip():
msg = "Glob pattern cannot be empty"
raise ValueError(msg)
invalid_chars = set("\x00")
if any(char in pattern for char in invalid_chars):
msg = f"Glob pattern contains invalid characters: {pattern}"
raise ValueError(msg)
if not any(wildcard in pattern for wildcard in ["*", "?", "[", "]"]):
msg = f"Pattern '{pattern}' does not contain glob wildcards (*, ?, [, ]). Use 'fpath' for exact filenames."
raise ValueError(msg)
return pattern
[docs]
def validate_file_extension(path: Path, *, info: "ValidationInfo") -> Path:
"""Validate that the file path has a supported extension.
This is a Pydantic validator that checks if the file extension from the
provided path exists as a key in the module-level `EXTENSION_MAPPING`.
Parameters
----------
value : str
The file path string to validate, provided by Pydantic.
info : pydantic.ValidationInfo
Pydantic's validation context. Required by the validator signature
but not used in this function.
Returns
-------
Path
The original file path string if its extension is valid.
Raises
------
AssertionError
If the input `value` is not a string.
ValueError
If the file path has no extension.
KeyError
If the file's extension is not found in `EXTENSION_MAPPING`.
Notes
-----
This function is intended for use as a Pydantic model validator (e.g.,
with `@field_validator` or `AfterValidator`) and should not be called directly.
"""
if info is None:
raise ValueError("Pydantic validation context is missing.")
if not isinstance(path, Path):
raise TypeError(f"Expected Path, got {type(path).__name__}")
ext = path.suffix.lower()
if ext not in EXTENSION_MAPPING:
msg = f"{ext=} not found on `EXTENSION_MAPPING`. "
msg += "Check spelling of file type or verify it is a supported `FileFormat`."
msg += f"List of supported `FileFormat`: {EXTENSION_MAPPING.keys()}"
raise KeyError(msg)
return path