Plugin System Usage Examples¶
The simplified plugin system allows you to easily register and use custom data loaders, especially in Jupyter notebooks and interactive environments.
Basic Usage¶
1. Creating a Custom Data Loader¶
import polars as pl
from tnp_statistic_library.api import register_named_loader, reset_plugin_manager
# Reset plugin manager for clean slate (useful in notebooks)
reset_plugin_manager()
def excel_loader(location: str) -> pl.LazyFrame:
"""Example loader for Excel files."""
if location.endswith('.xlsx') or location.endswith('.xls'):
# Use polars to read Excel (requires additional dependencies)
return pl.read_excel(location).lazy()
else:
raise ValueError(f"Excel loader can only handle .xlsx/.xls files, got: {location}")
# Register the loader
register_named_loader("excel", excel_loader)
2. Using the Registered Loader¶
from tnp_statistic_library._internal.datasets.datasets import Dataset
# Create dataset using the registered loader
dataset = Dataset(location="data/sales.xlsx", loader="excel")
# The data will be loaded when accessed
df = dataset.lazyframe.collect()
print(df)
3. Automatic Extension-based Loader (register_data_loader)¶
For loaders that should automatically handle files based on extensions:
from tnp_statistic_library.api import register_data_loader
def csv_special_loader(location: str) -> pl.LazyFrame | None:
"""Handle CSV files with special processing."""
if location.endswith('.csv'):
# Custom CSV processing logic
return pl.scan_csv(location, separator=';', has_header=False).lazy()
return None
# Register for automatic extension handling
register_data_loader("csv_special", csv_special_loader)
# This will automatically use our loader for .csv files
dataset = Dataset(location="data/special.csv") # No loader= needed
data = dataset.lazyframe.collect()
4. API Data Loader Example¶
import requests
import polars as pl
from tnp_statistic_library.api import register_named_loader
def api_loader(location: str) -> pl.LazyFrame:
"""Load data from REST APIs."""
if location.startswith("api://"):
url = location.replace("api://", "https://")
response = requests.get(url)
data = response.json()
return pl.from_records(data).lazy()
raise ValueError(f"Expected api:// URL, got {location}")
# Register the named loader
register_named_loader("api", api_loader)
4. Using the API Loader¶
# Use the named loader with explicit loader specification
dataset = Dataset(location="api://jsonplaceholder.typicode.com/posts", loader="api")
data = dataset.lazyframe.collect()
print(data)
Advanced Features¶
Overwriting Loaders¶
# Register initial loader
def loader1(location: str) -> pl.LazyFrame | None:
return pl.DataFrame({"old": [1, 2, 3]}).lazy() if location == "test" else None
def loader2(location: str) -> pl.LazyFrame | None:
return pl.DataFrame({"new": [4, 5, 6]}).lazy() if location == "test" else None
register_data_loader("custom", loader1)
# This will fail
try:
register_data_loader("custom", loader2)
except ValueError as e:
print(f"Error: {e}")
# This will succeed
register_data_loader("custom", loader2, overwrite=True)
Managing Loaders¶
from tnp_statistic_library.api import (
list_data_loaders,
unregister_data_loader,
clear_data_loaders
)
# List all registered loaders
print("Registered loaders:", list_data_loaders())
# Remove a loader
if unregister_data_loader("excel"):
print("Excel loader removed")
# Clear all loaders
clear_data_loaders()
print("All loaders cleared")
Database Loader Example¶
import polars as pl
from sqlalchemy import create_engine
from tnp_statistic_library.api import register_named_loader
def database_loader(location: str) -> pl.LazyFrame:
"""Load data from SQL databases."""
if location.startswith("sql://"):
table_name = location.replace("sql://", "")
def query_database():
engine = create_engine("postgresql://user:pass@localhost/db")
query = f"SELECT * FROM {table_name}"
return pl.read_database(query, engine)
return pl.defer(query_database)
else:
raise ValueError("DatabaseLoader requires sql:// prefix")
# Register database loader
register_named_loader("database", database_loader)
# Use in dataset
dataset = Dataset(location="sql://customers", loader="database")
Integration with Metrics¶
The plugin system integrates seamlessly with the existing metrics system:
from tnp_statistic_library.metrics.summary import Mean
# Register your custom loader
register_data_loader("custom", my_custom_loader)
# Use in metric computation
metric = Mean.build(
dataset=Dataset(location="custom://data", loader="custom"),
name="average_value",
variable="value_column"
)
result = metric.run_metric().collect()
Jupyter Notebook Workflow¶
The plugin system is particularly useful in Jupyter notebooks:
# Cell 1: Define and register loader
class MyLoader:
def load(self, location: str) -> pl.LazyFrame:
# Custom loading logic
return pl.DataFrame({"data": [1, 2, 3]}).lazy()
register_data_loader("notebook_loader", MyLoader())
# Cell 2: Use in different cells
dataset1 = Dataset(location="source1", loader="notebook_loader")
dataset2 = Dataset(location="source2", loader="notebook_loader")
# Cell 3: Experiment with different loaders
register_data_loader("notebook_loader", ImprovedLoader(), overwrite=True)
# Now all previous datasets will use the new loader!
Error Handling¶
The plugin system provides clear error messages:
# Unknown loader
try:
dataset = Dataset(location="data.txt", loader="unknown")
df = dataset.lazyframe
except ValueError as e:
print(f"Loader error: {e}")
# Output: "Unknown data loader 'unknown'"
# Loader registration error
try:
register_data_loader("test", "not_a_loader")
except TypeError as e:
print(f"Registration error: {e}")
# Output: "Loader must implement DataLoaderProtocol"
Migration from Hook-based Plugins¶
If you have existing hook-based plugins, they will continue to work. The new system provides an additional, simpler way to register loaders:
# Old way (still works)
from tnp_statistic_library.api import hookimpl
class OldStyleLoader:
@hookimpl
def data_loader(self, location: str) -> pl.LazyFrame | None:
if location.endswith('.custom'):
return pl.scan_csv(location)
return None
# New way (recommended for interactive use)
class NewStyleLoader:
def load(self, location: str) -> pl.LazyFrame:
return pl.scan_csv(location)
register_data_loader("new_style", NewStyleLoader())
The new system takes priority for named loaders, while hook-based plugins are still used for automatic format detection.