Skip to content

Plugin System Usage Examples

The simplified plugin system allows you to easily register and use custom data loaders, especially in Jupyter notebooks and interactive environments.

Basic Usage

1. Creating a Custom Data Loader

import polars as pl
from tnp_statistic_library.api import register_named_loader, reset_plugin_manager

# Reset plugin manager for clean slate (useful in notebooks)
reset_plugin_manager()

def excel_loader(location: str) -> pl.LazyFrame:
    """Example loader for Excel files."""
    if location.endswith('.xlsx') or location.endswith('.xls'):
        # Use polars to read Excel (requires additional dependencies)
        return pl.read_excel(location).lazy()
    else:
        raise ValueError(f"Excel loader can only handle .xlsx/.xls files, got: {location}")

# Register the loader
register_named_loader("excel", excel_loader)

2. Using the Registered Loader

from tnp_statistic_library._internal.datasets.datasets import Dataset

# Create dataset using the registered loader
dataset = Dataset(location="data/sales.xlsx", loader="excel")

# The data will be loaded when accessed
df = dataset.lazyframe.collect()
print(df)

3. Automatic Extension-based Loader (register_data_loader)

For loaders that should automatically handle files based on extensions:

from tnp_statistic_library.api import register_data_loader

def csv_special_loader(location: str) -> pl.LazyFrame | None:
    """Handle CSV files with special processing."""
    if location.endswith('.csv'):
        # Custom CSV processing logic
        return pl.scan_csv(location, separator=';', has_header=False).lazy()
    return None

# Register for automatic extension handling
register_data_loader("csv_special", csv_special_loader)

# This will automatically use our loader for .csv files
dataset = Dataset(location="data/special.csv")  # No loader= needed
data = dataset.lazyframe.collect()

4. API Data Loader Example

import requests
import polars as pl
from tnp_statistic_library.api import register_named_loader

def api_loader(location: str) -> pl.LazyFrame:
    """Load data from REST APIs."""
    if location.startswith("api://"):
        url = location.replace("api://", "https://")
        response = requests.get(url)
        data = response.json()
        return pl.from_records(data).lazy()
    raise ValueError(f"Expected api:// URL, got {location}")

# Register the named loader
register_named_loader("api", api_loader)

4. Using the API Loader

# Use the named loader with explicit loader specification
dataset = Dataset(location="api://jsonplaceholder.typicode.com/posts", loader="api")
data = dataset.lazyframe.collect()
print(data)

Advanced Features

Overwriting Loaders

# Register initial loader
def loader1(location: str) -> pl.LazyFrame | None:
    return pl.DataFrame({"old": [1, 2, 3]}).lazy() if location == "test" else None

def loader2(location: str) -> pl.LazyFrame | None:
    return pl.DataFrame({"new": [4, 5, 6]}).lazy() if location == "test" else None

register_data_loader("custom", loader1)

# This will fail
try:
    register_data_loader("custom", loader2)
except ValueError as e:
    print(f"Error: {e}")

# This will succeed
register_data_loader("custom", loader2, overwrite=True)

Managing Loaders

from tnp_statistic_library.api import (
    list_data_loaders,
    unregister_data_loader,
    clear_data_loaders
)

# List all registered loaders
print("Registered loaders:", list_data_loaders())

# Remove a loader
if unregister_data_loader("excel"):
    print("Excel loader removed")

# Clear all loaders
clear_data_loaders()
print("All loaders cleared")

Database Loader Example

import polars as pl
from sqlalchemy import create_engine
from tnp_statistic_library.api import register_named_loader

def database_loader(location: str) -> pl.LazyFrame:
    """Load data from SQL databases."""
    if location.startswith("sql://"):
        table_name = location.replace("sql://", "")

        def query_database():
            engine = create_engine("postgresql://user:pass@localhost/db")
            query = f"SELECT * FROM {table_name}"
            return pl.read_database(query, engine)

        return pl.defer(query_database)
    else:
        raise ValueError("DatabaseLoader requires sql:// prefix")

# Register database loader
register_named_loader("database", database_loader)

# Use in dataset
dataset = Dataset(location="sql://customers", loader="database")

Integration with Metrics

The plugin system integrates seamlessly with the existing metrics system:

from tnp_statistic_library.metrics.summary import Mean

# Register your custom loader
register_data_loader("custom", my_custom_loader)

# Use in metric computation
metric = Mean.build(
    dataset=Dataset(location="custom://data", loader="custom"),
    name="average_value",
    variable="value_column"
)

result = metric.run_metric().collect()

Jupyter Notebook Workflow

The plugin system is particularly useful in Jupyter notebooks:

# Cell 1: Define and register loader
class MyLoader:
    def load(self, location: str) -> pl.LazyFrame:
        # Custom loading logic
        return pl.DataFrame({"data": [1, 2, 3]}).lazy()

register_data_loader("notebook_loader", MyLoader())

# Cell 2: Use in different cells
dataset1 = Dataset(location="source1", loader="notebook_loader")
dataset2 = Dataset(location="source2", loader="notebook_loader")

# Cell 3: Experiment with different loaders
register_data_loader("notebook_loader", ImprovedLoader(), overwrite=True)
# Now all previous datasets will use the new loader!

Error Handling

The plugin system provides clear error messages:

# Unknown loader
try:
    dataset = Dataset(location="data.txt", loader="unknown")
    df = dataset.lazyframe
except ValueError as e:
    print(f"Loader error: {e}")
    # Output: "Unknown data loader 'unknown'"

# Loader registration error
try:
    register_data_loader("test", "not_a_loader")
except TypeError as e:
    print(f"Registration error: {e}")
    # Output: "Loader must implement DataLoaderProtocol"

Migration from Hook-based Plugins

If you have existing hook-based plugins, they will continue to work. The new system provides an additional, simpler way to register loaders:

# Old way (still works)
from tnp_statistic_library.api import hookimpl

class OldStyleLoader:
    @hookimpl
    def data_loader(self, location: str) -> pl.LazyFrame | None:
        if location.endswith('.custom'):
            return pl.scan_csv(location)
        return None

# New way (recommended for interactive use)
class NewStyleLoader:
    def load(self, location: str) -> pl.LazyFrame:
        return pl.scan_csv(location)

register_data_loader("new_style", NewStyleLoader())

The new system takes priority for named loaders, while hook-based plugins are still used for automatic format detection.