Complete Workflow Examples¶
This document provides comprehensive examples of complete YAML workflow configurations, demonstrating how to combine multiple metric types and leverage fan-out expansion effectively.
Basic Multi-Metric Workflow¶
# Simple workflow with multiple metric types
metrics:
model_performance:
metric_type: "default_accuracy"
config:
name: ["model_accuracy"]
dataset: "validation_set"
prob_def: "predicted_default"
default: "actual_default"
loan_statistics:
metric_type: "mean"
config:
name: ["avg_loan_amount"]
dataset: "loan_portfolio"
variable: "loan_amount"
credit_summary:
metric_type: "median"
config:
name: ["median_credit_score"]
dataset: "customer_data"
variable: "credit_score"
discrimination_test:
metric_type: "auc"
config:
name: ["model_auc"]
dataset: "test_set"
prob_def: "default_probability"
default: "is_default"
gini_coefficient:
metric_type: "gini"
config:
name: ["model_gini"]
dataset: "test_set"
prob_def: "default_probability"
default: "is_default"
prediction_accuracy:
metric_type: "mape"
config:
name: ["ead_mape"]
dataset: "prediction_data"
observed: "actual_ead"
predicted: "predicted_ead"
datasets:
validation_set:
location: "data/validation.csv"
loan_portfolio:
location: "data/loans.csv"
customer_data:
location: "data/customers.csv"
test_set:
location: "data/test_data.csv"
prediction_data:
location: "data/predictions.csv"
Advanced Fan-out Examples¶
Multi-Segment Performance Analysis¶
metrics:
multi_model_accuracy:
metric_type: "default_accuracy"
config:
name: ["model_v1_accuracy", "model_v2_accuracy", "model_v3_accuracy"]
dataset: "validation_data"
segment: [["prime"], ["near_prime"], ["subprime"]]
prob_def: "predicted_default"
default: "actual_default"
# This creates 3 metrics: name and segment lists must have same length
ead_confidence_analysis:
metric_type: "ead_accuracy"
config:
name: ["ead_corporate", "ead_retail", "ead_sme"]
dataset: "exposure_data"
segment: [["corporate"], ["retail"], ["sme"]]
predicted_ead: "predicted_ead"
actual_ead: "actual_ead"
default: "default_flag"
prediction_error_analysis:
metric_type: "mape"
config:
name: ["mape_corporate", "mape_retail", "mape_sme"]
dataset: "exposure_data"
segment: [["corporate"], ["retail"], ["sme"]]
observed: "actual_ead"
predicted: "predicted_ead"
datasets:
validation_data:
location: "data/model_validation.csv"
exposure_data:
location: "data/exposures.csv"
Comprehensive Model Validation¶
metrics:
discrimination_metrics:
metric_type: "auc"
config:
name: ["high_risk_auc", "medium_risk_auc", "low_risk_auc"]
dataset: "performance_data"
prob_def: "default_probability"
default: "default_flag"
segment: [["high_risk"], ["medium_risk"], ["low_risk"]]
gini_coefficients:
metric_type: "gini"
config:
name: ["high_risk_gini", "medium_risk_gini", "low_risk_gini"]
dataset: "performance_data"
prob_def: "default_probability"
default: "default_flag"
segment: [["high_risk"], ["medium_risk"], ["low_risk"]]
calibration_testing:
metric_type: "hosmer_lemeshow"
config:
name: ["calibration_test"]
dataset: "calibration_data"
prob_def: "default_probability"
default: "default_outcome"
bands: 10
distribution_stability:
metric_type: "jeffreys_test"
config:
name: ["score_stability_new", "score_stability_existing"]
dataset: "current_month"
variable: "credit_score"
segment: [["new_customers"], ["existing_customers"]]
datasets:
performance_data:
location: "data/monthly_quarterly_performance.csv"
calibration_data:
location: "data/calibration_sample.csv"
current_month:
location: "data/current_scores.csv"
baseline_month:
location: "data/baseline_scores.csv"
Time Series Monitoring¶
metrics:
monthly_accuracy:
metric_type: "default_accuracy"
config:
name: ["jan_accuracy", "feb_accuracy", "mar_accuracy", "apr_accuracy"]
dataset: "monthly_data"
segment: [["jan"], ["feb"], ["mar"], ["apr"]]
prob_def: "model_prediction"
default: "observed_default"
score_stability:
metric_type: "jeffreys_test"
config:
name: ["jan_stability", "feb_stability", "mar_stability"]
dataset: "monthly_scores"
segment: [["jan"], ["feb"], ["mar"]]
variable: "risk_score"
exposure_summaries:
metric_type: "mean"
config:
name:
[
"jan_avg_exposure",
"feb_avg_exposure",
"mar_avg_exposure",
"apr_avg_exposure",
]
dataset: "portfolio_data"
segment: [["jan"], ["feb"], ["mar"], ["apr"]]
variable: "exposure_amount"
exposure_medians:
metric_type: "median"
config:
name:
[
"jan_med_exposure",
"feb_med_exposure",
"mar_med_exposure",
"apr_med_exposure",
]
dataset: "portfolio_data"
segment: [["jan"], ["feb"], ["mar"], ["apr"]]
variable: "exposure_amount"
datasets:
monthly_data:
location: "data/monthly_performance.csv"
monthly_scores:
location: "data/monthly_risk_scores.csv"
baseline_scores:
location: "data/baseline_risk_scores.csv"
portfolio_data:
location: "data/portfolio_exposures.csv"
A/B Testing Configuration¶
metrics:
ab_accuracy_comparison:
metric_type: "default_accuracy"
config:
name: ["control_accuracy", "treatment_accuracy"]
dataset: "ab_test_results"
segment: [["control_group"], ["treatment_group"]]
prob_def: "model_prediction"
default: "actual_outcome"
ab_auc_comparison:
metric_type: "auc"
config:
name: ["control_auc", "treatment_auc"]
dataset: "ab_test_results"
segment: [["control_group"], ["treatment_group"]]
prob_def: "risk_probability"
default: "default_flag"
distribution_analysis:
metric_type: "jeffreys_test"
config:
name: ["treatment_vs_control"]
dataset: "treatment_data"
variable: "risk_score"
datasets:
ab_test_results:
location: "data/ab_test_complete.csv"
treatment_data:
location: "data/treatment_scores.csv"
control_data:
location: "data/control_scores.csv"
Comprehensive Discrimination Analysis¶
# Complete discrimination testing workflow with AUC and Gini coefficients
metrics:
overall_discrimination:
metric_type: "auc"
config:
name: ["overall_model_auc"]
dataset: "validation_set"
data_format: "record_level"
prob_def: "model_probability"
default: "default_indicator"
overall_gini:
metric_type: "gini"
config:
name: ["overall_model_gini"]
dataset: "validation_set"
data_format: "record_level"
prob_def: "model_probability"
default: "default_indicator"
segmented_auc:
metric_type: "auc"
config:
name: ["product_auc", "region_auc", "vintage_auc"]
dataset: "validation_set"
data_format: "record_level"
prob_def: "model_probability"
default: "default_indicator"
segment: [["product_type"], ["region"], ["origination_year"]]
segmented_gini:
metric_type: "gini"
config:
name: ["product_gini", "region_gini", "vintage_gini"]
dataset: "validation_set"
data_format: "record_level"
prob_def: "model_probability"
default: "default_indicator"
segment: [["product_type"], ["region"], ["origination_year"]]
ks_statistics:
metric_type: "kolmogorov_smirnov"
config:
name: ["overall_ks", "product_ks"]
dataset: "validation_set"
data_format: "record_level"
prob_def: "model_probability"
default: "default_indicator"
segment: [null, ["product_type"]]
# Summary-level discrimination for risk grades
grade_level_auc:
metric_type: "auc"
config:
name: ["risk_grade_auc"]
dataset: "grade_summary"
data_format: "summary_level"
mean_pd: "avg_probability"
defaults: "default_count"
volume: "total_count"
grade_level_gini:
metric_type: "gini"
config:
name: ["risk_grade_gini"]
dataset: "grade_summary"
data_format: "summary_level"
mean_pd: "avg_probability"
defaults: "default_count"
volume: "total_count"
datasets:
validation_set:
location: "data/model_validation.csv"
grade_summary:
location: "data/risk_grade_performance.csv"
Portfolio Risk Analysis¶
metrics:
product_accuracy:
metric_type: "default_accuracy"
config:
name: ["mortgage_accuracy", "auto_accuracy", "personal_accuracy"]
dataset: "portfolio_data"
segment: [["mortgage_2023"], ["auto_2023"], ["personal_2023"]]
prob_def: "pd_estimate"
default: "default_12m"
ead_scenarios:
metric_type: "ead_accuracy"
config:
name: ["ead_optimistic", "ead_baseline", "ead_conservative"]
dataset: "exposure_data"
segment: [["scenario_opt"], ["scenario_base"], ["scenario_cons"]]
predicted_ead: "ead_prediction"
actual_ead: "actual_ead"
default: "default_flag"
regional_exposure_means:
metric_type: "mean"
config:
name:
[
"northeast_mean",
"southeast_mean",
"midwest_mean",
"west_mean",
"southwest_mean",
]
dataset: "regional_data"
segment:
[["northeast"], ["southeast"], ["midwest"], ["west"], ["southwest"]]
variable: "exposure_amount"
regional_exposure_medians:
metric_type: "median"
config:
name:
[
"northeast_median",
"southeast_median",
"midwest_median",
"west_median",
"southwest_median",
]
dataset: "regional_data"
segment:
[["northeast"], ["southeast"], ["midwest"], ["west"], ["southwest"]]
variable: "exposure_amount"
datasets:
portfolio_data:
location: "data/portfolio_performance.csv"
exposure_data:
location: "data/exposure_predictions.csv"
regional_data:
location: "data/regional_exposures.csv"
Complex Multi-dimensional Analysis¶
metrics:
multi_dimensional_accuracy:
metric_type: "default_accuracy"
config:
name:
["prime_q1_acc", "prime_q2_acc", "subprime_q1_acc", "subprime_q2_acc"]
dataset: "performance_data"
segment:
[
["prime_customers", "q1"],
["prime_customers", "q2"],
["subprime_customers", "q1"],
["subprime_customers", "q2"],
]
prob_def: "default_prediction"
default: "default_outcome"
comprehensive_calibration:
metric_type: "hosmer_lemeshow"
config:
name: ["hl_overall", "hl_prime", "hl_subprime"]
dataset: "calibration_sample"
segment: [null, ["prime_customers"], ["subprime_customers"]]
prob_def: "calibrated_probability"
default: "default_indicator"
bands: 10
multi_variable_summaries:
metric_type: "mean"
config:
name:
[
"avg_income_prime",
"avg_income_sub",
"avg_score_prime",
"avg_score_sub",
]
dataset: "customer_analytics"
variable: "annual_income" # Only one variable per metric
segment:
[
["prime_segment"],
["subprime_segment"],
["prime_segment"],
["subprime_segment"],
]
datasets:
performance_data:
location: "data/quarterly_performance.csv"
calibration_sample:
location: "data/calibration_test.csv"
customer_analytics:
location: "data/customer_analysis.csv"
Production Monitoring Workflow¶
metrics:
daily_performance:
metric_type: "default_accuracy"
config:
name: ["daily_model_accuracy"]
dataset: "daily_predictions"
prob_def: "model_output"
default: "observed_outcome"
weekly_drift:
metric_type: "jeffreys_test"
config:
name: ["weekly_score_drift"]
dataset: "current_week_scores"
variable: "model_score"
monthly_calibration:
metric_type: "hosmer_lemeshow"
config:
name: ["monthly_calibration"]
dataset: "monthly_sample"
prob_def: "probability_estimate"
default: "default_outcome"
bands: 10
exposure_monitoring_mean:
metric_type: "mean"
config:
name: ["current_avg_exposure"]
dataset: "current_portfolio"
variable: "exposure_at_default"
exposure_monitoring_median:
metric_type: "median"
config:
name: ["current_med_exposure"]
dataset: "current_portfolio"
variable: "exposure_at_default"
channel_performance:
metric_type: "auc"
config:
name: ["online_auc", "branch_auc", "broker_auc", "partner_auc"]
dataset: "origination_data"
segment: [["online"], ["branch"], ["broker"], ["partner"]]
prob_def: "approval_score"
default: "early_default"
datasets:
daily_predictions:
location: "data/daily_model_output.csv"
current_week_scores:
location: "data/current_week.csv"
baseline_week_scores:
location: "data/baseline_week.csv"
monthly_sample:
location: "data/monthly_calibration.csv"
current_portfolio:
location: "data/current_exposures.csv"
origination_data:
location: "data/origination_channels.csv"
Fan-out Validation Examples¶
Correct Fan-out Structure¶
metrics:
# Valid: All fan-out lists have matching length (3)
valid_fanout:
metric_type: "default_accuracy"
config:
name: ["metric_a", "metric_b", "metric_c"]
dataset: "validation_data"
segment: [["seg_a"], ["seg_b"], ["seg_c"]]
prob_def: "predicted_default" # Single value applied to all
default: "actual_outcome" # Single value applied to all
# Valid: Single values with fan-out list
segment_analysis:
metric_type: "mean"
config:
name: ["retail_mean", "corporate_mean", "sme_mean"]
dataset: "portfolio_data" # Single value for all
variable: "exposure_amount" # Single value for all
segment: [["retail"], ["corporate"], ["sme"]] # Creates 3 metrics
datasets:
validation_data:
location: "data/multi_model_validation.csv"
portfolio_data:
location: "data/portfolio.csv"
Advanced Fan-out Patterns¶
metrics:
# Creates 4 metrics: Each name gets corresponding confidence level
ead_matrix:
metric_type: "ead_accuracy"
config:
name: ["conservative_95", "aggressive_99", "baseline_90", "stress_95"]
dataset: "ead_validation"
segment: [["model1"], ["model1"], ["model2"], ["model2"]]
predicted_ead: "ead_prediction"
actual_ead: "actual_ead"
default: "default_flag"
# Creates 6 metrics: 3 models × 2 segments
model_segment_matrix:
metric_type: "auc"
config:
name:
[
"v1_prime",
"v1_subprime",
"v2_prime",
"v2_subprime",
"v3_prime",
"v3_subprime",
]
dataset: "validation_data"
segment:
[
["prime"],
["subprime"],
["prime"],
["subprime"],
["prime"],
["subprime"],
]
prob_def: "model_score" # Single column for all
default: "default_flag"
datasets:
ead_validation:
location: "data/ead_test_set.csv"
validation_data:
location: "data/model_validation.csv"
Notes¶
- YAML Structure: Metrics are defined as dictionary keys under
metrics, not as list items - Dataset References: The
datasetfield in config references dataset keys defined in thedatasetssection - Fan-out Fields: Only
nameandsegmentsupport fan-out expansion (must be lists with matching lengths) - Non-Fan-out Fields: Fields like
dataset,prob_def,default,predicted_ead,actual_ead,variable,bandsare not fan-out fields and should be single values - Segment Format: Segments must be lists of lists, even for single segments:
[["segment_name"]] - Length Matching: When using fan-out,
nameandsegmentlists must have exactly the same length - Naming: Each expanded metric gets a unique name from the fan-out expansion
- Validation: The workflow validator checks for proper list length matching and required fields