🎯 The Problem
In dbt’s semantic layer, you have complex metrics with multiple filter conditions:
-- Complex metric with numerous filters
SELECT
SUM(revenue) as total_revenue
FROM sales
WHERE
region IN ('EU', 'NA')
AND product_category = 'Premium'
AND customer_segment = 'Enterprise'
AND order_status = 'Completed'
AND order_date >= '2024-01-01'
Challenges:
- ❌ Slow queries on large volumes
- ❌ Repeated filters across multiple metrics
- ❌ Scattered and hard-to-maintain filtering logic
- ❌ No reuse of frequent calculations
✅ Solution: Filter Dimensions with Selective Materialization
Key Concept
Separate filtering logic from metric definitions by creating pre-calculated filter dimensions for frequent combinations.
🛠️ Complete Implementation
Step 1: Identify Filtering Patterns
# Frequent filter analysis
filters_analysis:
high_frequency: # >100 queries/day
- region + product_category
- customer_segment + order_status
- region + customer_segment + product_category
medium_frequency: # 10-100 queries/day
- product_category + order_status
- region + order_date_range
ad_hoc: # <10 queries/day
- custom_combinations
Step 2: Create Materialized Filter Dimensions
-- models/dimensions/dim_sales_filter_combinations.sql
-- ✅ Apply optimal strategy based on usage frequency
{% set strategy = get_optimal_filter_strategy('dim_sales_filter_combinations', 'high') %}
{{ config(
materialized=strategy.materialization,
indexes=strategy.indexing and [
{'columns': ['region', 'product_category', 'customer_segment'], 'unique': false},
{'columns': ['is_premium_enterprise'], 'unique': false},
{'columns': ['filter_combination_key'], 'unique': true}
] or [],
cluster_by=strategy.clustering or ['region', 'product_category']
) }}
WITH sales_base AS (
SELECT DISTINCT
region,
product_category,
customer_segment,
order_status,
DATE_TRUNC('month', order_date) as order_month
FROM {{ ref('fact_sales') }}
WHERE order_date >= '2020-01-01'
),
filter_combinations AS (
SELECT
region,
product_category,
customer_segment,
order_status,
order_month,
-- Pre-calculate frequent combinations
CASE
WHEN region IN ('EU', 'NA')
AND product_category = 'Premium'
AND customer_segment = 'Enterprise'
THEN TRUE
ELSE FALSE
END as is_premium_enterprise,
CASE
WHEN region IN ('EU', 'NA')
AND product_category IN ('Premium', 'Standard')
THEN TRUE
ELSE FALSE
END as is_target_market,
CASE
WHEN customer_segment = 'Enterprise'
AND order_status = 'Completed'
THEN TRUE
ELSE FALSE
END as is_completed_enterprise,
-- Hash combinations for fast joins
{{ dbt_utils.generate_surrogate_key([
'region', 'product_category', 'customer_segment', 'order_status'
]) }} as filter_combination_key
FROM sales_base
)
SELECT * FROM filter_combinations
Step 3: Define Metrics with Filter Dimensions
# models/metrics/schema.yml
version: 2
metrics:
- name: revenue_premium_enterprise
label: "Revenue Premium Enterprise"
model: ref('fact_sales')
description: "Revenue from Premium products sold to Enterprise customers in EU/NA"
calculation_method: sum
expression: revenue
# ✅ Use pre-calculated filter dimension
filters:
- field: filter_combination_key
operator: 'in'
value: "{{ get_filter_keys('is_premium_enterprise', true) }}"
dimensions:
- region
- product_category
- customer_segment
- name: revenue_target_market
label: "Revenue Target Market"
model: ref('fact_sales')
description: "Revenue from target market segments"
calculation_method: sum
expression: revenue
# ✅ Different pre-calculated combination
filters:
- field: filter_combination_key
operator: 'in'
value: "{{ get_filter_keys('is_target_market', true) }}"
- name: revenue_flexible
label: "Revenue with Dynamic Filters"
model: ref('fact_sales')
description: "Revenue with ad-hoc filtering capability"
calculation_method: sum
expression: revenue
# ✅ Keep flexibility for rare filters
# No pre-filtering, allows all dynamic filters
Step 4: Utility Macros
-- macros/filter_helpers.sql
{% macro get_filter_keys(filter_name, filter_value) %}
{% set query %}
SELECT filter_combination_key
FROM {{ ref('dim_sales_filter_combinations') }}
WHERE {{ filter_name }} = {{ filter_value }}
{% endset %}
{% set results = run_query(query) %}
{% set keys = [] %}
{% if results %}
{% for row in results %}
{% set _ = keys.append("'" + row[0] + "'") %}
{% endfor %}
{% endif %}
{{ return('(' + keys|join(',') + ')') }}
{% endmacro %}
{% macro create_dynamic_filter(base_filters={}, dynamic_filters={}) %}
{% set all_filters = base_filters %}
{% for key, value in dynamic_filters.items() %}
{% if value is not none %}
{% set _ = all_filters.update({key: value}) %}
{% endif %}
{% endfor %}
{% set filter_conditions = [] %}
{% for field, condition in all_filters.items() %}
{% set _ = filter_conditions.append(field + ' ' + condition) %}
{% endfor %}
{{ return(' AND '.join(filter_conditions)) }}
{% endmacro %}
Step 5: Optimized Fact Model
-- models/facts/fact_sales_with_filters.sql
-- ✅ Apply strategy based on expected usage
{% set strategy = get_optimal_filter_strategy('fact_sales_with_filters', 'high') %}
{{ config(
materialized='incremental',
unique_key='sale_id',
cluster_by=strategy.clustering or ['order_date', 'region'],
indexes=strategy.indexing and [
{'columns': ['filter_combination_key'], 'unique': false},
{'columns': ['is_premium_enterprise', 'is_target_market'], 'unique': false}
] or []
) }}
SELECT
s.sale_id,
s.customer_id,
s.product_id,
s.order_date,
s.revenue,
s.quantity,
-- ✅ Join with filter dimensions
f.filter_combination_key,
f.is_premium_enterprise,
f.is_target_market,
f.is_completed_enterprise,
-- Dimensions for drill-down
s.region,
s.product_category,
s.customer_segment,
s.order_status
FROM {{ ref('stg_sales') }} s
LEFT JOIN {{ ref('dim_sales_filter_combinations') }} f
ON s.region = f.region
AND s.product_category = f.product_category
AND s.customer_segment = f.customer_segment
AND s.order_status = f.order_status
AND DATE_TRUNC('month', s.order_date) = f.order_month
{% if is_incremental() %}
WHERE s.updated_at > (SELECT MAX(updated_at) FROM {{ this }})
{% endif %}
📊 Usage Examples
Optimized Query (Frequent Filter)
-- ✅ Uses pre-calculated filter dimension
SELECT
region,
SUM(revenue) as total_revenue
FROM {{ ref('fact_sales_with_filters') }}
WHERE is_premium_enterprise = TRUE -- ← Indexed + pre-calculated
GROUP BY region
-- Performance: ~50ms instead of 2s
Flexible Query (Ad-hoc Filter)
-- ✅ Keeps flexibility for rare cases
SELECT
product_category,
SUM(revenue) as total_revenue
FROM {{ ref('fact_sales_with_filters') }}
WHERE region = 'APAC' -- ← Non-pre-calculated filter
AND customer_segment IN ('SMB', 'Mid-Market')
AND order_date >= '2024-06-01'
GROUP BY product_category
-- Performance: acceptable because rare usage
🎯 Advanced Configuration
Intelligent Materialization by Frequency
# dbt_project.yml
models:
metrics_project:
dimensions:
filter_combinations:
# ✅ Use get_optimal_filter_strategy results
high_frequency:
+materialized: "{{ get_optimal_filter_strategy('high_freq_filters', 'high').materialization }}"
+indexes: "{{ get_optimal_filter_strategy('high_freq_filters', 'high').indexing }}"
medium_frequency:
+materialized: "{{ get_optimal_filter_strategy('med_freq_filters', 'medium').materialization }}"
low_frequency:
+materialized: "{{ get_optimal_filter_strategy('low_freq_filters', 'low').materialization }}"
# ✅ Vars used by get_optimal_filter_strategy
vars:
filter_strategy_config:
performance_thresholds:
high_frequency_queries_per_day: 100
medium_frequency_queries_per_day: 10
low_frequency_queries_per_day: 1
optimization_levels:
high:
target_response_time_ms: 100
max_staleness_hours: 1
medium:
target_response_time_ms: 500
max_staleness_hours: 4
low:
target_response_time_ms: 2000
max_staleness_hours: 24
Dynamic Filter Configuration
-- macros/adaptive_filtering.sql
{% macro get_optimal_filter_strategy(metric_name, expected_frequency='medium') %}
{% set strategies = {
'high': {
'use_filter_dimension': true,
'materialization': 'table',
'indexing': true
},
'medium': {
'use_filter_dimension': true,
'materialization': 'materialized_view',
'indexing': false
},
'low': {
'use_filter_dimension': false,
'materialization': 'view',
'indexing': false
}
} %}
{{ return(strategies[expected_frequency]) }}
{% endmacro %}
📈 Monitoring and Optimization
Filter Performance Dashboard
-- models/monitoring/filter_performance_metrics.sql
WITH query_performance AS (
SELECT
metric_name,
filter_type,
avg_execution_time_ms,
query_frequency_per_day,
uses_filter_dimension,
CASE
WHEN uses_filter_dimension = true THEN avg_execution_time_ms
ELSE avg_execution_time_ms * 0.1 -- Estimated improvement
END as optimized_time_ms
FROM {{ ref('query_performance_log') }}
WHERE query_date >= CURRENT_DATE - 30
),
optimization_opportunities AS (
SELECT
metric_name,
filter_type,
query_frequency_per_day,
avg_execution_time_ms,
optimized_time_ms,
(avg_execution_time_ms - optimized_time_ms) * query_frequency_per_day as daily_time_saved_ms,
CASE
WHEN query_frequency_per_day > 100 AND NOT uses_filter_dimension
THEN 'HIGH_PRIORITY'
WHEN query_frequency_per_day > 50 AND NOT uses_filter_dimension
THEN 'MEDIUM_PRIORITY'
ELSE 'LOW_PRIORITY'
END as optimization_priority
FROM query_performance
)
SELECT
optimization_priority,
COUNT(*) as metrics_count,
SUM(daily_time_saved_ms) / 1000 as daily_seconds_saved,
AVG(query_frequency_per_day) as avg_daily_queries
FROM optimization_opportunities
GROUP BY optimization_priority
ORDER BY
CASE optimization_priority
WHEN 'HIGH_PRIORITY' THEN 1
WHEN 'MEDIUM_PRIORITY' THEN 2
ELSE 3
END
Performance Validation with Strategy Monitoring
-- models/monitoring/strategy_performance_validation.sql
-- ✅ Monitor if applied strategies are meeting performance expectations
WITH strategy_metrics AS (
SELECT
model_name,
strategy_applied,
expected_frequency,
target_response_time_ms,
actual_avg_response_time_ms,
actual_queries_per_day,
-- Performance validation
CASE
WHEN actual_avg_response_time_ms <= target_response_time_ms THEN 'MEETING_SLA'
WHEN actual_avg_response_time_ms <= target_response_time_ms * 1.5 THEN 'CLOSE_TO_SLA'
ELSE 'MISSING_SLA'
END as performance_status,
-- Usage validation
CASE
WHEN expected_frequency = 'high' AND actual_queries_per_day >= 100 THEN 'USAGE_MATCHES'
WHEN expected_frequency = 'medium' AND actual_queries_per_day BETWEEN 10 AND 100 THEN 'USAGE_MATCHES'
WHEN expected_frequency = 'low' AND actual_queries_per_day < 10 THEN 'USAGE_MATCHES'
ELSE 'USAGE_MISMATCH'
END as usage_validation
FROM {{ ref('model_performance_tracking') }}
WHERE measurement_date >= CURRENT_DATE - 30
),
strategy_recommendations AS (
SELECT
model_name,
strategy_applied as current_strategy,
performance_status,
usage_validation,
-- Recommend strategy adjustments
CASE
WHEN usage_validation = 'USAGE_MISMATCH' AND actual_queries_per_day > 100
AND current_strategy != 'high' THEN 'UPGRADE_TO_HIGH'
WHEN usage_validation = 'USAGE_MISMATCH' AND actual_queries_per_day < 10
AND current_strategy != 'low' THEN 'DOWNGRADE_TO_LOW'
WHEN performance_status = 'MISSING_SLA'
AND current_strategy != 'high' THEN 'UPGRADE_FOR_PERFORMANCE'
WHEN performance_status = 'MEETING_SLA'
AND current_strategy = 'high'
AND actual_queries_per_day < 50 THEN 'CONSIDER_DOWNGRADE'
ELSE 'OPTIMAL'
END as recommended_action,
-- Next strategy to apply
CASE
WHEN recommended_action LIKE '%HIGH%' THEN 'high'
WHEN recommended_action LIKE '%LOW%' THEN 'low'
ELSE 'medium'
END as recommended_strategy
FROM strategy_metrics
)
SELECT
model_name,
current_strategy,
performance_status,
usage_validation,
recommended_action,
recommended_strategy,
-- Action priority
CASE recommended_action
WHEN 'UPGRADE_FOR_PERFORMANCE' THEN '🔴 HIGH - Performance Issue'
WHEN 'UPGRADE_TO_HIGH' THEN '🟡 MEDIUM - Usage Increased'
WHEN 'CONSIDER_DOWNGRADE' THEN '🟢 LOW - Over-optimized'
ELSE '✅ No Action Needed'
END as priority_level
FROM strategy_recommendations
ORDER BY
CASE recommended_action
WHEN 'UPGRADE_FOR_PERFORMANCE' THEN 1
WHEN 'UPGRADE_TO_HIGH' THEN 2
WHEN 'DOWNGRADE_TO_LOW' THEN 3
WHEN 'CONSIDER_DOWNGRADE' THEN 4
ELSE 5
END
🚀 Advanced Use Cases
-- tests/test_filter_performance.sql
{% set performance_thresholds = {
'revenue_premium_enterprise': 100, -- ms
'revenue_target_market': 150,
'revenue_flexible': 500
} %}
{% for metric, threshold in performance_thresholds.items() %}
WITH performance_check AS (
SELECT
'{{ metric }}' as metric_name,
{{ threshold }} as threshold_ms,
avg_execution_time_ms,
CASE
WHEN avg_execution_time_ms > {{ threshold }} THEN 'PERFORMANCE_DEGRADED'
ELSE 'OK'
END as status
FROM {{ ref('query_performance_log') }}
WHERE metric_name = '{{ metric }}'
AND query_date >= CURRENT_DATE - 7
)
SELECT * FROM performance_check WHERE status = 'PERFORMANCE_DEGRADED'
{% if not loop.last %}UNION ALL{% endif %}
{% endfor %}
🚀 Complete Implementation Example
Real-World Usage with Strategy Application
-- models/metrics/revenue_premium_enterprise.sql
-- ✅ Step 1: Determine optimal strategy based on expected usage
{% set strategy = get_optimal_filter_strategy('revenue_premium_enterprise', 'high') %}
{{ config(
materialized=strategy.materialization,
unique_key='sale_id',
cluster_by=strategy.clustering or ['order_date', 'region'],
indexes=strategy.indexing and [
{'columns': ['is_premium_enterprise'], 'unique': false},
{'columns': ['region', 'order_date'], 'unique': false}
] or [],
# ✅ Refresh frequency based on strategy
on_schema_change=strategy.get('schema_change_policy', 'append_new_columns'),
# ✅ Metadata for monitoring
meta={
'strategy_applied': strategy,
'expected_queries_per_day': 150,
'target_response_time_ms': strategy.get('target_response_time_ms', 100)
}
) }}
-- ✅ Step 2: Apply conditional logic based on strategy
{% if strategy.use_filter_dimension %}
-- High-performance version with pre-calculated filters
SELECT
region,
DATE_TRUNC('month', order_date) as order_month,
SUM(revenue) as total_revenue,
COUNT(*) as order_count,
AVG(revenue) as avg_order_value,
CURRENT_TIMESTAMP as last_updated
FROM {{ ref('fact_sales_with_filters') }}
WHERE is_premium_enterprise = TRUE -- ✅ Pre-calculated boolean filter
GROUP BY region, DATE_TRUNC('month', order_date)
{% else %}
-- Simplified version for low-frequency metrics
SELECT
region,
DATE_TRUNC('month', order_date) as order_month,
SUM(revenue) as total_revenue,
COUNT(*) as order_count,
AVG(revenue) as avg_order_value,
CURRENT_TIMESTAMP as last_updated
FROM {{ ref('fact_sales') }}
WHERE region IN ('EU', 'NA')
AND customer_segment = 'Enterprise'
AND product_tier = 'Premium'
AND order_status = 'Completed' -- ✅ Direct filters for rare usage
GROUP BY region, DATE_TRUNC('month', order_date)
{% endif %}
-- ✅ Step 3: Add strategy-based post-hooks
{{ config(
post_hook=strategy.get('post_hooks', [
"ANALYZE {{ this }}", -- Update table statistics
"{{ log_performance_metrics(this.identifier, strategy) }}"
])
) }}
Multi-Metric Implementation with Different Strategies
-- models/metrics/customer_analytics_suite.sql
-- ✅ Different strategies for different metrics based on usage patterns
{% set daily_revenue_strategy = get_optimal_filter_strategy('daily_revenue', 'high') %}
{% set customer_segmentation_strategy = get_optimal_filter_strategy('customer_segmentation', 'medium') %}
{% set product_performance_strategy = get_optimal_filter_strategy('product_performance', 'low') %}
WITH
-- High-frequency metric: Optimized with filter dimensions
daily_revenue AS (
{% if daily_revenue_strategy.use_filter_dimension %}
SELECT
DATE(order_date) as revenue_date,
SUM(revenue) as daily_revenue,
COUNT(DISTINCT customer_id) as unique_customers
FROM {{ ref('fact_sales_with_filters') }}
WHERE is_premium_enterprise = TRUE
GROUP BY DATE(order_date)
{% else %}
-- Fallback for unexpected low usage
SELECT
DATE(order_date) as revenue_date,
SUM(revenue) as daily_revenue,
COUNT(DISTINCT customer_id) as unique_customers
FROM {{ ref('fact_sales') }}
WHERE region IN ('EU', 'NA') AND customer_segment = 'Enterprise'
GROUP BY DATE(order_date)
{% endif %}
),
-- Medium-frequency metric: Balanced approach
customer_segments AS (
{% if customer_segmentation_strategy.use_filter_dimension %}
SELECT
customer_segment,
is_completed_enterprise,
COUNT(*) as customer_count,
AVG(revenue) as avg_customer_revenue
FROM {{ ref('fact_sales_with_filters') }}
GROUP BY customer_segment, is_completed_enterprise
{% else %}
SELECT
customer_segment,
order_status = 'Completed' as is_completed_enterprise,
COUNT(*) as customer_count,
AVG(revenue) as avg_customer_revenue
FROM {{ ref('fact_sales') }}
WHERE customer_segment = 'Enterprise'
GROUP BY customer_segment, (order_status = 'Completed')
{% endif %}
),
-- Low-frequency metric: Simple approach
product_performance AS (
-- Always use simple approach for low-frequency metrics
SELECT
product_category,
COUNT(*) as total_sales,
SUM(revenue) as total_revenue
FROM {{ ref('fact_sales') }}
WHERE order_date >= CURRENT_DATE - INTERVAL 90 DAY
GROUP BY product_category
)
-- Combine all metrics
SELECT
dr.revenue_date,
dr.daily_revenue,
dr.unique_customers,
cs.customer_count,
cs.avg_customer_revenue,
pp.total_sales as product_sales_90d
FROM daily_revenue dr
CROSS JOIN customer_segments cs
CROSS JOIN product_performance pp
Strategy-Based Model Selection Macro
-- macros/strategic_model_builder.sql
{% macro build_metric_with_strategy(metric_name, base_query, filters, expected_frequency) %}
{% set strategy = get_optimal_filter_strategy(metric_name, expected_frequency) %}
{{ config(
materialized=strategy.materialization,
indexes=strategy.indexing and [
{'columns': ['filter_combination_key'], 'unique': false}
] or [],
cluster_by=strategy.clustering,
meta={'optimization_strategy': strategy}
) }}
{% if strategy.use_filter_dimension %}
-- Use optimized filter dimensions
{{ base_query }}
FROM {{ ref('fact_sales_with_filters') }}
WHERE {{ filters.optimized }}
{% else %}
-- Use direct filtering
{{ base_query }}
FROM {{ ref('fact_sales') }}
WHERE {{ filters.direct }}
{% endif %}
{% endmacro %}
-- Usage example:
{{ build_metric_with_strategy(
'revenue_analysis',
'SELECT region, SUM(revenue) as total_revenue',
{
'optimized': 'is_premium_enterprise = TRUE',
'direct': 'region IN (\'EU\', \'NA\') AND customer_segment = \'Enterprise\''
},
'high'
) }}
E-commerce: Customer Segmentation Filters
-- Filter dimension for e-commerce
WITH customer_segments AS (
SELECT DISTINCT
region,
customer_tier,
product_category,
purchase_channel,
-- Pre-calculated business segments
CASE
WHEN customer_tier = 'VIP'
AND region IN ('North America', 'Europe')
AND product_category = 'Premium'
THEN TRUE ELSE FALSE
END as is_vip_premium,
CASE
WHEN purchase_channel = 'Online'
AND customer_tier IN ('Gold', 'Platinum')
THEN TRUE ELSE FALSE
END as is_digital_premium,
-- Hash for optimized joins
{{ dbt_utils.generate_surrogate_key([
'region', 'customer_tier', 'product_category', 'purchase_channel'
]) }} as segment_key
FROM {{ ref('dim_customers') }}
CROSS JOIN {{ ref('dim_products') }}
WHERE is_active = true
)
SELECT * FROM customer_segments
SaaS: Product Usage Filters
-- Filter dimension for SaaS metrics
WITH usage_segments AS (
SELECT DISTINCT
subscription_tier,
user_role,
feature_set,
geographic_region,
-- Pre-calculated adoption segments
CASE
WHEN subscription_tier = 'Enterprise'
AND feature_set = 'Advanced'
AND user_role = 'Admin'
THEN TRUE ELSE FALSE
END as is_power_user,
CASE
WHEN subscription_tier IN ('Pro', 'Enterprise')
AND geographic_region IN ('US', 'EU')
THEN TRUE ELSE FALSE
END as is_target_market,
{{ dbt_utils.generate_surrogate_key([
'subscription_tier', 'user_role', 'feature_set', 'geographic_region'
]) }} as usage_segment_key
FROM {{ ref('dim_subscriptions') }}
CROSS JOIN {{ ref('dim_users') }}
WHERE subscription_status = 'active'
)
SELECT * FROM usage_segments
💡 Best Practices
✅ Do’s
- Analyze patterns before implementing
-- Identify most frequent filters SELECT filter_conditions, COUNT(*) as usage_count FROM query_log GROUP BY filter_conditions ORDER BY usage_count DESC
- Materialize by frequency
- High frequency (>100/day) →
table
with indexes - Medium frequency (10-100/day) →
materialized_view
- Low frequency (<10/day) →
view
or dynamic filters
- High frequency (>100/day) →
- Separate business logic
# Keep business definitions in filter dimensions # Keep pure metrics in metric definitions
- Monitor performance
-- Automatic tracking dashboard SELECT metric_name, avg_response_time, optimization_opportunity FROM filter_performance_dashboard
❌ Don’ts
- Over-materialize rare filters
- Duplicate filtering logic
- Forget indexes on filter dimensions
- Ignore performance monitoring
🎯 Summary
Filter Dimensions with Selective Materialization Strategy:
- 🚀 Performance: Pre-calculates frequent filters
- 🔄 Flexibility: Keeps dynamic filters for ad-hoc queries
- 🛠️ Maintenance: Separates filtering logic from metrics
- 📊 Scalability: Adapts to usage patterns
Result: Fast AND flexible metrics! 🎯