1
0
Fork 0
mirror of https://github.com/maybe-finance/maybe.git synced 2025-08-10 07:55:21 +02:00

Query optimizations

This commit is contained in:
Zach Gollwitzer 2025-06-17 17:55:51 -04:00
parent 84b654c4f8
commit 5bb3d27412
4 changed files with 86 additions and 79 deletions

View file

@ -21,21 +21,21 @@ class IncomeStatement::CategoryStats
def query_sql
ActiveRecord::Base.sanitize_sql_array([
base_query_sql,
optimized_query_sql,
sql_params
])
end
def base_query_sql
# OPTIMIZED: Use interval for time bucketing but eliminate unnecessary intermediate CTE
# Still faster than original due to simplified structure and kind filtering
def optimized_query_sql
<<~SQL
WITH base_totals AS (
WITH period_totals AS (
SELECT
c.id as category_id,
c.parent_id as parent_category_id,
date_trunc(:interval, ae.date) as date,
date_trunc(:interval, ae.date) as period,
CASE WHEN ae.amount < 0 THEN 'income' ELSE 'expense' END as classification,
SUM(ae.amount * COALESCE(er.rate, 1)) as total,
COUNT(ae.id) as transactions_count,
BOOL_OR(ae.currency <> :target_currency AND er.rate IS NULL) as missing_exchange_rates
FROM transactions t
JOIN entries ae ON ae.entryable_id = t.id AND ae.entryable_type = 'Transaction'
@ -48,15 +48,15 @@ class IncomeStatement::CategoryStats
)
WHERE a.family_id = :family_id
AND t.kind NOT IN ('transfer', 'one_time', 'payment')
GROUP BY 1, 2, 3, 4
GROUP BY c.id, period, CASE WHEN ae.amount < 0 THEN 'income' ELSE 'expense' END
)
SELECT
category_id,
classification,
ABS(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY total)) as median,
ABS(AVG(total)) as avg,
BOOL_OR(missing_exchange_rates) as missing_exchange_rates
FROM base_totals
category_id,
classification,
ABS(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY total)) as median,
ABS(AVG(total)) as avg,
BOOL_OR(missing_exchange_rates) as missing_exchange_rates
FROM period_totals
GROUP BY category_id, classification;
SQL
end

View file

@ -20,26 +20,24 @@ class IncomeStatement::FamilyStats
def query_sql
ActiveRecord::Base.sanitize_sql_array([
base_query_sql,
optimized_query_sql,
sql_params
])
end
def base_query_sql
# OPTIMIZED: Use interval for time bucketing but eliminate double CTE
# Single CTE instead of base_totals -> aggregated_totals -> final aggregation
def optimized_query_sql
<<~SQL
WITH base_totals AS (
WITH period_totals AS (
SELECT
c.id as category_id,
c.parent_id as parent_category_id,
date_trunc(:interval, ae.date) as date,
date_trunc(:interval, ae.date) as period,
CASE WHEN ae.amount < 0 THEN 'income' ELSE 'expense' END as classification,
SUM(ae.amount * COALESCE(er.rate, 1)) as total,
COUNT(ae.id) as transactions_count,
BOOL_OR(ae.currency <> :target_currency AND er.rate IS NULL) as missing_exchange_rates
FROM transactions t
JOIN entries ae ON ae.entryable_id = t.id AND ae.entryable_type = 'Transaction'
JOIN accounts a ON a.id = ae.account_id
LEFT JOIN categories c ON c.id = t.category_id
LEFT JOIN exchange_rates er ON (
er.date = ae.date AND
er.from_currency = ae.currency AND
@ -47,22 +45,14 @@ class IncomeStatement::FamilyStats
)
WHERE a.family_id = :family_id
AND t.kind NOT IN ('transfer', 'one_time', 'payment')
GROUP BY 1, 2, 3, 4
), aggregated_totals AS (
SELECT
date,
classification,
SUM(total) as total,
BOOL_OR(missing_exchange_rates) as missing_exchange_rates
FROM base_totals
GROUP BY date, classification
GROUP BY period, CASE WHEN ae.amount < 0 THEN 'income' ELSE 'expense' END
)
SELECT
classification,
ABS(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY total)) as median,
ABS(AVG(total)) as avg,
BOOL_OR(missing_exchange_rates) as missing_exchange_rates
FROM aggregated_totals
classification,
ABS(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY total)) as median,
ABS(AVG(total)) as avg,
BOOL_OR(missing_exchange_rates) as missing_exchange_rates
FROM period_totals
GROUP BY classification;
SQL
end

View file

@ -22,49 +22,38 @@ class IncomeStatement::Totals
def query_sql
ActiveRecord::Base.sanitize_sql_array([
base_query_sql,
optimized_query_sql,
sql_params
])
end
def base_query_sql
# OPTIMIZED: Direct SUM aggregation without unnecessary time bucketing
# Eliminates CTE and intermediate date grouping for maximum performance
def optimized_query_sql
<<~SQL
WITH base_totals AS (
SELECT
c.id as category_id,
c.parent_id as parent_category_id,
date_trunc(:interval, ae.date) as date,
CASE WHEN ae.amount < 0 THEN 'income' ELSE 'expense' END as classification,
SUM(ae.amount * COALESCE(er.rate, 1)) as total,
COUNT(ae.id) as transactions_count,
BOOL_OR(ae.currency <> :target_currency AND er.rate IS NULL) as missing_exchange_rates
FROM (#{@transactions_scope.to_sql}) at
JOIN entries ae ON ae.entryable_id = at.id AND ae.entryable_type = 'Transaction'
LEFT JOIN categories c ON c.id = at.category_id
LEFT JOIN exchange_rates er ON (
er.date = ae.date AND
er.from_currency = ae.currency AND
er.to_currency = :target_currency
)
WHERE at.kind NOT IN ('transfer', 'one_time', 'payment')
GROUP BY 1, 2, 3, 4
)
SELECT
parent_category_id,
category_id,
classification,
ABS(SUM(total)) as total,
BOOL_OR(missing_exchange_rates) as missing_exchange_rates,
SUM(transactions_count) as transactions_count
FROM base_totals
GROUP BY 1, 2, 3;
c.id as category_id,
c.parent_id as parent_category_id,
CASE WHEN ae.amount < 0 THEN 'income' ELSE 'expense' END as classification,
ABS(SUM(ae.amount * COALESCE(er.rate, 1))) as total,
COUNT(ae.id) as transactions_count,
BOOL_OR(ae.currency <> :target_currency AND er.rate IS NULL) as missing_exchange_rates
FROM (#{@transactions_scope.to_sql}) at
JOIN entries ae ON ae.entryable_id = at.id AND ae.entryable_type = 'Transaction'
LEFT JOIN categories c ON c.id = at.category_id
LEFT JOIN exchange_rates er ON (
er.date = ae.date AND
er.from_currency = ae.currency AND
er.to_currency = :target_currency
)
WHERE at.kind NOT IN ('transfer', 'one_time', 'payment')
GROUP BY c.id, c.parent_id, CASE WHEN ae.amount < 0 THEN 'income' ELSE 'expense' END;
SQL
end
def sql_params
{
target_currency: @family.currency,
interval: "day" # Totals always uses day interval
target_currency: @family.currency
}
end
end

View file

@ -71,7 +71,8 @@ class IncomeStatementTest < ActiveSupport::TestCase
create_transaction(account: @checking_account, amount: 500, category: @groceries_category)
income_statement = IncomeStatement.new(@family)
# Adjust expectation to match current implementation behavior
# CORRECT BUSINESS LOGIC: Calculates median of time-period totals for budget planning
# All transactions in same month = monthly total of 1500, so median = 1500.0
assert_equal 1500.0, income_statement.median_expense(interval: "month")
end
@ -79,14 +80,17 @@ class IncomeStatementTest < ActiveSupport::TestCase
# Clear existing transactions by deleting entries
Entry.joins(:account).where(accounts: { family_id: @family.id }).destroy_all
# Create income: -200, -400, -600 (median should be 400)
# Create income: -200, -300, -400, -500, -600 (median should be -400, displayed as 400)
create_transaction(account: @checking_account, amount: -200, category: @income_category)
create_transaction(account: @checking_account, amount: -300, category: @income_category)
create_transaction(account: @checking_account, amount: -400, category: @income_category)
create_transaction(account: @checking_account, amount: -500, category: @income_category)
create_transaction(account: @checking_account, amount: -600, category: @income_category)
income_statement = IncomeStatement.new(@family)
# Adjust expectation to match current implementation behavior
assert_equal 1200.0, income_statement.median_income(interval: "month")
# CORRECT BUSINESS LOGIC: Calculates median of time-period totals for budget planning
# All transactions in same month = monthly total of -2000, so median = 2000.0
assert_equal 2000.0, income_statement.median_income(interval: "month")
end
test "calculates average expense correctly with known dataset" do
@ -99,7 +103,8 @@ class IncomeStatementTest < ActiveSupport::TestCase
create_transaction(account: @checking_account, amount: 300, category: @groceries_category)
income_statement = IncomeStatement.new(@family)
# Adjust expectation to match current implementation behavior
# CORRECT BUSINESS LOGIC: Calculates average of time-period totals for budget planning
# All transactions in same month = monthly total of 600, so average = 600.0
assert_equal 600.0, income_statement.avg_expense(interval: "month")
end
@ -120,11 +125,12 @@ class IncomeStatementTest < ActiveSupport::TestCase
create_transaction(account: @checking_account, amount: 150, category: other_food_category)
income_statement = IncomeStatement.new(@family)
# Adjust expectations to match current implementation behavior
# CORRECT BUSINESS LOGIC: Calculates median of time-period totals for budget planning
# All groceries in same month = monthly total of 900, so median = 900.0
assert_equal 900.0, income_statement.median_expense(interval: "month", category: @groceries_category)
# For restaurants, let's see what the actual value is and adjust accordingly
# For restaurants: monthly total = 200, so median = 200.0
restaurants_median = income_statement.median_expense(interval: "month", category: other_food_category)
assert restaurants_median.is_a?(Numeric) # Just verify it returns a number for now
assert_equal 200.0, restaurants_median
end
test "calculates category-specific average expense" do
@ -138,7 +144,8 @@ class IncomeStatementTest < ActiveSupport::TestCase
create_transaction(account: @checking_account, amount: 300, category: @groceries_category)
income_statement = IncomeStatement.new(@family)
# Adjust expectation to match current implementation behavior
# CORRECT BUSINESS LOGIC: Calculates average of time-period totals for budget planning
# All transactions in same month = monthly total of 600, so average = 600.0
assert_equal 600.0, income_statement.avg_expense(interval: "month", category: @groceries_category)
end
@ -205,16 +212,37 @@ class IncomeStatementTest < ActiveSupport::TestCase
end
# NEW TESTS: Interval-Based Calculations
test "calculates statistics for different intervals" do
test "different intervals return different statistical results with multi-period data" do
# Clear existing transactions
Entry.joins(:account).where(accounts: { family_id: @family.id }).destroy_all
# Create transactions across multiple weeks to test interval behavior
# Week 1: 100, 200 (total: 300, median: 150)
create_transaction(account: @checking_account, amount: 100, category: @groceries_category, date: 3.weeks.ago)
create_transaction(account: @checking_account, amount: 200, category: @groceries_category, date: 3.weeks.ago + 1.day)
# Week 2: 400, 600 (total: 1000, median: 500)
create_transaction(account: @checking_account, amount: 400, category: @groceries_category, date: 2.weeks.ago)
create_transaction(account: @checking_account, amount: 600, category: @groceries_category, date: 2.weeks.ago + 1.day)
# Week 3: 800 (total: 800, median: 800)
create_transaction(account: @checking_account, amount: 800, category: @groceries_category, date: 1.week.ago)
income_statement = IncomeStatement.new(@family)
# Test that different intervals return results (specific values depend on implementation)
month_median = income_statement.median_expense(interval: "month")
week_median = income_statement.median_expense(interval: "week")
# Both should return numeric values
# CRITICAL TEST: Different intervals should return different results
# Month interval: median of monthly totals (if all in same month) vs individual transactions
# Week interval: median of weekly totals [300, 1000, 800] = 800 vs individual transactions [100,200,400,600,800] = 400
refute_equal month_median, week_median, "Different intervals should return different statistical results when data spans multiple time periods"
# Both should still be numeric
assert month_median.is_a?(Numeric)
assert week_median.is_a?(Numeric)
assert month_median > 0
assert week_median > 0
end
# NEW TESTS: Edge Cases