1
0
Fork 0
mirror of https://github.com/maybe-finance/maybe.git synced 2025-07-19 13:19:39 +02:00

Benchmarking setup (#2366)
Some checks are pending
Publish Docker image / ci (push) Waiting to run
Publish Docker image / Build docker image (push) Blocked by required conditions

* Benchmarking setup

* Get demo data working in benchmark scenario

* Finalize default demo scenario

* Finalize benchmarking setup
This commit is contained in:
Zach Gollwitzer 2025-06-14 11:53:53 -04:00 committed by GitHub
parent cdad31812a
commit 84b2426e54
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 1477 additions and 2166 deletions

154
lib/tasks/benchmarking.rake Normal file
View file

@ -0,0 +1,154 @@
# Benchmarking requires a production-like data sample, so requires some up-front setup.
#
# 1. Load a scrubbed production-like slice of data into maybe_benchmarking DB locally
# 2. Setup .env.production so that the Rails app can boot with RAILS_ENV=production and connect to local maybe_benchmarking DB
# 3. Run `rake benchmark_dump:06_setup_bench_user`
# 4. Run locally, find endpoint needed
# 5. Run an endpoint, example: `ENDPOINT=/budgets/jun-2025/budget_categories/245637cb-129f-4612-b0a8-1de57559372b RAILS_ENV=production BENCHMARKING_ENABLED=true RAILS_LOG_LEVEL=debug rake benchmarking:ips`
namespace :benchmarking do
# When to use: Track overall endpoint speed improvements over time (recommended, most practical test)
desc "Run cold & warm performance benchmarks and append to history"
task ips: :environment do
path = ENV.fetch("ENDPOINT", "/")
# 🚫 Fail fast unless the benchmark is run in production mode
unless Rails.env.production?
raise "benchmark:ips must be run with RAILS_ENV=production (current: #{Rails.env})"
end
# ---------------------------------------------------------------------------
# Tunable parameters override with environment variables if needed
# ---------------------------------------------------------------------------
cold_warmup = Integer(ENV.fetch("COLD_WARMUP", 0)) # seconds to warm up before *cold* timing (0 == true cold)
cold_iterations = Integer(ENV.fetch("COLD_ITERATIONS", 1)) # requests to measure for the cold run
warm_warmup = Integer(ENV.fetch("WARM_WARMUP", 5)) # seconds benchmark-ips uses to stabilise JIT/caches
warm_time = Integer(ENV.fetch("WARM_TIME", 30)) # seconds benchmark-ips samples for warm statistics
# ---------------------------------------------------------------------------
setup_benchmark_env(path)
FileUtils.mkdir_p("tmp/benchmarks")
timestamp = Time.current.strftime("%Y-%m-%d %H:%M:%S")
commit_sha = `git rev-parse --short HEAD 2>/dev/null`.strip rescue "unknown"
puts "🕒 Starting benchmark run at #{timestamp} (#{commit_sha})"
# 🚿 Flush application caches so the first request is a *true* cold hit
Rails.cache&.clear if defined?(Rails)
# ---------------------------
# 1⃣ Cold measurement
# ---------------------------
puts "❄️ Running cold benchmark for #{path} (#{cold_iterations} iteration)..."
cold_cmd = "IPS_WARMUP=#{cold_warmup} IPS_TIME=0 IPS_ITERATIONS=#{cold_iterations} " \
"bundle exec derailed exec perf:ips"
cold_output = `#{cold_cmd} 2>&1`
puts "Cold output:"
puts cold_output
cold_result = extract_clean_results(cold_output)
# ---------------------------
# 2⃣ Warm measurement
# ---------------------------
puts "🔥 Running warm benchmark for #{path} (#{warm_time}s sample)..."
warm_cmd = "IPS_WARMUP=#{warm_warmup} IPS_TIME=#{warm_time} " \
"bundle exec derailed exec perf:ips"
warm_output = `#{warm_cmd} 2>&1`
puts "Warm output:"
puts warm_output
warm_result = extract_clean_results(warm_output)
# ---------------------------------------------------------------------------
# Persist results
# ---------------------------------------------------------------------------
separator = "\n" + "=" * 70 + "\n"
timestamp_header = "#{separator}📊 BENCHMARK RUN - #{timestamp} (#{commit_sha})#{separator}"
# Table header
table_header = "| Type | IPS | Deviation | Time/Iteration | Iterations | Total Time |\n"
table_separator = "|------|-----|-----------|----------------|------------|------------|\n"
cold_row = format_table_row("COLD", cold_result)
warm_row = format_table_row("WARM", warm_result)
combined_result = table_header + table_separator + cold_row + warm_row + "\n"
File.open(benchmark_file(path), "a") { |f| f.write(timestamp_header + combined_result) }
puts "✅ Results saved to #{benchmark_file(path)}"
end
private
def setup_benchmark_env(path)
ENV["USE_AUTH"] = "true"
ENV["USE_SERVER"] = "puma"
ENV["PATH_TO_HIT"] = path
ENV["HTTP_METHOD"] = "GET"
ENV["RAILS_LOG_LEVEL"] ||= "error" # keep output clean
end
def benchmark_file(path)
filename = case path
when "/" then "dashboard"
else
path.gsub("/", "_").gsub(/^_+/, "")
end
"tmp/benchmarks/#{filename}.txt"
end
def extract_clean_results(output)
lines = output.split("\n")
# Example benchmark-ips output line:
# " SomeLabel 14.416k (± 3.8%) i/s - 72.000k in 5.004618s"
result_line = lines.find { |line| line.match(/\d[\d\.kM]*\s+\\s*[0-9\.]+%\)\s+i\/s/) }
if result_line
if (match = result_line.match(/(\d[\d\.kM]*)\s+\\s*([0-9\.]+)%\)\s+i\/s\s+(?:\(([^)]+)\)\s+)?-\s+(\d[\d\.kM]*)\s+in\s+(\d+\.\d+)s/))
ips_value = match[1]
deviation_percent = match[2].to_f
time_per_iteration = match[3] || "-"
iterations = match[4]
total_time = "#{match[5]}s"
{
ips: ips_value,
deviation: "± %.2f%%" % deviation_percent,
time_per_iteration: time_per_iteration,
iterations: iterations,
total_time: total_time
}
else
no_data_hash
end
else
no_data_hash("No results")
end
end
def format_table_row(type, data)
# Wider deviation column accommodates strings like "± 0.12%"
"| %-4s | %-5s | %-11s | %-14s | %-10s | %-10s |\n" % [
type,
data[:ips],
data[:deviation],
data[:time_per_iteration],
data[:iterations],
data[:total_time]
]
end
def no_data_hash(ips_msg = "No data")
{
ips: ips_msg,
deviation: "-",
time_per_iteration: "-",
iterations: "-",
total_time: "-"
}
end
end

View file

@ -1,39 +1,63 @@
namespace :demo_data do
desc "Creates a new user with no data. Use for testing empty data states."
desc "Load empty demo dataset (no financial data)"
task empty: :environment do
families = [ "Demo Family 1" ]
Demo::Generator.new.reset_and_clear_data!(families)
start = Time.now
puts "🚀 Loading EMPTY demo data…"
Demo::Generator.new.generate_empty_data!
puts "✅ Done in #{(Time.now - start).round(2)}s"
end
desc "Creates a new user who has to go through onboarding still. Use for testing onboarding flows."
desc "Load new-user demo dataset (family created but not onboarded)"
task new_user: :environment do
families = [ "Demo Family 1" ]
Demo::Generator.new.reset_and_clear_data!(families, require_onboarding: true)
start = Time.now
puts "🚀 Loading NEW-USER demo data…"
Demo::Generator.new.generate_new_user_data!
puts "✅ Done in #{(Time.now - start).round(2)}s"
end
desc "General data reset that loads semi-realistic data"
task :reset, [ :count ] => :environment do |t, args|
count = (args[:count] || 1).to_i
families = count.times.map { |i| "Demo Family #{i + 1}" }
Demo::Generator.new.reset_data!(families)
desc "Load full realistic demo dataset"
task default: :environment do
start = Time.now
seed = ENV.fetch("SEED", Random.new_seed)
puts "🚀 Loading FULL demo data (seed=#{seed})…"
generator = Demo::Generator.new(seed: seed)
generator.generate_default_data!
validate_demo_data!
elapsed = Time.now - start
puts "🎉 Demo data ready in #{elapsed.round(2)}s"
end
desc "Use this when you need to test multi-currency features of the app with a minimal setup"
task multi_currency: :environment do
families = [ "Demo Family 1", "Demo Family 2" ]
Demo::Generator.new.generate_multi_currency_data!(families)
end
# ---------------------------------------------------------------------------
# Validation helpers
# ---------------------------------------------------------------------------
def validate_demo_data!
total_entries = Entry.count
trade_entries = Entry.where(entryable_type: "Trade").count
categorized_txn = Transaction.joins(:category).count
txn_total = Transaction.count
desc "Use this when you want realistic budget data"
task basic_budget: :environment do
families = [ "Demo Family 1" ]
Demo::Generator.new.generate_basic_budget_data!(families)
end
coverage = ((categorized_txn.to_f / txn_total) * 100).round(1)
# DO NOT RUN THIS unless you're testing performance locally. It will take a long time to load/clear. Easiest to clear with a db:reset
desc "Generates realistic data for 500 families for performance testing. Creates 1 family with Ruby, then efficiently duplicates it 499 times using SQL bulk operations."
task performance_testing: :environment do
families = [ "Performance Family 1" ]
Demo::Generator.new.generate_performance_testing_data!(families)
puts "\n📊 Validation Summary".ljust(40, "-")
puts "Entries total: #{total_entries}"
puts "Trade entries: #{trade_entries} (#{trade_entries.between?(500, 1000) ? '✅' : '❌'})"
puts "Txn categorization: #{coverage}% (>=75% ✅)"
unless total_entries.between?(8_000, 12_000)
raise "Total entries #{total_entries} outside 8k12k range"
end
unless trade_entries.between?(500, 1000)
raise "Trade entries #{trade_entries} outside 5001 000 range"
end
unless coverage >= 75
raise "Categorization coverage below 75%"
end
end
end