Benchmarking setup (#2366)

* Benchmarking setup * Get demo data working in benchmark scenario * Finalize default demo scenario * Finalize benchmarking setup
2025-07-19 13:19:39 +02:00 · 2025-06-14 11:53:53 -04:00 · 2025-06-14 11:53:53 -04:00 · 84b2426e54
commit 84b2426e54
parent cdad31812a
23 changed files with 1477 additions and 2166 deletions
--- a/lib/tasks/benchmarking.rake
+++ b/lib/tasks/benchmarking.rake
@ -0,0 +1,154 @@
+# Benchmarking requires a production-like data sample, so requires some up-front setup.
+#
+# 1. Load a scrubbed production-like slice of data into maybe_benchmarking DB locally
+# 2. Setup .env.production so that the Rails app can boot with RAILS_ENV=production and connect to local maybe_benchmarking DB
+# 3. Run `rake benchmark_dump:06_setup_bench_user`
+# 4. Run locally, find endpoint needed
+# 5. Run an endpoint, example: `ENDPOINT=/budgets/jun-2025/budget_categories/245637cb-129f-4612-b0a8-1de57559372b RAILS_ENV=production BENCHMARKING_ENABLED=true RAILS_LOG_LEVEL=debug rake benchmarking:ips`
+namespace :benchmarking do
+  # When to use: Track overall endpoint speed improvements over time (recommended, most practical test)
+  desc "Run cold & warm performance benchmarks and append to history"
+  task ips: :environment do
+    path = ENV.fetch("ENDPOINT", "/")
+
+    # 🚫 Fail fast unless the benchmark is run in production mode
+    unless Rails.env.production?
+      raise "benchmark:ips must be run with RAILS_ENV=production (current: #{Rails.env})"
+    end
+
+    # ---------------------------------------------------------------------------
+    # Tunable parameters – override with environment variables if needed
+    # ---------------------------------------------------------------------------
+    cold_warmup     = Integer(ENV.fetch("COLD_WARMUP", 0))  # seconds to warm up before *cold* timing (0 == true cold)
+    cold_iterations = Integer(ENV.fetch("COLD_ITERATIONS", 1)) # requests to measure for the cold run
+
+    warm_warmup     = Integer(ENV.fetch("WARM_WARMUP", 5))  # seconds benchmark-ips uses to stabilise JIT/caches
+    warm_time       = Integer(ENV.fetch("WARM_TIME", 30))   # seconds benchmark-ips samples for warm statistics
+    # ---------------------------------------------------------------------------
+
+    setup_benchmark_env(path)
+    FileUtils.mkdir_p("tmp/benchmarks")
+
+    timestamp  = Time.current.strftime("%Y-%m-%d %H:%M:%S")
+    commit_sha = `git rev-parse --short HEAD 2>/dev/null`.strip rescue "unknown"
+    puts "🕒 Starting benchmark run at #{timestamp} (#{commit_sha})"
+
+    # 🚿  Flush application caches so the first request is a *true* cold hit
+    Rails.cache&.clear if defined?(Rails)
+
+    # ---------------------------
+    # 1️⃣  Cold measurement
+    # ---------------------------
+    puts "❄️  Running cold benchmark for #{path} (#{cold_iterations} iteration)..."
+    cold_cmd = "IPS_WARMUP=#{cold_warmup} IPS_TIME=0 IPS_ITERATIONS=#{cold_iterations} " \
+               "bundle exec derailed exec perf:ips"
+    cold_output = `#{cold_cmd} 2>&1`
+
+    puts "Cold output:"
+    puts cold_output
+
+    cold_result = extract_clean_results(cold_output)
+
+    # ---------------------------
+    # 2️⃣  Warm measurement
+    # ---------------------------
+    puts "🔥 Running warm benchmark for #{path} (#{warm_time}s sample)..."
+    warm_cmd = "IPS_WARMUP=#{warm_warmup} IPS_TIME=#{warm_time} " \
+               "bundle exec derailed exec perf:ips"
+    warm_output = `#{warm_cmd} 2>&1`
+
+    puts "Warm output:"
+    puts warm_output
+
+    warm_result = extract_clean_results(warm_output)
+
+    # ---------------------------------------------------------------------------
+    # Persist results
+    # ---------------------------------------------------------------------------
+    separator        = "\n" + "=" * 70 + "\n"
+    timestamp_header = "#{separator}📊 BENCHMARK RUN - #{timestamp} (#{commit_sha})#{separator}"
+
+    # Table header
+    table_header    = "| Type | IPS | Deviation | Time/Iteration | Iterations | Total Time |\n"
+    table_separator = "|------|-----|-----------|----------------|------------|------------|\n"
+
+    cold_row        = format_table_row("COLD", cold_result)
+    warm_row        = format_table_row("WARM", warm_result)
+
+    combined_result = table_header + table_separator + cold_row + warm_row + "\n"
+
+    File.open(benchmark_file(path), "a") { |f| f.write(timestamp_header + combined_result) }
+
+    puts "✅ Results saved to #{benchmark_file(path)}"
+  end
+
+  private
+    def setup_benchmark_env(path)
+      ENV["USE_AUTH"]      = "true"
+      ENV["USE_SERVER"]    = "puma"
+      ENV["PATH_TO_HIT"]   = path
+      ENV["HTTP_METHOD"]   = "GET"
+      ENV["RAILS_LOG_LEVEL"] ||= "error" # keep output clean
+    end
+
+    def benchmark_file(path)
+      filename = case path
+      when "/" then "dashboard"
+      else
+        path.gsub("/", "_").gsub(/^_+/, "")
+      end
+      "tmp/benchmarks/#{filename}.txt"
+    end
+
+    def extract_clean_results(output)
+      lines = output.split("\n")
+
+      # Example benchmark-ips output line:
+      # "         SomeLabel    14.416k (± 3.8%) i/s -     72.000k in   5.004618s"
+      result_line = lines.find { |line| line.match(/\d[\d\.kM]*\s+\(±\s*[0-9\.]+%\)\s+i\/s/) }
+
+      if result_line
+        if (match = result_line.match(/(\d[\d\.kM]*)\s+\(±\s*([0-9\.]+)%\)\s+i\/s\s+(?:\(([^)]+)\)\s+)?-\s+(\d[\d\.kM]*)\s+in\s+(\d+\.\d+)s/))
+          ips_value          = match[1]
+          deviation_percent  = match[2].to_f
+          time_per_iteration = match[3] || "-"
+          iterations         = match[4]
+          total_time         = "#{match[5]}s"
+
+          {
+            ips:                ips_value,
+            deviation:          "± %.2f%%" % deviation_percent,
+            time_per_iteration: time_per_iteration,
+            iterations:         iterations,
+            total_time:         total_time
+          }
+        else
+          no_data_hash
+        end
+      else
+        no_data_hash("No results")
+      end
+    end
+
+    def format_table_row(type, data)
+      # Wider deviation column accommodates strings like "± 0.12%"
+      "| %-4s | %-5s | %-11s | %-14s | %-10s | %-10s |\n" % [
+        type,
+        data[:ips],
+        data[:deviation],
+        data[:time_per_iteration],
+        data[:iterations],
+        data[:total_time]
+      ]
+    end
+
+    def no_data_hash(ips_msg = "No data")
+      {
+        ips:                ips_msg,
+        deviation:          "-",
+        time_per_iteration: "-",
+        iterations:         "-",
+        total_time:         "-"
+      }
+    end
+end
--- a/lib/tasks/demo_data.rake
+++ b/lib/tasks/demo_data.rake
@ -1,39 +1,63 @@
 namespace :demo_data do
-  desc "Creates a new user with no data. Use for testing empty data states."
+  desc "Load empty demo dataset (no financial data)"
  task empty: :environment do
-    families = [ "Demo Family 1" ]
-    Demo::Generator.new.reset_and_clear_data!(families)
+    start = Time.now
+    puts "🚀 Loading EMPTY demo data…"
+
+    Demo::Generator.new.generate_empty_data!
+
+    puts "✅ Done in #{(Time.now - start).round(2)}s"
  end

-  desc "Creates a new user who has to go through onboarding still. Use for testing onboarding flows."
+  desc "Load new-user demo dataset (family created but not onboarded)"
  task new_user: :environment do
-    families = [ "Demo Family 1" ]
-    Demo::Generator.new.reset_and_clear_data!(families, require_onboarding: true)
+    start = Time.now
+    puts "🚀 Loading NEW-USER demo data…"
+
+    Demo::Generator.new.generate_new_user_data!
+
+    puts "✅ Done in #{(Time.now - start).round(2)}s"
  end

-  desc "General data reset that loads semi-realistic data"
-  task :reset, [ :count ] => :environment do |t, args|
-    count = (args[:count] || 1).to_i
-    families = count.times.map { |i| "Demo Family #{i + 1}" }
-    Demo::Generator.new.reset_data!(families)
+  desc "Load full realistic demo dataset"
+  task default: :environment do
+    start    = Time.now
+    seed     = ENV.fetch("SEED", Random.new_seed)
+    puts "🚀 Loading FULL demo data (seed=#{seed})…"
+
+    generator = Demo::Generator.new(seed: seed)
+    generator.generate_default_data!
+
+    validate_demo_data!
+
+    elapsed = Time.now - start
+    puts "🎉 Demo data ready in #{elapsed.round(2)}s"
  end

-  desc "Use this when you need to test multi-currency features of the app with a minimal setup"
-  task multi_currency: :environment do
-    families = [ "Demo Family 1", "Demo Family 2" ]
-    Demo::Generator.new.generate_multi_currency_data!(families)
-  end
+  # ---------------------------------------------------------------------------
+  # Validation helpers
+  # ---------------------------------------------------------------------------
+  def validate_demo_data!
+    total_entries   = Entry.count
+    trade_entries   = Entry.where(entryable_type: "Trade").count
+    categorized_txn = Transaction.joins(:category).count
+    txn_total       = Transaction.count

-  desc "Use this when you want realistic budget data"
-  task basic_budget: :environment do
-    families = [ "Demo Family 1" ]
-    Demo::Generator.new.generate_basic_budget_data!(families)
-  end
+    coverage = ((categorized_txn.to_f / txn_total) * 100).round(1)

-  # DO NOT RUN THIS unless you're testing performance locally. It will take a long time to load/clear. Easiest to clear with a db:reset
-  desc "Generates realistic data for 500 families for performance testing. Creates 1 family with Ruby, then efficiently duplicates it 499 times using SQL bulk operations."
-  task performance_testing: :environment do
-    families = [ "Performance Family 1" ]
-    Demo::Generator.new.generate_performance_testing_data!(families)
+    puts "\n📊 Validation Summary".ljust(40, "-")
+    puts "Entries total:              #{total_entries}"
+    puts "Trade entries:             #{trade_entries} (#{trade_entries.between?(500, 1000) ? '✅' : '❌'})"
+    puts "Txn categorization:        #{coverage}% (>=75% ✅)"
+
+    unless total_entries.between?(8_000, 12_000)
+      raise "Total entries #{total_entries} outside 8k–12k range"
+    end
+    unless trade_entries.between?(500, 1000)
+      raise "Trade entries #{trade_entries} outside 500–1 000 range"
+    end
+    unless coverage >= 75
+      raise "Categorization coverage below 75%"
+    end
  end
 end