mirror of
https://github.com/maybe-finance/maybe.git
synced 2025-07-19 21:29:38 +02:00
Refactor transaction enrichment to support batch processing (#1803)
* Refactor transaction enrichment to support batch processing - Add method to enrich transactions in batches - Implement job scheduling for unenriched transactions - Improve logging and error handling for transaction enrichment * Re-enable enrichment * Fix transaction enrichment query to use correct table references - Update queries to explicitly join and reference account_entries and account_transactions tables - Remove unnecessary name presence check before enrichment - Improve query precision for unenriched transaction selection * Optimize transaction enrichment query joins - Refactor database joins to use explicit table references - Improve query performance for unenriched transaction selection - Ensure correct table aliasing in enrichment methods * Remove deprecated data enrichment job and method - Delete EnrichDataJob as it's no longer used - Remove `enrich_data_later` method from Account model - Update Account::Syncer to directly call `enrich_data` instead of scheduling a job
This commit is contained in:
parent
abd932c894
commit
b84a33c09d
5 changed files with 62 additions and 54 deletions
|
@ -1,7 +0,0 @@
|
||||||
class EnrichDataJob < ApplicationJob
|
|
||||||
queue_as :latency_high
|
|
||||||
|
|
||||||
def perform(account)
|
|
||||||
account.enrich_data
|
|
||||||
end
|
|
||||||
end
|
|
8
app/jobs/enrich_transaction_batch_job.rb
Normal file
8
app/jobs/enrich_transaction_batch_job.rb
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
class EnrichTransactionBatchJob < ApplicationJob
|
||||||
|
queue_as :latency_high
|
||||||
|
|
||||||
|
def perform(account, batch_size = 100, offset = 0)
|
||||||
|
enricher = Account::DataEnricher.new(account)
|
||||||
|
enricher.enrich_transaction_batch(batch_size, offset)
|
||||||
|
end
|
||||||
|
end
|
|
@ -130,10 +130,6 @@ class Account < ApplicationRecord
|
||||||
DataEnricher.new(self).run
|
DataEnricher.new(self).run
|
||||||
end
|
end
|
||||||
|
|
||||||
def enrich_data_later
|
|
||||||
EnrichDataJob.perform_later(self)
|
|
||||||
end
|
|
||||||
|
|
||||||
def update_with_sync!(attributes)
|
def update_with_sync!(attributes)
|
||||||
should_update_balance = attributes[:balance] && attributes[:balance].to_d != balance
|
should_update_balance = attributes[:balance] && attributes[:balance].to_d != balance
|
||||||
|
|
||||||
|
|
|
@ -8,22 +8,35 @@ class Account::DataEnricher
|
||||||
end
|
end
|
||||||
|
|
||||||
def run
|
def run
|
||||||
enrich_transactions
|
total_unenriched = account.entries.account_transactions
|
||||||
|
.joins("JOIN account_transactions at ON at.id = account_entries.entryable_id AND account_entries.entryable_type = 'Account::Transaction'")
|
||||||
|
.where("account_entries.enriched_at IS NULL OR at.merchant_id IS NULL OR at.category_id IS NULL")
|
||||||
|
.count
|
||||||
|
|
||||||
|
if total_unenriched > 0
|
||||||
|
batch_size = 50
|
||||||
|
batches = (total_unenriched.to_f / batch_size).ceil
|
||||||
|
|
||||||
|
batches.times do |batch|
|
||||||
|
EnrichTransactionBatchJob.perform_later(account, batch_size, batch * batch_size)
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
def enrich_transaction_batch(batch_size = 50, offset = 0)
|
||||||
def enrich_transactions
|
candidates = account.entries.account_transactions
|
||||||
candidates = account.entries.account_transactions.includes(entryable: [ :merchant, :category ])
|
.includes(entryable: [ :merchant, :category ])
|
||||||
|
.joins("JOIN account_transactions at ON at.id = account_entries.entryable_id AND account_entries.entryable_type = 'Account::Transaction'")
|
||||||
|
.where("account_entries.enriched_at IS NULL OR at.merchant_id IS NULL OR at.category_id IS NULL")
|
||||||
|
.offset(offset)
|
||||||
|
.limit(batch_size)
|
||||||
|
|
||||||
Rails.logger.info("Enriching #{candidates.count} transactions for account #{account.id}")
|
Rails.logger.info("Enriching batch of #{candidates.count} transactions for account #{account.id} (offset: #{offset})")
|
||||||
|
|
||||||
merchants = {}
|
merchants = {}
|
||||||
|
|
||||||
candidates.each do |entry|
|
candidates.each do |entry|
|
||||||
if entry.enriched_at.nil? || entry.entryable.merchant_id.nil? || entry.entryable.category_id.nil?
|
|
||||||
begin
|
begin
|
||||||
next unless entry.name.present?
|
|
||||||
|
|
||||||
info = self.class.synth_provider.enrich_transaction(entry.name).info
|
info = self.class.synth_provider.enrich_transaction(entry.name).info
|
||||||
|
|
||||||
next unless info.present?
|
next unless info.present?
|
||||||
|
@ -53,4 +66,3 @@ class Account::DataEnricher
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
|
@ -15,8 +15,7 @@ class Account::Syncer
|
||||||
|
|
||||||
# Enrich if user opted in or if we're syncing transactions from a Plaid account on the hosted app
|
# Enrich if user opted in or if we're syncing transactions from a Plaid account on the hosted app
|
||||||
if account.family.data_enrichment_enabled? || (account.plaid_account_id.present? && Rails.application.config.app_mode.hosted?)
|
if account.family.data_enrichment_enabled? || (account.plaid_account_id.present? && Rails.application.config.app_mode.hosted?)
|
||||||
# Temporarily disable until optimizations complete
|
account.enrich_data
|
||||||
# account.enrich_data_later
|
|
||||||
else
|
else
|
||||||
Rails.logger.info("Data enrichment is disabled, skipping enrichment for account #{account.id}")
|
Rails.logger.info("Data enrichment is disabled, skipping enrichment for account #{account.id}")
|
||||||
end
|
end
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue