1
0
Fork 0
mirror of https://github.com/maybe-finance/maybe.git synced 2025-07-19 21:29:38 +02:00

Refactor transaction enrichment to support batch processing (#1803)

* Refactor transaction enrichment to support batch processing

- Add method to enrich transactions in batches
- Implement job scheduling for unenriched transactions
- Improve logging and error handling for transaction enrichment

* Re-enable enrichment

* Fix transaction enrichment query to use correct table references

- Update queries to explicitly join and reference account_entries and account_transactions tables
- Remove unnecessary name presence check before enrichment
- Improve query precision for unenriched transaction selection

* Optimize transaction enrichment query joins

- Refactor database joins to use explicit table references
- Improve query performance for unenriched transaction selection
- Ensure correct table aliasing in enrichment methods

* Remove deprecated data enrichment job and method

- Delete EnrichDataJob as it's no longer used
- Remove `enrich_data_later` method from Account model
- Update Account::Syncer to directly call `enrich_data` instead of scheduling a job
This commit is contained in:
Josh Pigford 2025-02-05 10:34:28 -06:00 committed by GitHub
parent abd932c894
commit b84a33c09d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 62 additions and 54 deletions

View file

@ -1,7 +0,0 @@
class EnrichDataJob < ApplicationJob
queue_as :latency_high
def perform(account)
account.enrich_data
end
end

View file

@ -0,0 +1,8 @@
class EnrichTransactionBatchJob < ApplicationJob
queue_as :latency_high
def perform(account, batch_size = 100, offset = 0)
enricher = Account::DataEnricher.new(account)
enricher.enrich_transaction_batch(batch_size, offset)
end
end

View file

@ -130,10 +130,6 @@ class Account < ApplicationRecord
DataEnricher.new(self).run DataEnricher.new(self).run
end end
def enrich_data_later
EnrichDataJob.perform_later(self)
end
def update_with_sync!(attributes) def update_with_sync!(attributes)
should_update_balance = attributes[:balance] && attributes[:balance].to_d != balance should_update_balance = attributes[:balance] && attributes[:balance].to_d != balance

View file

@ -8,22 +8,35 @@ class Account::DataEnricher
end end
def run def run
enrich_transactions total_unenriched = account.entries.account_transactions
.joins("JOIN account_transactions at ON at.id = account_entries.entryable_id AND account_entries.entryable_type = 'Account::Transaction'")
.where("account_entries.enriched_at IS NULL OR at.merchant_id IS NULL OR at.category_id IS NULL")
.count
if total_unenriched > 0
batch_size = 50
batches = (total_unenriched.to_f / batch_size).ceil
batches.times do |batch|
EnrichTransactionBatchJob.perform_later(account, batch_size, batch * batch_size)
end
end
end end
private def enrich_transaction_batch(batch_size = 50, offset = 0)
def enrich_transactions candidates = account.entries.account_transactions
candidates = account.entries.account_transactions.includes(entryable: [ :merchant, :category ]) .includes(entryable: [ :merchant, :category ])
.joins("JOIN account_transactions at ON at.id = account_entries.entryable_id AND account_entries.entryable_type = 'Account::Transaction'")
.where("account_entries.enriched_at IS NULL OR at.merchant_id IS NULL OR at.category_id IS NULL")
.offset(offset)
.limit(batch_size)
Rails.logger.info("Enriching #{candidates.count} transactions for account #{account.id}") Rails.logger.info("Enriching batch of #{candidates.count} transactions for account #{account.id} (offset: #{offset})")
merchants = {} merchants = {}
candidates.each do |entry| candidates.each do |entry|
if entry.enriched_at.nil? || entry.entryable.merchant_id.nil? || entry.entryable.category_id.nil?
begin begin
next unless entry.name.present?
info = self.class.synth_provider.enrich_transaction(entry.name).info info = self.class.synth_provider.enrich_transaction(entry.name).info
next unless info.present? next unless info.present?
@ -53,4 +66,3 @@ class Account::DataEnricher
end end
end end
end end
end

View file

@ -15,8 +15,7 @@ class Account::Syncer
# Enrich if user opted in or if we're syncing transactions from a Plaid account on the hosted app # Enrich if user opted in or if we're syncing transactions from a Plaid account on the hosted app
if account.family.data_enrichment_enabled? || (account.plaid_account_id.present? && Rails.application.config.app_mode.hosted?) if account.family.data_enrichment_enabled? || (account.plaid_account_id.present? && Rails.application.config.app_mode.hosted?)
# Temporarily disable until optimizations complete account.enrich_data
# account.enrich_data_later
else else
Rails.logger.info("Data enrichment is disabled, skipping enrichment for account #{account.id}") Rails.logger.info("Data enrichment is disabled, skipping enrichment for account #{account.id}")
end end