mirror of
https://github.com/maybe-finance/maybe.git
synced 2025-08-09 15:35:22 +02:00
Auto merchant detection, ai enrichment in batches
This commit is contained in:
parent
a1cb17c0da
commit
9260c5ef03
33 changed files with 811 additions and 232 deletions
7
app/jobs/auto_categorize_job.rb
Normal file
7
app/jobs/auto_categorize_job.rb
Normal file
|
@ -0,0 +1,7 @@
|
|||
class AutoCategorizeJob < ApplicationJob
|
||||
queue_as :medium_priority
|
||||
|
||||
def perform(family, transaction_ids: [])
|
||||
family.auto_categorize_transactions(transaction_ids)
|
||||
end
|
||||
end
|
7
app/jobs/auto_detect_merchants_job.rb
Normal file
7
app/jobs/auto_detect_merchants_job.rb
Normal file
|
@ -0,0 +1,7 @@
|
|||
class AutoDetectMerchantsJob < ApplicationJob
|
||||
queue_as :medium_priority
|
||||
|
||||
def perform(family, transaction_ids: [])
|
||||
family.auto_detect_transaction_merchants(transaction_ids)
|
||||
end
|
||||
end
|
|
@ -22,6 +22,18 @@ module Enrichable
|
|||
}
|
||||
end
|
||||
|
||||
def log_enrichment!(attribute_name:, attribute_value:, source:, metadata: {})
|
||||
de = DataEnrichment.find_or_create_by!(
|
||||
enrichable: self,
|
||||
attribute_name: attribute_name,
|
||||
source: source,
|
||||
)
|
||||
|
||||
de.value = attribute_value
|
||||
de.metadata = metadata
|
||||
de.save!
|
||||
end
|
||||
|
||||
def locked?(attr)
|
||||
locked_attributes[attr.to_s].present?
|
||||
end
|
||||
|
@ -46,6 +58,6 @@ module Enrichable
|
|||
|
||||
private
|
||||
def ignored_enrichable_attributes
|
||||
%w[updated_at created_at]
|
||||
%w[id updated_at created_at]
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
class DataEnrichment < ApplicationRecord
|
||||
belongs_to :enrichable, polymorphic: true
|
||||
|
||||
enum :source, { rule: "rule", plaid: "plaid", synth: "synth" }
|
||||
enum :source, { rule: "rule", plaid: "plaid", synth: "synth", ai: "ai" }
|
||||
end
|
||||
|
|
|
@ -28,7 +28,7 @@ class Family < ApplicationRecord
|
|||
|
||||
has_many :tags, dependent: :destroy
|
||||
has_many :categories, dependent: :destroy
|
||||
has_many :merchants, dependent: :destroy
|
||||
has_many :merchants, dependent: :destroy, class_name: "FamilyMerchant"
|
||||
|
||||
has_many :budgets, dependent: :destroy
|
||||
has_many :budget_categories, through: :budgets
|
||||
|
@ -41,6 +41,22 @@ class Family < ApplicationRecord
|
|||
Merchant.where(id: merchant_ids)
|
||||
end
|
||||
|
||||
def auto_categorize_transactions_later(transactions)
|
||||
AutoCategorizeJob.perform_later(self, transaction_ids: transactions.pluck(:id))
|
||||
end
|
||||
|
||||
def auto_categorize_transactions(transaction_ids)
|
||||
AutoCategorizer.new(self, transaction_ids: transaction_ids).auto_categorize
|
||||
end
|
||||
|
||||
def auto_detect_transaction_merchants_later(transactions)
|
||||
AutoDetectMerchantsJob.perform_later(self, transaction_ids: transactions.pluck(:id))
|
||||
end
|
||||
|
||||
def auto_detect_transaction_merchants(transaction_ids)
|
||||
AutoMerchantDetector.new(self, transaction_ids: transaction_ids).auto_detect
|
||||
end
|
||||
|
||||
def balance_sheet
|
||||
@balance_sheet ||= BalanceSheet.new(self)
|
||||
end
|
||||
|
|
87
app/models/family/auto_categorizer.rb
Normal file
87
app/models/family/auto_categorizer.rb
Normal file
|
@ -0,0 +1,87 @@
|
|||
class Family::AutoCategorizer
|
||||
Error = Class.new(StandardError)
|
||||
|
||||
def initialize(family, transaction_ids: [])
|
||||
@family = family
|
||||
@transaction_ids = transaction_ids
|
||||
end
|
||||
|
||||
def auto_categorize
|
||||
raise Error, "No LLM provider for auto-categorization" unless llm_provider
|
||||
|
||||
if scope.none?
|
||||
Rails.logger.info("No transactions to auto-categorize for family #{family.id}")
|
||||
return
|
||||
else
|
||||
Rails.logger.info("Auto-categorizing #{scope.count} transactions for family #{family.id}")
|
||||
end
|
||||
|
||||
result = llm_provider.auto_categorize(
|
||||
transactions: transactions_input,
|
||||
user_categories: user_categories_input
|
||||
)
|
||||
|
||||
unless result.success?
|
||||
Rails.logger.error("Failed to auto-categorize transactions for family #{family.id}: #{result.error.message}")
|
||||
return
|
||||
end
|
||||
|
||||
scope.each do |transaction|
|
||||
transaction.lock!(:category_id)
|
||||
|
||||
auto_categorization = result.data.find { |c| c.transaction_id == transaction.id }
|
||||
|
||||
category_id = user_categories_input.find { |c| c[:name] == auto_categorization&.category_name }&.dig(:id)
|
||||
|
||||
if category_id.present?
|
||||
Family.transaction do
|
||||
transaction.log_enrichment!(
|
||||
attribute_name: "category_id",
|
||||
attribute_value: category_id,
|
||||
source: "ai",
|
||||
)
|
||||
|
||||
transaction.update!(category_id: category_id)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
attr_reader :family, :transaction_ids
|
||||
|
||||
# For now, OpenAI only, but this should work with any LLM concept provider
|
||||
def llm_provider
|
||||
Provider::Registry.get_provider(:openai)
|
||||
end
|
||||
|
||||
def user_categories_input
|
||||
family.categories.map do |category|
|
||||
{
|
||||
id: category.id,
|
||||
name: category.name,
|
||||
is_subcategory: category.subcategory?,
|
||||
parent_id: category.parent_id,
|
||||
classification: category.classification
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
def transactions_input
|
||||
scope.map do |transaction|
|
||||
{
|
||||
id: transaction.id,
|
||||
amount: transaction.entry.amount.abs,
|
||||
classification: transaction.entry.classification,
|
||||
description: transaction.entry.name,
|
||||
merchant: transaction.merchant&.name
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
def scope
|
||||
family.transactions.where(id: transaction_ids, category_id: nil)
|
||||
.enrichable(:category_id)
|
||||
.includes(:category, :merchant, :entry)
|
||||
end
|
||||
end
|
100
app/models/family/auto_merchant_detector.rb
Normal file
100
app/models/family/auto_merchant_detector.rb
Normal file
|
@ -0,0 +1,100 @@
|
|||
class Family::AutoMerchantDetector
|
||||
Error = Class.new(StandardError)
|
||||
|
||||
def initialize(family, transaction_ids: [])
|
||||
@family = family
|
||||
@transaction_ids = transaction_ids
|
||||
end
|
||||
|
||||
def auto_detect
|
||||
raise "No LLM provider for auto-detecting merchants" unless llm_provider
|
||||
|
||||
if scope.none?
|
||||
Rails.logger.info("No transactions to auto-detect merchants for family #{family.id}")
|
||||
return
|
||||
else
|
||||
Rails.logger.info("Auto-detecting merchants for #{scope.count} transactions for family #{family.id}")
|
||||
end
|
||||
|
||||
result = llm_provider.auto_detect_merchants(
|
||||
transactions: transactions_input,
|
||||
user_merchants: user_merchants_input
|
||||
)
|
||||
|
||||
unless result.success?
|
||||
Rails.logger.error("Failed to auto-detect merchants for family #{family.id}: #{result.error.message}")
|
||||
return
|
||||
end
|
||||
|
||||
scope.each do |transaction|
|
||||
transaction.lock!(:merchant_id)
|
||||
|
||||
auto_detection = result.data.find { |c| c.transaction_id == transaction.id }
|
||||
|
||||
merchant_id = user_merchants_input.find { |m| m[:name] == auto_detection&.business_name }&.dig(:id)
|
||||
|
||||
if merchant_id.nil? && auto_detection&.business_url.present? && auto_detection&.business_name.present?
|
||||
ai_provider_merchant = ProviderMerchant.find_or_create_by!(
|
||||
source: "ai",
|
||||
name: auto_detection.business_name,
|
||||
website_url: auto_detection.business_url,
|
||||
) do |pm|
|
||||
pm.logo_url = "#{default_logo_provider_url}/#{auto_detection.business_url}"
|
||||
end
|
||||
end
|
||||
|
||||
merchant_id = merchant_id || ai_provider_merchant&.id
|
||||
|
||||
if merchant_id.present?
|
||||
Family.transaction do
|
||||
transaction.log_enrichment!(
|
||||
attribute_name: "merchant_id",
|
||||
attribute_value: merchant_id,
|
||||
source: "ai",
|
||||
)
|
||||
|
||||
transaction.update!(merchant_id: merchant_id)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
attr_reader :family, :transaction_ids
|
||||
|
||||
# For now, OpenAI only, but this should work with any LLM concept provider
|
||||
def llm_provider
|
||||
Provider::Registry.get_provider(:openai)
|
||||
end
|
||||
|
||||
def default_logo_provider_url
|
||||
"https://logo.synthfinance.com"
|
||||
end
|
||||
|
||||
def user_merchants_input
|
||||
family.merchants.map do |merchant|
|
||||
{
|
||||
id: merchant.id,
|
||||
name: merchant.name
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
def transactions_input
|
||||
scope.map do |transaction|
|
||||
{
|
||||
id: transaction.id,
|
||||
amount: transaction.entry.amount.abs,
|
||||
classification: transaction.entry.classification,
|
||||
description: transaction.entry.name,
|
||||
merchant: transaction.merchant&.name
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
def scope
|
||||
family.transactions.where(id: transaction_ids, merchant_id: nil)
|
||||
.enrichable(:merchant_id)
|
||||
.includes(:merchant, :entry)
|
||||
end
|
||||
end
|
|
@ -7,6 +7,12 @@ module Provider::LlmConcept
|
|||
raise NotImplementedError, "Subclasses must implement #auto_categorize"
|
||||
end
|
||||
|
||||
AutoDetectedMerchant = Data.define(:transaction_id, :business_name, :business_url)
|
||||
|
||||
def auto_detect_merchants(transactions)
|
||||
raise NotImplementedError, "Subclasses must implement #auto_detect_merchants"
|
||||
end
|
||||
|
||||
ChatMessage = Data.define(:id, :output_text)
|
||||
ChatStreamChunk = Data.define(:type, :data)
|
||||
ChatResponse = Data.define(:id, :model, :messages, :function_requests)
|
||||
|
|
|
@ -4,7 +4,7 @@ class Provider::Openai < Provider
|
|||
# Subclass so errors caught in this provider are raised as Provider::Openai::Error
|
||||
Error = Class.new(Provider::Error)
|
||||
|
||||
MODELS = %w[gpt-4o gpt-4o-mini]
|
||||
MODELS = %w[gpt-4o]
|
||||
|
||||
def initialize(access_token)
|
||||
@client = ::OpenAI::Client.new(access_token: access_token)
|
||||
|
@ -16,7 +16,7 @@ class Provider::Openai < Provider
|
|||
|
||||
def auto_categorize(transactions: [], user_categories: [])
|
||||
with_provider_response do
|
||||
raise Error, "Too many transactions to auto-categorize. Max is 100 per request" if transactions.size > 100
|
||||
raise Error, "Too many transactions to auto-categorize. Max is 25 per request." if transactions.size > 25
|
||||
|
||||
AutoCategorizer.new(
|
||||
client,
|
||||
|
@ -26,6 +26,18 @@ class Provider::Openai < Provider
|
|||
end
|
||||
end
|
||||
|
||||
def auto_detect_merchants(transactions: [], user_merchants: [])
|
||||
with_provider_response do
|
||||
raise Error, "Too many transactions to auto-detect merchants. Max is 25 per request." if transactions.size > 25
|
||||
|
||||
AutoMerchantDetector.new(
|
||||
client,
|
||||
transactions: transactions,
|
||||
user_merchants: user_merchants
|
||||
).auto_detect_merchants
|
||||
end
|
||||
end
|
||||
|
||||
def chat_response(prompt, model:, instructions: nil, functions: [], function_results: [], streamer: nil, previous_response_id: nil)
|
||||
with_provider_response do
|
||||
chat_config = ChatConfig.new(
|
||||
|
|
|
@ -7,7 +7,7 @@ class Provider::Openai::AutoCategorizer
|
|||
|
||||
def auto_categorize
|
||||
response = client.responses.create(parameters: {
|
||||
model: "gpt-4o-mini",
|
||||
model: "gpt-4.1-mini",
|
||||
input: [ { role: "developer", content: developer_message } ],
|
||||
text: {
|
||||
format: {
|
||||
|
@ -20,6 +20,8 @@ class Provider::Openai::AutoCategorizer
|
|||
instructions: instructions
|
||||
})
|
||||
|
||||
Rails.logger.info("Tokens used to auto-categorize transactions: #{response.dig("usage").dig("total_tokens")}")
|
||||
|
||||
build_response(extract_categorizations(response))
|
||||
end
|
||||
|
||||
|
|
146
app/models/provider/openai/auto_merchant_detector.rb
Normal file
146
app/models/provider/openai/auto_merchant_detector.rb
Normal file
|
@ -0,0 +1,146 @@
|
|||
class Provider::Openai::AutoMerchantDetector
|
||||
def initialize(client, transactions:, user_merchants:)
|
||||
@client = client
|
||||
@transactions = transactions
|
||||
@user_merchants = user_merchants
|
||||
end
|
||||
|
||||
def auto_detect_merchants
|
||||
response = client.responses.create(parameters: {
|
||||
model: "gpt-4.1-mini",
|
||||
input: [ { role: "developer", content: developer_message } ],
|
||||
text: {
|
||||
format: {
|
||||
type: "json_schema",
|
||||
name: "auto_detect_personal_finance_merchants",
|
||||
strict: true,
|
||||
schema: json_schema
|
||||
}
|
||||
},
|
||||
instructions: instructions
|
||||
})
|
||||
|
||||
Rails.logger.info("Tokens used to auto-detect merchants: #{response.dig("usage").dig("total_tokens")}")
|
||||
|
||||
build_response(extract_categorizations(response))
|
||||
end
|
||||
|
||||
private
|
||||
attr_reader :client, :transactions, :user_merchants
|
||||
|
||||
AutoDetectedMerchant = Provider::LlmConcept::AutoDetectedMerchant
|
||||
|
||||
def build_response(categorizations)
|
||||
categorizations.map do |categorization|
|
||||
AutoDetectedMerchant.new(
|
||||
transaction_id: categorization.dig("transaction_id"),
|
||||
business_name: normalize_ai_value(categorization.dig("business_name")),
|
||||
business_url: normalize_ai_value(categorization.dig("business_url")),
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
def normalize_ai_value(ai_value)
|
||||
return nil if ai_value == "null"
|
||||
|
||||
ai_value
|
||||
end
|
||||
|
||||
def extract_categorizations(response)
|
||||
response_json = JSON.parse(response.dig("output")[0].dig("content")[0].dig("text"))
|
||||
response_json.dig("merchants")
|
||||
end
|
||||
|
||||
def json_schema
|
||||
{
|
||||
type: "object",
|
||||
properties: {
|
||||
merchants: {
|
||||
type: "array",
|
||||
description: "An array of auto-detected merchant businesses for each transaction",
|
||||
items: {
|
||||
type: "object",
|
||||
properties: {
|
||||
transaction_id: {
|
||||
type: "string",
|
||||
description: "The internal ID of the original transaction",
|
||||
enum: transactions.map { |t| t[:id] }
|
||||
},
|
||||
business_name: {
|
||||
type: [ "string", "null" ],
|
||||
description: "The detected business name of the transaction, or `null` if uncertain"
|
||||
},
|
||||
business_url: {
|
||||
type: [ "string", "null" ],
|
||||
description: "The URL of the detected business, or `null` if uncertain"
|
||||
}
|
||||
},
|
||||
required: [ "transaction_id", "business_name", "business_url" ],
|
||||
additionalProperties: false
|
||||
}
|
||||
}
|
||||
},
|
||||
required: [ "merchants" ],
|
||||
additionalProperties: false
|
||||
}
|
||||
end
|
||||
|
||||
def developer_message
|
||||
<<~MESSAGE.strip_heredoc
|
||||
Here are the user's available merchants in JSON format:
|
||||
|
||||
```json
|
||||
#{user_merchants.to_json}
|
||||
```
|
||||
|
||||
Use BOTH your knowledge AND the user-generated merchants to auto-detect the following transactions:
|
||||
|
||||
```json
|
||||
#{transactions.to_json}
|
||||
```
|
||||
|
||||
Return "null" if you are not 80%+ confident in your answer.
|
||||
MESSAGE
|
||||
end
|
||||
|
||||
def instructions
|
||||
<<~INSTRUCTIONS.strip_heredoc
|
||||
You are an assistant to a consumer personal finance app.
|
||||
|
||||
Closely follow ALL the rules below while auto-detecting business names and website URLs:
|
||||
|
||||
- Return 1 result per transaction
|
||||
- Correlate each transaction by ID (transaction_id)
|
||||
- Do not include the subdomain in the business_url (i.e. "amazon.com" not "www.amazon.com")
|
||||
- User merchants are considered "manual" user-generated merchants and should only be used in 100% clear cases
|
||||
- Be slightly pessimistic. We favor returning "null" over returning a false positive.
|
||||
- NEVER return a name or URL for generic transaction names (e.g. "Paycheck", "Laundromat", "Grocery store", "Local diner")
|
||||
|
||||
Determining a value:
|
||||
|
||||
- First attempt to determine the name + URL from your knowledge of global businesses
|
||||
- If no certain match, attempt to match one of the user-provided merchants
|
||||
- If no match, return "null"
|
||||
|
||||
Example 1 (known business):
|
||||
|
||||
```
|
||||
Transaction name: "Some Amazon purchases"
|
||||
|
||||
Result:
|
||||
- business_name: "Amazon"
|
||||
- business_url: "amazon.com"
|
||||
```
|
||||
|
||||
Example 2 (generic business):
|
||||
|
||||
```
|
||||
Transaction name: "local diner"
|
||||
|
||||
Result:
|
||||
- business_name: null
|
||||
- business_url: null
|
||||
```
|
||||
INSTRUCTIONS
|
||||
end
|
||||
end
|
|
@ -1,5 +1,5 @@
|
|||
class ProviderMerchant < Merchant
|
||||
enum source: { plaid: "plaid", synth: "synth" }
|
||||
enum :source, { plaid: "plaid", synth: "synth", ai: "ai" }
|
||||
|
||||
validates :name, uniqueness: { scope: :source }
|
||||
validates :name, uniqueness: { scope: [ :source, :website_url ] }
|
||||
end
|
||||
|
|
|
@ -1,6 +1,4 @@
|
|||
class Rule < ApplicationRecord
|
||||
include Provided
|
||||
|
||||
UnsupportedResourceTypeError = Class.new(StandardError)
|
||||
|
||||
belongs_to :family
|
||||
|
|
|
@ -1,104 +0,0 @@
|
|||
class Rule::ActionExecutor::AiAutoCategorize < Rule::ActionExecutor
|
||||
ProviderMissingError = Class.new(StandardError)
|
||||
|
||||
def execute(transaction_scope, value: nil, ignore_attribute_locks: false)
|
||||
raise ProviderMissingError, "LLM provider is not configured" unless llm_provider.present?
|
||||
|
||||
enrichable_transactions = transaction_scope.enrichable(:category_id).where(category_id: nil).includes(:category, :merchant, :entry)
|
||||
|
||||
if enrichable_transactions.none?
|
||||
Rails.logger.info("No transactions to auto-categorize for rule #{rule.id}")
|
||||
return
|
||||
else
|
||||
Rails.logger.info("Auto-categorizing #{enrichable_transactions.count} transactions for rule #{rule.id}")
|
||||
end
|
||||
|
||||
consecutive_failures = 0
|
||||
total_transactions = enrichable_transactions.count
|
||||
batch_size = 100
|
||||
total_batches = (total_transactions.to_f / batch_size).ceil
|
||||
batch_index = 0
|
||||
|
||||
enrichable_transactions.in_batches(of: batch_size, load: true) do |batch|
|
||||
batch_index += 1
|
||||
percent_complete = ((batch_index.to_f / total_batches) * 100).round
|
||||
Rails.logger.info("Processing batch #{batch_index} of #{total_batches} (#{percent_complete}% complete) for rule #{rule.id}")
|
||||
success = process_batch(batch)
|
||||
if success
|
||||
consecutive_failures = 0
|
||||
else
|
||||
consecutive_failures += 1
|
||||
break if consecutive_failures >= 3
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
def llm_provider
|
||||
rule.llm_provider
|
||||
end
|
||||
|
||||
def process_batch(batch)
|
||||
result = llm_provider.auto_categorize(
|
||||
transactions: prepare_transaction_input(batch),
|
||||
user_categories: user_categories
|
||||
)
|
||||
|
||||
unless result.success?
|
||||
Rails.logger.error("Failed to auto-categorize transactions for rule #{rule.id}: #{result.error.message}")
|
||||
return false
|
||||
end
|
||||
|
||||
batch.each do |txn|
|
||||
txn.lock!(:category_id)
|
||||
|
||||
auto_categorization = result.data.find { |c| c.transaction_id == txn.id }
|
||||
|
||||
if auto_categorization.present?
|
||||
category_id = user_categories.find { |c| c[:name] == auto_categorization.category_name }&.dig(:id)
|
||||
|
||||
if category_id.present?
|
||||
DataEnrichment.transaction do
|
||||
de = DataEnrichment.find_or_create_by!(
|
||||
enrichable: txn,
|
||||
attribute_name: "category_id",
|
||||
value: category_id,
|
||||
source: "rule"
|
||||
)
|
||||
|
||||
de.value = category_id
|
||||
de.save!
|
||||
|
||||
txn.update!(category_id: category_id)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
true
|
||||
end
|
||||
|
||||
def prepare_transaction_input(transactions)
|
||||
transactions.map do |transaction|
|
||||
{
|
||||
id: transaction.id,
|
||||
amount: transaction.entry.amount.abs,
|
||||
classification: transaction.entry.classification,
|
||||
description: transaction.entry.name,
|
||||
merchant: transaction.merchant&.name
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
def user_categories
|
||||
rule.family.categories.map do |category|
|
||||
{
|
||||
id: category.id,
|
||||
name: category.name,
|
||||
is_subcategory: category.subcategory?,
|
||||
parent_id: category.parent_id,
|
||||
classification: category.classification
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,5 +0,0 @@
|
|||
class Rule::ActionExecutor::AiEnhanceTransactionName < Rule::ActionExecutor
|
||||
def execute(transaction_scope, value: nil, ignore_attribute_locks: false)
|
||||
# TODO
|
||||
end
|
||||
end
|
19
app/models/rule/action_executor/auto_categorize.rb
Normal file
19
app/models/rule/action_executor/auto_categorize.rb
Normal file
|
@ -0,0 +1,19 @@
|
|||
class Rule::ActionExecutor::AutoCategorize < Rule::ActionExecutor
|
||||
def label
|
||||
"Auto-categorize transactions"
|
||||
end
|
||||
|
||||
def execute(transaction_scope, value: nil, ignore_attribute_locks: false)
|
||||
enrichable_transactions = transaction_scope.enrichable(:category_id)
|
||||
|
||||
if enrichable_transactions.empty?
|
||||
Rails.logger.info("No transactions to auto-categorize for #{rule.title} #{rule.id}")
|
||||
return
|
||||
end
|
||||
|
||||
enrichable_transactions.in_batches(of: 20).each_with_index do |transactions, idx|
|
||||
Rails.logger.info("Scheduling auto-categorization for batch #{idx + 1} of #{enrichable_transactions.count}")
|
||||
rule.family.auto_categorize_transactions_later(transactions)
|
||||
end
|
||||
end
|
||||
end
|
19
app/models/rule/action_executor/auto_detect_merchants.rb
Normal file
19
app/models/rule/action_executor/auto_detect_merchants.rb
Normal file
|
@ -0,0 +1,19 @@
|
|||
class Rule::ActionExecutor::AutoDetectMerchants < Rule::ActionExecutor
|
||||
def label
|
||||
"Auto-detect merchants"
|
||||
end
|
||||
|
||||
def execute(transaction_scope, value: nil, ignore_attribute_locks: false)
|
||||
enrichable_transactions = transaction_scope.enrichable(:merchant_id)
|
||||
|
||||
if enrichable_transactions.empty?
|
||||
Rails.logger.info("No transactions to auto-detect merchants for #{rule.title} #{rule.id}")
|
||||
return
|
||||
end
|
||||
|
||||
enrichable_transactions.in_batches(of: 20).each_with_index do |transactions, idx|
|
||||
Rails.logger.info("Scheduling auto-merchant-enrichment for batch #{idx + 1} of #{enrichable_transactions.count}")
|
||||
rule.family.auto_detect_transaction_merchants_later(transactions)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -17,18 +17,14 @@ class Rule::ActionExecutor::SetTransactionCategory < Rule::ActionExecutor
|
|||
end
|
||||
|
||||
scope.each do |txn|
|
||||
DataEnrichment.transaction do
|
||||
txn.update!(category: category)
|
||||
|
||||
de = DataEnrichment.find_or_create_by!(
|
||||
enrichable: txn,
|
||||
Rule.transaction do
|
||||
txn.log_enrichment!(
|
||||
attribute_name: "category_id",
|
||||
value: category.id,
|
||||
attribute_value: category.id,
|
||||
source: "rule"
|
||||
)
|
||||
|
||||
de.value = category.id
|
||||
de.save!
|
||||
txn.update!(category: category)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -17,18 +17,14 @@ class Rule::ActionExecutor::SetTransactionTags < Rule::ActionExecutor
|
|||
end
|
||||
|
||||
rows = scope.each do |txn|
|
||||
DataEnrichment.transaction do
|
||||
txn.update!(tag_ids: [ tag.id ])
|
||||
|
||||
de = DataEnrichment.find_or_initialize_by(
|
||||
enrichable_id: txn.id,
|
||||
enrichable_type: "Transaction",
|
||||
Rule.transaction do
|
||||
txn.log_enrichment!(
|
||||
attribute_name: "tag_ids",
|
||||
attribute_value: [ tag.id ],
|
||||
source: "rule"
|
||||
)
|
||||
|
||||
de.value = [ tag.id ]
|
||||
de.save!
|
||||
txn.update!(tag_ids: [ tag.id ])
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,21 +0,0 @@
|
|||
module Rule::Provided
|
||||
extend ActiveSupport::Concern
|
||||
|
||||
class_methods do
|
||||
def llm_provider
|
||||
Provider::Registry.get_provider(:openai)
|
||||
end
|
||||
|
||||
def synth
|
||||
Provider::Registry.get_provider(:synth)
|
||||
end
|
||||
end
|
||||
|
||||
def llm_provider
|
||||
self.class.llm_provider
|
||||
end
|
||||
|
||||
def synth
|
||||
self.class.synth
|
||||
end
|
||||
end
|
|
@ -15,8 +15,8 @@ class Rule::Registry::TransactionResource < Rule::Registry
|
|||
[
|
||||
Rule::ActionExecutor::SetTransactionCategory.new(rule),
|
||||
Rule::ActionExecutor::SetTransactionTags.new(rule),
|
||||
Rule::ActionExecutor::AiEnhanceTransactionName.new(rule),
|
||||
Rule::ActionExecutor::AiAutoCategorize.new(rule)
|
||||
Rule::ActionExecutor::AutoDetectMerchants.new(rule),
|
||||
Rule::ActionExecutor::AutoCategorize.new(rule)
|
||||
]
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,35 +0,0 @@
|
|||
class Rule::TransactionResourceRegistry
|
||||
def initialize(rule)
|
||||
@rule = rule
|
||||
end
|
||||
|
||||
def scope
|
||||
family.transactions.active
|
||||
end
|
||||
|
||||
def get_filter!(key)
|
||||
condition_filters.find { |filter| filter.key == key }
|
||||
end
|
||||
|
||||
def get_executor!(key)
|
||||
action_executors.find { |executor| executor.key == key }
|
||||
end
|
||||
|
||||
private
|
||||
def condition_filters
|
||||
[
|
||||
Rule::ConditionFilter::TransactionName.new(rule),
|
||||
Rule::ConditionFilter::TransactionAmount.new(rule),
|
||||
Rule::ConditionFilter::TransactionMerchant.new(rule)
|
||||
]
|
||||
end
|
||||
|
||||
def action_executors
|
||||
[
|
||||
Rule::ActionExecutor::SetTransactionCategory.new(rule),
|
||||
Rule::ActionExecutor::SetTransactionTags.new(rule),
|
||||
Rule::ActionExecutor::AiEnhanceTransactionName.new(rule),
|
||||
Rule::ActionExecutor::AiAutoCategorize.new(rule)
|
||||
]
|
||||
end
|
||||
end
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
<div class="border border-alpha-black-25 rounded-md bg-container shadow-border-xs">
|
||||
<div class="overflow-hidden rounded-md">
|
||||
<%= render partial: @merchants, spacer_template: "family_merchants/ruler" %>
|
||||
<%= render partial: "family_merchants/family_merchant", collection: @merchants, spacer_template: "family_merchants/ruler" %>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
|
||||
<div class="max-w-full">
|
||||
<%= content_tag :div, class: ["flex items-center gap-2"] do %>
|
||||
<% if transaction.merchant&.logo_url %>
|
||||
<% if transaction.merchant&.logo_url.present? %>
|
||||
<%= image_tag transaction.merchant.logo_url,
|
||||
class: "w-6 h-6 rounded-full",
|
||||
loading: "lazy" %>
|
||||
|
|
|
@ -66,7 +66,7 @@
|
|||
<%= f.fields_for :entryable do |ef| %>
|
||||
|
||||
<%= ef.collection_select :merchant_id,
|
||||
Current.family.merchants.alphabetically,
|
||||
Current.family.assigned_merchants.alphabetically,
|
||||
:id, :name,
|
||||
{ include_blank: t(".none"),
|
||||
label: t(".merchant_label"),
|
||||
|
|
|
@ -24,6 +24,6 @@ class ProviderMerchants < ActiveRecord::Migration[7.2]
|
|||
add_column :merchants, :provider_merchant_id, :string
|
||||
|
||||
add_index :merchants, [ :family_id, :name ], unique: true, where: "type = 'FamilyMerchant'"
|
||||
add_index :merchants, [ :source, :name ], unique: true, where: "type = 'ProviderMerchant'"
|
||||
add_index :merchants, [ :source, :name, :website_url ], unique: true, where: "type = 'ProviderMerchant'"
|
||||
end
|
||||
end
|
||||
|
|
|
@ -5,6 +5,7 @@ class DataEnrichmentsAndLocks < ActiveRecord::Migration[7.2]
|
|||
t.string :source
|
||||
t.string :attribute_name
|
||||
t.jsonb :value
|
||||
t.jsonb :metadata
|
||||
|
||||
t.timestamps
|
||||
end
|
||||
|
|
1
db/schema.rb
generated
1
db/schema.rb
generated
|
@ -172,6 +172,7 @@ ActiveRecord::Schema[7.2].define(version: 2025_04_15_125256) do
|
|||
t.string "source"
|
||||
t.string "attribute_name"
|
||||
t.jsonb "value"
|
||||
t.jsonb "metadata"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.index ["enrichable_id", "enrichable_type", "source", "attribute_name"], name: "idx_on_enrichable_id_enrichable_type_source_attribu_5be5f63e08", unique: true
|
||||
|
|
|
@ -1,15 +1,13 @@
|
|||
require "test_helper"
|
||||
|
||||
class Rule::ActionExecutor::AiAutoCategorizeTest < ActiveSupport::TestCase
|
||||
class Family::AutoCategorizerTest < ActiveSupport::TestCase
|
||||
include EntriesTestHelper, ProviderTestHelper
|
||||
|
||||
setup do
|
||||
@family = families(:dylan_family)
|
||||
@account = @family.accounts.create!(name: "Rule test", balance: 100, currency: "USD", accountable: Depository.new)
|
||||
@llm_provider = mock
|
||||
@rule = rules(:one)
|
||||
|
||||
Rule.any_instance.stubs(:llm_provider).returns(@llm_provider)
|
||||
Provider::Registry.stubs(:get_provider).with(:openai).returns(@llm_provider)
|
||||
end
|
||||
|
||||
test "auto-categorizes transactions" do
|
||||
|
@ -27,10 +25,9 @@ class Rule::ActionExecutor::AiAutoCategorizeTest < ActiveSupport::TestCase
|
|||
|
||||
@llm_provider.expects(:auto_categorize).returns(provider_response).once
|
||||
|
||||
# All 3 of newly created transactions are enrichable by category_id
|
||||
assert_equal 3, @account.transactions.reload.enrichable(:category_id).count
|
||||
|
||||
Rule::ActionExecutor::AiAutoCategorize.new(@rule).execute(@account.transactions)
|
||||
assert_difference "DataEnrichment.count", 2 do
|
||||
Family::AutoCategorizer.new(@family, transaction_ids: [ txn1.id, txn2.id, txn3.id ]).auto_categorize
|
||||
end
|
||||
|
||||
assert_equal test_category, txn1.reload.category
|
||||
assert_equal test_category, txn2.reload.category
|
42
test/models/family/auto_merchant_detector_test.rb
Normal file
42
test/models/family/auto_merchant_detector_test.rb
Normal file
|
@ -0,0 +1,42 @@
|
|||
require "test_helper"
|
||||
|
||||
class Family::AutoMerchantDetectorTest < ActiveSupport::TestCase
|
||||
include EntriesTestHelper, ProviderTestHelper
|
||||
|
||||
setup do
|
||||
@family = families(:dylan_family)
|
||||
@account = @family.accounts.create!(name: "Rule test", balance: 100, currency: "USD", accountable: Depository.new)
|
||||
@llm_provider = mock
|
||||
Provider::Registry.stubs(:get_provider).with(:openai).returns(@llm_provider)
|
||||
end
|
||||
|
||||
test "auto detects transaction merchants" do
|
||||
txn1 = create_transaction(account: @account, name: "McDonalds").transaction
|
||||
txn2 = create_transaction(account: @account, name: "Chipotle").transaction
|
||||
txn3 = create_transaction(account: @account, name: "generic").transaction
|
||||
|
||||
provider_response = provider_success_response([
|
||||
AutoDetectedMerchant.new(transaction_id: txn1.id, business_name: "McDonalds", business_url: "mcdonalds.com"),
|
||||
AutoDetectedMerchant.new(transaction_id: txn2.id, business_name: "Chipotle", business_url: "chipotle.com"),
|
||||
AutoDetectedMerchant.new(transaction_id: txn3.id, business_name: nil, business_url: nil)
|
||||
])
|
||||
|
||||
@llm_provider.expects(:auto_detect_merchants).returns(provider_response).once
|
||||
|
||||
assert_difference "DataEnrichment.count", 2 do
|
||||
Family::AutoMerchantDetector.new(@family, transaction_ids: [ txn1.id, txn2.id, txn3.id ]).auto_detect
|
||||
end
|
||||
|
||||
assert_equal "McDonalds", txn1.reload.merchant.name
|
||||
assert_equal "Chipotle", txn2.reload.merchant.name
|
||||
assert_equal "https://logo.synthfinance.com/mcdonalds.com", txn1.reload.merchant.logo_url
|
||||
assert_equal "https://logo.synthfinance.com/chipotle.com", txn2.reload.merchant.logo_url
|
||||
assert_nil txn3.reload.merchant
|
||||
|
||||
# After auto-detection, all transactions are locked and no longer enrichable
|
||||
assert_equal 0, @account.transactions.reload.enrichable(:merchant_id).count
|
||||
end
|
||||
|
||||
private
|
||||
AutoDetectedMerchant = Provider::LlmConcept::AutoDetectedMerchant
|
||||
end
|
|
@ -22,14 +22,20 @@ class Provider::OpenaiTest < ActiveSupport::TestCase
|
|||
input_transactions = [
|
||||
{ id: "1", name: "McDonalds", amount: 20, classification: "expense", merchant: "McDonalds", hint: "Fast Food" },
|
||||
{ id: "2", name: "Amazon purchase", amount: 100, classification: "expense", merchant: "Amazon" },
|
||||
{ id: "3", name: "Netflix subscription", amount: 10, classification: "expense", merchant: "Netflix", hint: "Subscriptions" }
|
||||
{ id: "3", name: "Netflix subscription", amount: 10, classification: "expense", merchant: "Netflix", hint: "Subscriptions" },
|
||||
{ id: "4", name: "paycheck", amount: 3000, classification: "income" },
|
||||
{ id: "5", name: "Italian dinner with friends", amount: 100, classification: "expense" },
|
||||
{ id: "6", name: "1212XXXBCaaa charge", amount: 2.99, classification: "expense" }
|
||||
]
|
||||
|
||||
response = @subject.auto_categorize(
|
||||
transactions: input_transactions,
|
||||
user_categories: [
|
||||
{ id: "shopping_id", name: "Shopping", is_subcategory: false, parent_id: nil, classification: "expense" },
|
||||
{ id: "restaurants_id", name: "Restaurants", is_subcategory: false, parent_id: nil, classification: "expense" }
|
||||
{ id: "subscriptions_id", name: "Subscriptions", is_subcategory: true, parent_id: nil, classification: "expense" },
|
||||
{ id: "restaurants_id", name: "Restaurants", is_subcategory: false, parent_id: nil, classification: "expense" },
|
||||
{ id: "fast_food_id", name: "Fast Food", is_subcategory: true, parent_id: "restaurants_id", classification: "expense" },
|
||||
{ id: "income_id", name: "Income", is_subcategory: false, parent_id: nil, classification: "income" }
|
||||
]
|
||||
)
|
||||
|
||||
|
@ -39,10 +45,71 @@ class Provider::OpenaiTest < ActiveSupport::TestCase
|
|||
txn1 = response.data.find { |c| c.transaction_id == "1" }
|
||||
txn2 = response.data.find { |c| c.transaction_id == "2" }
|
||||
txn3 = response.data.find { |c| c.transaction_id == "3" }
|
||||
txn4 = response.data.find { |c| c.transaction_id == "4" }
|
||||
txn5 = response.data.find { |c| c.transaction_id == "5" }
|
||||
txn6 = response.data.find { |c| c.transaction_id == "6" }
|
||||
|
||||
assert_equal "Restaurants", txn1.category_name
|
||||
assert_equal "Fast Food", txn1.category_name
|
||||
assert_equal "Shopping", txn2.category_name
|
||||
assert_nil txn3.category_name
|
||||
assert_equal "Subscriptions", txn3.category_name
|
||||
assert_equal "Income", txn4.category_name
|
||||
assert_equal "Restaurants", txn5.category_name
|
||||
assert_nil txn6.category_name
|
||||
end
|
||||
end
|
||||
|
||||
test "auto detects merchants" do
|
||||
VCR.use_cassette("openai/auto_detect_merchants") do
|
||||
input_transactions = [
|
||||
{ id: "1", name: "McDonalds", amount: 20, classification: "expense" },
|
||||
{ id: "2", name: "local pub", amount: 20, classification: "expense" },
|
||||
{ id: "3", name: "WMT purchases", amount: 20, classification: "expense" },
|
||||
{ id: "4", name: "amzn 123 abc", amount: 20, classification: "expense" },
|
||||
{ id: "5", name: "chaseX1231", amount: 2000, classification: "income" },
|
||||
{ id: "6", name: "check deposit 022", amount: 200, classification: "income" },
|
||||
{ id: "7", name: "shooters bar and grill", amount: 200, classification: "expense" },
|
||||
{ id: "8", name: "Microsoft Office subscription", amount: 200, classification: "expense" }
|
||||
]
|
||||
|
||||
response = @subject.auto_detect_merchants(
|
||||
transactions: input_transactions,
|
||||
user_merchants: [ { name: "Shooters" } ]
|
||||
)
|
||||
|
||||
assert response.success?
|
||||
assert_equal input_transactions.size, response.data.size
|
||||
|
||||
txn1 = response.data.find { |c| c.transaction_id == "1" }
|
||||
txn2 = response.data.find { |c| c.transaction_id == "2" }
|
||||
txn3 = response.data.find { |c| c.transaction_id == "3" }
|
||||
txn4 = response.data.find { |c| c.transaction_id == "4" }
|
||||
txn5 = response.data.find { |c| c.transaction_id == "5" }
|
||||
txn6 = response.data.find { |c| c.transaction_id == "6" }
|
||||
txn7 = response.data.find { |c| c.transaction_id == "7" }
|
||||
txn8 = response.data.find { |c| c.transaction_id == "8" }
|
||||
assert_equal "McDonald's", txn1.business_name
|
||||
assert_equal "mcdonalds.com", txn1.business_url
|
||||
|
||||
assert_nil txn2.business_name
|
||||
assert_nil txn2.business_url
|
||||
|
||||
assert_equal "Walmart", txn3.business_name
|
||||
assert_equal "walmart.com", txn3.business_url
|
||||
|
||||
assert_equal "Amazon", txn4.business_name
|
||||
assert_equal "amazon.com", txn4.business_url
|
||||
|
||||
assert_nil txn5.business_name
|
||||
assert_nil txn5.business_url
|
||||
|
||||
assert_nil txn6.business_name
|
||||
assert_nil txn6.business_url
|
||||
|
||||
assert_equal "Shooters", txn7.business_name
|
||||
assert_nil txn7.business_url
|
||||
|
||||
assert_equal "Microsoft", txn8.business_name
|
||||
assert_equal "microsoft.com", txn8.business_url
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -5,14 +5,18 @@ http_interactions:
|
|||
uri: https://api.openai.com/v1/responses
|
||||
body:
|
||||
encoding: UTF-8
|
||||
string: '{"model":"gpt-4o-mini","input":[{"role":"developer","content":"Here
|
||||
are the user''s available categories in JSON format:\n\n```json\n[{\"id\":\"shopping_id\",\"name\":\"Shopping\",\"is_subcategory\":false,\"parent_id\":null,\"classification\":\"expense\"},{\"id\":\"restaurants_id\",\"name\":\"Restaurants\",\"is_subcategory\":false,\"parent_id\":null,\"classification\":\"expense\"}]\n```\n\nUse
|
||||
string: '{"model":"gpt-4.1-mini","input":[{"role":"developer","content":"Here
|
||||
are the user''s available categories in JSON format:\n\n```json\n[{\"id\":\"shopping_id\",\"name\":\"Shopping\",\"is_subcategory\":false,\"parent_id\":null,\"classification\":\"expense\"},{\"id\":\"subscriptions_id\",\"name\":\"Subscriptions\",\"is_subcategory\":true,\"parent_id\":null,\"classification\":\"expense\"},{\"id\":\"restaurants_id\",\"name\":\"Restaurants\",\"is_subcategory\":false,\"parent_id\":null,\"classification\":\"expense\"},{\"id\":\"fast_food_id\",\"name\":\"Fast
|
||||
Food\",\"is_subcategory\":true,\"parent_id\":\"restaurants_id\",\"classification\":\"expense\"},{\"id\":\"income_id\",\"name\":\"Income\",\"is_subcategory\":false,\"parent_id\":null,\"classification\":\"income\"}]\n```\n\nUse
|
||||
the available categories to auto-categorize the following transactions:\n\n```json\n[{\"id\":\"1\",\"name\":\"McDonalds\",\"amount\":20,\"classification\":\"expense\",\"merchant\":\"McDonalds\",\"hint\":\"Fast
|
||||
Food\"},{\"id\":\"2\",\"name\":\"Amazon purchase\",\"amount\":100,\"classification\":\"expense\",\"merchant\":\"Amazon\"},{\"id\":\"3\",\"name\":\"Netflix
|
||||
subscription\",\"amount\":10,\"classification\":\"expense\",\"merchant\":\"Netflix\",\"hint\":\"Subscriptions\"}]\n```\n"}],"text":{"format":{"type":"json_schema","name":"auto_categorize_personal_finance_transactions","strict":true,"schema":{"type":"object","properties":{"categorizations":{"type":"array","description":"An
|
||||
subscription\",\"amount\":10,\"classification\":\"expense\",\"merchant\":\"Netflix\",\"hint\":\"Subscriptions\"},{\"id\":\"4\",\"name\":\"paycheck\",\"amount\":3000,\"classification\":\"income\"},{\"id\":\"5\",\"name\":\"Italian
|
||||
dinner with friends\",\"amount\":100,\"classification\":\"expense\"},{\"id\":\"6\",\"name\":\"1212XXXBCaaa
|
||||
charge\",\"amount\":2.99,\"classification\":\"expense\"}]\n```\n"}],"text":{"format":{"type":"json_schema","name":"auto_categorize_personal_finance_transactions","strict":true,"schema":{"type":"object","properties":{"categorizations":{"type":"array","description":"An
|
||||
array of auto-categorizations for each transaction","items":{"type":"object","properties":{"transaction_id":{"type":"string","description":"The
|
||||
internal ID of the original transaction","enum":["1","2","3"]},"category_name":{"type":"string","description":"The
|
||||
matched category name of the transaction, or null if no match","enum":["Shopping","Restaurants","null"]}},"required":["transaction_id","category_name"],"additionalProperties":false}}},"required":["categorizations"],"additionalProperties":false}}},"instructions":"You
|
||||
internal ID of the original transaction","enum":["1","2","3","4","5","6"]},"category_name":{"type":"string","description":"The
|
||||
matched category name of the transaction, or null if no match","enum":["Shopping","Subscriptions","Restaurants","Fast
|
||||
Food","Income","null"]}},"required":["transaction_id","category_name"],"additionalProperties":false}}},"required":["categorizations"],"additionalProperties":false}}},"instructions":"You
|
||||
are an assistant to a consumer personal finance app. You will be provided
|
||||
a list\nof the user''s transactions and a list of the user''s categories. Your
|
||||
job is to auto-categorize\neach transaction.\n\nClosely follow ALL the rules
|
||||
|
@ -44,7 +48,7 @@ http_interactions:
|
|||
message: OK
|
||||
headers:
|
||||
Date:
|
||||
- Tue, 15 Apr 2025 23:37:03 GMT
|
||||
- Wed, 16 Apr 2025 14:07:39 GMT
|
||||
Content-Type:
|
||||
- application/json
|
||||
Transfer-Encoding:
|
||||
|
@ -56,50 +60,50 @@ http_interactions:
|
|||
Openai-Organization:
|
||||
- user-r6cwd3mn6iv6gn748b2xoajx
|
||||
X-Request-Id:
|
||||
- req_66a1351ce255af7a2d6d97f291443f0f
|
||||
- req_01b869bd9eb7b994a80e79f6de92e5a2
|
||||
Openai-Processing-Ms:
|
||||
- '4685'
|
||||
- '2173'
|
||||
Strict-Transport-Security:
|
||||
- max-age=31536000; includeSubDomains; preload
|
||||
Cf-Cache-Status:
|
||||
- DYNAMIC
|
||||
Set-Cookie:
|
||||
- __cf_bm=Cz9arSNtnMJ5YW26HFXEFAAEuy5cyoZ1zOWjBLXvZiQ-1744760223-1.0.1.1-S15VvCx.x3tvOi74lMrJ_5XVpwtCEOtsLJ_3fbLW.qfsY4Q9.8mPYKA_PSa97.9t5iL4VxtfJZj0DrI9kDSq3aMk2Y3ajdPUHNAQnZUp3vI;
|
||||
path=/; expires=Wed, 16-Apr-25 00:07:03 GMT; domain=.api.openai.com; HttpOnly;
|
||||
- __cf_bm=xGkX7L6XeEFLp6ZPB2Y.LLHD_YSpzTH28MUro6fQG7Y-1744812459-1.0.1.1-uy8WQsFzGblq3h.u6WFs2vld_HM.5fveVAFBsQ6y.Za22DSEa22k3NS7.GAUbgAvoVjGvSQlkm8LkSZyU3wZfN70cUpZrg27orQt0Nfq91U;
|
||||
path=/; expires=Wed, 16-Apr-25 14:37:39 GMT; domain=.api.openai.com; HttpOnly;
|
||||
Secure; SameSite=None
|
||||
- _cfuvid=O9qfsS9kbZw6J4uiNLYwZt2lDb1iY.XZjbiUDTVWRq4-1744760223916-0.0.1.1-604800000;
|
||||
- _cfuvid=LicWzTMZxt1n1GLU6XQx3NnU0PbKnI0m97CH.p0895U-1744812459077-0.0.1.1-604800000;
|
||||
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
|
||||
X-Content-Type-Options:
|
||||
- nosniff
|
||||
Server:
|
||||
- cloudflare
|
||||
Cf-Ray:
|
||||
- 930f44a99dbbd287-CMH
|
||||
- 93143ffeffe8cf6b-CMH
|
||||
Alt-Svc:
|
||||
- h3=":443"; ma=86400
|
||||
body:
|
||||
encoding: ASCII-8BIT
|
||||
string: |-
|
||||
{
|
||||
"id": "resp_67feed9b386081929a83999f7c7167fa0e152c5eb776dde0",
|
||||
"id": "resp_67ffb9a8e530819290c5d3ec8aaf326d0e0f06e2ac13ae37",
|
||||
"object": "response",
|
||||
"created_at": 1744760219,
|
||||
"created_at": 1744812456,
|
||||
"status": "completed",
|
||||
"error": null,
|
||||
"incomplete_details": null,
|
||||
"instructions": "You are an assistant to a consumer personal finance app. You will be provided a list\nof the user's transactions and a list of the user's categories. Your job is to auto-categorize\neach transaction.\n\nClosely follow ALL the rules below while auto-categorizing:\n\n- Return 1 result per transaction\n- Correlate each transaction by ID (transaction_id)\n- Attempt to match the most specific category possible (i.e. subcategory over parent category)\n- Category and transaction classifications should match (i.e. if transaction is an \"expense\", the category must have classification of \"expense\")\n- If you don't know the category, return \"null\"\n - You should always favor \"null\" over false positives\n - Be slightly pessimistic. Only match a category if you're 60%+ confident it is the correct one.\n- Each transaction has varying metadata that can be used to determine the category\n - Note: \"hint\" comes from 3rd party aggregators and typically represents a category name that\n may or may not match any of the user-supplied categories\n",
|
||||
"max_output_tokens": null,
|
||||
"model": "gpt-4o-mini-2024-07-18",
|
||||
"model": "gpt-4.1-mini-2025-04-14",
|
||||
"output": [
|
||||
{
|
||||
"id": "msg_67feed9f6d948192824b8b23db6562720e152c5eb776dde0",
|
||||
"id": "msg_67ffb9a96b3c81928d9da130e889a9aa0e0f06e2ac13ae37",
|
||||
"type": "message",
|
||||
"status": "completed",
|
||||
"content": [
|
||||
{
|
||||
"type": "output_text",
|
||||
"annotations": [],
|
||||
"text": "{\"categorizations\":[{\"transaction_id\":\"1\",\"category_name\":\"Restaurants\"},{\"transaction_id\":\"2\",\"category_name\":\"Shopping\"},{\"transaction_id\":\"3\",\"category_name\":\"null\"}]}"
|
||||
"text": "{\"categorizations\":[{\"transaction_id\":\"1\",\"category_name\":\"Fast Food\"},{\"transaction_id\":\"2\",\"category_name\":\"Shopping\"},{\"transaction_id\":\"3\",\"category_name\":\"Subscriptions\"},{\"transaction_id\":\"4\",\"category_name\":\"Income\"},{\"transaction_id\":\"5\",\"category_name\":\"Restaurants\"},{\"transaction_id\":\"6\",\"category_name\":\"null\"}]}"
|
||||
}
|
||||
],
|
||||
"role": "assistant"
|
||||
|
@ -109,7 +113,7 @@ http_interactions:
|
|||
"previous_response_id": null,
|
||||
"reasoning": {
|
||||
"effort": null,
|
||||
"generate_summary": null
|
||||
"summary": null
|
||||
},
|
||||
"store": true,
|
||||
"temperature": 1.0,
|
||||
|
@ -133,7 +137,10 @@ http_interactions:
|
|||
"enum": [
|
||||
"1",
|
||||
"2",
|
||||
"3"
|
||||
"3",
|
||||
"4",
|
||||
"5",
|
||||
"6"
|
||||
]
|
||||
},
|
||||
"category_name": {
|
||||
|
@ -141,7 +148,10 @@ http_interactions:
|
|||
"description": "The matched category name of the transaction, or null if no match",
|
||||
"enum": [
|
||||
"Shopping",
|
||||
"Subscriptions",
|
||||
"Restaurants",
|
||||
"Fast Food",
|
||||
"Income",
|
||||
"null"
|
||||
]
|
||||
}
|
||||
|
@ -167,18 +177,18 @@ http_interactions:
|
|||
"top_p": 1.0,
|
||||
"truncation": "disabled",
|
||||
"usage": {
|
||||
"input_tokens": 511,
|
||||
"input_tokens": 659,
|
||||
"input_tokens_details": {
|
||||
"cached_tokens": 0
|
||||
},
|
||||
"output_tokens": 39,
|
||||
"output_tokens": 70,
|
||||
"output_tokens_details": {
|
||||
"reasoning_tokens": 0
|
||||
},
|
||||
"total_tokens": 550
|
||||
"total_tokens": 729
|
||||
},
|
||||
"user": null,
|
||||
"metadata": {}
|
||||
}
|
||||
recorded_at: Tue, 15 Apr 2025 23:37:03 GMT
|
||||
recorded_at: Wed, 16 Apr 2025 14:07:39 GMT
|
||||
recorded_with: VCR 6.3.1
|
||||
|
|
203
test/vcr_cassettes/openai/auto_detect_merchants.yml
Normal file
203
test/vcr_cassettes/openai/auto_detect_merchants.yml
Normal file
|
@ -0,0 +1,203 @@
|
|||
---
|
||||
http_interactions:
|
||||
- request:
|
||||
method: post
|
||||
uri: https://api.openai.com/v1/responses
|
||||
body:
|
||||
encoding: UTF-8
|
||||
string: '{"model":"gpt-4.1-mini","input":[{"role":"developer","content":"Here
|
||||
are the user''s available merchants in JSON format:\n\n```json\n[{\"name\":\"Shooters\"}]\n```\n\nUse
|
||||
BOTH your knowledge AND the user-generated merchants to auto-detect the following
|
||||
transactions:\n\n```json\n[{\"id\":\"1\",\"name\":\"McDonalds\",\"amount\":20,\"classification\":\"expense\"},{\"id\":\"2\",\"name\":\"local
|
||||
pub\",\"amount\":20,\"classification\":\"expense\"},{\"id\":\"3\",\"name\":\"WMT
|
||||
purchases\",\"amount\":20,\"classification\":\"expense\"},{\"id\":\"4\",\"name\":\"amzn
|
||||
123 abc\",\"amount\":20,\"classification\":\"expense\"},{\"id\":\"5\",\"name\":\"chaseX1231\",\"amount\":2000,\"classification\":\"income\"},{\"id\":\"6\",\"name\":\"check
|
||||
deposit 022\",\"amount\":200,\"classification\":\"income\"},{\"id\":\"7\",\"name\":\"shooters
|
||||
bar and grill\",\"amount\":200,\"classification\":\"expense\"},{\"id\":\"8\",\"name\":\"Microsoft
|
||||
Office subscription\",\"amount\":200,\"classification\":\"expense\"}]\n```\n\nReturn
|
||||
\"null\" if you are not 80%+ confident in your answer.\n"}],"text":{"format":{"type":"json_schema","name":"auto_detect_personal_finance_merchants","strict":true,"schema":{"type":"object","properties":{"merchants":{"type":"array","description":"An
|
||||
array of auto-detected merchant businesses for each transaction","items":{"type":"object","properties":{"transaction_id":{"type":"string","description":"The
|
||||
internal ID of the original transaction","enum":["1","2","3","4","5","6","7","8"]},"business_name":{"type":["string","null"],"description":"The
|
||||
detected business name of the transaction, or `null` if uncertain"},"business_url":{"type":["string","null"],"description":"The
|
||||
URL of the detected business, or `null` if uncertain"}},"required":["transaction_id","business_name","business_url"],"additionalProperties":false}}},"required":["merchants"],"additionalProperties":false}}},"instructions":"You
|
||||
are an assistant to a consumer personal finance app.\n\nClosely follow ALL
|
||||
the rules below while auto-detecting business names and website URLs:\n\n-
|
||||
Return 1 result per transaction\n- Correlate each transaction by ID (transaction_id)\n-
|
||||
Do not include the subdomain in the business_url (i.e. \"amazon.com\" not
|
||||
\"www.amazon.com\")\n- User merchants are considered \"manual\" user-generated
|
||||
merchants and should only be used in 100% clear cases\n- Be slightly pessimistic. We
|
||||
favor returning \"null\" over returning a false positive.\n- NEVER return
|
||||
a name or URL for generic transaction names (e.g. \"Paycheck\", \"Laundromat\",
|
||||
\"Grocery store\", \"Local diner\")\n\nDetermining a value:\n\n- First attempt
|
||||
to determine the name + URL from your knowledge of global businesses\n- If
|
||||
no certain match, attempt to match one of the user-provided merchants\n- If
|
||||
no match, return \"null\"\n\nExample 1 (known business):\n\n```\nTransaction
|
||||
name: \"Some Amazon purchases\"\n\nResult:\n- business_name: \"Amazon\"\n-
|
||||
business_url: \"amazon.com\"\n```\n\nExample 2 (generic business):\n\n```\nTransaction
|
||||
name: \"local diner\"\n\nResult:\n- business_name: null\n- business_url: null\n```\n"}'
|
||||
headers:
|
||||
Content-Type:
|
||||
- application/json
|
||||
Authorization:
|
||||
- Bearer <OPENAI_ACCESS_TOKEN>
|
||||
Accept-Encoding:
|
||||
- gzip;q=1.0,deflate;q=0.6,identity;q=0.3
|
||||
Accept:
|
||||
- "*/*"
|
||||
User-Agent:
|
||||
- Ruby
|
||||
response:
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
headers:
|
||||
Date:
|
||||
- Wed, 16 Apr 2025 15:41:50 GMT
|
||||
Content-Type:
|
||||
- application/json
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
Connection:
|
||||
- keep-alive
|
||||
Openai-Version:
|
||||
- '2020-10-01'
|
||||
Openai-Organization:
|
||||
- user-r6cwd3mn6iv6gn748b2xoajx
|
||||
X-Request-Id:
|
||||
- req_77a41d32ae2c3dbd9081b34bc5e4ce61
|
||||
Openai-Processing-Ms:
|
||||
- '2152'
|
||||
Strict-Transport-Security:
|
||||
- max-age=31536000; includeSubDomains; preload
|
||||
Cf-Cache-Status:
|
||||
- DYNAMIC
|
||||
Set-Cookie:
|
||||
- __cf_bm=hCFJRspk322ZVvRasJGcux5mYDyfa5aO7EQOCAbnhjM-1744818110-1.0.1.1-.fRz_SYTG_PqZ3VCSDju7YeDaZwCyf5OGVvDvaN.h3aegNTlYtdPwbnZ5NNFxLRJhWFRY4vwHYkHm1DGTarK5NQ6UjA1sOrRpmS5eZ.zabw;
|
||||
path=/; expires=Wed, 16-Apr-25 16:11:50 GMT; domain=.api.openai.com; HttpOnly;
|
||||
Secure; SameSite=None
|
||||
- _cfuvid=At3dVxwug2seJ3Oa02PSnIoKhVSEvt6IPCLfhkULvac-1744818110064-0.0.1.1-604800000;
|
||||
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
|
||||
X-Content-Type-Options:
|
||||
- nosniff
|
||||
Server:
|
||||
- cloudflare
|
||||
Cf-Ray:
|
||||
- 9314c9f5cef5efe9-CMH
|
||||
Alt-Svc:
|
||||
- h3=":443"; ma=86400
|
||||
body:
|
||||
encoding: ASCII-8BIT
|
||||
string: |-
|
||||
{
|
||||
"id": "resp_67ffcfbbddb48192a251a3c0f341941a04d20b39fa51ef90",
|
||||
"object": "response",
|
||||
"created_at": 1744818107,
|
||||
"status": "completed",
|
||||
"error": null,
|
||||
"incomplete_details": null,
|
||||
"instructions": "You are an assistant to a consumer personal finance app.\n\nClosely follow ALL the rules below while auto-detecting business names and website URLs:\n\n- Return 1 result per transaction\n- Correlate each transaction by ID (transaction_id)\n- Do not include the subdomain in the business_url (i.e. \"amazon.com\" not \"www.amazon.com\")\n- User merchants are considered \"manual\" user-generated merchants and should only be used in 100% clear cases\n- Be slightly pessimistic. We favor returning \"null\" over returning a false positive.\n- NEVER return a name or URL for generic transaction names (e.g. \"Paycheck\", \"Laundromat\", \"Grocery store\", \"Local diner\")\n\nDetermining a value:\n\n- First attempt to determine the name + URL from your knowledge of global businesses\n- If no certain match, attempt to match one of the user-provided merchants\n- If no match, return \"null\"\n\nExample 1 (known business):\n\n```\nTransaction name: \"Some Amazon purchases\"\n\nResult:\n- business_name: \"Amazon\"\n- business_url: \"amazon.com\"\n```\n\nExample 2 (generic business):\n\n```\nTransaction name: \"local diner\"\n\nResult:\n- business_name: null\n- business_url: null\n```\n",
|
||||
"max_output_tokens": null,
|
||||
"model": "gpt-4.1-mini-2025-04-14",
|
||||
"output": [
|
||||
{
|
||||
"id": "msg_67ffcfbc58bc8192bbcf4dc54759837c04d20b39fa51ef90",
|
||||
"type": "message",
|
||||
"status": "completed",
|
||||
"content": [
|
||||
{
|
||||
"type": "output_text",
|
||||
"annotations": [],
|
||||
"text": "{\"merchants\":[{\"transaction_id\":\"1\",\"business_name\":\"McDonald's\",\"business_url\":\"mcdonalds.com\"},{\"transaction_id\":\"2\",\"business_name\":null,\"business_url\":null},{\"transaction_id\":\"3\",\"business_name\":\"Walmart\",\"business_url\":\"walmart.com\"},{\"transaction_id\":\"4\",\"business_name\":\"Amazon\",\"business_url\":\"amazon.com\"},{\"transaction_id\":\"5\",\"business_name\":null,\"business_url\":null},{\"transaction_id\":\"6\",\"business_name\":null,\"business_url\":null},{\"transaction_id\":\"7\",\"business_name\":\"Shooters\",\"business_url\":null},{\"transaction_id\":\"8\",\"business_name\":\"Microsoft\",\"business_url\":\"microsoft.com\"}]}"
|
||||
}
|
||||
],
|
||||
"role": "assistant"
|
||||
}
|
||||
],
|
||||
"parallel_tool_calls": true,
|
||||
"previous_response_id": null,
|
||||
"reasoning": {
|
||||
"effort": null,
|
||||
"summary": null
|
||||
},
|
||||
"store": true,
|
||||
"temperature": 1.0,
|
||||
"text": {
|
||||
"format": {
|
||||
"type": "json_schema",
|
||||
"description": null,
|
||||
"name": "auto_detect_personal_finance_merchants",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"merchants": {
|
||||
"type": "array",
|
||||
"description": "An array of auto-detected merchant businesses for each transaction",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"transaction_id": {
|
||||
"type": "string",
|
||||
"description": "The internal ID of the original transaction",
|
||||
"enum": [
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5",
|
||||
"6",
|
||||
"7",
|
||||
"8"
|
||||
]
|
||||
},
|
||||
"business_name": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"description": "The detected business name of the transaction, or `null` if uncertain"
|
||||
},
|
||||
"business_url": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"description": "The URL of the detected business, or `null` if uncertain"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"transaction_id",
|
||||
"business_name",
|
||||
"business_url"
|
||||
],
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"merchants"
|
||||
],
|
||||
"additionalProperties": false
|
||||
},
|
||||
"strict": true
|
||||
}
|
||||
},
|
||||
"tool_choice": "auto",
|
||||
"tools": [],
|
||||
"top_p": 1.0,
|
||||
"truncation": "disabled",
|
||||
"usage": {
|
||||
"input_tokens": 635,
|
||||
"input_tokens_details": {
|
||||
"cached_tokens": 0
|
||||
},
|
||||
"output_tokens": 140,
|
||||
"output_tokens_details": {
|
||||
"reasoning_tokens": 0
|
||||
},
|
||||
"total_tokens": 775
|
||||
},
|
||||
"user": null,
|
||||
"metadata": {}
|
||||
}
|
||||
recorded_at: Wed, 16 Apr 2025 15:41:50 GMT
|
||||
recorded_with: VCR 6.3.1
|
Loading…
Add table
Add a link
Reference in a new issue