From fe24117c509bf880b210d425aaa54f1eef416f94 Mon Sep 17 00:00:00 2001 From: Zach Gollwitzer Date: Thu, 22 May 2025 15:15:07 -0400 Subject: [PATCH 1/4] Stronger security unique index and data migration Note to self hosters: If you started self hosting prior to this commit, you may have duplicate securities in your database. This is usually not a problem, but if you'd like to clean things up, you can run the data migration by opening a terminal on the machine you're hosting with and running: ```sh rake data_migration:migrate_duplicate_securities ``` --- ...ger_uniqueness_constraint_on_securities.rb | 13 ++++ db/schema.rb | 4 +- lib/tasks/data_migration.rake | 59 +++++++++++++++++++ 3 files changed, 74 insertions(+), 2 deletions(-) create mode 100644 db/migrate/20250522174753_stronger_uniqueness_constraint_on_securities.rb diff --git a/db/migrate/20250522174753_stronger_uniqueness_constraint_on_securities.rb b/db/migrate/20250522174753_stronger_uniqueness_constraint_on_securities.rb new file mode 100644 index 00000000..ac49330f --- /dev/null +++ b/db/migrate/20250522174753_stronger_uniqueness_constraint_on_securities.rb @@ -0,0 +1,13 @@ +class StrongerUniquenessConstraintOnSecurities < ActiveRecord::Migration[7.2] + def change + remove_index :securities, [ :ticker, :exchange_operating_mic ], unique: true + + # Matches our ActiveRecord validation: + # - uppercase ticker + # - either exchange_operating_mic or empty string (unique index doesn't work with NULL values) + add_index :securities, + "UPPER(ticker), COALESCE(UPPER(exchange_operating_mic), '')", + unique: true, + name: "index_securities_on_ticker_and_exchange_operating_mic_unique" + end +end diff --git a/db/schema.rb b/db/schema.rb index 9f079956..1d03733d 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.2].define(version: 2025_05_21_112347) do +ActiveRecord::Schema[7.2].define(version: 2025_05_22_174753) do # These are extensions that must be enabled in order to support this database enable_extension "pgcrypto" enable_extension "plpgsql" @@ -517,9 +517,9 @@ ActiveRecord::Schema[7.2].define(version: 2025_05_21_112347) do t.datetime "failed_fetch_at" t.integer "failed_fetch_count", default: 0, null: false t.datetime "last_health_check_at" + t.index "upper((ticker)::text), COALESCE(upper((exchange_operating_mic)::text), ''::text)", name: "index_securities_on_ticker_and_exchange_operating_mic_unique", unique: true t.index ["country_code"], name: "index_securities_on_country_code" t.index ["exchange_operating_mic"], name: "index_securities_on_exchange_operating_mic" - t.index ["ticker", "exchange_operating_mic"], name: "index_securities_on_ticker_and_exchange_operating_mic", unique: true end create_table "security_prices", id: :uuid, default: -> { "gen_random_uuid()" }, force: :cascade do |t| diff --git a/lib/tasks/data_migration.rake b/lib/tasks/data_migration.rake index c84dbab0..3b6882fd 100644 --- a/lib/tasks/data_migration.rake +++ b/lib/tasks/data_migration.rake @@ -20,4 +20,63 @@ namespace :data_migration do puts "Error updating webhook for Plaid item #{item.plaid_id}: #{error.message}" end end + + desc "Migrate duplicate securities" + # 2025-05-22: older data allowed multiple rows with the same + # ticker / exchange_operating_mic (case-insensitive, NULLs collapsed). + # This task: + # 1. Finds each duplicate group + # 2. Chooses the earliest-created row as the keeper + # 3. Re-points holdings and trades to the keeper + # 4. Destroys the duplicate (which also removes its prices) + task migrate_duplicate_securities: :environment do + puts "==> Scanning for duplicate securities…" + + duplicate_sets = Security + .select("UPPER(ticker) AS up_ticker, + COALESCE(UPPER(exchange_operating_mic), '') AS up_mic, + COUNT(*) AS dup_count") + .group("up_ticker, up_mic") + .having("COUNT(*) > 1") + .to_a + + puts "Found #{duplicate_sets.size} duplicate groups." + + duplicate_sets.each_with_index do |set, idx| + # Fetch duplicates ordered by creation; the first row becomes keeper + duplicates_scope = Security + .where("UPPER(ticker) = ? AND COALESCE(UPPER(exchange_operating_mic), '') = ?", + set.up_ticker, set.up_mic) + .order(:created_at) + + keeper = duplicates_scope.first + next unless keeper + + duplicates = duplicates_scope.offset(1) + + dup_ids = duplicates.ids + + # Skip if nothing to merge (defensive; shouldn't occur) + next if dup_ids.empty? + + begin + ActiveRecord::Base.transaction do + updated_holdings = Holding.where(security_id: dup_ids).update_all(security_id: keeper.id) + updated_trades = Trade.where(security_id: dup_ids).update_all(security_id: keeper.id) + + # Ensure no rows remain pointing at duplicates before deletion + raise "Leftover holdings detected" if Holding.where(security_id: dup_ids).exists? + raise "Leftover trades detected" if Trade.where(security_id: dup_ids).exists? + + duplicates.each(&:destroy!) # destroys its security_prices via dependent: :destroy + end + + puts "[#{idx + 1}/#{duplicate_sets.size}] Merged #{dup_ids.join(', ')} → #{keeper.id} (#{updated_holdings} holdings, #{updated_trades} trades)" + rescue => e + puts "ERROR migrating #{dup_ids.join(', ')}: #{e.message}" + end + end + + puts "✅ Duplicate security migration complete." + end end From 19804d2b05667e13370b35a86770516dbce45f3a Mon Sep 17 00:00:00 2001 From: Zach Gollwitzer Date: Thu, 22 May 2025 15:39:45 -0400 Subject: [PATCH 2/4] temp: remove schema migration --- ..._stronger_uniqueness_constraint_on_securities.rb | 13 ------------- db/schema.rb | 4 ++-- 2 files changed, 2 insertions(+), 15 deletions(-) delete mode 100644 db/migrate/20250522174753_stronger_uniqueness_constraint_on_securities.rb diff --git a/db/migrate/20250522174753_stronger_uniqueness_constraint_on_securities.rb b/db/migrate/20250522174753_stronger_uniqueness_constraint_on_securities.rb deleted file mode 100644 index ac49330f..00000000 --- a/db/migrate/20250522174753_stronger_uniqueness_constraint_on_securities.rb +++ /dev/null @@ -1,13 +0,0 @@ -class StrongerUniquenessConstraintOnSecurities < ActiveRecord::Migration[7.2] - def change - remove_index :securities, [ :ticker, :exchange_operating_mic ], unique: true - - # Matches our ActiveRecord validation: - # - uppercase ticker - # - either exchange_operating_mic or empty string (unique index doesn't work with NULL values) - add_index :securities, - "UPPER(ticker), COALESCE(UPPER(exchange_operating_mic), '')", - unique: true, - name: "index_securities_on_ticker_and_exchange_operating_mic_unique" - end -end diff --git a/db/schema.rb b/db/schema.rb index 1d03733d..9f079956 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.2].define(version: 2025_05_22_174753) do +ActiveRecord::Schema[7.2].define(version: 2025_05_21_112347) do # These are extensions that must be enabled in order to support this database enable_extension "pgcrypto" enable_extension "plpgsql" @@ -517,9 +517,9 @@ ActiveRecord::Schema[7.2].define(version: 2025_05_22_174753) do t.datetime "failed_fetch_at" t.integer "failed_fetch_count", default: 0, null: false t.datetime "last_health_check_at" - t.index "upper((ticker)::text), COALESCE(upper((exchange_operating_mic)::text), ''::text)", name: "index_securities_on_ticker_and_exchange_operating_mic_unique", unique: true t.index ["country_code"], name: "index_securities_on_country_code" t.index ["exchange_operating_mic"], name: "index_securities_on_exchange_operating_mic" + t.index ["ticker", "exchange_operating_mic"], name: "index_securities_on_ticker_and_exchange_operating_mic", unique: true end create_table "security_prices", id: :uuid, default: -> { "gen_random_uuid()" }, force: :cascade do |t| From fcdc42760d9ed2c5e38dc1dcb09e37c02c7a8419 Mon Sep 17 00:00:00 2001 From: Zach Gollwitzer Date: Thu, 22 May 2025 16:02:34 -0400 Subject: [PATCH 3/4] Tweak dup securities data migration --- lib/tasks/data_migration.rake | 40 +++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/lib/tasks/data_migration.rake b/lib/tasks/data_migration.rake index 3b6882fd..509e033c 100644 --- a/lib/tasks/data_migration.rake +++ b/lib/tasks/data_migration.rake @@ -61,17 +61,49 @@ namespace :data_migration do begin ActiveRecord::Base.transaction do - updated_holdings = Holding.where(security_id: dup_ids).update_all(security_id: keeper.id) - updated_trades = Trade.where(security_id: dup_ids).update_all(security_id: keeper.id) + # -------------------------------------------------------------- + # HOLDINGS + # -------------------------------------------------------------- + holdings_moved = 0 + holdings_deleted = 0 + + dup_ids.each do |dup_id| + Holding.where(security_id: dup_id).find_each(batch_size: 1_000) do |holding| + # Will this holding collide with an existing keeper row? + conflict_exists = Holding.where( + account_id: holding.account_id, + security_id: keeper.id, + date: holding.date, + currency: holding.currency + ).exists? + + if conflict_exists + holding.destroy! + holdings_deleted += 1 + else + holding.update!(security_id: keeper.id) + holdings_moved += 1 + end + end + end + + # -------------------------------------------------------------- + # TRADES — no uniqueness constraints -> bulk update is fine + # -------------------------------------------------------------- + trades_moved = Trade.where(security_id: dup_ids).update_all(security_id: keeper.id) # Ensure no rows remain pointing at duplicates before deletion raise "Leftover holdings detected" if Holding.where(security_id: dup_ids).exists? raise "Leftover trades detected" if Trade.where(security_id: dup_ids).exists? duplicates.each(&:destroy!) # destroys its security_prices via dependent: :destroy - end - puts "[#{idx + 1}/#{duplicate_sets.size}] Merged #{dup_ids.join(', ')} → #{keeper.id} (#{updated_holdings} holdings, #{updated_trades} trades)" + # Log inside the transaction so counters are in-scope + total_holdings = holdings_moved + holdings_deleted + puts "[#{idx + 1}/#{duplicate_sets.size}] Merged #{dup_ids.join(', ')} → #{keeper.id} " \ + "(#{total_holdings} holdings → #{holdings_moved} moved, #{holdings_deleted} removed, " \ + "#{trades_moved} trades)" + end rescue => e puts "ERROR migrating #{dup_ids.join(', ')}: #{e.message}" end From c7d9c944895921a8530cdfef0ef4965405939854 Mon Sep 17 00:00:00 2001 From: Zach Gollwitzer Date: Thu, 22 May 2025 16:11:22 -0400 Subject: [PATCH 4/4] Add back schema migration for unique securities --- ...50522201031_stronger_unique_index_on_security.rb | 13 +++++++++++++ db/schema.rb | 4 ++-- 2 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 db/migrate/20250522201031_stronger_unique_index_on_security.rb diff --git a/db/migrate/20250522201031_stronger_unique_index_on_security.rb b/db/migrate/20250522201031_stronger_unique_index_on_security.rb new file mode 100644 index 00000000..43672f81 --- /dev/null +++ b/db/migrate/20250522201031_stronger_unique_index_on_security.rb @@ -0,0 +1,13 @@ +class StrongerUniqueIndexOnSecurity < ActiveRecord::Migration[7.2] + def change + remove_index :securities, [ :ticker, :exchange_operating_mic ], unique: true + + # Matches our ActiveRecord validation: + # - uppercase ticker + # - either exchange_operating_mic or empty string (unique index doesn't work with NULL values) + add_index :securities, + "UPPER(ticker), COALESCE(UPPER(exchange_operating_mic), '')", + unique: true, + name: "index_securities_on_ticker_and_exchange_operating_mic_unique" + end +end diff --git a/db/schema.rb b/db/schema.rb index 9f079956..11612da4 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.2].define(version: 2025_05_21_112347) do +ActiveRecord::Schema[7.2].define(version: 2025_05_22_201031) do # These are extensions that must be enabled in order to support this database enable_extension "pgcrypto" enable_extension "plpgsql" @@ -517,9 +517,9 @@ ActiveRecord::Schema[7.2].define(version: 2025_05_21_112347) do t.datetime "failed_fetch_at" t.integer "failed_fetch_count", default: 0, null: false t.datetime "last_health_check_at" + t.index "upper((ticker)::text), COALESCE(upper((exchange_operating_mic)::text), ''::text)", name: "index_securities_on_ticker_and_exchange_operating_mic_unique", unique: true t.index ["country_code"], name: "index_securities_on_country_code" t.index ["exchange_operating_mic"], name: "index_securities_on_exchange_operating_mic" - t.index ["ticker", "exchange_operating_mic"], name: "index_securities_on_ticker_and_exchange_operating_mic", unique: true end create_table "security_prices", id: :uuid, default: -> { "gen_random_uuid()" }, force: :cascade do |t|