diff --git a/db/migrate/20250522201031_stronger_unique_index_on_security.rb b/db/migrate/20250522201031_stronger_unique_index_on_security.rb new file mode 100644 index 00000000..43672f81 --- /dev/null +++ b/db/migrate/20250522201031_stronger_unique_index_on_security.rb @@ -0,0 +1,13 @@ +class StrongerUniqueIndexOnSecurity < ActiveRecord::Migration[7.2] + def change + remove_index :securities, [ :ticker, :exchange_operating_mic ], unique: true + + # Matches our ActiveRecord validation: + # - uppercase ticker + # - either exchange_operating_mic or empty string (unique index doesn't work with NULL values) + add_index :securities, + "UPPER(ticker), COALESCE(UPPER(exchange_operating_mic), '')", + unique: true, + name: "index_securities_on_ticker_and_exchange_operating_mic_unique" + end +end diff --git a/db/schema.rb b/db/schema.rb index dc54c285..c4d0be5f 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.2].define(version: 2025_05_21_112347) do +ActiveRecord::Schema[7.2].define(version: 2025_05_22_201031) do # These are extensions that must be enabled in order to support this database enable_extension "pgcrypto" enable_extension "plpgsql" @@ -523,9 +523,9 @@ ActiveRecord::Schema[7.2].define(version: 2025_05_21_112347) do t.datetime "failed_fetch_at" t.integer "failed_fetch_count", default: 0, null: false t.datetime "last_health_check_at" + t.index "upper((ticker)::text), COALESCE(upper((exchange_operating_mic)::text), ''::text)", name: "index_securities_on_ticker_and_exchange_operating_mic_unique", unique: true t.index ["country_code"], name: "index_securities_on_country_code" t.index ["exchange_operating_mic"], name: "index_securities_on_exchange_operating_mic" - t.index ["ticker", "exchange_operating_mic"], name: "index_securities_on_ticker_and_exchange_operating_mic", unique: true end create_table "security_prices", id: :uuid, default: -> { "gen_random_uuid()" }, force: :cascade do |t| diff --git a/lib/tasks/data_migration.rake b/lib/tasks/data_migration.rake index c84dbab0..509e033c 100644 --- a/lib/tasks/data_migration.rake +++ b/lib/tasks/data_migration.rake @@ -20,4 +20,95 @@ namespace :data_migration do puts "Error updating webhook for Plaid item #{item.plaid_id}: #{error.message}" end end + + desc "Migrate duplicate securities" + # 2025-05-22: older data allowed multiple rows with the same + # ticker / exchange_operating_mic (case-insensitive, NULLs collapsed). + # This task: + # 1. Finds each duplicate group + # 2. Chooses the earliest-created row as the keeper + # 3. Re-points holdings and trades to the keeper + # 4. Destroys the duplicate (which also removes its prices) + task migrate_duplicate_securities: :environment do + puts "==> Scanning for duplicate securities…" + + duplicate_sets = Security + .select("UPPER(ticker) AS up_ticker, + COALESCE(UPPER(exchange_operating_mic), '') AS up_mic, + COUNT(*) AS dup_count") + .group("up_ticker, up_mic") + .having("COUNT(*) > 1") + .to_a + + puts "Found #{duplicate_sets.size} duplicate groups." + + duplicate_sets.each_with_index do |set, idx| + # Fetch duplicates ordered by creation; the first row becomes keeper + duplicates_scope = Security + .where("UPPER(ticker) = ? AND COALESCE(UPPER(exchange_operating_mic), '') = ?", + set.up_ticker, set.up_mic) + .order(:created_at) + + keeper = duplicates_scope.first + next unless keeper + + duplicates = duplicates_scope.offset(1) + + dup_ids = duplicates.ids + + # Skip if nothing to merge (defensive; shouldn't occur) + next if dup_ids.empty? + + begin + ActiveRecord::Base.transaction do + # -------------------------------------------------------------- + # HOLDINGS + # -------------------------------------------------------------- + holdings_moved = 0 + holdings_deleted = 0 + + dup_ids.each do |dup_id| + Holding.where(security_id: dup_id).find_each(batch_size: 1_000) do |holding| + # Will this holding collide with an existing keeper row? + conflict_exists = Holding.where( + account_id: holding.account_id, + security_id: keeper.id, + date: holding.date, + currency: holding.currency + ).exists? + + if conflict_exists + holding.destroy! + holdings_deleted += 1 + else + holding.update!(security_id: keeper.id) + holdings_moved += 1 + end + end + end + + # -------------------------------------------------------------- + # TRADES — no uniqueness constraints -> bulk update is fine + # -------------------------------------------------------------- + trades_moved = Trade.where(security_id: dup_ids).update_all(security_id: keeper.id) + + # Ensure no rows remain pointing at duplicates before deletion + raise "Leftover holdings detected" if Holding.where(security_id: dup_ids).exists? + raise "Leftover trades detected" if Trade.where(security_id: dup_ids).exists? + + duplicates.each(&:destroy!) # destroys its security_prices via dependent: :destroy + + # Log inside the transaction so counters are in-scope + total_holdings = holdings_moved + holdings_deleted + puts "[#{idx + 1}/#{duplicate_sets.size}] Merged #{dup_ids.join(', ')} → #{keeper.id} " \ + "(#{total_holdings} holdings → #{holdings_moved} moved, #{holdings_deleted} removed, " \ + "#{trades_moved} trades)" + end + rescue => e + puts "ERROR migrating #{dup_ids.join(', ')}: #{e.message}" + end + end + + puts "✅ Duplicate security migration complete." + end end