From e136112ab76a37cbde5c1bcfeb562c8e0a5b5116 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Mon, 29 Jul 2019 20:40:21 +0200 Subject: [PATCH] Fix tag normalization and migration not removing duplicate tags (#11441) Fix #11428 --- app/models/tag.rb | 8 ++--- ...726175042_add_case_insensitive_index_to_tags.rb | 13 +++++++++ spec/models/tag_spec.rb | 34 ++++++++++++++++++++++ 3 files changed, 51 insertions(+), 4 deletions(-) diff --git a/app/models/tag.rb b/app/models/tag.rb index 9722420..46e3a3e 100644 --- a/app/models/tag.rb +++ b/app/models/tag.rb @@ -65,7 +65,7 @@ class Tag < ApplicationRecord class << self def find_or_create_by_names(name_or_names) - Array(name_or_names).map(&method(:normalize)).uniq.map do |normalized_name| + Array(name_or_names).map(&method(:normalize)).uniq { |str| str.mb_chars.downcase.to_s }.map do |normalized_name| tag = matching_name(normalized_name).first || create(name: normalized_name) yield tag if block_given? @@ -77,7 +77,7 @@ class Tag < ApplicationRecord def search_for(term, limit = 5, offset = 0) pattern = sanitize_sql_like(normalize(term.strip)) + '%' - Tag.where(arel_table[:name].lower.matches(pattern.downcase)) + Tag.where(arel_table[:name].lower.matches(pattern.mb_chars.downcase.to_s)) .order(:name) .limit(limit) .offset(offset) @@ -92,7 +92,7 @@ class Tag < ApplicationRecord end def matching_name(name_or_names) - names = Array(name_or_names).map { |name| normalize(name).downcase } + names = Array(name_or_names).map { |name| normalize(name).mb_chars.downcase.to_s } if names.size == 1 where(arel_table[:name].lower.eq(names.first)) @@ -104,7 +104,7 @@ class Tag < ApplicationRecord private def normalize(str) - str.gsub(/\A#/, '').mb_chars.to_s + str.gsub(/\A#/, '') end end diff --git a/db/migrate/20190726175042_add_case_insensitive_index_to_tags.rb b/db/migrate/20190726175042_add_case_insensitive_index_to_tags.rb index 6fa8c0e..057fc86 100644 --- a/db/migrate/20190726175042_add_case_insensitive_index_to_tags.rb +++ b/db/migrate/20190726175042_add_case_insensitive_index_to_tags.rb @@ -2,6 +2,19 @@ class AddCaseInsensitiveIndexToTags < ActiveRecord::Migration[5.2] disable_ddl_transaction! def up + Tag.connection.select_all('SELECT string_agg(id::text, \',\') AS ids FROM tags GROUP BY lower(name) HAVING count(*) > 1').to_hash.each do |row| + canonical_tag_id = row['ids'].split(',').first + redundant_tag_ids = row['ids'].split(',')[1..-1] + + safety_assured do + execute "UPDATE accounts_tags AS t0 SET tag_id = #{canonical_tag_id} WHERE tag_id IN (#{redundant_tag_ids.join(', ')}) AND NOT EXISTS (SELECT t1.tag_id FROM accounts_tags AS t1 WHERE t1.tag_id = #{canonical_tag_id} AND t1.account_id = t0.account_id)" + execute "UPDATE statuses_tags AS t0 SET tag_id = #{canonical_tag_id} WHERE tag_id IN (#{redundant_tag_ids.join(', ')}) AND NOT EXISTS (SELECT t1.tag_id FROM statuses_tags AS t1 WHERE t1.tag_id = #{canonical_tag_id} AND t1.status_id = t0.status_id)" + execute "UPDATE featured_tags AS t0 SET tag_id = #{canonical_tag_id} WHERE tag_id IN (#{redundant_tag_ids.join(', ')}) AND NOT EXISTS (SELECT t1.tag_id FROM featured_tags AS t1 WHERE t1.tag_id = #{canonical_tag_id} AND t1.account_id = t0.account_id)" + end + + Tag.where(id: redundant_tag_ids).in_batches.delete_all + end + safety_assured { execute 'CREATE UNIQUE INDEX CONCURRENTLY index_tags_on_name_lower ON tags (lower(name))' } remove_index :tags, name: 'index_tags_on_name' remove_index :tags, name: 'hashtag_search_index' diff --git a/spec/models/tag_spec.rb b/spec/models/tag_spec.rb index 9a30cea..5f07fd6 100644 --- a/spec/models/tag_spec.rb +++ b/spec/models/tag_spec.rb @@ -82,6 +82,40 @@ RSpec.describe Tag, type: :model do end end + describe '.find_normalized' do + it 'returns tag for a multibyte case-insensitive name' do + upcase_string = 'abcABCabcABCやゆよ' + downcase_string = 'abcabcabcabcやゆよ'; + + tag = Fabricate(:tag, name: downcase_string) + expect(Tag.find_normalized(upcase_string)).to eq tag + end + end + + describe '.matching_name' do + it 'returns tags for multibyte case-insensitive names' do + upcase_string = 'abcABCabcABCやゆよ' + downcase_string = 'abcabcabcabcやゆよ'; + + tag = Fabricate(:tag, name: downcase_string) + expect(Tag.matching_name(upcase_string)).to eq [tag] + end + end + + describe '.find_or_create_by_names' do + it 'runs a passed block once per tag regardless of duplicates' do + upcase_string = 'abcABCabcABCやゆよ' + downcase_string = 'abcabcabcabcやゆよ'; + count = 0 + + Tag.find_or_create_by_names([upcase_string, downcase_string]) do |tag| + count += 1 + end + + expect(count).to eq 1 + end + end + describe '.search_for' do it 'finds tag records with matching names' do tag = Fabricate(:tag, name: "match")