From 68b4e36c82344fba7c5a01e9f8dc9ddbaaf4e3ff Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Mon, 4 Sep 2023 10:18:45 +0200 Subject: [PATCH] Fix `#hashtag` matching non-hashtagged posts in search (#26781) --- app/chewy/public_statuses_index.rb | 13 ++++++++++++- app/chewy/statuses_index.rb | 13 ++++++++++++- app/chewy/tags_index.rb | 24 +++++++++++++++--------- app/lib/search_query_transformer.rb | 6 +++++- 4 files changed, 44 insertions(+), 12 deletions(-) diff --git a/app/chewy/public_statuses_index.rb b/app/chewy/public_statuses_index.rb index 5c68a1365..4be204d4a 100644 --- a/app/chewy/public_statuses_index.rb +++ b/app/chewy/public_statuses_index.rb @@ -37,18 +37,29 @@ class PublicStatusesIndex < Chewy::Index english_stemmer ), }, + + hashtag: { + tokenizer: 'keyword', + filter: %w( + word_delimiter_graph + lowercase + asciifolding + cjk_width + ), + }, }, } index_scope ::Status.unscoped .kept .indexable - .includes(:media_attachments, :preloadable_poll, :preview_cards) + .includes(:media_attachments, :preloadable_poll, :preview_cards, :tags) root date_detection: false do field(:id, type: 'long') field(:account_id, type: 'long') field(:text, type: 'text', analyzer: 'verbatim', value: ->(status) { status.searchable_text }) { field(:stemmed, type: 'text', analyzer: 'content') } + field(:tags, type: 'text', analyzer: 'hashtag', value: ->(status) { status.tags.map(&:display_name) }) field(:language, type: 'keyword') field(:properties, type: 'keyword', value: ->(status) { status.searchable_properties }) field(:created_at, type: 'date') diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb index 2be7e4525..6b25dc9df 100644 --- a/app/chewy/statuses_index.rb +++ b/app/chewy/statuses_index.rb @@ -37,15 +37,26 @@ class StatusesIndex < Chewy::Index english_stemmer ), }, + + hashtag: { + tokenizer: 'keyword', + filter: %w( + word_delimiter_graph + lowercase + asciifolding + cjk_width + ), + }, }, } - index_scope ::Status.unscoped.kept.without_reblogs.includes(:media_attachments, :preview_cards, :local_mentioned, :local_favorited, :local_reblogged, :local_bookmarked, preloadable_poll: :local_voters), delete_if: ->(status) { status.searchable_by.empty? } + index_scope ::Status.unscoped.kept.without_reblogs.includes(:media_attachments, :preview_cards, :local_mentioned, :local_favorited, :local_reblogged, :local_bookmarked, :tags, preloadable_poll: :local_voters), delete_if: ->(status) { status.searchable_by.empty? } root date_detection: false do field(:id, type: 'long') field(:account_id, type: 'long') field(:text, type: 'text', analyzer: 'verbatim', value: ->(status) { status.searchable_text }) { field(:stemmed, type: 'text', analyzer: 'content') } + field(:tags, type: 'text', analyzer: 'hashtag', value: ->(status) { status.tags.map(&:display_name) }) field(:searchable_by, type: 'long', value: ->(status) { status.searchable_by }) field(:language, type: 'keyword') field(:properties, type: 'keyword', value: ->(status) { status.searchable_properties }) diff --git a/app/chewy/tags_index.rb b/app/chewy/tags_index.rb index b2d50a000..5b6349a96 100644 --- a/app/chewy/tags_index.rb +++ b/app/chewy/tags_index.rb @@ -5,12 +5,21 @@ class TagsIndex < Chewy::Index analyzer: { content: { tokenizer: 'keyword', - filter: %w(lowercase asciifolding cjk_width), + filter: %w( + word_delimiter_graph + lowercase + asciifolding + cjk_width + ), }, edge_ngram: { tokenizer: 'edge_ngram', - filter: %w(lowercase asciifolding cjk_width), + filter: %w( + lowercase + asciifolding + cjk_width + ), }, }, @@ -30,12 +39,9 @@ class TagsIndex < Chewy::Index end root date_detection: false do - field :name, type: 'text', analyzer: 'content' do - field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content' - end - - field :reviewed, type: 'boolean', value: ->(tag) { tag.reviewed? } - field :usage, type: 'long', value: ->(tag, crutches) { tag.history.aggregate(crutches.time_period).accounts } - field :last_status_at, type: 'date', value: ->(tag) { tag.last_status_at || tag.created_at } + field(:name, type: 'text', analyzer: 'content', value: :display_name) { field(:edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content') } + field(:reviewed, type: 'boolean', value: ->(tag) { tag.reviewed? }) + field(:usage, type: 'long', value: ->(tag, crutches) { tag.history.aggregate(crutches.time_period).accounts }) + field(:last_status_at, type: 'date', value: ->(tag) { tag.last_status_at || tag.created_at }) end end diff --git a/app/lib/search_query_transformer.rb b/app/lib/search_query_transformer.rb index 2c8e95afe..af3964fd3 100644 --- a/app/lib/search_query_transformer.rb +++ b/app/lib/search_query_transformer.rb @@ -53,7 +53,11 @@ class SearchQueryTransformer < Parslet::Transform end def to_query - { multi_match: { type: 'most_fields', query: @term, fields: ['text', 'text.stemmed'], operator: 'and' } } + if @term.start_with?('#') + { match: { tags: { query: @term } } } + else + { multi_match: { type: 'most_fields', query: @term, fields: ['text', 'text.stemmed'], operator: 'and' } } + end end end