* Add full-text search for authorized statuses - Search API will return statuses that match the query - Only for logged in users - Only if you are author of the status, - Or you were mentioned in it - Or you favourited or reblogged it - Configuration over `ES_ENABLED`, `ES_HOST`, `ES_PORT`, `ES_PREFIX` - Run `rails chewy:deploy` to create & populate index Fix #5880 Fix #4293 Fix #1152 * Add commented out docker-compose configuration for ES container * Optimize index import, filter search results * Add basic normalization to the index * Add better stemming and normalization to the index * Skip webfinger request if search query includes both @ and a space * Fix code style * Visually separate search result sections * Fix code style issuesmaster
@@ -9,6 +9,10 @@ DB_USER=postgres | |||||
DB_NAME=postgres | DB_NAME=postgres | ||||
DB_PASS= | DB_PASS= | ||||
DB_PORT=5432 | DB_PORT=5432 | ||||
# Optional ElasticSearch configuration | |||||
# ES_ENABLED=true | |||||
# ES_HOST=localhost | |||||
# ES_PORT=9200 | |||||
# Federation | # Federation | ||||
# Note: Changing LOCAL_DOMAIN at a later time will cause unwanted side effects, including breaking all existing federation. | # Note: Changing LOCAL_DOMAIN at a later time will cause unwanted side effects, including breaking all existing federation. | ||||
@@ -27,6 +27,7 @@ gem 'bootsnap' | |||||
gem 'browser' | gem 'browser' | ||||
gem 'charlock_holmes', '~> 0.7.5' | gem 'charlock_holmes', '~> 0.7.5' | ||||
gem 'iso-639' | gem 'iso-639' | ||||
gem 'chewy', '~> 0.10', git: 'https://github.com/toptal/chewy.git' | |||||
gem 'cld3', '~> 3.2.0' | gem 'cld3', '~> 3.2.0' | ||||
gem 'devise', '~> 4.4' | gem 'devise', '~> 4.4' | ||||
gem 'devise-two-factor', '~> 3.0' | gem 'devise-two-factor', '~> 3.0' | ||||
@@ -1,3 +1,12 @@ | |||||
GIT | |||||
remote: https://github.com/toptal/chewy.git | |||||
revision: a7d21eb4b0bd7415533ef134bb6d31b2df309701 | |||||
specs: | |||||
chewy (0.10.1) | |||||
activesupport (>= 4.0) | |||||
elasticsearch (>= 2.0.0) | |||||
elasticsearch-dsl | |||||
GEM | GEM | ||||
remote: https://rubygems.org/ | remote: https://rubygems.org/ | ||||
specs: | specs: | ||||
@@ -154,6 +163,15 @@ GEM | |||||
json | json | ||||
thread | thread | ||||
thread_safe | thread_safe | ||||
elasticsearch (6.0.1) | |||||
elasticsearch-api (= 6.0.1) | |||||
elasticsearch-transport (= 6.0.1) | |||||
elasticsearch-api (6.0.1) | |||||
multi_json | |||||
elasticsearch-dsl (0.1.5) | |||||
elasticsearch-transport (6.0.1) | |||||
faraday | |||||
multi_json | |||||
encryptor (3.0.0) | encryptor (3.0.0) | ||||
erubi (1.7.0) | erubi (1.7.0) | ||||
et-orbi (1.0.8) | et-orbi (1.0.8) | ||||
@@ -163,6 +181,8 @@ GEM | |||||
fabrication (2.18.0) | fabrication (2.18.0) | ||||
faker (1.8.4) | faker (1.8.4) | ||||
i18n (~> 0.5) | i18n (~> 0.5) | ||||
faraday (0.14.0) | |||||
multipart-post (>= 1.2, < 3) | |||||
fast_blank (1.0.0) | fast_blank (1.0.0) | ||||
ffi (1.9.18) | ffi (1.9.18) | ||||
fog-core (1.45.0) | fog-core (1.45.0) | ||||
@@ -291,6 +311,7 @@ GEM | |||||
minitest (5.11.3) | minitest (5.11.3) | ||||
msgpack (1.1.0) | msgpack (1.1.0) | ||||
multi_json (1.12.2) | multi_json (1.12.2) | ||||
multipart-post (2.0.0) | |||||
net-scp (1.2.1) | net-scp (1.2.1) | ||||
net-ssh (>= 2.6.5) | net-ssh (>= 2.6.5) | ||||
net-ssh (4.2.0) | net-ssh (4.2.0) | ||||
@@ -583,6 +604,7 @@ DEPENDENCIES | |||||
capistrano-yarn (~> 2.0) | capistrano-yarn (~> 2.0) | ||||
capybara (~> 2.15) | capybara (~> 2.15) | ||||
charlock_holmes (~> 0.7.5) | charlock_holmes (~> 0.7.5) | ||||
chewy (~> 0.10)! | |||||
cld3 (~> 3.2.0) | cld3 (~> 3.2.0) | ||||
climate_control (~> 0.2) | climate_control (~> 0.2) | ||||
devise (~> 4.4) | devise (~> 4.4) | ||||
@@ -0,0 +1,61 @@ | |||||
# frozen_string_literal: true | |||||
class StatusesIndex < Chewy::Index | |||||
settings index: { refresh_interval: '15m' }, analysis: { | |||||
filter: { | |||||
english_stop: { | |||||
type: 'stop', | |||||
stopwords: '_english_', | |||||
}, | |||||
english_stemmer: { | |||||
type: 'stemmer', | |||||
language: 'english', | |||||
}, | |||||
english_possessive_stemmer: { | |||||
type: 'stemmer', | |||||
language: 'possessive_english', | |||||
}, | |||||
}, | |||||
analyzer: { | |||||
content: { | |||||
tokenizer: 'uax_url_email', | |||||
filter: %w( | |||||
english_possessive_stemmer | |||||
lowercase | |||||
asciifolding | |||||
cjk_width | |||||
english_stop | |||||
english_stemmer | |||||
), | |||||
}, | |||||
}, | |||||
} | |||||
define_type ::Status.without_reblogs do | |||||
crutch :mentions do |collection| | |||||
data = ::Mention.where(status_id: collection.map(&:id)).pluck(:status_id, :account_id) | |||||
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) } | |||||
end | |||||
crutch :favourites do |collection| | |||||
data = ::Favourite.where(status_id: collection.map(&:id)).pluck(:status_id, :account_id) | |||||
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) } | |||||
end | |||||
crutch :reblogs do |collection| | |||||
data = ::Status.where(reblog_of_id: collection.map(&:id)).pluck(:reblog_of_id, :account_id) | |||||
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) } | |||||
end | |||||
root date_detection: false do | |||||
field :account_id, type: 'long' | |||||
field :text, type: 'text', value: ->(status) { [status.spoiler_text, Formatter.instance.plaintext(status)].join("\n\n") } do | |||||
field :stemmed, type: 'text', analyzer: 'content' | |||||
end | |||||
field :searchable_by, type: 'long', value: ->(status, crutches) { status.searchable_by(crutches) } | |||||
field :created_at, type: 'date' | |||||
end | |||||
end | |||||
end |
@@ -22,6 +22,8 @@ export default class SearchResults extends ImmutablePureComponent { | |||||
count += results.get('accounts').size; | count += results.get('accounts').size; | ||||
accounts = ( | accounts = ( | ||||
<div className='search-results__section'> | <div className='search-results__section'> | ||||
<h5><FormattedMessage id='search_results.accounts' defaultMessage='People' /></h5> | |||||
{results.get('accounts').map(accountId => <AccountContainer key={accountId} id={accountId} />)} | {results.get('accounts').map(accountId => <AccountContainer key={accountId} id={accountId} />)} | ||||
</div> | </div> | ||||
); | ); | ||||
@@ -31,6 +33,8 @@ export default class SearchResults extends ImmutablePureComponent { | |||||
count += results.get('statuses').size; | count += results.get('statuses').size; | ||||
statuses = ( | statuses = ( | ||||
<div className='search-results__section'> | <div className='search-results__section'> | ||||
<h5><FormattedMessage id='search_results.statuses' defaultMessage='Toots' /></h5> | |||||
{results.get('statuses').map(statusId => <StatusContainer key={statusId} id={statusId} />)} | {results.get('statuses').map(statusId => <StatusContainer key={statusId} id={statusId} />)} | ||||
</div> | </div> | ||||
); | ); | ||||
@@ -40,6 +44,8 @@ export default class SearchResults extends ImmutablePureComponent { | |||||
count += results.get('hashtags').size; | count += results.get('hashtags').size; | ||||
hashtags = ( | hashtags = ( | ||||
<div className='search-results__section'> | <div className='search-results__section'> | ||||
<h5><FormattedMessage id='search_results.hashtags' defaultMessage='Hashtags' /></h5> | |||||
{results.get('hashtags').map(hashtag => ( | {results.get('hashtags').map(hashtag => ( | ||||
<Link key={hashtag} className='search-results__hashtag' to={`/timelines/tag/${hashtag}`}> | <Link key={hashtag} className='search-results__hashtag' to={`/timelines/tag/${hashtag}`}> | ||||
#{hashtag} | #{hashtag} | ||||
@@ -1786,7 +1786,7 @@ | |||||
flex: 1; | flex: 1; | ||||
min-height: 47px; | min-height: 47px; | ||||
> img { | |||||
> img { | |||||
display: block; | display: block; | ||||
object-fit: contain; | object-fit: contain; | ||||
object-position: bottom left; | object-position: bottom left; | ||||
@@ -3229,6 +3229,43 @@ | |||||
font-weight: 500; | font-weight: 500; | ||||
} | } | ||||
.search-results__section { | |||||
margin-bottom: 20px; | |||||
h5 { | |||||
position: relative; | |||||
&::before { | |||||
content: ""; | |||||
display: block; | |||||
position: absolute; | |||||
left: 0; | |||||
right: 0; | |||||
top: 50%; | |||||
width: 100%; | |||||
height: 0; | |||||
border-top: 1px solid lighten($ui-base-color, 8%); | |||||
} | |||||
span { | |||||
display: inline-block; | |||||
background: $ui-base-color; | |||||
color: $ui-primary-color; | |||||
font-size: 14px; | |||||
font-weight: 500; | |||||
padding: 10px; | |||||
position: relative; | |||||
z-index: 1; | |||||
cursor: default; | |||||
} | |||||
} | |||||
.account:last-child, | |||||
& > div:last-child .status { | |||||
border-bottom: 0; | |||||
} | |||||
} | |||||
.search-results__hashtag { | .search-results__hashtag { | ||||
display: block; | display: block; | ||||
padding: 10px; | padding: 10px; | ||||
@@ -9,6 +9,7 @@ class StatusFilter | |||||
end | end | ||||
def filtered? | def filtered? | ||||
return false if !account.nil? && account.id == status.account_id | |||||
blocked_by_policy? || (account_present? && filtered_status?) || silenced_account? | blocked_by_policy? || (account_present? && filtered_status?) || silenced_account? | ||||
end | end | ||||
@@ -13,6 +13,8 @@ | |||||
class Favourite < ApplicationRecord | class Favourite < ApplicationRecord | ||||
include Paginable | include Paginable | ||||
update_index('statuses#status', :status) if Chewy.enabled? | |||||
belongs_to :account, inverse_of: :favourites | belongs_to :account, inverse_of: :favourites | ||||
belongs_to :status, inverse_of: :favourites, counter_cache: true | belongs_to :status, inverse_of: :favourites, counter_cache: true | ||||
@@ -31,6 +31,8 @@ class Status < ApplicationRecord | |||||
include Cacheable | include Cacheable | ||||
include StatusThreadingConcern | include StatusThreadingConcern | ||||
update_index('statuses#status', :proper) if Chewy.enabled? | |||||
enum visibility: [:public, :unlisted, :private, :direct], _suffix: :visibility | enum visibility: [:public, :unlisted, :private, :direct], _suffix: :visibility | ||||
belongs_to :application, class_name: 'Doorkeeper::Application', optional: true | belongs_to :application, class_name: 'Doorkeeper::Application', optional: true | ||||
@@ -78,6 +80,22 @@ class Status < ApplicationRecord | |||||
delegate :domain, to: :account, prefix: true | delegate :domain, to: :account, prefix: true | ||||
def searchable_by(preloaded = nil) | |||||
ids = [account_id] | |||||
if preloaded.nil? | |||||
ids += mentions.pluck(:account_id) | |||||
ids += favourites.pluck(:account_id) | |||||
ids += reblogs.pluck(:account_id) | |||||
else | |||||
ids += preloaded.mentions[id] || [] | |||||
ids += preloaded.favourites[id] || [] | |||||
ids += preloaded.reblogs[id] || [] | |||||
end | |||||
ids.uniq | |||||
end | |||||
def reply? | def reply? | ||||
!in_reply_to_id.nil? || attributes['reply'] | !in_reply_to_id.nil? || attributes['reply'] | ||||
end | end | ||||
@@ -1,21 +1,43 @@ | |||||
# frozen_string_literal: true | # frozen_string_literal: true | ||||
class SearchService < BaseService | class SearchService < BaseService | ||||
attr_accessor :query | |||||
attr_accessor :query, :account, :limit, :resolve | |||||
def call(query, limit, resolve = false, account = nil) | def call(query, limit, resolve = false, account = nil) | ||||
@query = query | |||||
@query = query | |||||
@account = account | |||||
@limit = limit | |||||
@resolve = resolve | |||||
default_results.tap do |results| | default_results.tap do |results| | ||||
if url_query? | if url_query? | ||||
results.merge!(url_resource_results) unless url_resource.nil? | results.merge!(url_resource_results) unless url_resource.nil? | ||||
elsif query.present? | elsif query.present? | ||||
results[:accounts] = AccountSearchService.new.call(query, limit, account, resolve: resolve) | |||||
results[:hashtags] = Tag.search_for(query.gsub(/\A#/, ''), limit) unless query.start_with?('@') | |||||
results[:accounts] = perform_accounts_search! if account_searchable? | |||||
results[:statuses] = perform_statuses_search! if full_text_searchable? | |||||
results[:hashtags] = perform_hashtags_search! if hashtag_searchable? | |||||
end | end | ||||
end | end | ||||
end | end | ||||
private | |||||
def perform_accounts_search! | |||||
AccountSearchService.new.call(query, limit, account, resolve: resolve) | |||||
end | |||||
def perform_statuses_search! | |||||
statuses = StatusesIndex.filter(term: { searchable_by: account.id }) | |||||
.query(multi_match: { type: 'most_fields', query: query, operator: 'and', fields: %w(text text.stemmed) }) | |||||
.limit(limit).objects | |||||
statuses.reject { |status| StatusFilter.new(status, account).filtered? } | |||||
end | |||||
def perform_hashtags_search! | |||||
Tag.search_for(query.gsub(/\A#/, ''), limit) | |||||
end | |||||
def default_results | def default_results | ||||
{ accounts: [], hashtags: [], statuses: [] } | { accounts: [], hashtags: [], statuses: [] } | ||||
end | end | ||||
@@ -35,4 +57,17 @@ class SearchService < BaseService | |||||
def url_resource_symbol | def url_resource_symbol | ||||
url_resource.class.name.downcase.pluralize.to_sym | url_resource.class.name.downcase.pluralize.to_sym | ||||
end | end | ||||
def full_text_searchable? | |||||
return false unless Chewy.enabled? | |||||
!account.nil? && !((query.start_with?('#') || query.include?('@')) && !query.include?(' ')) | |||||
end | |||||
def account_searchable? | |||||
!(query.include?('@') && query.include?(' ')) | |||||
end | |||||
def hashtag_searchable? | |||||
!query.include?('@') | |||||
end | |||||
end | end |
@@ -0,0 +1,22 @@ | |||||
enabled = ENV['ES_ENABLED'] == 'true' | |||||
host = ENV.fetch('ES_HOST') { 'localhost' } | |||||
port = ENV.fetch('ES_PORT') { 9200 } | |||||
fallback_prefix = ENV.fetch('REDIS_NAMESPACE') { nil } | |||||
prefix = ENV.fetch('ES_PREFIX') { fallback_prefix } | |||||
Chewy.settings = { | |||||
host: "#{host}:#{port}", | |||||
prefix: prefix, | |||||
enabled: enabled, | |||||
journal: false, | |||||
} | |||||
Chewy.root_strategy = enabled ? :sidekiq : :bypass | |||||
module Chewy | |||||
class << self | |||||
def enabled? | |||||
settings[:enabled] | |||||
end | |||||
end | |||||
end |
@@ -19,6 +19,17 @@ services: | |||||
# volumes: | # volumes: | ||||
# - ./redis:/data | # - ./redis:/data | ||||
# es: | |||||
# restart: always | |||||
# image: docker.elastic.co/elasticsearch/elasticsearch-oss:6.1.3 | |||||
# environment: | |||||
# - "ES_JAVA_OPTS=-Xms512m -Xmx512m" | |||||
# networks: | |||||
# - internal_network | |||||
#### Uncomment to enable ES persistance | |||||
## volumes: | |||||
## - ./elasticsearch:/usr/share/elasticsearch/data | |||||
web: | web: | ||||
build: . | build: . | ||||
image: gargron/mastodon | image: gargron/mastodon | ||||
@@ -33,6 +44,7 @@ services: | |||||
depends_on: | depends_on: | ||||
- db | - db | ||||
- redis | - redis | ||||
# - es | |||||
volumes: | volumes: | ||||
- ./public/assets:/mastodon/public/assets | - ./public/assets:/mastodon/public/assets | ||||
- ./public/packs:/mastodon/public/packs | - ./public/packs:/mastodon/public/packs | ||||
@@ -25,6 +25,10 @@ RSpec.configure do |config| | |||||
end | end | ||||
end | end | ||||
config.before :suite do | |||||
Chewy.strategy(:bypass) | |||||
end | |||||
config.after :suite do | config.after :suite do | ||||
gc_counter = 0 | gc_counter = 0 | ||||
FileUtils.rm_rf(Dir["#{Rails.root}/spec/test_files/"]) | FileUtils.rm_rf(Dir["#{Rails.root}/spec/test_files/"]) | ||||