Full-text search for authorized statuses (#6423)
* Add full-text search for authorized statuses - Search API will return statuses that match the query - Only for logged in users - Only if you are author of the status, - Or you were mentioned in it - Or you favourited or reblogged it - Configuration over `ES_ENABLED`, `ES_HOST`, `ES_PORT`, `ES_PREFIX` - Run `rails chewy:deploy` to create & populate index Fix #5880 Fix #4293 Fix #1152 * Add commented out docker-compose configuration for ES container * Optimize index import, filter search results * Add basic normalization to the index * Add better stemming and normalization to the index * Skip webfinger request if search query includes both @ and a space * Fix code style * Visually separate search result sections * Fix code style issues
This commit is contained in:
		
							parent
							
								
									235c14c79d
								
							
						
					
					
						commit
						3ebc0ad4d3
					
				|  | @ -9,6 +9,10 @@ DB_USER=postgres | |||
| DB_NAME=postgres | ||||
| DB_PASS= | ||||
| DB_PORT=5432 | ||||
| # Optional ElasticSearch configuration | ||||
| # ES_ENABLED=true | ||||
| # ES_HOST=localhost | ||||
| # ES_PORT=9200 | ||||
| 
 | ||||
| # Federation | ||||
| # Note: Changing LOCAL_DOMAIN at a later time will cause unwanted side effects, including breaking all existing federation. | ||||
|  |  | |||
							
								
								
									
										1
									
								
								Gemfile
								
								
								
								
							
							
						
						
									
										1
									
								
								Gemfile
								
								
								
								
							|  | @ -27,6 +27,7 @@ gem 'bootsnap' | |||
| gem 'browser' | ||||
| gem 'charlock_holmes', '~> 0.7.5' | ||||
| gem 'iso-639' | ||||
| gem 'chewy', '~> 0.10', git: 'https://github.com/toptal/chewy.git' | ||||
| gem 'cld3', '~> 3.2.0' | ||||
| gem 'devise', '~> 4.4' | ||||
| gem 'devise-two-factor', '~> 3.0' | ||||
|  |  | |||
							
								
								
									
										22
									
								
								Gemfile.lock
								
								
								
								
							
							
						
						
									
										22
									
								
								Gemfile.lock
								
								
								
								
							|  | @ -1,3 +1,12 @@ | |||
| GIT | ||||
|   remote: https://github.com/toptal/chewy.git | ||||
|   revision: a7d21eb4b0bd7415533ef134bb6d31b2df309701 | ||||
|   specs: | ||||
|     chewy (0.10.1) | ||||
|       activesupport (>= 4.0) | ||||
|       elasticsearch (>= 2.0.0) | ||||
|       elasticsearch-dsl | ||||
| 
 | ||||
| GEM | ||||
|   remote: https://rubygems.org/ | ||||
|   specs: | ||||
|  | @ -154,6 +163,15 @@ GEM | |||
|       json | ||||
|       thread | ||||
|       thread_safe | ||||
|     elasticsearch (6.0.1) | ||||
|       elasticsearch-api (= 6.0.1) | ||||
|       elasticsearch-transport (= 6.0.1) | ||||
|     elasticsearch-api (6.0.1) | ||||
|       multi_json | ||||
|     elasticsearch-dsl (0.1.5) | ||||
|     elasticsearch-transport (6.0.1) | ||||
|       faraday | ||||
|       multi_json | ||||
|     encryptor (3.0.0) | ||||
|     erubi (1.7.0) | ||||
|     et-orbi (1.0.8) | ||||
|  | @ -163,6 +181,8 @@ GEM | |||
|     fabrication (2.18.0) | ||||
|     faker (1.8.4) | ||||
|       i18n (~> 0.5) | ||||
|     faraday (0.14.0) | ||||
|       multipart-post (>= 1.2, < 3) | ||||
|     fast_blank (1.0.0) | ||||
|     ffi (1.9.18) | ||||
|     fog-core (1.45.0) | ||||
|  | @ -291,6 +311,7 @@ GEM | |||
|     minitest (5.11.3) | ||||
|     msgpack (1.1.0) | ||||
|     multi_json (1.12.2) | ||||
|     multipart-post (2.0.0) | ||||
|     net-scp (1.2.1) | ||||
|       net-ssh (>= 2.6.5) | ||||
|     net-ssh (4.2.0) | ||||
|  | @ -583,6 +604,7 @@ DEPENDENCIES | |||
|   capistrano-yarn (~> 2.0) | ||||
|   capybara (~> 2.15) | ||||
|   charlock_holmes (~> 0.7.5) | ||||
|   chewy (~> 0.10)! | ||||
|   cld3 (~> 3.2.0) | ||||
|   climate_control (~> 0.2) | ||||
|   devise (~> 4.4) | ||||
|  |  | |||
|  | @ -0,0 +1,61 @@ | |||
| # frozen_string_literal: true | ||||
| 
 | ||||
| class StatusesIndex < Chewy::Index | ||||
|   settings index: { refresh_interval: '15m' }, analysis: { | ||||
|     filter: { | ||||
|       english_stop: { | ||||
|         type: 'stop', | ||||
|         stopwords: '_english_', | ||||
|       }, | ||||
|       english_stemmer: { | ||||
|         type: 'stemmer', | ||||
|         language: 'english', | ||||
|       }, | ||||
|       english_possessive_stemmer: { | ||||
|         type: 'stemmer', | ||||
|         language: 'possessive_english', | ||||
|       }, | ||||
|     }, | ||||
|     analyzer: { | ||||
|       content: { | ||||
|         tokenizer: 'uax_url_email', | ||||
|         filter: %w( | ||||
|           english_possessive_stemmer | ||||
|           lowercase | ||||
|           asciifolding | ||||
|           cjk_width | ||||
|           english_stop | ||||
|           english_stemmer | ||||
|         ), | ||||
|       }, | ||||
|     }, | ||||
|   } | ||||
| 
 | ||||
|   define_type ::Status.without_reblogs do | ||||
|     crutch :mentions do |collection| | ||||
|       data = ::Mention.where(status_id: collection.map(&:id)).pluck(:status_id, :account_id) | ||||
|       data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) } | ||||
|     end | ||||
| 
 | ||||
|     crutch :favourites do |collection| | ||||
|       data = ::Favourite.where(status_id: collection.map(&:id)).pluck(:status_id, :account_id) | ||||
|       data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) } | ||||
|     end | ||||
| 
 | ||||
|     crutch :reblogs do |collection| | ||||
|       data = ::Status.where(reblog_of_id: collection.map(&:id)).pluck(:reblog_of_id, :account_id) | ||||
|       data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) } | ||||
|     end | ||||
| 
 | ||||
|     root date_detection: false do | ||||
|       field :account_id, type: 'long' | ||||
| 
 | ||||
|       field :text, type: 'text', value: ->(status) { [status.spoiler_text, Formatter.instance.plaintext(status)].join("\n\n") } do | ||||
|         field :stemmed, type: 'text', analyzer: 'content' | ||||
|       end | ||||
| 
 | ||||
|       field :searchable_by, type: 'long', value: ->(status, crutches) { status.searchable_by(crutches) } | ||||
|       field :created_at, type: 'date' | ||||
|     end | ||||
|   end | ||||
| end | ||||
|  | @ -22,6 +22,8 @@ export default class SearchResults extends ImmutablePureComponent { | |||
|       count   += results.get('accounts').size; | ||||
|       accounts = ( | ||||
|         <div className='search-results__section'> | ||||
|           <h5><FormattedMessage id='search_results.accounts' defaultMessage='People' /></h5> | ||||
| 
 | ||||
|           {results.get('accounts').map(accountId => <AccountContainer key={accountId} id={accountId} />)} | ||||
|         </div> | ||||
|       ); | ||||
|  | @ -31,6 +33,8 @@ export default class SearchResults extends ImmutablePureComponent { | |||
|       count   += results.get('statuses').size; | ||||
|       statuses = ( | ||||
|         <div className='search-results__section'> | ||||
|           <h5><FormattedMessage id='search_results.statuses' defaultMessage='Toots' /></h5> | ||||
| 
 | ||||
|           {results.get('statuses').map(statusId => <StatusContainer key={statusId} id={statusId} />)} | ||||
|         </div> | ||||
|       ); | ||||
|  | @ -40,6 +44,8 @@ export default class SearchResults extends ImmutablePureComponent { | |||
|       count += results.get('hashtags').size; | ||||
|       hashtags = ( | ||||
|         <div className='search-results__section'> | ||||
|           <h5><FormattedMessage id='search_results.hashtags' defaultMessage='Hashtags' /></h5> | ||||
| 
 | ||||
|           {results.get('hashtags').map(hashtag => ( | ||||
|             <Link key={hashtag} className='search-results__hashtag' to={`/timelines/tag/${hashtag}`}> | ||||
|               #{hashtag} | ||||
|  |  | |||
|  | @ -1786,7 +1786,7 @@ | |||
|   flex: 1; | ||||
|   min-height: 47px; | ||||
| 
 | ||||
|   > img {  | ||||
|   > img { | ||||
|     display: block; | ||||
|     object-fit: contain; | ||||
|     object-position: bottom left; | ||||
|  | @ -3229,6 +3229,43 @@ | |||
|   font-weight: 500; | ||||
| } | ||||
| 
 | ||||
| .search-results__section { | ||||
|   margin-bottom: 20px; | ||||
| 
 | ||||
|   h5 { | ||||
|     position: relative; | ||||
| 
 | ||||
|     &::before { | ||||
|       content: ""; | ||||
|       display: block; | ||||
|       position: absolute; | ||||
|       left: 0; | ||||
|       right: 0; | ||||
|       top: 50%; | ||||
|       width: 100%; | ||||
|       height: 0; | ||||
|       border-top: 1px solid lighten($ui-base-color, 8%); | ||||
|     } | ||||
| 
 | ||||
|     span { | ||||
|       display: inline-block; | ||||
|       background: $ui-base-color; | ||||
|       color: $ui-primary-color; | ||||
|       font-size: 14px; | ||||
|       font-weight: 500; | ||||
|       padding: 10px; | ||||
|       position: relative; | ||||
|       z-index: 1; | ||||
|       cursor: default; | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   .account:last-child, | ||||
|   & > div:last-child .status { | ||||
|     border-bottom: 0; | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| .search-results__hashtag { | ||||
|   display: block; | ||||
|   padding: 10px; | ||||
|  |  | |||
|  | @ -9,6 +9,7 @@ class StatusFilter | |||
|   end | ||||
| 
 | ||||
|   def filtered? | ||||
|     return false if !account.nil? && account.id == status.account_id | ||||
|     blocked_by_policy? || (account_present? && filtered_status?) || silenced_account? | ||||
|   end | ||||
| 
 | ||||
|  |  | |||
|  | @ -13,6 +13,8 @@ | |||
| class Favourite < ApplicationRecord | ||||
|   include Paginable | ||||
| 
 | ||||
|   update_index('statuses#status', :status) if Chewy.enabled? | ||||
| 
 | ||||
|   belongs_to :account, inverse_of: :favourites | ||||
|   belongs_to :status,  inverse_of: :favourites, counter_cache: true | ||||
| 
 | ||||
|  |  | |||
|  | @ -31,6 +31,8 @@ class Status < ApplicationRecord | |||
|   include Cacheable | ||||
|   include StatusThreadingConcern | ||||
| 
 | ||||
|   update_index('statuses#status', :proper) if Chewy.enabled? | ||||
| 
 | ||||
|   enum visibility: [:public, :unlisted, :private, :direct], _suffix: :visibility | ||||
| 
 | ||||
|   belongs_to :application, class_name: 'Doorkeeper::Application', optional: true | ||||
|  | @ -78,6 +80,22 @@ class Status < ApplicationRecord | |||
| 
 | ||||
|   delegate :domain, to: :account, prefix: true | ||||
| 
 | ||||
|   def searchable_by(preloaded = nil) | ||||
|     ids = [account_id] | ||||
| 
 | ||||
|     if preloaded.nil? | ||||
|       ids += mentions.pluck(:account_id) | ||||
|       ids += favourites.pluck(:account_id) | ||||
|       ids += reblogs.pluck(:account_id) | ||||
|     else | ||||
|       ids += preloaded.mentions[id] || [] | ||||
|       ids += preloaded.favourites[id] || [] | ||||
|       ids += preloaded.reblogs[id] || [] | ||||
|     end | ||||
| 
 | ||||
|     ids.uniq | ||||
|   end | ||||
| 
 | ||||
|   def reply? | ||||
|     !in_reply_to_id.nil? || attributes['reply'] | ||||
|   end | ||||
|  |  | |||
|  | @ -1,21 +1,43 @@ | |||
| # frozen_string_literal: true | ||||
| 
 | ||||
| class SearchService < BaseService | ||||
|   attr_accessor :query | ||||
|   attr_accessor :query, :account, :limit, :resolve | ||||
| 
 | ||||
|   def call(query, limit, resolve = false, account = nil) | ||||
|     @query = query | ||||
|     @query   = query | ||||
|     @account = account | ||||
|     @limit   = limit | ||||
|     @resolve = resolve | ||||
| 
 | ||||
|     default_results.tap do |results| | ||||
|       if url_query? | ||||
|         results.merge!(url_resource_results) unless url_resource.nil? | ||||
|       elsif query.present? | ||||
|         results[:accounts] = AccountSearchService.new.call(query, limit, account, resolve: resolve) | ||||
|         results[:hashtags] = Tag.search_for(query.gsub(/\A#/, ''), limit) unless query.start_with?('@') | ||||
|         results[:accounts] = perform_accounts_search! if account_searchable? | ||||
|         results[:statuses] = perform_statuses_search! if full_text_searchable? | ||||
|         results[:hashtags] = perform_hashtags_search! if hashtag_searchable? | ||||
|       end | ||||
|     end | ||||
|   end | ||||
| 
 | ||||
|   private | ||||
| 
 | ||||
|   def perform_accounts_search! | ||||
|     AccountSearchService.new.call(query, limit, account, resolve: resolve) | ||||
|   end | ||||
| 
 | ||||
|   def perform_statuses_search! | ||||
|     statuses = StatusesIndex.filter(term: { searchable_by: account.id }) | ||||
|                             .query(multi_match: { type: 'most_fields', query: query, operator: 'and', fields: %w(text text.stemmed) }) | ||||
|                             .limit(limit).objects | ||||
| 
 | ||||
|     statuses.reject { |status| StatusFilter.new(status, account).filtered? } | ||||
|   end | ||||
| 
 | ||||
|   def perform_hashtags_search! | ||||
|     Tag.search_for(query.gsub(/\A#/, ''), limit) | ||||
|   end | ||||
| 
 | ||||
|   def default_results | ||||
|     { accounts: [], hashtags: [], statuses: [] } | ||||
|   end | ||||
|  | @ -35,4 +57,17 @@ class SearchService < BaseService | |||
|   def url_resource_symbol | ||||
|     url_resource.class.name.downcase.pluralize.to_sym | ||||
|   end | ||||
| 
 | ||||
|   def full_text_searchable? | ||||
|     return false unless Chewy.enabled? | ||||
|     !account.nil? && !((query.start_with?('#') || query.include?('@')) && !query.include?(' ')) | ||||
|   end | ||||
| 
 | ||||
|   def account_searchable? | ||||
|     !(query.include?('@') && query.include?(' ')) | ||||
|   end | ||||
| 
 | ||||
|   def hashtag_searchable? | ||||
|     !query.include?('@') | ||||
|   end | ||||
| end | ||||
|  |  | |||
|  | @ -0,0 +1,22 @@ | |||
| enabled         = ENV['ES_ENABLED'] == 'true' | ||||
| host            = ENV.fetch('ES_HOST') { 'localhost' } | ||||
| port            = ENV.fetch('ES_PORT') { 9200 } | ||||
| fallback_prefix = ENV.fetch('REDIS_NAMESPACE') { nil } | ||||
| prefix          = ENV.fetch('ES_PREFIX') { fallback_prefix } | ||||
| 
 | ||||
| Chewy.settings = { | ||||
|   host: "#{host}:#{port}", | ||||
|   prefix: prefix, | ||||
|   enabled: enabled, | ||||
|   journal: false, | ||||
| } | ||||
| 
 | ||||
| Chewy.root_strategy = enabled ? :sidekiq : :bypass | ||||
| 
 | ||||
| module Chewy | ||||
|   class << self | ||||
|     def enabled? | ||||
|       settings[:enabled] | ||||
|     end | ||||
|   end | ||||
| end | ||||
|  | @ -19,6 +19,17 @@ services: | |||
| #    volumes: | ||||
| #      - ./redis:/data | ||||
| 
 | ||||
| #  es: | ||||
| #    restart: always | ||||
| #    image: docker.elastic.co/elasticsearch/elasticsearch-oss:6.1.3 | ||||
| #    environment: | ||||
| #      - "ES_JAVA_OPTS=-Xms512m -Xmx512m" | ||||
| #    networks: | ||||
| #      - internal_network | ||||
| #### Uncomment to enable ES persistance | ||||
| ##    volumes: | ||||
| ##      - ./elasticsearch:/usr/share/elasticsearch/data | ||||
| 
 | ||||
|   web: | ||||
|     build: . | ||||
|     image: gargron/mastodon | ||||
|  | @ -33,6 +44,7 @@ services: | |||
|     depends_on: | ||||
|       - db | ||||
|       - redis | ||||
| #      - es | ||||
|     volumes: | ||||
|       - ./public/assets:/mastodon/public/assets | ||||
|       - ./public/packs:/mastodon/public/packs | ||||
|  |  | |||
|  | @ -25,6 +25,10 @@ RSpec.configure do |config| | |||
|     end | ||||
|   end | ||||
| 
 | ||||
|   config.before :suite do | ||||
|     Chewy.strategy(:bypass) | ||||
|   end | ||||
| 
 | ||||
|   config.after :suite do | ||||
|     gc_counter = 0 | ||||
|     FileUtils.rm_rf(Dir["#{Rails.root}/spec/test_files/"]) | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue