Skip to content

Commit 6d49b9a

Browse files
committed
Elasticsearch indexing with elasticsearch-(model|rails)
1 parent aad6aed commit 6d49b9a

File tree

9 files changed

+167
-2
lines changed

9 files changed

+167
-2
lines changed

Gemfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ gem "rails", github: "rails/rails", branch: "main"
77

88
gem "bootsnap", require: false
99
gem "dotenv-rails"
10+
gem "elasticsearch-model"
11+
gem "elasticsearch-rails"
1012
gem "importmap-rails"
1113
gem "opengraph_parser"
1214
gem "pg"

Gemfile.lock

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,46 @@ GEM
111111
dotenv-rails (2.8.1)
112112
dotenv (= 2.8.1)
113113
railties (>= 3.2)
114+
elasticsearch (7.17.7)
115+
elasticsearch-api (= 7.17.7)
116+
elasticsearch-transport (= 7.17.7)
117+
elasticsearch-api (7.17.7)
118+
multi_json
119+
elasticsearch-model (7.2.1)
120+
activesupport (> 3)
121+
elasticsearch (~> 7)
122+
hashie
123+
elasticsearch-rails (7.2.1)
124+
elasticsearch-transport (7.17.7)
125+
faraday (~> 1)
126+
multi_json
114127
erubi (1.12.0)
128+
faraday (1.10.3)
129+
faraday-em_http (~> 1.0)
130+
faraday-em_synchrony (~> 1.0)
131+
faraday-excon (~> 1.1)
132+
faraday-httpclient (~> 1.0)
133+
faraday-multipart (~> 1.0)
134+
faraday-net_http (~> 1.0)
135+
faraday-net_http_persistent (~> 1.0)
136+
faraday-patron (~> 1.0)
137+
faraday-rack (~> 1.0)
138+
faraday-retry (~> 1.0)
139+
ruby2_keywords (>= 0.0.4)
140+
faraday-em_http (1.0.0)
141+
faraday-em_synchrony (1.0.0)
142+
faraday-excon (1.1.0)
143+
faraday-httpclient (1.0.1)
144+
faraday-multipart (1.0.4)
145+
multipart-post (~> 2)
146+
faraday-net_http (1.0.1)
147+
faraday-net_http_persistent (1.2.0)
148+
faraday-patron (1.0.0)
149+
faraday-rack (1.0.0)
150+
faraday-retry (1.0.3)
115151
globalid (1.1.0)
116152
activesupport (>= 5.0)
153+
hashie (5.0.0)
117154
i18n (1.12.0)
118155
concurrent-ruby (~> 1.0)
119156
importmap-rails (1.1.5)
@@ -134,6 +171,8 @@ GEM
134171
mini_mime (1.1.2)
135172
minitest (5.18.0)
136173
msgpack (1.7.0)
174+
multi_json (1.15.0)
175+
multipart-post (2.3.0)
137176
net-imap (0.3.4)
138177
date
139178
net-protocol
@@ -183,6 +222,7 @@ GEM
183222
connection_pool
184223
reline (0.3.3)
185224
io-console (~> 0.5)
225+
ruby2_keywords (0.0.5)
186226
sidekiq (7.0.7)
187227
concurrent-ruby (< 2)
188228
connection_pool (>= 2.3.0)
@@ -217,6 +257,8 @@ PLATFORMS
217257
DEPENDENCIES
218258
bootsnap
219259
dotenv-rails
260+
elasticsearch-model
261+
elasticsearch-rails
220262
importmap-rails
221263
opengraph_parser
222264
pg

app/jobs/indexer_job.rb

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
class IndexerJob < ApplicationJob
2+
def perform(operation, klass, id)
3+
klass = klass.constantize
4+
5+
case operation
6+
when "index"
7+
model = klass.find_by_id(id)
8+
return unless model
9+
10+
model.__elasticsearch__.index_document
11+
when /update/
12+
model = klass.find_by_id(id)
13+
return unless model
14+
15+
model.__elasticsearch__.update_document
16+
when /delete/
17+
begin
18+
klass.__elasticsearch__.client.delete(index: klass.index_name, id: id)
19+
rescue Elasticsearch::Transport::Transport::Errors::NotFound # rubocop:disable Lint/SuppressedException
20+
end
21+
else
22+
raise ArgumentError, "Unknown operation '#{operation}'"
23+
end
24+
end
25+
end

app/models/concerns/searchable.rb

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# Description:
2+
# Include this module in models that should be searchable.
3+
#
4+
# Models including the concern are expected to implement 'title' and 'searchable_content'
5+
# for indexing. Title will be given higher priority compared to content.
6+
#
7+
# After adding and indexing your models. Search across all models can be performed via
8+
# the Searchable.search method.
9+
10+
module Searchable
11+
extend ActiveSupport::Concern
12+
13+
mattr_accessor :models
14+
self.models = []
15+
16+
def self.search(query)
17+
return [] if query.blank?
18+
19+
search_definition = {
20+
query: {
21+
multi_match: {
22+
query: query,
23+
fields: ["title^2", "content"],
24+
},
25+
},
26+
}
27+
28+
Elasticsearch::Model.search(search_definition, models).records
29+
end
30+
31+
def as_indexed_json(_options = {})
32+
{
33+
title: title,
34+
content: searchable_content,
35+
}
36+
end
37+
38+
def should_index?
39+
true
40+
end
41+
42+
def searchable_content
43+
raise NotImplementedError
44+
end
45+
46+
included do
47+
include Elasticsearch::Model
48+
49+
Searchable.models << self
50+
51+
after_commit on: :create, if: :should_index? do
52+
IndexerJob.perform_later("create", self.class.name, id)
53+
end
54+
55+
after_commit on: :update, if: :should_index? do
56+
IndexerJob.perform_later("update", self.class.name, id)
57+
end
58+
59+
after_commit on: :destroy do
60+
IndexerJob.perform_later("delete", self.class.name, id)
61+
end
62+
63+
settings index: { number_of_shards: 1 } do
64+
mappings dynamic: "false" do
65+
indexes :title, analyzer: "english", boost: 2
66+
indexes :content, analyzer: "english"
67+
end
68+
end
69+
end
70+
end

app/models/link.rb

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
class Link < ApplicationRecord
2+
include Searchable
3+
24
validates_presence_of :url
35
validate :validate_format_of_url
46
validates_inclusion_of :state, in: %w[pending success error]
@@ -8,6 +10,14 @@ class Link < ApplicationRecord
810

911
private
1012

13+
def should_index?
14+
status == "success"
15+
end
16+
17+
def searchable_content
18+
description
19+
end
20+
1121
def enqueue_crawl_job
1222
CrawlLinkJob.perform_later(id)
1323
end

app/models/post.rb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,12 @@
11
class Post < ApplicationRecord
2+
include Searchable
3+
24
validates_presence_of :title
35
validates_presence_of :body
6+
7+
private
8+
9+
def searchable_content
10+
body
11+
end
412
end

config/environments/development.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
config.enable_reloading = true
1010

1111
# Do not eager load code on boot.
12-
config.eager_load = false
12+
config.eager_load = true # necessary for inclusion tracking of Searchable concern
1313

1414
# Show full error reports.
1515
config.consider_all_requests_local = true

config/environments/test.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# this is usually not necessary, and can slow down your test suite. However, it's
1616
# recommended that you enable it in continuous integration systems to ensure eager
1717
# loading is working properly before deploying your code.
18-
config.eager_load = ENV["CI"].present?
18+
config.eager_load = true # necessary for inclusion tracking of Searchable concern
1919

2020
# Configure public file server for tests with Cache-Control for performance.
2121
config.public_file_server.enabled = true
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# https://github.com/elastic/elasticsearch-ruby/issues/1429#issuecomment-958162468
2+
module Elasticsearch
3+
class Client
4+
def verify_with_version_or_header(*_args)
5+
@verified = true
6+
end
7+
end
8+
end

0 commit comments

Comments
 (0)