From daeda80ada1d914d52dcc53a2cda087e5f62330c Mon Sep 17 00:00:00 2001 From: Guo Xiang Tan Date: Mon, 1 Apr 2019 10:06:27 +0800 Subject: [PATCH] FIX: Don't index posts with empty `Post#raw` for search. (#7263) * DEV: Remove unnecessary join in `Jobs::ReindexSearch`. * FIX: Don't index posts with empty `Post#raw` for search. --- app/jobs/scheduled/reindex_search.rb | 10 +++++++++- app/services/search_indexer.rb | 2 +- spec/jobs/reindex_search_spec.rb | 11 +++++++++++ spec/services/search_indexer_spec.rb | 7 +++++++ 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/app/jobs/scheduled/reindex_search.rb b/app/jobs/scheduled/reindex_search.rb index 0e9768d292..6e919eba57 100644 --- a/app/jobs/scheduled/reindex_search.rb +++ b/app/jobs/scheduled/reindex_search.rb @@ -9,6 +9,7 @@ module Jobs rebuild_problem_categories rebuild_problem_users rebuild_problem_tags + clean_post_search_data end def rebuild_problem_categories(limit = 500) @@ -60,8 +61,15 @@ module Jobs private + def clean_post_search_data + PostSearchData + .joins("LEFT JOIN posts p ON p.id = post_search_data.post_id") + .where("p.raw = ''") + .delete_all + end + def load_problem_post_ids(limit) - Post.joins(:topic) + Post .where('posts.id IN ( SELECT p2.id FROM posts p2 LEFT JOIN post_search_data pd ON pd.locale = ? AND pd.version = ? AND p2.id = pd.post_id diff --git a/app/services/search_indexer.rb b/app/services/search_indexer.rb index 2f496e1206..c7b07c504d 100644 --- a/app/services/search_indexer.rb +++ b/app/services/search_indexer.rb @@ -134,7 +134,7 @@ class SearchIndexer category_name = topic.category&.name if topic tag_names = topic.tags.pluck(:name).join(' ') if topic - if Post === obj && + if Post === obj && obj.raw.present? && ( obj.saved_change_to_cooked? || obj.saved_change_to_topic_id? || diff --git a/spec/jobs/reindex_search_spec.rb b/spec/jobs/reindex_search_spec.rb index 8b8c870d31..01879bd7af 100644 --- a/spec/jobs/reindex_search_spec.rb +++ b/spec/jobs/reindex_search_spec.rb @@ -28,4 +28,15 @@ describe Jobs::ReindexSearch do expect(model.send("#{m}_search_data").version).to eq Search::INDEX_VERSION end end + + it "should clean up post_search_data of posts with empty raw" do + post = Fabricate(:post) + post2 = Fabricate(:post, post_type: Post.types[:small_action]) + post2.raw = "" + post2.save!(validate: false) + + expect { subject.execute({}) }.to change { PostSearchData.count }.by(-1) + expect(Post.all).to contain_exactly(post, post2) + expect(PostSearchData.all).to contain_exactly(post.post_search_data) + end end diff --git a/spec/services/search_indexer_spec.rb b/spec/services/search_indexer_spec.rb index 1922237a40..d748ec638a 100644 --- a/spec/services/search_indexer_spec.rb +++ b/spec/services/search_indexer_spec.rb @@ -103,5 +103,12 @@ describe SearchIndexer do expect { post.update!(topic_id: Fabricate(:topic).id) } .to change { post.reload.post_search_data.raw_data } end + + it 'should not index posts with empty raw' do + expect do + post = Fabricate.build(:post, raw: "", post_type: Post.types[:small_action]) + post.save!(validate: false) + end.to_not change { PostSearchData.count } + end end end