From 6385fbbfbf420994ca25c6e2cab32baf910fb9e8 Mon Sep 17 00:00:00 2001 From: Guo Xiang Tan Date: Wed, 15 Jul 2020 13:25:15 +0800 Subject: [PATCH] FIX: Ignore document length in search when ranking by relevance. Considering document length in search introduced too much variance in our search results such that it makes certain searches better but at the same time made certain searches worst. Instead, we want to have a more determistic way of ranking search so that it is easier to reason about why a post is rank higher in search than another. The long term plan to tackle repeated terms is to restrict the number of positions for a given lexeme in our search index. --- config/site_settings.yml | 2 +- spec/components/search_spec.rb | 29 +++-------------------------- 2 files changed, 4 insertions(+), 27 deletions(-) diff --git a/config/site_settings.yml b/config/site_settings.yml index c7f82ac247..aeab2e006e 100644 --- a/config/site_settings.yml +++ b/config/site_settings.yml @@ -1748,7 +1748,7 @@ backups: search: search_ranking_normalization: - default: '1' + default: '0' hidden: true min_search_term_length: client: true diff --git a/spec/components/search_spec.rb b/spec/components/search_spec.rb index 4f3fd88a67..90579f8995 100644 --- a/spec/components/search_spec.rb +++ b/spec/components/search_spec.rb @@ -402,29 +402,6 @@ describe Search do expect(result.blurb(reply)).to eq(expected_blurb) end - it 'does not allow a post with repeated words to dominate the ranking' do - category = Fabricate(:category_with_definition, name: "winter is coming") - - post = Fabricate(:post, - raw: "I think winter will end soon", - topic: Fabricate(:topic, - title: "dragon john snow winter", - category: category - ) - ) - - post2 = Fabricate(:post, - raw: "I think #{'winter' * 20} will end soon", - topic: Fabricate(:topic, title: "dragon john snow summer", category: category) - ) - - result = Search.execute('winter') - - expect(result.posts.pluck(:id)).to eq([ - post.id, category.topic.first_post.id, post2.id - ]) - end - it 'applies a small penalty to closed topic when ranking' do post = Fabricate(:post, raw: "My weekly update", @@ -698,12 +675,12 @@ describe Search do expect(search.posts.map(&:id)).to eq([ child_of_ignored_category.topic.first_post, category.topic.first_post, - post, - post2 + post2, + post ].map(&:id)) search = Search.execute("snow") - expect(search.posts).to eq([post, post2]) + expect(search.posts.map(&:id)).to eq([post2.id, post.id]) category.set_permissions({}) category.save