From f8eddd40ade5a62cd5f9349b71a2d1b60e3c7d2e Mon Sep 17 00:00:00 2001 From: Sam Saffron Date: Mon, 6 May 2019 15:58:49 +1000 Subject: [PATCH] PERF: remove avg_time calculations and regular jobs from posts and topics After careful analysis of large data-sets it became apparent that avg_time had no impact whatsoever on "best of" topic scoring. Calculating avg_time was a very costly operation especially on large databases. We have some longer term plans of introducing other weighting that is read time based into our scoring for "best of" and "top" topics, but in the interim to stop a large amount of work that is not achieving any value we are removing the jobs. Column removal will follow once we decide on a new replacement metric. --- app/jobs/scheduled/calculate_avg_time.rb | 14 ----- app/jobs/scheduled/weekly.rb | 2 - app/models/post.rb | 31 --------- app/models/topic.rb | 25 -------- app/serializers/post_serializer.rb | 1 - lib/score_calculator.rb | 1 - spec/models/post_spec.rb | 8 --- spec/models/post_timing_spec.rb | 63 ------------------- spec/models/topic_spec.rb | 7 --- test/javascripts/fixtures/post.js.es6 | 3 - .../fixtures/search-fixtures.js.es6 | 5 -- test/javascripts/fixtures/topic.js.es6 | 38 ----------- 12 files changed, 198 deletions(-) delete mode 100644 app/jobs/scheduled/calculate_avg_time.rb diff --git a/app/jobs/scheduled/calculate_avg_time.rb b/app/jobs/scheduled/calculate_avg_time.rb deleted file mode 100644 index c0bc9387e4..0000000000 --- a/app/jobs/scheduled/calculate_avg_time.rb +++ /dev/null @@ -1,14 +0,0 @@ -module Jobs - class CalculateAvgTime < Jobs::Scheduled - every 1.day - - # PERF: these calculations can become exceedingly expnsive - # they run a huge gemoetric mean and are hard to optimise - # defer to only run once a day - def execute(args) - # Update the average times - Post.calculate_avg_time(2.days.ago) - Topic.calculate_avg_time(2.days.ago) - end - end -end diff --git a/app/jobs/scheduled/weekly.rb b/app/jobs/scheduled/weekly.rb index 72fdb0127c..d76b9f32f9 100644 --- a/app/jobs/scheduled/weekly.rb +++ b/app/jobs/scheduled/weekly.rb @@ -8,8 +8,6 @@ module Jobs every 1.week def execute(args) - Post.calculate_avg_time - Topic.calculate_avg_time ScoreCalculator.new.calculate MiniScheduler::Stat.purge_old Draft.cleanup! diff --git a/app/models/post.rb b/app/models/post.rb index 903400c004..f51146ca8c 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -674,37 +674,6 @@ class Post < ActiveRecord::Base end - # This calculates the geometric mean of the post timings and stores it along with - # each post. - def self.calculate_avg_time(min_topic_age = nil) - retry_lock_error do - builder = DB.build("UPDATE posts - SET avg_time = (x.gmean / 1000) - FROM (SELECT post_timings.topic_id, - post_timings.post_number, - round(exp(avg(CASE WHEN msecs > 0 THEN ln(msecs) ELSE 0 END))) AS gmean - FROM post_timings - INNER JOIN posts AS p2 - ON p2.post_number = post_timings.post_number - AND p2.topic_id = post_timings.topic_id - AND p2.user_id <> post_timings.user_id - /*where2*/ - GROUP BY post_timings.topic_id, post_timings.post_number) AS x - /*where*/") - - builder.where("x.topic_id = posts.topic_id - AND x.post_number = posts.post_number - AND (posts.avg_time <> (x.gmean / 1000)::int OR posts.avg_time IS NULL)") - - if min_topic_age - builder.where2("p2.topic_id IN (SELECT id FROM topics where bumped_at > :bumped_at)", - bumped_at: min_topic_age) - end - - builder.exec - end - end - before_save do self.last_editor_id ||= user_id diff --git a/app/models/topic.rb b/app/models/topic.rb index bb5a0835ad..b8a0b18797 100644 --- a/app/models/topic.rb +++ b/app/models/topic.rb @@ -671,31 +671,6 @@ class Topic < ActiveRecord::Base SQL end - # This calculates the geometric mean of the posts and stores it with the topic - def self.calculate_avg_time(min_topic_age = nil) - builder = DB.build <<~SQL - UPDATE topics - SET avg_time = x.gmean - FROM (SELECT topic_id, - round(exp(avg(ln(avg_time)))) AS gmean - FROM posts - WHERE avg_time > 0 AND avg_time IS NOT NULL - GROUP BY topic_id) AS x - /*where*/ - SQL - - builder.where <<~SQL - x.topic_id = topics.id AND - (topics.avg_time <> x.gmean OR topics.avg_time IS NULL) - SQL - - if min_topic_age - builder.where("topics.bumped_at > :bumped_at", bumped_at: min_topic_age) - end - - builder.exec - end - def changed_to_category(new_category) return true if new_category.blank? || Category.exists?(topic_id: id) return false if new_category.id == SiteSetting.uncategorized_category_id && !SiteSetting.allow_uncategorized_topics diff --git a/app/serializers/post_serializer.rb b/app/serializers/post_serializer.rb index 8614d0fa5f..2e059ac3b1 100644 --- a/app/serializers/post_serializer.rb +++ b/app/serializers/post_serializer.rb @@ -23,7 +23,6 @@ class PostSerializer < BasicPostSerializer :reply_count, :reply_to_post_number, :quote_count, - :avg_time, :incoming_link_count, :reads, :score, diff --git a/lib/score_calculator.rb b/lib/score_calculator.rb index 69c8173d9b..cdb70afd35 100644 --- a/lib/score_calculator.rb +++ b/lib/score_calculator.rb @@ -6,7 +6,6 @@ class ScoreCalculator like_score: 15, incoming_link_count: 5, bookmark_count: 2, - avg_time: 0.05, reads: 0.2 } end diff --git a/spec/models/post_spec.rb b/spec/models/post_spec.rb index 84629708bc..be6cfeff40 100644 --- a/spec/models/post_spec.rb +++ b/spec/models/post_spec.rb @@ -1023,14 +1023,6 @@ describe Post do end end - describe "calculate_avg_time" do - - it "should not crash" do - Post.calculate_avg_time - Post.calculate_avg_time(1.day.ago) - end - end - describe "has_host_spam" do let(:raw) { "hello from my site http://www.example.net http://#{GlobalSetting.hostname} http://#{RailsMultisite::ConnectionManagement.current_hostname}" } diff --git a/spec/models/post_timing_spec.rb b/spec/models/post_timing_spec.rb index b09a84881e..dc52290f78 100644 --- a/spec/models/post_timing_spec.rb +++ b/spec/models/post_timing_spec.rb @@ -162,69 +162,6 @@ describe PostTiming do end - describe 'avg times' do - - describe 'posts' do - it 'has no avg_time by default' do - expect(@post.avg_time).to be_blank - end - - it "doesn't change when we calculate the avg time for the post because there's no timings" do - Post.calculate_avg_time - @post.reload - expect(@post.avg_time).to be_blank - end - end - - describe 'topics' do - it 'has no avg_time by default' do - expect(@topic.avg_time).to be_blank - end - - it "doesn't change when we calculate the avg time for the post because there's no timings" do - Topic.calculate_avg_time - @topic.reload - expect(@topic.avg_time).to be_blank - end - end - - describe "it doesn't create an avg time for the same user" do - it 'something' do - PostTiming.record_timing(@timing_attrs.merge(user_id: @post.user_id)) - Post.calculate_avg_time - @post.reload - expect(@post.avg_time).to be_blank - end - - end - - describe 'with a timing for another user' do - before do - PostTiming.record_timing(@timing_attrs) - Post.calculate_avg_time - @post.reload - end - - it 'has a post avg_time from the timing' do - expect(@post.avg_time).to be_present - end - - describe 'forum topics' do - before do - Topic.calculate_avg_time - @topic.reload - end - - it 'has an avg_time from the timing' do - expect(@topic.avg_time).to be_present - end - - end - - end - - end - end end diff --git a/spec/models/topic_spec.rb b/spec/models/topic_spec.rb index 0180d94c48..34ca73a16c 100644 --- a/spec/models/topic_spec.rb +++ b/spec/models/topic_spec.rb @@ -2030,13 +2030,6 @@ describe Topic do end - describe "calculate_avg_time" do - it "does not explode" do - Topic.calculate_avg_time - Topic.calculate_avg_time(1.day.ago) - end - end - describe "expandable_first_post?" do let(:topic) { Fabricate.build(:topic) } diff --git a/test/javascripts/fixtures/post.js.es6 b/test/javascripts/fixtures/post.js.es6 index 28288e4739..736f4376d2 100644 --- a/test/javascripts/fixtures/post.js.es6 +++ b/test/javascripts/fixtures/post.js.es6 @@ -15,7 +15,6 @@ export default { reply_count: 1, reply_to_post_number: null, quote_count: 0, - avg_time: 25, incoming_link_count: 314, reads: 475, score: 1702.25, @@ -68,7 +67,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 1, score: 0, @@ -118,7 +116,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 1, score: 0, diff --git a/test/javascripts/fixtures/search-fixtures.js.es6 b/test/javascripts/fixtures/search-fixtures.js.es6 index 96a14ae24b..60d179fa74 100644 --- a/test/javascripts/fixtures/search-fixtures.js.es6 +++ b/test/javascripts/fixtures/search-fixtures.js.es6 @@ -838,7 +838,6 @@ export default { reply_count: 1, reply_to_post_number: null, quote_count: 0, - avg_time: 24, incoming_link_count: 4422, reads: 327, score: 21978.4, @@ -928,7 +927,6 @@ export default { reply_count: 2, reply_to_post_number: null, quote_count: 0, - avg_time: 36, incoming_link_count: 4680, reads: 491, score: 23815.8, @@ -1019,7 +1017,6 @@ export default { reply_count: 1, reply_to_post_number: null, quote_count: 0, - avg_time: 36, incoming_link_count: 1483, reads: 274, score: 7985.4, @@ -1110,7 +1107,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: 39, incoming_link_count: 1241, reads: 149, score: 6161.35, @@ -1200,7 +1196,6 @@ export default { reply_count: 1, reply_to_post_number: null, quote_count: 0, - avg_time: 22, incoming_link_count: 386, reads: 163, score: 1953.7, diff --git a/test/javascripts/fixtures/topic.js.es6 b/test/javascripts/fixtures/topic.js.es6 index 2d657aae38..0ced6085b4 100644 --- a/test/javascripts/fixtures/topic.js.es6 +++ b/test/javascripts/fixtures/topic.js.es6 @@ -20,7 +20,6 @@ export default { reply_count: 1, reply_to_post_number: null, quote_count: 0, - avg_time: 25, incoming_link_count: 314, reads: 475, score: 1702.25, @@ -164,7 +163,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: 27, incoming_link_count: 16, reads: 460, score: 308.35, @@ -268,7 +266,6 @@ export default { reply_count: 3, reply_to_post_number: null, quote_count: 0, - avg_time: 33, incoming_link_count: 5, reads: 449, score: 191.45, @@ -382,7 +379,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: 20, incoming_link_count: 15, reads: 401, score: 291.2, @@ -486,7 +482,6 @@ export default { reply_count: 2, reply_to_post_number: 3, quote_count: 1, - avg_time: 22, incoming_link_count: 10, reads: 386, score: 213.3, @@ -592,7 +587,6 @@ export default { reply_count: 1, reply_to_post_number: 5, quote_count: 0, - avg_time: 17, incoming_link_count: 4, reads: 329, score: 106.65, @@ -708,7 +702,6 @@ export default { reply_count: 1, reply_to_post_number: 6, quote_count: 1, - avg_time: 16, incoming_link_count: 0, reads: 326, score: 86.0, @@ -812,7 +805,6 @@ export default { reply_count: 1, reply_to_post_number: 7, quote_count: 0, - avg_time: 11, incoming_link_count: 2, reads: 296, score: 89.75, @@ -922,7 +914,6 @@ export default { reply_count: 1, reply_to_post_number: 8, quote_count: 1, - avg_time: 10, incoming_link_count: 0, reads: 293, score: 79.1, @@ -1017,7 +1008,6 @@ export default { reply_count: 1, reply_to_post_number: 9, quote_count: 0, - avg_time: 7, incoming_link_count: 0, reads: 275, score: 75.35, @@ -1117,7 +1107,6 @@ export default { reply_count: 1, reply_to_post_number: 10, quote_count: 0, - avg_time: 7, incoming_link_count: 1, reads: 273, score: 79.95, @@ -1217,7 +1206,6 @@ export default { reply_count: 1, reply_to_post_number: 11, quote_count: 1, - avg_time: 7, incoming_link_count: 2, reads: 273, score: 84.95, @@ -1312,7 +1300,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: 7, incoming_link_count: 11, reads: 290, score: 158.35, @@ -1407,7 +1394,6 @@ export default { reply_count: 1, reply_to_post_number: 12, quote_count: 1, - avg_time: 9, incoming_link_count: 0, reads: 283, score: 137.05, @@ -1511,7 +1497,6 @@ export default { reply_count: 1, reply_to_post_number: 14, quote_count: 1, - avg_time: 7, incoming_link_count: 0, reads: 260, score: 72.35, @@ -1606,7 +1591,6 @@ export default { reply_count: 1, reply_to_post_number: 15, quote_count: 1, - avg_time: 7, incoming_link_count: 0, reads: 254, score: 71.15, @@ -1701,7 +1685,6 @@ export default { reply_count: 2, reply_to_post_number: 16, quote_count: 1, - avg_time: 7, incoming_link_count: 0, reads: 255, score: 76.35, @@ -1796,7 +1779,6 @@ export default { reply_count: 2, reply_to_post_number: 17, quote_count: 0, - avg_time: 8, incoming_link_count: 0, reads: 264, score: 168.2, @@ -1896,7 +1878,6 @@ export default { reply_count: 0, reply_to_post_number: 18, quote_count: 0, - avg_time: 8, incoming_link_count: 1, reads: 241, score: 68.6, @@ -1996,7 +1977,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: 9, incoming_link_count: 2, reads: 244, score: 74.25, @@ -3161,7 +3141,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 14, reads: 24, score: 224.6, @@ -3224,7 +3203,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 6, reads: 22, score: 34.2, @@ -3277,7 +3255,6 @@ export default { reply_count: 1, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 14, score: 7.2, @@ -3330,7 +3307,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 6, reads: 12, score: 31.6, @@ -3383,7 +3359,6 @@ export default { reply_count: 0, reply_to_post_number: 3, quote_count: 1, - avg_time: null, incoming_link_count: 0, reads: 11, score: 16.0, @@ -3735,7 +3710,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 1, score: 0, @@ -3784,7 +3758,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 1, score: 0, @@ -3833,7 +3806,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 1, score: 0, @@ -4028,7 +4000,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 1, score: 0, @@ -4081,7 +4052,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 1, score: 0, @@ -4299,7 +4269,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 1, score: 0, @@ -4348,7 +4317,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 1, score: 0, @@ -4397,7 +4365,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 1, score: 0, @@ -4594,7 +4561,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 1, score: 0, @@ -4643,7 +4609,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 1, score: 0, @@ -4692,7 +4657,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 1, score: 0, @@ -4888,7 +4852,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: 15, incoming_link_count: 0, reads: 2, score: 1.15, @@ -4951,7 +4914,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: 16, incoming_link_count: 0, reads: 2, score: 1.2,