From bd07658a375e66f43604ff5215a4afaa4226b02c Mon Sep 17 00:00:00 2001 From: Guo Xiang Tan Date: Fri, 1 Jul 2016 15:22:30 +0800 Subject: [PATCH] PERF: Split queries when cleaning uploads. This reduces the number of scans that the db has to do in the query to fetch orphan uploads. Futheremore, we were not batching our records which bloats memory. --- app/jobs/scheduled/clean_up_uploads.rb | 36 +++++---- spec/components/cooked_post_processor_spec.rb | 18 ++--- .../components/file_store/local_store_spec.rb | 4 +- spec/components/file_store/s3_store_spec.rb | 4 +- spec/fabricators/upload_fabricator.rb | 4 +- spec/fabricators/user_avatar_fabricator.rb | 3 + spec/jobs/clean_up_uploads_spec.rb | 73 ++++++++++++++++++- 7 files changed, 111 insertions(+), 31 deletions(-) create mode 100644 spec/fabricators/user_avatar_fabricator.rb diff --git a/app/jobs/scheduled/clean_up_uploads.rb b/app/jobs/scheduled/clean_up_uploads.rb index 523a017c46..0ec3c96b54 100644 --- a/app/jobs/scheduled/clean_up_uploads.rb +++ b/app/jobs/scheduled/clean_up_uploads.rb @@ -1,25 +1,35 @@ module Jobs - class CleanUpUploads < Jobs::Scheduled every 1.hour def execute(args) return unless SiteSetting.clean_up_uploads? + ignore_urls = [] + ignore_urls |= UserProfile.uniq.select(:profile_background).where("profile_background IS NOT NULL AND profile_background != ''").pluck(:profile_background) + ignore_urls |= UserProfile.uniq.select(:card_background).where("card_background IS NOT NULL AND card_background != ''").pluck(:card_background) + ignore_urls |= Category.uniq.select(:logo_url).where("logo_url IS NOT NULL AND logo_url != ''").pluck(:logo_url) + ignore_urls |= Category.uniq.select(:background_url).where("background_url IS NOT NULL AND background_url != ''").pluck(:background_url) + + ids = [] + ids |= PostUpload.uniq.select(:upload_id).pluck(:upload_id) + ids |= User.uniq.select(:uploaded_avatar_id).where("uploaded_avatar_id IS NOT NULL").pluck(:uploaded_avatar_id) + ids |= UserAvatar.uniq.select(:gravatar_upload_id).where("gravatar_upload_id IS NOT NULL").pluck(:gravatar_upload_id) + grace_period = [SiteSetting.clean_orphan_uploads_grace_period_hours, 1].max - Upload.where("created_at < ?", grace_period.hour.ago) - .where("retain_hours IS NULL OR created_at < current_timestamp - interval '1 hour' * retain_hours") - .where("id NOT IN (SELECT upload_id FROM post_uploads WHERE upload_id IS NOT NULL)") - .where("id NOT IN (SELECT uploaded_avatar_id FROM users WHERE uploaded_avatar_id IS NOT NULL)") - .where("id NOT IN (SELECT gravatar_upload_id FROM user_avatars WHERE gravatar_upload_id IS NOT NULL)") - .where("url NOT IN (SELECT profile_background FROM user_profiles WHERE LENGTH(COALESCE(profile_background, '')) > 0)") - .where("url NOT IN (SELECT card_background FROM user_profiles WHERE LENGTH(COALESCE(card_background, '')) > 0)") - .where("url NOT IN (SELECT logo_url FROM categories WHERE LENGTH(COALESCE(logo_url, '')) > 0)") - .where("url NOT IN (SELECT background_url FROM categories WHERE LENGTH(COALESCE(background_url, '')) > 0)") - .destroy_all + result = Upload.where("created_at < ?", grace_period.hour.ago) + .where("retain_hours IS NULL OR created_at < current_timestamp - interval '1 hour' * retain_hours") + + if !ids.empty? + result = result.where("id NOT IN (?)", ids) + end + + if !ignore_urls.empty? + result = result.where("url NOT IN (?)", ignore_urls) + end + + result.find_each { |upload| upload.destroy } end - end - end diff --git a/spec/components/cooked_post_processor_spec.rb b/spec/components/cooked_post_processor_spec.rb index 02e36bc8a5..fef5750de7 100644 --- a/spec/components/cooked_post_processor_spec.rb +++ b/spec/components/cooked_post_processor_spec.rb @@ -120,9 +120,9 @@ describe CookedPostProcessor do it "generates overlay information" do cpp.post_process_images - expect(cpp.html).to match_html '

' + expect(cpp.html).to match_html "

+logo.png1750x2000 1.21 KB +

" expect(cpp).to be_dirty end @@ -153,9 +153,9 @@ describe CookedPostProcessor do it "generates overlay information" do cpp.post_process_images - expect(cpp.html).to match_html '

' + expect(cpp.html).to match_html "

+logo.png1750x2000 1.21 KB +

" expect(cpp).to be_dirty end @@ -181,9 +181,9 @@ describe CookedPostProcessor do it "generates overlay information" do cpp.post_process_images - expect(cpp.html).to match_html '

' + expect(cpp.html).to match_html "

+ WAT1750x2000 1.21 KB +

" expect(cpp).to be_dirty end diff --git a/spec/components/file_store/local_store_spec.rb b/spec/components/file_store/local_store_spec.rb index 68724c5777..142c83d854 100644 --- a/spec/components/file_store/local_store_spec.rb +++ b/spec/components/file_store/local_store_spec.rb @@ -14,7 +14,7 @@ describe FileStore::LocalStore do it "returns a relative url" do store.expects(:copy_file) - expect(store.store_upload(uploaded_file, upload)).to match(/\/uploads\/default\/original\/.+e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98\.png/) + expect(store.store_upload(uploaded_file, upload)).to match(/\/uploads\/default\/original\/.+#{upload.sha1}\.png/) end end @@ -23,7 +23,7 @@ describe FileStore::LocalStore do it "returns a relative url" do store.expects(:copy_file) - expect(store.store_optimized_image({}, optimized_image)).to match(/\/uploads\/default\/optimized\/.+e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98_#{OptimizedImage::VERSION}_100x200\.png/) + expect(store.store_optimized_image({}, optimized_image)).to match(/\/uploads\/default\/optimized\/.+#{optimized_image.upload.sha1}_#{OptimizedImage::VERSION}_100x200\.png/) end end diff --git a/spec/components/file_store/s3_store_spec.rb b/spec/components/file_store/s3_store_spec.rb index 8b9a6309f5..8b504a72b5 100644 --- a/spec/components/file_store/s3_store_spec.rb +++ b/spec/components/file_store/s3_store_spec.rb @@ -23,7 +23,7 @@ describe FileStore::S3Store do it "returns an absolute schemaless url" do s3_helper.expects(:upload) - expect(store.store_upload(uploaded_file, upload)).to match(/\/\/s3_upload_bucket\.s3\.amazonaws\.com\/original\/.+e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98\.png/) + expect(store.store_upload(uploaded_file, upload)).to match(/\/\/s3_upload_bucket\.s3\.amazonaws\.com\/original\/.+#{upload.sha1}\.png/) end end @@ -32,7 +32,7 @@ describe FileStore::S3Store do it "returns an absolute schemaless url" do s3_helper.expects(:upload) - expect(store.store_optimized_image(optimized_image_file, optimized_image)).to match(/\/\/s3_upload_bucket\.s3\.amazonaws\.com\/optimized\/.+e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98_#{OptimizedImage::VERSION}_100x200\.png/) + expect(store.store_optimized_image(optimized_image_file, optimized_image)).to match(/\/\/s3_upload_bucket\.s3\.amazonaws\.com\/optimized\/.+#{optimized_image.upload.sha1}_#{OptimizedImage::VERSION}_100x200\.png/) end end diff --git a/spec/fabricators/upload_fabricator.rb b/spec/fabricators/upload_fabricator.rb index a64d7def4d..96abf71405 100644 --- a/spec/fabricators/upload_fabricator.rb +++ b/spec/fabricators/upload_fabricator.rb @@ -1,11 +1,11 @@ Fabricator(:upload) do user - sha1 "e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98" + sha1 { sequence(:sha1) { |n| Digest::SHA1.hexdigest(n.to_s) } } original_filename "logo.png" filesize 1234 width 100 height 200 - url "/uploads/default/1/1234567890123456.png" + url { sequence(:url) { |n| "/uploads/default/#{n}/1234567890123456.png" } } end Fabricator(:attachment, from: :upload) do diff --git a/spec/fabricators/user_avatar_fabricator.rb b/spec/fabricators/user_avatar_fabricator.rb new file mode 100644 index 0000000000..3cbd17cddc --- /dev/null +++ b/spec/fabricators/user_avatar_fabricator.rb @@ -0,0 +1,3 @@ +Fabricator(:user_avatar) do + user +end diff --git a/spec/jobs/clean_up_uploads_spec.rb b/spec/jobs/clean_up_uploads_spec.rb index 6035f17fc2..ca0fd7390d 100644 --- a/spec/jobs/clean_up_uploads_spec.rb +++ b/spec/jobs/clean_up_uploads_spec.rb @@ -3,16 +3,14 @@ require 'rails_helper' require_dependency 'jobs/scheduled/clean_up_uploads' describe Jobs::CleanUpUploads do - before do Upload.destroy_all SiteSetting.clean_up_uploads = true SiteSetting.clean_orphan_uploads_grace_period_hours = 1 + @upload = Fabricate(:upload, created_at: 2.hours.ago) end it "deletes orphan uploads" do - Fabricate(:upload, created_at: 2.hours.ago) - expect(Upload.count).to be(1) Jobs::CleanUpUploads.new.execute(nil) @@ -20,4 +18,73 @@ describe Jobs::CleanUpUploads do expect(Upload.count).to be(0) end + it "does not delete profile background uploads" do + profile_background_upload = Fabricate(:upload, created_at: 2.hours.ago) + UserProfile.last.update_attributes!(profile_background: profile_background_upload.url) + + Jobs::CleanUpUploads.new.execute(nil) + + expect(Upload.find_by(id: @upload.id)).to eq(nil) + expect(Upload.find_by(id: profile_background_upload.id)).to eq(profile_background_upload) + end + + it "does not delete card background uploads" do + card_background_upload = Fabricate(:upload, created_at: 2.hours.ago) + UserProfile.last.update_attributes!(card_background: card_background_upload.url) + + Jobs::CleanUpUploads.new.execute(nil) + + expect(Upload.find_by(id: @upload.id)).to eq(nil) + expect(Upload.find_by(id: card_background_upload.id)).to eq(card_background_upload) + end + + it "does not delete category logo uploads" do + category_logo_upload = Fabricate(:upload, created_at: 2.hours.ago) + category = Fabricate(:category, logo_url: category_logo_upload.url) + + Jobs::CleanUpUploads.new.execute(nil) + + expect(Upload.find_by(id: @upload.id)).to eq(nil) + expect(Upload.find_by(id: category_logo_upload.id)).to eq(category_logo_upload) + end + + it "does not delete category background url uploads" do + category_background_url = Fabricate(:upload, created_at: 2.hours.ago) + category = Fabricate(:category, background_url: category_background_url.url) + + Jobs::CleanUpUploads.new.execute(nil) + + expect(Upload.find_by(id: @upload.id)).to eq(nil) + expect(Upload.find_by(id: category_background_url.id)).to eq(category_background_url) + end + + it "does not delete post uploads" do + upload = Fabricate(:upload, created_at: 2.hours.ago) + post = Fabricate(:post, uploads: [upload]) + + Jobs::CleanUpUploads.new.execute(nil) + + expect(Upload.find_by(id: @upload.id)).to eq(nil) + expect(Upload.find_by(id: upload.id)).to eq(upload) + end + + it "does not delete user uploaded avatar" do + upload = Fabricate(:upload, created_at: 2.hours.ago) + user = Fabricate(:user, uploaded_avatar: upload) + + Jobs::CleanUpUploads.new.execute(nil) + + expect(Upload.find_by(id: @upload.id)).to eq(nil) + expect(Upload.find_by(id: upload.id)).to eq(upload) + end + + it "does not delete user gravatar" do + upload = Fabricate(:upload, created_at: 2.hours.ago) + user = Fabricate(:user, user_avatar: Fabricate(:user_avatar, gravatar_upload: upload)) + + Jobs::CleanUpUploads.new.execute(nil) + + expect(Upload.find_by(id: @upload.id)).to eq(nil) + expect(Upload.find_by(id: upload.id)).to eq(upload) + end end