From 96ace7b9fab3d8a53386d634c0bb6d2af6e388e4 Mon Sep 17 00:00:00 2001 From: Martin Brennan Date: Tue, 2 Nov 2021 09:38:54 +1000 Subject: [PATCH] DEV: Add caching to S3 ensure_cors! rules fetching When fetching CORS rules from S3, it takes ~1s to get the rules, which will add up a lot over lots of upload requests. This commit introduces a 1 day cache of the bucket CORS rules. This cache is broken whenever there are new rules not already in the existing rule set from the cache, and is initially set when we fetch the rules from S3 for the bucket. Getting the cached rules from redis takes ~0.000265s, so it's a huge improvement over going to S3 every time. --- lib/s3_helper.rb | 51 ++++++++++++++++++++++++++----- spec/components/s3_helper_spec.rb | 25 +++++++++++++++ 2 files changed, 69 insertions(+), 7 deletions(-) diff --git a/lib/s3_helper.rb b/lib/s3_helper.rb index 94c4093d10..f9f8fe17bf 100644 --- a/lib/s3_helper.rb +++ b/lib/s3_helper.rb @@ -23,6 +23,12 @@ class S3Helper # * presigned put_object URLs for direct S3 uploads UPLOAD_URL_EXPIRES_AFTER_SECONDS ||= 10.minutes.to_i + ## + # It takes ~1s to fetch bucket CORS rules from S3, so instead of having + # to take that hit every time in ensure_cors! we cache the rules for a + # day, and break the cache when a new rule is added. + BUCKET_CORS_RULE_CACHE_SECONDS ||= 1.day.to_i + def initialize(s3_bucket_name, tombstone_prefix = '', options = {}) @s3_client = options.delete(:client) @s3_options = default_s3_options.merge(options) @@ -38,6 +44,7 @@ class S3Helper else tombstone_prefix end + @cors_rule_cache_key = "#{@s3_bucket_name}_bucket_cors" end def self.get_bucket_and_folder_path(s3_bucket_name) @@ -142,24 +149,38 @@ class S3Helper rules = [S3CorsRulesets::ASSETS] if rules.nil? rules = [rules] if !rules.is_a?(Array) existing_rules = [] + cached_rules = Discourse.redis.get(@cors_rule_cache_key) - begin - existing_rules = s3_resource.client.get_bucket_cors( - bucket: @s3_bucket_name - ).cors_rules&.map(&:to_h) || [] - rescue Aws::S3::Errors::NoSuchCORSConfiguration - # no rule + if cached_rules.blank? + existing_rules = fetch_bucket_cors_rules + retrieved_rules_from_cache = false + else + existing_rules = JSON.parse(cached_rules, symbolize_names: true) + retrieved_rules_from_cache = true end + new_rules = rules - existing_rules + if new_rules.empty? + cache_bucket_cors_rules(existing_rules) if !retrieved_rules_from_cache + return + end + + # we want to make sure we aren't stepping on any toes if there + # are new rules but we had retrieved the existing ones from cache + if retrieved_rules_from_cache + existing_rules = fetch_bucket_cors_rules + end new_rules = rules - existing_rules return if new_rules.empty? + final_rules = existing_rules + new_rules s3_resource.client.put_bucket_cors( bucket: @s3_bucket_name, cors_configuration: { - cors_rules: existing_rules + new_rules + cors_rules: final_rules } ) + cache_bucket_cors_rules(final_rules) end def update_lifecycle(id, days, prefix: nil, tag: nil) @@ -275,6 +296,22 @@ class S3Helper private + def fetch_bucket_cors_rules + begin + s3_resource.client.get_bucket_cors( + bucket: @s3_bucket_name + ).cors_rules&.map(&:to_h) || [] + rescue Aws::S3::Errors::NoSuchCORSConfiguration + # no rule + [] + end + end + + def cache_bucket_cors_rules(rules) + Rails.logger.debug("Caching bucket cors rules for #{@s3_bucket_name}. Rules: #{rules}") + Discourse.redis.setex(@cors_rule_cache_key, BUCKET_CORS_RULE_CACHE_SECONDS, rules.to_json) + end + def default_s3_options if SiteSetting.enable_s3_uploads? options = self.class.s3_options(SiteSetting) diff --git a/spec/components/s3_helper_spec.rb b/spec/components/s3_helper_spec.rb index fc0e98f5b1..411e1ca2d0 100644 --- a/spec/components/s3_helper_spec.rb +++ b/spec/components/s3_helper_spec.rb @@ -193,5 +193,30 @@ describe "S3Helper" do ) s3_helper.ensure_cors!([new_rule]) end + + it "returns the cached cors data from the S3 bucket, and does not get it if the rule change is a noop" do + Discourse.redis.setex("test-bucket_bucket_cors", 1.hour, [S3CorsRulesets::ASSETS].to_json) + s3_helper.s3_client.expects(:get_bucket_cors).never + s3_helper.ensure_cors!([ + S3CorsRulesets::ASSETS + ]) + end + + it "returns the cached cors data from the S3 bucket, and breaks the cache and re-fetches if the rule change is not a noop" do + Discourse.redis.setex("test-bucket_bucket_cors", 1.hour, [S3CorsRulesets::ASSETS].to_json) + s3_helper.s3_client.stub_responses(:get_bucket_cors, { + cors_rules: [S3CorsRulesets::ASSETS] + }) + s3_helper.ensure_cors!([ + S3CorsRulesets::DIRECT_UPLOAD + ]) + expect(JSON.parse(Discourse.redis.get("test-bucket_bucket_cors"), symbolize_names: true)).to eq( + [S3CorsRulesets::ASSETS, S3CorsRulesets::DIRECT_UPLOAD] + ) + end + + after do + Discourse.redis.flushdb + end end end