From 3632b8d8d6580e6630f20f81dfd6c1e0f4e9bbdd Mon Sep 17 00:00:00 2001 From: Sam Date: Fri, 13 Apr 2018 14:58:33 +1000 Subject: [PATCH] FEATURE: provide extra signal about content age to crawlers Adds Last-Modified field to help teach crawlers not to crawl old content --- app/controllers/topics_controller.rb | 8 ++++++++ spec/requests/topics_controller_spec.rb | 15 +++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/app/controllers/topics_controller.rb b/app/controllers/topics_controller.rb index c9e551ebbf..9f4fe20b35 100644 --- a/app/controllers/topics_controller.rb +++ b/app/controllers/topics_controller.rb @@ -117,6 +117,14 @@ class TopicsController < ApplicationController canonical_url UrlHelper.absolute_without_cdn(@topic_view.canonical_path) + # provide hint to crawlers only for now + # we would like to give them a bit more signal about age of data + if use_crawler_layout? + if last_modified = @topic_view.posts&.map { |p| p.updated_at }&.max&.httpdate + response.headers['Last-Modified'] = last_modified + end + end + perform_show_response rescue Discourse::InvalidAccess => ex diff --git a/spec/requests/topics_controller_spec.rb b/spec/requests/topics_controller_spec.rb index 8476558c02..64d11da2c4 100644 --- a/spec/requests/topics_controller_spec.rb +++ b/spec/requests/topics_controller_spec.rb @@ -554,11 +554,21 @@ RSpec.describe TopicsController do context "when a crawler" do it "renders with the crawler layout, and handles proper pagination" do + page1_time = 3.months.ago + page2_time = 2.months.ago + page3_time = 1.month.ago + + freeze_time page1_time + topic = Fabricate(:topic) Fabricate(:post, topic_id: topic.id) Fabricate(:post, topic_id: topic.id) + + freeze_time page2_time Fabricate(:post, topic_id: topic.id) Fabricate(:post, topic_id: topic.id) + + freeze_time page3_time Fabricate(:post, topic_id: topic.id) # ugly, but no inteface to set this and we don't want to create @@ -575,15 +585,20 @@ RSpec.describe TopicsController do expect(body).to_not have_tag(:meta, with: { name: 'fragment' }) expect(body).to include(' user_agent } body = response.body + expect(response.headers['Last-Modified']).to eq(page2_time.httpdate) + expect(body).to include(' user_agent } body = response.body + expect(response.headers['Last-Modified']).to eq(page3_time.httpdate) expect(body).to include('