From 4c22f3a0e2cf85acc82860ed02fc81d01590e037 Mon Sep 17 00:00:00 2001 From: Jakub Macina Date: Fri, 9 Jun 2017 13:56:18 +0200 Subject: [PATCH 1/9] Add file extension column to TopicLinks. --- db/migrate/20170609115401_add_extension_to_topic_links.rb | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 db/migrate/20170609115401_add_extension_to_topic_links.rb diff --git a/db/migrate/20170609115401_add_extension_to_topic_links.rb b/db/migrate/20170609115401_add_extension_to_topic_links.rb new file mode 100644 index 0000000000..8360b0ef08 --- /dev/null +++ b/db/migrate/20170609115401_add_extension_to_topic_links.rb @@ -0,0 +1,5 @@ +class AddExtensionToTopicLinks < ActiveRecord::Migration + def change + add_column :topic_links, :extension, :string, limit: 5 + end +end From eaf46431d49aebda75a8ef5eba0e2b7e4ede082b Mon Sep 17 00:00:00 2001 From: Jakub Macina Date: Mon, 19 Jun 2017 17:09:54 +0200 Subject: [PATCH 2/9] Add extraction of file extension in TopicLink and related rspec tests. --- app/models/topic_link.rb | 4 +++- spec/models/topic_link_spec.rb | 8 ++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/app/models/topic_link.rb b/app/models/topic_link.rb index 12ae7dcaab..c9abf160c4 100644 --- a/app/models/topic_link.rb +++ b/app/models/topic_link.rb @@ -164,6 +164,7 @@ SQL added_urls << url unless TopicLink.exists?(topic_id: post.topic_id, post_id: post.id, url: url) + file_extension = File.extname(parsed.path)[1..5].downcase unless File.extname(parsed.path).empty? begin TopicLink.create!(post_id: post.id, user_id: post.user_id, @@ -173,7 +174,8 @@ SQL internal: internal, link_topic_id: topic_id, link_post_id: reflected_post.try(:id), - quote: link.is_quote) + quote: link.is_quote, + extension: file_extension) rescue ActiveRecord::RecordNotUnique, PG::UniqueViolation # it's fine end diff --git a/spec/models/topic_link_spec.rb b/spec/models/topic_link_spec.rb index 9ee96750f2..f9f0c9d309 100644 --- a/spec/models/topic_link_spec.rb +++ b/spec/models/topic_link_spec.rb @@ -193,7 +193,7 @@ http://b.com/#{'a'*500} end context "link to a local attachments" do - let(:post) { topic.posts.create(user: user, raw: 'ruby.rb') } + let(:post) { topic.posts.create(user: user, raw: 'ruby.rb') } it "extracts the link" do TopicLink.extract_from(post) @@ -203,9 +203,11 @@ http://b.com/#{'a'*500} # is set to internal expect(link).to be_internal # has the correct url - expect(link.url).to eq("/uploads/default/208/87bb3d8428eb4783.rb") + expect(link.url).to eq("/uploads/default/208/87bb3d8428eb4783.rb?foo=bar") # should not be the reflection expect(link).not_to be_reflection + # should have file extension + expect(link.extension).to eq('rb') end end @@ -224,6 +226,8 @@ http://b.com/#{'a'*500} expect(link.url).to eq("//s3.amazonaws.com/bucket/2104a0211c9ce41ed67989a1ed62e9a394c1fbd1446.rb") # should not be the reflection expect(link).not_to be_reflection + # should have file extension + expect(link.extension).to eq('rb') end end From 67ce4b70a67ccefb03874375bf73e904f9d6cb8e Mon Sep 17 00:00:00 2001 From: Jakub Macina Date: Tue, 20 Jun 2017 13:01:31 +0200 Subject: [PATCH 3/9] Add index to extension column in TopicLink. --- db/migrate/20170609115401_add_extension_to_topic_links.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/db/migrate/20170609115401_add_extension_to_topic_links.rb b/db/migrate/20170609115401_add_extension_to_topic_links.rb index 8360b0ef08..fbe8b9c184 100644 --- a/db/migrate/20170609115401_add_extension_to_topic_links.rb +++ b/db/migrate/20170609115401_add_extension_to_topic_links.rb @@ -1,5 +1,6 @@ class AddExtensionToTopicLinks < ActiveRecord::Migration def change add_column :topic_links, :extension, :string, limit: 5 + add_index :topic_links, :extension end end From f87d32ac6d6636912d0b8d559493333f4768abd7 Mon Sep 17 00:00:00 2001 From: Jakub Macina Date: Tue, 20 Jun 2017 21:20:06 +0200 Subject: [PATCH 4/9] Add backend code for searching by filetypes. --- lib/search.rb | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lib/search.rb b/lib/search.rb index 543149b18e..f3ea5ccd82 100644 --- a/lib/search.rb +++ b/lib/search.rb @@ -456,6 +456,15 @@ class Search )", tags) end + advanced_filter(/filetypes?:([a-zA-Z0-9,\-_]+)/) do |posts, match| + file_extensions = match.split(",") + + posts.where("posts.id IN ( + SELECT post_id FROM topic_links + WHERE extension IN (?) + )", file_extensions) + end + private From aad1c9bef2851d5dcc13ba4dddd40d818eb49424 Mon Sep 17 00:00:00 2001 From: Jakub Macina Date: Tue, 20 Jun 2017 21:21:56 +0200 Subject: [PATCH 5/9] Add rspec tests for searching by a filetype. --- spec/components/search_spec.rb | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/spec/components/search_spec.rb b/spec/components/search_spec.rb index d215c8835c..c7c2c5609e 100644 --- a/spec/components/search_spec.rb +++ b/spec/components/search_spec.rb @@ -703,6 +703,24 @@ describe Search do expect(Search.execute('green tags:eggs').posts.map(&:id)).to eq([post2.id]) expect(Search.execute('green tags:plants').posts.size).to eq(0) end + + it "can find posts which contains filetypes" do + # Must be posts with real images + post1 = Fabricate(:post, + raw: "https://www.discourse.org/a/img/favicon.png") + post2 = Fabricate(:post, + raw: "Discourse logo\n"\ + "https://www.discourse.org/a/img/favicon.png\n"\ + "https://www.discourse.org/a/img/trust-1x.jpg") + Fabricate(:post) + + TopicLink.extract_from(post1) + TopicLink.extract_from(post2) + + expect(Search.execute('filetype:jpg').posts.map(&:id)).to eq([post2.id]) + expect(Search.execute('filetype:png').posts.map(&:id)).to contain_exactly(post1.id, post2.id) + expect(Search.execute('logo filetype:jpg').posts.map(&:id)).to eq([post2.id]) + end end it 'can parse complex strings using ts_query helper' do From bb392973ca6b57510d26ad4a6ae44356b84b1233 Mon Sep 17 00:00:00 2001 From: Jakub Macina Date: Mon, 3 Jul 2017 19:06:54 +0200 Subject: [PATCH 6/9] Add migration with extension column to uploads. --- .../20170609115401_add_extension_to_topic_links.rb | 2 +- db/migrate/20170703115216_add_extension_to_uploads.rb | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 db/migrate/20170703115216_add_extension_to_uploads.rb diff --git a/db/migrate/20170609115401_add_extension_to_topic_links.rb b/db/migrate/20170609115401_add_extension_to_topic_links.rb index fbe8b9c184..7c05a12d1a 100644 --- a/db/migrate/20170609115401_add_extension_to_topic_links.rb +++ b/db/migrate/20170609115401_add_extension_to_topic_links.rb @@ -1,6 +1,6 @@ class AddExtensionToTopicLinks < ActiveRecord::Migration def change - add_column :topic_links, :extension, :string, limit: 5 + add_column :topic_links, :extension, :string, limit: 10 add_index :topic_links, :extension end end diff --git a/db/migrate/20170703115216_add_extension_to_uploads.rb b/db/migrate/20170703115216_add_extension_to_uploads.rb new file mode 100644 index 0000000000..76aa08fa11 --- /dev/null +++ b/db/migrate/20170703115216_add_extension_to_uploads.rb @@ -0,0 +1,11 @@ +class AddExtensionToUploads < ActiveRecord::Migration + def up + add_column :uploads, :extension, :string, limit: 10 + execute "CREATE INDEX index_uploads_on_extension ON uploads(lower(extension))" + end + + def down + remove_column :uploads, :extension + execute "DROP INDEX index_uploads_on_extension" + end +end From f0a674d620587af1cb2dae802d2520827c193af8 Mon Sep 17 00:00:00 2001 From: Jakub Macina Date: Mon, 3 Jul 2017 19:08:59 +0200 Subject: [PATCH 7/9] Add extraction of upload extension. Add rspec test for search of post with upload by extension. --- app/models/topic_link.rb | 2 +- lib/upload_creator.rb | 1 + spec/components/search_spec.rb | 6 ++++-- spec/fabricators/upload_fabricator.rb | 1 + 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/app/models/topic_link.rb b/app/models/topic_link.rb index c9abf160c4..48c7c279c0 100644 --- a/app/models/topic_link.rb +++ b/app/models/topic_link.rb @@ -164,7 +164,7 @@ SQL added_urls << url unless TopicLink.exists?(topic_id: post.topic_id, post_id: post.id, url: url) - file_extension = File.extname(parsed.path)[1..5].downcase unless File.extname(parsed.path).empty? + file_extension = File.extname(parsed.path)[1..10].downcase unless File.extname(parsed.path).empty? begin TopicLink.create!(post_id: post.id, user_id: post.user_id, diff --git a/lib/upload_creator.rb b/lib/upload_creator.rb index a00b13f08f..503c0fadef 100644 --- a/lib/upload_creator.rb +++ b/lib/upload_creator.rb @@ -73,6 +73,7 @@ class UploadCreator @upload.sha1 = sha1 @upload.url = "" @upload.origin = @opts[:origin][0...1000] if @opts[:origin] + @upload.extension = File.extname(@filename)[1..10] if FileHelper.is_image?(@filename) @upload.width, @upload.height = ImageSizer.resize(*@image_info.size) diff --git a/spec/components/search_spec.rb b/spec/components/search_spec.rb index c7c2c5609e..7faf1bac43 100644 --- a/spec/components/search_spec.rb +++ b/spec/components/search_spec.rb @@ -712,14 +712,16 @@ describe Search do raw: "Discourse logo\n"\ "https://www.discourse.org/a/img/favicon.png\n"\ "https://www.discourse.org/a/img/trust-1x.jpg") + post_with_upload = Fabricate(:post) + post_with_upload.uploads = [Fabricate(:upload)] Fabricate(:post) TopicLink.extract_from(post1) TopicLink.extract_from(post2) expect(Search.execute('filetype:jpg').posts.map(&:id)).to eq([post2.id]) - expect(Search.execute('filetype:png').posts.map(&:id)).to contain_exactly(post1.id, post2.id) - expect(Search.execute('logo filetype:jpg').posts.map(&:id)).to eq([post2.id]) + expect(Search.execute('filetype:png').posts.map(&:id)).to contain_exactly(post1.id, post2.id, post_with_upload.id) + expect(Search.execute('logo filetype:png').posts.map(&:id)).to eq([post2.id]) end end diff --git a/spec/fabricators/upload_fabricator.rb b/spec/fabricators/upload_fabricator.rb index a2afdf7af0..7d0c7f43be 100644 --- a/spec/fabricators/upload_fabricator.rb +++ b/spec/fabricators/upload_fabricator.rb @@ -6,6 +6,7 @@ Fabricator(:upload) do width 100 height 200 url { sequence(:url) { |n| "/uploads/default/#{n}/1234567890123456.png" } } + extension "png" end Fabricator(:upload_s3, from: :upload) do From 8c445e9f17f6ba51a5698a3e948ad46d339615ee Mon Sep 17 00:00:00 2001 From: Jakub Macina Date: Tue, 4 Jul 2017 17:50:08 +0200 Subject: [PATCH 8/9] Fix backend code for searching by a filetype as a combination of uploads and topic links. Add rspec test for extracting file extension in upload. --- app/models/upload.rb | 4 ---- lib/file_store/base_store.rb | 2 +- lib/search.rb | 6 +++++- spec/models/upload_spec.rb | 5 +++++ 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/app/models/upload.rb b/app/models/upload.rb index d6e1916c72..f0eae0896b 100644 --- a/app/models/upload.rb +++ b/app/models/upload.rb @@ -52,10 +52,6 @@ class Upload < ActiveRecord::Base end end - def extension - File.extname(original_filename) - end - def self.generate_digest(path) Digest::SHA1.file(path).hexdigest end diff --git a/lib/file_store/base_store.rb b/lib/file_store/base_store.rb index b79282fe68..b4c8bfe47c 100644 --- a/lib/file_store/base_store.rb +++ b/lib/file_store/base_store.rb @@ -95,7 +95,7 @@ module FileStore end def get_path_for_upload(upload) - get_path_for("original".freeze, upload.id, upload.sha1, upload.extension) + get_path_for("original".freeze, upload.id, upload.sha1, File.extname(upload.original_filename)) end def get_path_for_optimized_image(optimized_image) diff --git a/lib/search.rb b/lib/search.rb index f3ea5ccd82..159de71b8d 100644 --- a/lib/search.rb +++ b/lib/search.rb @@ -462,7 +462,11 @@ class Search posts.where("posts.id IN ( SELECT post_id FROM topic_links WHERE extension IN (?) - )", file_extensions) + UNION + SELECT post_uploads.post_id FROM uploads + JOIN post_uploads ON post_uploads.upload_id = uploads.id + WHERE lower(uploads.extension) IN (?) + )", file_extensions, file_extensions) end private diff --git a/spec/models/upload_spec.rb b/spec/models/upload_spec.rb index c187ddbe67..ceef5bf669 100644 --- a/spec/models/upload_spec.rb +++ b/spec/models/upload_spec.rb @@ -46,6 +46,11 @@ describe Upload do end + it "extracts file extension" do + created_upload = UploadCreator.new(image, image_filename).create_for(user_id) + expect(created_upload.extension).to eq("png") + end + context ".get_from_url" do let(:url) { "/uploads/default/original/3X/1/0/10f73034616a796dfd70177dc54b6def44c4ba6f.png" } let(:upload) { Fabricate(:upload, url: url) } From 677267ae786bae753655039d7c2e3deba58b0077 Mon Sep 17 00:00:00 2001 From: Jakub Macina Date: Thu, 6 Jul 2017 19:11:32 +0200 Subject: [PATCH 9/9] Add onceoff job for uploads migration of column extension. Simplify filetype search and related rspec tests. --- app/jobs/onceoff/migrate_upload_extensions.rb | 11 +++++++++++ lib/file_store/base_store.rb | 2 +- lib/search.rb | 8 ++++---- spec/components/search_spec.rb | 12 +++++------- 4 files changed, 21 insertions(+), 12 deletions(-) create mode 100644 app/jobs/onceoff/migrate_upload_extensions.rb diff --git a/app/jobs/onceoff/migrate_upload_extensions.rb b/app/jobs/onceoff/migrate_upload_extensions.rb new file mode 100644 index 0000000000..38bcd3a1ea --- /dev/null +++ b/app/jobs/onceoff/migrate_upload_extensions.rb @@ -0,0 +1,11 @@ +module Jobs + + class MigrateUploadExtensions < Jobs::Onceoff + def execute_onceoff(args) + Upload.find_each do |upload| + upload.extension = File.extname(upload.original_filename)[1..10] + upload.save + end + end + end +end diff --git a/lib/file_store/base_store.rb b/lib/file_store/base_store.rb index b4c8bfe47c..7b17fb2d85 100644 --- a/lib/file_store/base_store.rb +++ b/lib/file_store/base_store.rb @@ -95,7 +95,7 @@ module FileStore end def get_path_for_upload(upload) - get_path_for("original".freeze, upload.id, upload.sha1, File.extname(upload.original_filename)) + get_path_for("original".freeze, upload.id, upload.sha1, "."+upload.extension) end def get_path_for_optimized_image(optimized_image) diff --git a/lib/search.rb b/lib/search.rb index 159de71b8d..5a6c1c2d78 100644 --- a/lib/search.rb +++ b/lib/search.rb @@ -457,16 +457,16 @@ class Search end advanced_filter(/filetypes?:([a-zA-Z0-9,\-_]+)/) do |posts, match| - file_extensions = match.split(",") + file_extensions = match.split(",").map(&:downcase) posts.where("posts.id IN ( SELECT post_id FROM topic_links - WHERE extension IN (?) + WHERE extension IN (:file_extensions) UNION SELECT post_uploads.post_id FROM uploads JOIN post_uploads ON post_uploads.upload_id = uploads.id - WHERE lower(uploads.extension) IN (?) - )", file_extensions, file_extensions) + WHERE lower(uploads.extension) IN (:file_extensions) + )", {file_extensions: file_extensions}) end private diff --git a/spec/components/search_spec.rb b/spec/components/search_spec.rb index 7faf1bac43..c09e3f8d43 100644 --- a/spec/components/search_spec.rb +++ b/spec/components/search_spec.rb @@ -705,21 +705,19 @@ describe Search do end it "can find posts which contains filetypes" do - # Must be posts with real images post1 = Fabricate(:post, - raw: "https://www.discourse.org/a/img/favicon.png") + raw: "http://example.com/image.png") post2 = Fabricate(:post, raw: "Discourse logo\n"\ - "https://www.discourse.org/a/img/favicon.png\n"\ - "https://www.discourse.org/a/img/trust-1x.jpg") - post_with_upload = Fabricate(:post) - post_with_upload.uploads = [Fabricate(:upload)] + "http://example.com/logo.png\n"\ + "http://example.com/vector_image.svg") + post_with_upload = Fabricate(:post, uploads: [Fabricate(:upload)]) Fabricate(:post) TopicLink.extract_from(post1) TopicLink.extract_from(post2) - expect(Search.execute('filetype:jpg').posts.map(&:id)).to eq([post2.id]) + expect(Search.execute('filetype:svg').posts).to eq([post2]) expect(Search.execute('filetype:png').posts.map(&:id)).to contain_exactly(post1.id, post2.id, post_with_upload.id) expect(Search.execute('logo filetype:png').posts.map(&:id)).to eq([post2.id]) end