From 59223b283a0de158bcfd2ef3262346efba16251e Mon Sep 17 00:00:00 2001 From: Gerhard Schlager Date: Tue, 22 Jun 2021 21:00:51 +0200 Subject: [PATCH] WIP bulk importer --- Gemfile | 3 + Gemfile.lock | 6 +- script/bulk_import/generic_bulk.rb | 96 ++++++++++++++++++++++++++++++ 3 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 script/bulk_import/generic_bulk.rb diff --git a/Gemfile b/Gemfile index e09839e88a..c493a03daf 100644 --- a/Gemfile +++ b/Gemfile @@ -258,3 +258,6 @@ gem 'colored2', require: false gem 'maxminddb' gem 'rails_failover', require: false + +gem 'redcarpet' +gem "sqlite3" diff --git a/Gemfile.lock b/Gemfile.lock index 53803ed4d0..f9c736a972 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -345,6 +345,7 @@ GEM msgpack (>= 0.4.3) optimist (>= 3.0.0) rchardet (1.8.0) + redcarpet (3.5.1) redis (4.5.1) redis-namespace (1.8.1) redis (>= 3.0.4) @@ -446,6 +447,7 @@ GEM actionpack (>= 4.0) activesupport (>= 4.0) sprockets (>= 3.0.0) + sqlite3 (1.4.2) sshkey (2.0.0) stackprof (0.2.17) test-prof (1.0.7) @@ -576,6 +578,7 @@ DEPENDENCIES rb-fsevent rbtrace rchardet + redcarpet redis redis-namespace rinku @@ -600,6 +603,7 @@ DEPENDENCIES simplecov sprockets (= 3.7.2) sprockets-rails + sqlite3 sshkey stackprof test-prof @@ -613,4 +617,4 @@ DEPENDENCIES yaml-lint BUNDLED WITH - 2.2.26 + 2.2.29 diff --git a/script/bulk_import/generic_bulk.rb b/script/bulk_import/generic_bulk.rb new file mode 100644 index 0000000000..19d34e2a91 --- /dev/null +++ b/script/bulk_import/generic_bulk.rb @@ -0,0 +1,96 @@ +# frozen_string_literal: true + +require_relative "base" +require "sqlite3" + +class BulkImport::Generic < BulkImport::Base + AVATAR_DIRECTORY = ENV["AVATAR_DIRECTORY"] + UPLOAD_DIRECTORY = ENV["UPLOAD_DIRECTORY"] + + def initialize(db_path) + super() + @db = create_connection(db_path) + end + + def start + run # will call execute, and then "complete" the migration + + # Now that the migration is complete, do some more work: + + Discourse::Application.load_tasks + Rake::Task["import:ensure_consistency"].invoke + end + + def execute + import_groups + import_users + end + + def import_groups + puts "Importing groups..." + + groups = @db.execute(<<~SQL, last_row_id: @last_imported_group_id) + SELECT * + FROM groups + WHERE ROWID > :last_row_id #{} + ORDER BY ROWID + SQL + + create_groups(groups) do |row| + { + imported_id: row["id"], + name: normalize_text("name") + } + end + end + + def import_users + puts "Importing users..." + + users = @db.execute(<<~SQL, last_row_id: @last_imported_user_id) + SELECT ROWID, * + FROM users + WHERE ROWID > :last_row_id + ORDER BY ROWID + SQL + + create_users(users) do |row| + { + imported_id: row["id"], + username: row["username"], + created_at: to_datetime(row["created_at"]), + name: row["name"], + email: row["email"], + last_seen_at: to_datetime(row[:"last_seen_at"]), + bio_raw: row["bio"], + location: row["location"], + admin: to_boolean(row["admin"]), + moderator: to_boolean(row["moderator"]) + } + end + end + + def create_connection(path) + sqlite = SQLite3::Database.new(path, results_as_hash: true) + sqlite.busy_timeout = 60000 # 60 seconds + sqlite.auto_vacuum = "full" + sqlite.foreign_keys = true + sqlite.journal_mode = "wal" + sqlite.synchronous = "normal" + sqlite + end + + def to_date(text) + text.present? ? Date.parse(text) : nil + end + + def to_datetime(text) + text.present? ? DateTime.parse(text) : nil + end + + def to_boolean(value) + value == 1 + end +end + +BulkImport::Generic.new(ARGV.first).start