WIP bulk importer

This commit is contained in:
Gerhard Schlager 2021-06-22 21:00:51 +02:00
parent 12295e1f7a
commit 59223b283a
3 changed files with 104 additions and 1 deletions

View File

@ -258,3 +258,6 @@ gem 'colored2', require: false
gem 'maxminddb'
gem 'rails_failover', require: false
gem 'redcarpet'
gem "sqlite3"

View File

@ -345,6 +345,7 @@ GEM
msgpack (>= 0.4.3)
optimist (>= 3.0.0)
rchardet (1.8.0)
redcarpet (3.5.1)
redis (4.5.1)
redis-namespace (1.8.1)
redis (>= 3.0.4)
@ -446,6 +447,7 @@ GEM
actionpack (>= 4.0)
activesupport (>= 4.0)
sprockets (>= 3.0.0)
sqlite3 (1.4.2)
sshkey (2.0.0)
stackprof (0.2.17)
test-prof (1.0.7)
@ -576,6 +578,7 @@ DEPENDENCIES
rb-fsevent
rbtrace
rchardet
redcarpet
redis
redis-namespace
rinku
@ -600,6 +603,7 @@ DEPENDENCIES
simplecov
sprockets (= 3.7.2)
sprockets-rails
sqlite3
sshkey
stackprof
test-prof
@ -613,4 +617,4 @@ DEPENDENCIES
yaml-lint
BUNDLED WITH
2.2.26
2.2.29

View File

@ -0,0 +1,96 @@
# frozen_string_literal: true
require_relative "base"
require "sqlite3"
class BulkImport::Generic < BulkImport::Base
AVATAR_DIRECTORY = ENV["AVATAR_DIRECTORY"]
UPLOAD_DIRECTORY = ENV["UPLOAD_DIRECTORY"]
def initialize(db_path)
super()
@db = create_connection(db_path)
end
def start
run # will call execute, and then "complete" the migration
# Now that the migration is complete, do some more work:
Discourse::Application.load_tasks
Rake::Task["import:ensure_consistency"].invoke
end
def execute
import_groups
import_users
end
def import_groups
puts "Importing groups..."
groups = @db.execute(<<~SQL, last_row_id: @last_imported_group_id)
SELECT *
FROM groups
WHERE ROWID > :last_row_id #{}
ORDER BY ROWID
SQL
create_groups(groups) do |row|
{
imported_id: row["id"],
name: normalize_text("name")
}
end
end
def import_users
puts "Importing users..."
users = @db.execute(<<~SQL, last_row_id: @last_imported_user_id)
SELECT ROWID, *
FROM users
WHERE ROWID > :last_row_id
ORDER BY ROWID
SQL
create_users(users) do |row|
{
imported_id: row["id"],
username: row["username"],
created_at: to_datetime(row["created_at"]),
name: row["name"],
email: row["email"],
last_seen_at: to_datetime(row[:"last_seen_at"]),
bio_raw: row["bio"],
location: row["location"],
admin: to_boolean(row["admin"]),
moderator: to_boolean(row["moderator"])
}
end
end
def create_connection(path)
sqlite = SQLite3::Database.new(path, results_as_hash: true)
sqlite.busy_timeout = 60000 # 60 seconds
sqlite.auto_vacuum = "full"
sqlite.foreign_keys = true
sqlite.journal_mode = "wal"
sqlite.synchronous = "normal"
sqlite
end
def to_date(text)
text.present? ? Date.parse(text) : nil
end
def to_datetime(text)
text.present? ? DateTime.parse(text) : nil
end
def to_boolean(value)
value == 1
end
end
BulkImport::Generic.new(ARGV.first).start