diff --git a/Gemfile b/Gemfile
index ba6165e350..61d79c3097 100644
--- a/Gemfile
+++ b/Gemfile
@@ -194,4 +194,5 @@ if ENV["IMPORT"] == "1"
gem 'mysql2'
gem 'redcarpet'
gem 'sqlite3', '~> 1.3.13'
+ gem 'ruby-bbcode-to-md', :github => 'nlalonde/ruby-bbcode-to-md'
end
diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake
index a0ec50de6c..f4542d51a3 100644
--- a/lib/tasks/import.rake
+++ b/lib/tasks/import.rake
@@ -428,3 +428,19 @@ def exec_sql(sql)
ActiveRecord::Base.exec_sql(sql)
end
end
+
+task "import:create_phpbb_permalinks" => :environment do
+ log 'Creating Permalinks...'
+
+ # /[^\/]+\/.*-t(\d+).html
+ SiteSetting.permalink_normalizations = '/[^\/]+\/.*-t(\d+).html/thread/\1'
+
+ Topic.listable_topics.find_each do |topic|
+ tcf = topic.custom_fields
+ if tcf && tcf["import_id"]
+ Permalink.create(url: "thread/#{tcf["import_id"]}", topic_id: topic.id) rescue nil
+ end
+ end
+
+ log "Done!"
+end
diff --git a/script/bulk_import/base.rb b/script/bulk_import/base.rb
index 189d154740..6ef8e3b011 100644
--- a/script/bulk_import/base.rb
+++ b/script/bulk_import/base.rb
@@ -229,8 +229,8 @@ class BulkImport::Base
group[:name] = group_name
end
- group[:title] = group[:title].scrub.strip.presence
- group[:bio_raw] = group[:bio_raw].scrub.strip.presence
+ group[:title] = group[:title].scrub.strip.presence if group[:title].present?
+ group[:bio_raw] = group[:bio_raw].scrub.strip.presence if group[:bio_raw].present?
group[:bio_cooked] = pre_cook(group[:bio_raw]) if group[:bio_raw].present?
group[:created_at] ||= NOW
group[:updated_at] ||= group[:created_at]
@@ -307,6 +307,7 @@ class BulkImport::Base
def process_user_profile(user_profile)
user_profile[:bio_raw] = (user_profile[:bio_raw].presence || "").scrub.strip.presence
user_profile[:bio_cooked] = pre_cook(user_profile[:bio_raw]) if user_profile[:bio_raw].present?
+ user_profile[:views] ||= 0
user_profile
end
@@ -526,6 +527,9 @@ class BulkImport::Base
value: imported_id,
}
end
+ rescue => e
+ puts e.message
+ puts e.backtrace.join("\n")
end
def create_custom_fields(table, name, rows)
diff --git a/script/bulk_import/phpbb_postgresql.rb b/script/bulk_import/phpbb_postgresql.rb
new file mode 100644
index 0000000000..932a96a31b
--- /dev/null
+++ b/script/bulk_import/phpbb_postgresql.rb
@@ -0,0 +1,482 @@
+require_relative "base"
+require "pg"
+require "htmlentities"
+require 'ruby-bbcode-to-md'
+
+class BulkImport::PhpBB < BulkImport::Base
+
+ SUSPENDED_TILL ||= Date.new(3000, 1, 1)
+ TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "phpbb_"
+ CHARSET_MAP = {
+ "armscii8" => nil,
+ "ascii" => Encoding::US_ASCII,
+ "big5" => Encoding::Big5,
+ "binary" => Encoding::ASCII_8BIT,
+ "cp1250" => Encoding::Windows_1250,
+ "cp1251" => Encoding::Windows_1251,
+ "cp1256" => Encoding::Windows_1256,
+ "cp1257" => Encoding::Windows_1257,
+ "cp850" => Encoding::CP850,
+ "cp852" => Encoding::CP852,
+ "cp866" => Encoding::IBM866,
+ "cp932" => Encoding::Windows_31J,
+ "dec8" => nil,
+ "eucjpms" => Encoding::EucJP_ms,
+ "euckr" => Encoding::EUC_KR,
+ "gb2312" => Encoding::EUC_CN,
+ "gbk" => Encoding::GBK,
+ "geostd8" => nil,
+ "greek" => Encoding::ISO_8859_7,
+ "hebrew" => Encoding::ISO_8859_8,
+ "hp8" => nil,
+ "keybcs2" => nil,
+ "koi8r" => Encoding::KOI8_R,
+ "koi8u" => Encoding::KOI8_U,
+ "latin1" => Encoding::ISO_8859_1,
+ "latin2" => Encoding::ISO_8859_2,
+ "latin5" => Encoding::ISO_8859_9,
+ "latin7" => Encoding::ISO_8859_13,
+ "macce" => Encoding::MacCentEuro,
+ "macroman" => Encoding::MacRoman,
+ "sjis" => Encoding::SHIFT_JIS,
+ "swe7" => nil,
+ "tis620" => Encoding::TIS_620,
+ "ucs2" => Encoding::UTF_16BE,
+ "ujis" => Encoding::EucJP_ms,
+ "utf8" => Encoding::UTF_8,
+ }
+
+ def initialize
+ super
+
+ charset = ENV["DB_CHARSET"] || "utf8"
+ database = ENV["DB_NAME"] || "flightaware"
+ password = ENV["DB_PASSWORD"] || "discourse"
+
+ @html_entities = HTMLEntities.new
+ @encoding = CHARSET_MAP[charset]
+
+ @client = PG.connect(dbname: database, password: password)
+
+ @smiley_map = {}
+ add_default_smilies
+ end
+
+ def execute
+ import_groups
+ import_users
+ import_group_users
+
+ import_user_emails
+ import_user_profiles
+
+ import_categories
+ import_topics
+ import_posts
+
+ import_private_topics
+ import_topic_allowed_users
+ import_private_posts
+ end
+
+ def import_groups
+ puts "Importing groups..."
+
+ groups = psql_query <<-SQL
+ SELECT group_id, group_name, group_desc
+ FROM #{TABLE_PREFIX}groups
+ WHERE group_id > #{@last_imported_group_id}
+ ORDER BY group_id
+ SQL
+
+ create_groups(groups) do |row|
+ {
+ imported_id: row["group_id"],
+ name: normalize_text(row["group_name"]),
+ bio_raw: normalize_text(row["group_desc"])
+ }
+ end
+ end
+
+ def import_users
+ puts "Importing users..."
+
+ users = psql_query <<-SQL
+ SELECT u.user_id, u.username, u.user_email, u.user_regdate, u.user_lastvisit, u.user_ip,
+ u.user_type, u.user_inactive_reason, g.group_id, g.group_name, b.ban_start, b.ban_end, b.ban_reason,
+ u.user_posts, u.user_website, u.user_from, u.user_birthday, u.user_avatar_type, u.user_avatar
+ FROM #{TABLE_PREFIX}users u
+ LEFT OUTER JOIN #{TABLE_PREFIX}groups g ON (g.group_id = u.group_id)
+ LEFT OUTER JOIN #{TABLE_PREFIX}banlist b ON (
+ u.user_id = b.ban_userid AND b.ban_exclude = 0 AND
+ b.ban_end = 0
+ )
+ WHERE u.user_id > #{@last_imported_user_id}
+ ORDER BY u.user_id
+ SQL
+
+ create_users(users) do |row|
+ u = {
+ imported_id: row["user_id"],
+ username: normalize_text(row["username"]),
+ created_at: Time.zone.at(row["user_regdate"].to_i),
+ last_seen_at: row["user_lastvisit"] == 0 ? Time.zone.at(row["user_regdate"].to_i) : Time.zone.at(row["user_lastvisit"].to_i),
+ trust_level: row["user_posts"] == 0 ? TrustLevel[0] : TrustLevel[1],
+ date_of_birth: parse_birthday(row["user_birthday"]),
+ primary_group_id: group_id_from_imported_id(row["group_id"])
+ }
+ u[:ip_address] = row["user_ip"][/\b(?:\d{1,3}\.){3}\d{1,3}\b/] if row["user_ip"].present?
+ if row["ban_start"]
+ u[:suspended_at] = Time.zone.at(row["ban_start"].to_i)
+ u[:suspended_till] = row["ban_end"].to_i > 0 ? Time.zone.at(row["ban_end"].to_i) : SUSPENDED_TILL
+ end
+ u
+ end
+ end
+
+ def import_user_emails
+ puts "Importing user emails..."
+
+ users = psql_query <<-SQL
+ SELECT user_id, user_email, user_regdate
+ FROM #{TABLE_PREFIX}users u
+ WHERE user_id > #{@last_imported_user_id}
+ ORDER BY user_id
+ SQL
+
+ create_user_emails(users) do |row|
+ {
+ imported_id: row["user_id"],
+ imported_user_id: row["user_id"],
+ email: row["user_email"],
+ created_at: Time.zone.at(row["user_regdate"].to_i)
+ }
+ end
+ end
+
+ def import_group_users
+ puts "Importing group users..."
+
+ group_users = psql_query <<-SQL
+ SELECT user_id, group_id
+ FROM #{TABLE_PREFIX}users u
+ WHERE user_id > #{@last_imported_user_id}
+ SQL
+
+ create_group_users(group_users) do |row|
+ {
+ group_id: group_id_from_imported_id(row["group_id"]),
+ user_id: user_id_from_imported_id(row["user_id"]),
+ }
+ end
+ end
+
+ def import_user_profiles
+ puts "Importing user profiles..."
+
+ user_profiles = psql_query <<-SQL
+ SELECT user_id, user_website, user_from
+ FROM #{TABLE_PREFIX}users
+ WHERE user_id > #{@last_imported_user_id}
+ ORDER BY user_id
+ SQL
+
+ create_user_profiles(user_profiles) do |row|
+ {
+ user_id: user_id_from_imported_id(row["user_id"]),
+ website: (URI.parse(row["user_website"]).to_s rescue nil),
+ location: row["user_from"],
+ }
+ end
+ end
+
+ def import_categories
+ puts "Importing categories..."
+
+ categories = psql_query(<<-SQL
+ SELECT forum_id, parent_id, forum_name, forum_desc
+ FROM #{TABLE_PREFIX}forums
+ WHERE forum_id > #{@last_imported_category_id}
+ ORDER BY parent_id, left_id
+ SQL
+ ).to_a
+
+ return if categories.empty?
+
+ parent_categories = categories.select { |c| c["parent_id"].to_i == 0 }
+ children_categories = categories.select { |c| c["parent_id"].to_i != 0 }
+
+ puts "Importing parent categories..."
+ create_categories(parent_categories) do |row|
+ {
+ imported_id: row["forum_id"],
+ name: normalize_text(row["forum_name"]),
+ description: normalize_text(row["forum_desc"])
+ }
+ end
+
+ puts "Importing children categories..."
+ create_categories(children_categories) do |row|
+ {
+ imported_id: row["forum_id"],
+ name: normalize_text(row["forum_name"]),
+ description: normalize_text(row["forum_desc"]),
+ parent_category_id: category_id_from_imported_id(row["parent_id"])
+ }
+ end
+ end
+
+ def import_topics
+ puts "Importing topics..."
+
+ topics = psql_query <<-SQL
+ SELECT topic_id, topic_title, forum_id, topic_poster, topic_time, topic_views
+ FROM #{TABLE_PREFIX}topics
+ WHERE topic_id > #{@last_imported_topic_id}
+ AND EXISTS (SELECT 1 FROM #{TABLE_PREFIX}posts WHERE #{TABLE_PREFIX}posts.topic_id = #{TABLE_PREFIX}topics.topic_id)
+ ORDER BY topic_id
+ SQL
+
+ create_topics(topics) do |row|
+ {
+ imported_id: row["topic_id"],
+ title: normalize_text(row["topic_title"]),
+ category_id: category_id_from_imported_id(row["forum_id"]),
+ user_id: user_id_from_imported_id(row["topic_poster"]),
+ created_at: Time.zone.at(row["topic_time"].to_i),
+ views: row["topic_views"]
+ }
+ end
+ end
+
+ def import_posts
+ puts "Importing posts..."
+
+ posts = psql_query <<-SQL
+ SELECT p.post_id, p.topic_id, p.poster_id, p.post_time, p.post_text
+ FROM #{TABLE_PREFIX}posts p
+ JOIN #{TABLE_PREFIX}topics t ON t.topic_id = p.topic_id
+ WHERE p.post_id > #{@last_imported_post_id}
+ ORDER BY p.post_id
+ SQL
+
+ create_posts(posts) do |row|
+ {
+ imported_id: row["post_id"],
+ topic_id: topic_id_from_imported_id(row["topic_id"]),
+ user_id: user_id_from_imported_id(row["poster_id"]),
+ created_at: Time.zone.at(row["post_time"].to_i),
+ raw: process_raw_text(row["post_text"]),
+ }
+ end
+ end
+
+ def import_private_topics
+ puts "Importing private topics..."
+
+ @imported_topics = {}
+
+ topics = psql_query <<-SQL
+ SELECT msg_id, message_subject, author_id, to_address, message_time
+ FROM #{TABLE_PREFIX}privmsgs
+ WHERE msg_id > (#{@last_imported_private_topic_id - PRIVATE_OFFSET})
+ ORDER BY msg_id
+ SQL
+
+ create_topics(topics) do |row|
+ user_ids = get_message_recipients(row["author_id"], row["to_address"])
+ title = extract_pm_title(row["message_subject"])
+ key = [title, user_ids]
+
+ next if @imported_topics.has_key?(key) || title.blank?
+ @imported_topics[key] = row["msg_id"].to_i + PRIVATE_OFFSET
+
+ {
+ archetype: Archetype.private_message,
+ imported_id: row["msg_id"].to_i + PRIVATE_OFFSET,
+ title: normalize_text(title),
+ user_id: user_id_from_imported_id(row["author_id"].to_i),
+ created_at: Time.zone.at(row["message_time"].to_i)
+ }
+ end
+ end
+
+ def import_topic_allowed_users
+ puts "Importing topic allowed users..."
+
+ allowed_users = []
+
+ psql_query(<<-SQL
+ SELECT msg_id, author_id, to_address
+ FROM #{TABLE_PREFIX}privmsgs
+ WHERE msg_id > (#{@last_imported_private_topic_id - PRIVATE_OFFSET})
+ ORDER BY msg_id
+ SQL
+ ).each do |row|
+ next unless topic_id = topic_id_from_imported_id(row["msg_id"].to_i + PRIVATE_OFFSET)
+
+ user_ids = get_message_recipients(row["author_id"], row["to_address"])
+ user_ids.each do |id|
+ next unless user_id = user_id_from_imported_id(id.to_i)
+ allowed_users << [topic_id, user_id]
+ end
+ end
+
+ create_topic_allowed_users(allowed_users) do |row|
+ {
+ topic_id: row[0],
+ user_id: row[1]
+ }
+ end
+ end
+
+ def import_private_posts
+ puts "Importing private posts..."
+
+ posts = psql_query <<-SQL
+ SELECT msg_id, message_subject, author_id, to_address, message_time, message_text
+ FROM #{TABLE_PREFIX}privmsgs
+ WHERE msg_id > (#{@last_imported_private_topic_id - PRIVATE_OFFSET})
+ ORDER BY msg_id
+ SQL
+
+ create_posts(posts) do |row|
+ user_ids = get_message_recipients(row["author_id"], row["to_address"])
+ title = extract_pm_title(row["message_subject"])
+ key = [title, user_ids]
+
+ next unless topic_id = topic_id_from_imported_id(@imported_topics[key])
+ {
+ imported_id: row["msg_id"].to_i + PRIVATE_OFFSET,
+ topic_id: topic_id,
+ user_id: user_id_from_imported_id(row["author_id"].to_i),
+ created_at: Time.zone.at(row["message_time"].to_i),
+ raw: process_raw_text(row["message_text"])
+ }
+ end
+ end
+
+ def get_message_recipients(from, to)
+ user_ids = to.split(':')
+ user_ids.map! { |u| u[2..-1].to_i }
+ user_ids.push(from.to_i)
+ user_ids.uniq!
+ user_ids = user_ids.flatten.map(&:to_i).sort
+ user_ids
+ end
+
+ def extract_pm_title(title)
+ pm_title = CGI.unescapeHTML(title)
+ pm_title = title.gsub(/^Re\s*:\s*/i, "") rescue nil
+ pm_title
+ end
+
+ def normalize_text(text)
+ return nil unless text.present?
+ @html_entities.decode(normalize_charset(text.presence || "").scrub)
+ end
+
+ def normalize_charset(text)
+ return text if @encoding == Encoding::UTF_8
+ return text && text.encode(@encoding).force_encoding(Encoding::UTF_8)
+ end
+
+ def parse_birthday(birthday)
+ return if birthday.blank?
+ date_of_birth = Date.strptime(birthday.gsub(/[^\d-]+/, ""), "%m-%d-%Y") rescue nil
+ return if date_of_birth.nil?
+ date_of_birth.year < 1904 ? Date.new(1904, date_of_birth.month, date_of_birth.day) : date_of_birth
+ end
+
+ def psql_query(sql)
+ @client.query(sql)
+ end
+
+ def process_raw_text(raw)
+ return "" if raw.blank?
+ text = raw.dup
+ text = CGI.unescapeHTML(text)
+
+ text.gsub!(/:(?:\w{8})\]/, ']')
+
+ text = bbcode_to_md(text)
+
+ # Some links look like this: http://www.onegameamonth.com
+ text.gsub!(/(.+)<\/a>/i, '[\2](\1)')
+
+ # phpBB shortens link text like this, which breaks our markdown processing:
+ # [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
+ #
+ # Work around it for now:
+ text.gsub!(/\[http(s)?:\/\/(www\.)?/i, '[')
+
+ # convert list tags to ul and list=1 tags to ol
+ # list=a is not supported, so handle it like list=1
+ # list=9 and list=x have the same result as list=1 and list=a
+ text.gsub!(/\[list\](.*?)\[\/list:u\]/mi, '[ul]\1[/ul]')
+ text.gsub!(/\[list=.*?\](.*?)\[\/list:o\]/mi, '[ol]\1[/ol]')
+
+ # convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists:
+ text.gsub!(/\[\*\](.*?)\[\/\*:m\]/mi, '[li]\1[/li]')
+
+ # [QUOTE=""] -- add newline
+ text.gsub!(/(\[quote="[a-zA-Z\d]+"\])/i) { "#{$1}\n" }
+
+ # [/QUOTE] -- add newline
+ text.gsub!(/(\[\/quote\])/i) { "\n#{$1}" }
+
+ # :) is encoded as
+ text.gsub!(/
/) do
+ smiley = $1
+ @smiley_map.fetch(smiley) do
+ # upload_smiley(smiley, $2, $3, $4) || smiley_as_text(smiley)
+ @smiley_map[smiley] = smiley
+ end
+ end
+
+ text
+ end
+
+ protected
+
+ def bbcode_to_md(text)
+ begin
+ text.bbcode_to_md(false)
+ rescue => e
+ puts "Problem converting \n#{text}\n using ruby-bbcode-to-md"
+ text
+ end
+ end
+
+ def add_default_smilies
+ {
+ [':D', ':-D', ':grin:'] => ':smiley:',
+ [':)', ':-)', ':smile:'] => ':slight_smile:',
+ [';)', ';-)', ':wink:'] => ':wink:',
+ [':(', ':-(', ':sad:'] => ':frowning:',
+ [':o', ':-o', ':eek:'] => ':astonished:',
+ [':shock:'] => ':open_mouth:',
+ [':?', ':-?', ':???:'] => ':confused:',
+ ['8-)', ':cool:'] => ':sunglasses:',
+ [':lol:'] => ':laughing:',
+ [':x', ':-x', ':mad:'] => ':angry:',
+ [':P', ':-P', ':razz:'] => ':stuck_out_tongue:',
+ [':oops:'] => ':blush:',
+ [':cry:'] => ':cry:',
+ [':evil:'] => ':imp:',
+ [':twisted:'] => ':smiling_imp:',
+ [':roll:'] => ':unamused:',
+ [':!:'] => ':exclamation:',
+ [':?:'] => ':question:',
+ [':idea:'] => ':bulb:',
+ [':arrow:'] => ':arrow_right:',
+ [':|', ':-|'] => ':neutral_face:',
+ [':geek:'] => ':nerd:'
+ }.each do |smilies, emoji|
+ smilies.each { |smiley| @smiley_map[smiley] = emoji }
+ end
+ end
+
+end
+
+BulkImport::PhpBB.new.run