FEATURE: Lots of improvements to the phpBB3 importer

- Extensive refactoring of the existing importer
- Configuration of import with settings.yml instead of editing code
- Supports importing from phpBB 3.0.x and 3.1.x
- Imports all attachments (not just the ones embedded with [attachment])
  from posts and private messages
- Imports all existing attachments without the need to configure allowed
  file extensions or file sizes
- Imports polls
- Imports bookmarks
- Imports sticky topics and (global) announcements as pinned topics
- Imports categories in the original order and sets the content of the
  category description topic
- Sets the creation date of category description topics to the creation
  date of the first topic in each category
- Imports additional user attributes: last seen date, registration
  IP address, website, date of birth, location
- Optionally set the user's name to its username
- Users that didn't activate their account in phpBB3 are imported as
  inactive users
- All imported, active users are automatically approved
- Users that were deactivated in phpBB3 get suspended for 200 years
  during the import
- Anonymous user can be imported as suspended users instead of the
  system user
- Forums of type "link" are not imported as categories anymore
- Internal links to posts get rewritten during the import (previously
  only links to topics got rewritten)
- Ordered lists with BBCode [list=a] (which are unsupported in
  Discourse) get imported as if they would be [list=1]
- Importing of avatars, attachments, private messages, polls and
  bookmarks can be disabled via configuration file
- Optional fixing of private messages for forums that have been upgraded
  from phpBB2 prevents the import of duplicate messages and tries to
  group related messages into topics
- Table prefix (default: phpbb) is configurable
- Most of phpBB's default smilies are mapped to Emojis and all other
  smilies get uploaded and embedded as images. Smiley mappings can be
  added or overridden in the settings.yml file.
This commit is contained in:
Gerhard Schlager
2015-07-05 23:17:03 +02:00
parent 2dd01c61b0
commit 1cb45861c5
20 changed files with 1696 additions and 484 deletions
@@ -0,0 +1,35 @@
module ImportScripts::PhpBB3
class Constants
ACTIVE_USER = 0
INACTIVE_REGISTER = 1 # Newly registered account
INACTIVE_PROFILE = 2 # Profile details changed
INACTIVE_MANUAL = 3 # Account deactivated by administrator
INACTIVE_REMIND = 4 # Forced user account reactivation
GROUP_ADMINISTRATORS = 'ADMINISTRATORS'
GROUP_MODERATORS = 'GLOBAL_MODERATORS'
# https://wiki.phpbb.com/Table.phpbb_users
USER_TYPE_NORMAL = 0
USER_TYPE_INACTIVE = 1
USER_TYPE_IGNORE = 2
USER_TYPE_FOUNDER = 3
AVATAR_TYPE_UPLOADED = 1
AVATAR_TYPE_REMOTE = 2
AVATAR_TYPE_GALLERY = 3
FORUM_TYPE_CATEGORY = 0
FORUM_TYPE_POST = 1
FORUM_TYPE_LINK = 2
TOPIC_UNLOCKED = 0
TOPIC_LOCKED = 1
TOPIC_MOVED = 2
POST_NORMAL = 0
POST_STICKY = 1
POST_ANNOUNCE = 2
POST_GLOBAL = 3
end
end
@@ -0,0 +1,78 @@
require 'yaml'
module ImportScripts::PhpBB3
class Settings
def self.load(filename)
yaml = YAML::load_file(filename)
Settings.new(yaml)
end
attr_reader :import_anonymous_users
attr_reader :import_attachments
attr_reader :import_private_messages
attr_reader :import_polls
attr_reader :import_bookmarks
attr_reader :import_uploaded_avatars
attr_reader :import_remote_avatars
attr_reader :import_gallery_avatars
attr_reader :fix_private_messages
attr_reader :use_bbcode_to_md
attr_reader :original_site_prefix
attr_reader :new_site_prefix
attr_reader :base_dir
attr_reader :username_as_name
attr_reader :emojis
attr_reader :database
def initialize(yaml)
import_settings = yaml['import']
@import_anonymous_users = import_settings['anonymous_users']
@import_attachments = import_settings['attachments']
@import_private_messages = import_settings['private_messages']
@import_polls = import_settings['polls']
@import_bookmarks = import_settings['bookmarks']
avatar_settings = import_settings['avatars']
@import_uploaded_avatars = avatar_settings['uploaded']
@import_remote_avatars = avatar_settings['remote']
@import_gallery_avatars = avatar_settings['gallery']
@fix_private_messages = import_settings['fix_private_messages']
@use_bbcode_to_md =import_settings['use_bbcode_to_md']
@original_site_prefix = import_settings['site_prefix']['original']
@new_site_prefix = import_settings['site_prefix']['new']
@base_dir = import_settings['phpbb_base_dir']
@username_as_name = import_settings['username_as_name']
@emojis = import_settings.fetch('emojis', [])
@database = DatabaseSettings.new(yaml['database'])
end
end
class DatabaseSettings
attr_reader :type
attr_reader :host
attr_reader :username
attr_reader :password
attr_reader :schema
attr_reader :table_prefix
attr_reader :batch_size
def initialize(yaml)
@type = yaml['type']
@host = yaml['host']
@username = yaml['username']
@password = yaml['password']
@schema = yaml['schema']
@table_prefix = yaml['table_prefix']
@batch_size = yaml['batch_size']
end
end
end
@@ -0,0 +1,90 @@
module ImportScripts::PhpBB3
class SmileyProcessor
# @param uploader [ImportScripts::Uploader]
# @param settings [ImportScripts::PhpBB3::Settings]
# @param phpbb_config [Hash]
def initialize(uploader, settings, phpbb_config)
@uploader = uploader
@smilies_path = File.join(settings.base_dir, phpbb_config[:smilies_path])
@smiley_map = {}
add_default_smilies
add_configured_smilies(settings.emojis)
end
def replace_smilies(text)
# :) is encoded as <!-- s:) --><img src="{SMILIES_PATH}/icon_e_smile.gif" alt=":)" title="Smile" /><!-- s:) -->
text.gsub!(/<!-- s(\S+) --><img src="\{SMILIES_PATH\}\/(.+?)" alt="(.*?)" title="(.*?)" \/><!-- s(?:\S+) -->/) do
smiley = $1
@smiley_map.fetch(smiley) do
upload_smiley(smiley, $2, $3, $4) || smiley_as_text(smiley)
end
end
end
protected
def add_default_smilies
{
[':D', ':-D', ':grin:'] => ':smiley:',
[':)', ':-)', ':smile:'] => ':smile:',
[';)', ';-)', ':wink:'] => ':wink:',
[':(', ':-(', ':sad:'] => ':frowning:',
[':o', ':-o', ':eek:'] => ':astonished:',
[':shock:'] => ':open_mouth:',
[':?', ':-?', ':???:'] => ':confused:',
['8-)', ':cool:'] => ':sunglasses:',
[':lol:'] => ':laughing:',
[':x', ':-x', ':mad:'] => ':angry:',
[':P', ':-P', ':razz:'] => ':stuck_out_tongue:',
[':oops:'] => ':blush:',
[':cry:'] => ':cry:',
[':evil:'] => ':imp:',
[':twisted:'] => ':smiling_imp:',
[':roll:'] => ':unamused:',
[':!:'] => ':exclamation:',
[':?:'] => ':question:',
[':idea:'] => ':bulb:',
[':arrow:'] => ':arrow_right:',
[':|', ':-|'] => ':neutral_face:'
}.each do |smilies, emoji|
smilies.each { |smiley| @smiley_map[smiley] = emoji }
end
end
def add_configured_smilies(emojis)
emojis.each do |emoji, smilies|
Array.wrap(smilies)
.each { |smiley| @smiley_map[smiley] = ":#{emoji}:" }
end
end
def upload_smiley(smiley, path, alt_text, title)
path = File.join(@smilies_path, path)
filename = File.basename(path)
upload = @uploader.create_upload(Discourse::SYSTEM_USER_ID, path, filename)
if upload.nil? || !upload.valid?
puts "Failed to upload #{path}"
puts upload.errors.inspect if upload
html = nil
else
html = embedded_image_html(upload, alt_text, title)
@smiley_map[smiley] = html
end
html
end
def embedded_image_html(upload, alt_text, title)
image_width = [upload.width, SiteSetting.max_image_width].compact.min
image_height = [upload.height, SiteSetting.max_image_height].compact.min
%Q[<img src="#{upload.url}" width="#{image_width}" height="#{image_height}" alt="#{alt_text}" title="#{title}"/>]
end
def smiley_as_text(smiley)
@smiley_map[smiley] = smiley
end
end
end
@@ -0,0 +1,133 @@
module ImportScripts::PhpBB3
class TextProcessor
# @param lookup [ImportScripts::LookupContainer]
# @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1]
# @param smiley_processor [ImportScripts::PhpBB3::SmileyProcessor]
# @param settings [ImportScripts::PhpBB3::Settings]
def initialize(lookup, database, smiley_processor, settings)
@lookup = lookup
@database = database
@smiley_processor = smiley_processor
@new_site_prefix = settings.new_site_prefix
create_internal_link_regexps(settings.original_site_prefix)
end
def process_raw_text(raw)
text = raw.dup
text = CGI.unescapeHTML(text)
clean_bbcodes(text)
process_smilies(text)
process_links(text)
process_lists(text)
text
end
def process_post(raw, attachments)
text = process_raw_text(raw)
text = process_attachments(text, attachments) if attachments.present?
text
end
def process_private_msg(raw, attachments)
text = process_raw_text(raw)
text = process_attachments(text, attachments) if attachments.present?
text
end
protected
def clean_bbcodes(text)
# Many phpbb bbcode tags have a hash attached to them. Examples:
# [url=https&#58;//google&#46;com:1qh1i7ky]click here[/url:1qh1i7ky]
# [quote=&quot;cybereality&quot;:b0wtlzex]Some text.[/quote:b0wtlzex]
text.gsub!(/:(?:\w{8})\]/, ']')
end
def process_smilies(text)
@smiley_processor.replace_smilies(text)
end
def process_links(text)
# Internal forum links can have this forms:
# for topics: <!-- l --><a class="postlink-local" href="https://example.com/forums/viewtopic.php?f=26&amp;t=3412">viewtopic.php?f=26&amp;t=3412</a><!-- l -->
# for posts: <!-- l --><a class="postlink-local" href="https://example.com/forums/viewtopic.php?p=1732#p1732">viewtopic.php?p=1732#p1732</a><!-- l -->
text.gsub!(@long_internal_link_regexp) do |link|
replace_internal_link(link, $1, $2)
end
# Some links look like this: <!-- m --><a class="postlink" href="http://www.onegameamonth.com">http://www.onegameamonth.com</a><!-- m -->
text.gsub!(/<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)<\/a><!-- \w -->/i, '[\2](\1)')
# Replace internal forum links that aren't in the <!-- l --> format
text.gsub!(@short_internal_link_regexp) do |link|
replace_internal_link(link, $1, $2)
end
# phpBB shortens link text like this, which breaks our markdown processing:
# [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
#
# Work around it for now:
text.gsub!(/\[http(s)?:\/\/(www\.)?/i, '[')
end
def replace_internal_link(link, import_topic_id, import_post_id)
if import_post_id.nil?
replace_internal_topic_link(link, import_topic_id)
else
replace_internal_post_link(link, import_post_id)
end
end
def replace_internal_topic_link(link, import_topic_id)
import_post_id = @database.get_first_post_id(import_topic_id)
return link if import_post_id.nil?
replace_internal_post_link(link, import_post_id)
end
def replace_internal_post_link(link, import_post_id)
topic = @lookup.topic_lookup_from_imported_post_id(import_post_id)
topic ? "#{@new_site_prefix}#{topic[:url]}" : link
end
def process_lists(text)
# convert list tags to ul and list=1 tags to ol
# list=a is not supported, so handle it like list=1
# list=9 and list=x have the same result as list=1 and list=a
text.gsub!(/\[list\](.*?)\[\/list:u\]/mi, '[ul]\1[/ul]')
text.gsub!(/\[list=.*?\](.*?)\[\/list:o\]/mi, '[ol]\1[/ol]')
# convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists:
text.gsub!(/\[\*\](.*?)\[\/\*:m\]/mi, '[li]\1[/li]')
end
# This replaces existing [attachment] BBCodes with the corresponding HTML tags for Discourse.
# All attachments that haven't been referenced in the text are appended to the end of the text.
def process_attachments(text, attachments)
attachment_regexp = /\[attachment=([\d])+\]<!-- [\w]+ -->([^<]+)<!-- [\w]+ -->\[\/attachment\]?/i
unreferenced_attachments = attachments.dup
text = text.gsub(attachment_regexp) do
index = $1.to_i
real_filename = $2
unreferenced_attachments[index] = nil
attachments.fetch(index, real_filename)
end
unreferenced_attachments = unreferenced_attachments.compact
text << "\n" << unreferenced_attachments.join("\n") unless unreferenced_attachments.empty?
text
end
def create_internal_link_regexps(original_site_prefix)
host = original_site_prefix.gsub('.', '\.')
link_regex = "http(?:s)?://#{host}/viewtopic\\.php\\?(?:\\S*)(?:t=(\\d+)|p=(\\d+)(?:#p\\d+)?)"
@long_internal_link_regexp = Regexp.new(%Q|<!-- l --><a(?:.+)href="#{link_regex}"(?:.*)</a><!-- l -->|, Regexp::IGNORECASE)
@short_internal_link_regexp = Regexp.new(link_regex, Regexp::IGNORECASE)
end
end
end