FEATURE: Lots of improvements to the phpBB3 importer
- Extensive refactoring of the existing importer - Configuration of import with settings.yml instead of editing code - Supports importing from phpBB 3.0.x and 3.1.x - Imports all attachments (not just the ones embedded with [attachment]) from posts and private messages - Imports all existing attachments without the need to configure allowed file extensions or file sizes - Imports polls - Imports bookmarks - Imports sticky topics and (global) announcements as pinned topics - Imports categories in the original order and sets the content of the category description topic - Sets the creation date of category description topics to the creation date of the first topic in each category - Imports additional user attributes: last seen date, registration IP address, website, date of birth, location - Optionally set the user's name to its username - Users that didn't activate their account in phpBB3 are imported as inactive users - All imported, active users are automatically approved - Users that were deactivated in phpBB3 get suspended for 200 years during the import - Anonymous user can be imported as suspended users instead of the system user - Forums of type "link" are not imported as categories anymore - Internal links to posts get rewritten during the import (previously only links to topics got rewritten) - Ordered lists with BBCode [list=a] (which are unsupported in Discourse) get imported as if they would be [list=1] - Importing of avatars, attachments, private messages, polls and bookmarks can be disabled via configuration file - Optional fixing of private messages for forums that have been upgraded from phpBB2 prevents the import of duplicate messages and tries to group related messages into topics - Table prefix (default: phpbb) is configurable - Most of phpBB's default smilies are mapped to Emojis and all other smilies get uploaded and embedded as images. Smiley mappings can be added or overridden in the settings.yml file.
This commit is contained in:
@@ -0,0 +1,35 @@
|
||||
module ImportScripts::PhpBB3
|
||||
class Constants
|
||||
ACTIVE_USER = 0
|
||||
INACTIVE_REGISTER = 1 # Newly registered account
|
||||
INACTIVE_PROFILE = 2 # Profile details changed
|
||||
INACTIVE_MANUAL = 3 # Account deactivated by administrator
|
||||
INACTIVE_REMIND = 4 # Forced user account reactivation
|
||||
|
||||
GROUP_ADMINISTRATORS = 'ADMINISTRATORS'
|
||||
GROUP_MODERATORS = 'GLOBAL_MODERATORS'
|
||||
|
||||
# https://wiki.phpbb.com/Table.phpbb_users
|
||||
USER_TYPE_NORMAL = 0
|
||||
USER_TYPE_INACTIVE = 1
|
||||
USER_TYPE_IGNORE = 2
|
||||
USER_TYPE_FOUNDER = 3
|
||||
|
||||
AVATAR_TYPE_UPLOADED = 1
|
||||
AVATAR_TYPE_REMOTE = 2
|
||||
AVATAR_TYPE_GALLERY = 3
|
||||
|
||||
FORUM_TYPE_CATEGORY = 0
|
||||
FORUM_TYPE_POST = 1
|
||||
FORUM_TYPE_LINK = 2
|
||||
|
||||
TOPIC_UNLOCKED = 0
|
||||
TOPIC_LOCKED = 1
|
||||
TOPIC_MOVED = 2
|
||||
|
||||
POST_NORMAL = 0
|
||||
POST_STICKY = 1
|
||||
POST_ANNOUNCE = 2
|
||||
POST_GLOBAL = 3
|
||||
end
|
||||
end
|
||||
@@ -0,0 +1,78 @@
|
||||
require 'yaml'
|
||||
|
||||
module ImportScripts::PhpBB3
|
||||
class Settings
|
||||
def self.load(filename)
|
||||
yaml = YAML::load_file(filename)
|
||||
Settings.new(yaml)
|
||||
end
|
||||
|
||||
attr_reader :import_anonymous_users
|
||||
attr_reader :import_attachments
|
||||
attr_reader :import_private_messages
|
||||
attr_reader :import_polls
|
||||
attr_reader :import_bookmarks
|
||||
|
||||
attr_reader :import_uploaded_avatars
|
||||
attr_reader :import_remote_avatars
|
||||
attr_reader :import_gallery_avatars
|
||||
|
||||
attr_reader :fix_private_messages
|
||||
attr_reader :use_bbcode_to_md
|
||||
|
||||
attr_reader :original_site_prefix
|
||||
attr_reader :new_site_prefix
|
||||
attr_reader :base_dir
|
||||
|
||||
attr_reader :username_as_name
|
||||
attr_reader :emojis
|
||||
|
||||
attr_reader :database
|
||||
|
||||
def initialize(yaml)
|
||||
import_settings = yaml['import']
|
||||
@import_anonymous_users = import_settings['anonymous_users']
|
||||
@import_attachments = import_settings['attachments']
|
||||
@import_private_messages = import_settings['private_messages']
|
||||
@import_polls = import_settings['polls']
|
||||
@import_bookmarks = import_settings['bookmarks']
|
||||
|
||||
avatar_settings = import_settings['avatars']
|
||||
@import_uploaded_avatars = avatar_settings['uploaded']
|
||||
@import_remote_avatars = avatar_settings['remote']
|
||||
@import_gallery_avatars = avatar_settings['gallery']
|
||||
|
||||
@fix_private_messages = import_settings['fix_private_messages']
|
||||
@use_bbcode_to_md =import_settings['use_bbcode_to_md']
|
||||
|
||||
@original_site_prefix = import_settings['site_prefix']['original']
|
||||
@new_site_prefix = import_settings['site_prefix']['new']
|
||||
@base_dir = import_settings['phpbb_base_dir']
|
||||
|
||||
@username_as_name = import_settings['username_as_name']
|
||||
@emojis = import_settings.fetch('emojis', [])
|
||||
|
||||
@database = DatabaseSettings.new(yaml['database'])
|
||||
end
|
||||
end
|
||||
|
||||
class DatabaseSettings
|
||||
attr_reader :type
|
||||
attr_reader :host
|
||||
attr_reader :username
|
||||
attr_reader :password
|
||||
attr_reader :schema
|
||||
attr_reader :table_prefix
|
||||
attr_reader :batch_size
|
||||
|
||||
def initialize(yaml)
|
||||
@type = yaml['type']
|
||||
@host = yaml['host']
|
||||
@username = yaml['username']
|
||||
@password = yaml['password']
|
||||
@schema = yaml['schema']
|
||||
@table_prefix = yaml['table_prefix']
|
||||
@batch_size = yaml['batch_size']
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -0,0 +1,90 @@
|
||||
module ImportScripts::PhpBB3
|
||||
class SmileyProcessor
|
||||
# @param uploader [ImportScripts::Uploader]
|
||||
# @param settings [ImportScripts::PhpBB3::Settings]
|
||||
# @param phpbb_config [Hash]
|
||||
def initialize(uploader, settings, phpbb_config)
|
||||
@uploader = uploader
|
||||
@smilies_path = File.join(settings.base_dir, phpbb_config[:smilies_path])
|
||||
|
||||
@smiley_map = {}
|
||||
add_default_smilies
|
||||
add_configured_smilies(settings.emojis)
|
||||
end
|
||||
|
||||
def replace_smilies(text)
|
||||
# :) is encoded as <!-- s:) --><img src="{SMILIES_PATH}/icon_e_smile.gif" alt=":)" title="Smile" /><!-- s:) -->
|
||||
text.gsub!(/<!-- s(\S+) --><img src="\{SMILIES_PATH\}\/(.+?)" alt="(.*?)" title="(.*?)" \/><!-- s(?:\S+) -->/) do
|
||||
smiley = $1
|
||||
|
||||
@smiley_map.fetch(smiley) do
|
||||
upload_smiley(smiley, $2, $3, $4) || smiley_as_text(smiley)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
def add_default_smilies
|
||||
{
|
||||
[':D', ':-D', ':grin:'] => ':smiley:',
|
||||
[':)', ':-)', ':smile:'] => ':smile:',
|
||||
[';)', ';-)', ':wink:'] => ':wink:',
|
||||
[':(', ':-(', ':sad:'] => ':frowning:',
|
||||
[':o', ':-o', ':eek:'] => ':astonished:',
|
||||
[':shock:'] => ':open_mouth:',
|
||||
[':?', ':-?', ':???:'] => ':confused:',
|
||||
['8-)', ':cool:'] => ':sunglasses:',
|
||||
[':lol:'] => ':laughing:',
|
||||
[':x', ':-x', ':mad:'] => ':angry:',
|
||||
[':P', ':-P', ':razz:'] => ':stuck_out_tongue:',
|
||||
[':oops:'] => ':blush:',
|
||||
[':cry:'] => ':cry:',
|
||||
[':evil:'] => ':imp:',
|
||||
[':twisted:'] => ':smiling_imp:',
|
||||
[':roll:'] => ':unamused:',
|
||||
[':!:'] => ':exclamation:',
|
||||
[':?:'] => ':question:',
|
||||
[':idea:'] => ':bulb:',
|
||||
[':arrow:'] => ':arrow_right:',
|
||||
[':|', ':-|'] => ':neutral_face:'
|
||||
}.each do |smilies, emoji|
|
||||
smilies.each { |smiley| @smiley_map[smiley] = emoji }
|
||||
end
|
||||
end
|
||||
|
||||
def add_configured_smilies(emojis)
|
||||
emojis.each do |emoji, smilies|
|
||||
Array.wrap(smilies)
|
||||
.each { |smiley| @smiley_map[smiley] = ":#{emoji}:" }
|
||||
end
|
||||
end
|
||||
|
||||
def upload_smiley(smiley, path, alt_text, title)
|
||||
path = File.join(@smilies_path, path)
|
||||
filename = File.basename(path)
|
||||
upload = @uploader.create_upload(Discourse::SYSTEM_USER_ID, path, filename)
|
||||
|
||||
if upload.nil? || !upload.valid?
|
||||
puts "Failed to upload #{path}"
|
||||
puts upload.errors.inspect if upload
|
||||
html = nil
|
||||
else
|
||||
html = embedded_image_html(upload, alt_text, title)
|
||||
@smiley_map[smiley] = html
|
||||
end
|
||||
|
||||
html
|
||||
end
|
||||
|
||||
def embedded_image_html(upload, alt_text, title)
|
||||
image_width = [upload.width, SiteSetting.max_image_width].compact.min
|
||||
image_height = [upload.height, SiteSetting.max_image_height].compact.min
|
||||
%Q[<img src="#{upload.url}" width="#{image_width}" height="#{image_height}" alt="#{alt_text}" title="#{title}"/>]
|
||||
end
|
||||
|
||||
def smiley_as_text(smiley)
|
||||
@smiley_map[smiley] = smiley
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -0,0 +1,133 @@
|
||||
module ImportScripts::PhpBB3
|
||||
class TextProcessor
|
||||
# @param lookup [ImportScripts::LookupContainer]
|
||||
# @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1]
|
||||
# @param smiley_processor [ImportScripts::PhpBB3::SmileyProcessor]
|
||||
# @param settings [ImportScripts::PhpBB3::Settings]
|
||||
def initialize(lookup, database, smiley_processor, settings)
|
||||
@lookup = lookup
|
||||
@database = database
|
||||
@smiley_processor = smiley_processor
|
||||
|
||||
@new_site_prefix = settings.new_site_prefix
|
||||
create_internal_link_regexps(settings.original_site_prefix)
|
||||
end
|
||||
|
||||
def process_raw_text(raw)
|
||||
text = raw.dup
|
||||
text = CGI.unescapeHTML(text)
|
||||
|
||||
clean_bbcodes(text)
|
||||
process_smilies(text)
|
||||
process_links(text)
|
||||
process_lists(text)
|
||||
|
||||
text
|
||||
end
|
||||
|
||||
def process_post(raw, attachments)
|
||||
text = process_raw_text(raw)
|
||||
text = process_attachments(text, attachments) if attachments.present?
|
||||
text
|
||||
end
|
||||
|
||||
def process_private_msg(raw, attachments)
|
||||
text = process_raw_text(raw)
|
||||
text = process_attachments(text, attachments) if attachments.present?
|
||||
text
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
def clean_bbcodes(text)
|
||||
# Many phpbb bbcode tags have a hash attached to them. Examples:
|
||||
# [url=https://google.com:1qh1i7ky]click here[/url:1qh1i7ky]
|
||||
# [quote="cybereality":b0wtlzex]Some text.[/quote:b0wtlzex]
|
||||
text.gsub!(/:(?:\w{8})\]/, ']')
|
||||
end
|
||||
|
||||
def process_smilies(text)
|
||||
@smiley_processor.replace_smilies(text)
|
||||
end
|
||||
|
||||
def process_links(text)
|
||||
# Internal forum links can have this forms:
|
||||
# for topics: <!-- l --><a class="postlink-local" href="https://example.com/forums/viewtopic.php?f=26&t=3412">viewtopic.php?f=26&t=3412</a><!-- l -->
|
||||
# for posts: <!-- l --><a class="postlink-local" href="https://example.com/forums/viewtopic.php?p=1732#p1732">viewtopic.php?p=1732#p1732</a><!-- l -->
|
||||
text.gsub!(@long_internal_link_regexp) do |link|
|
||||
replace_internal_link(link, $1, $2)
|
||||
end
|
||||
|
||||
# Some links look like this: <!-- m --><a class="postlink" href="http://www.onegameamonth.com">http://www.onegameamonth.com</a><!-- m -->
|
||||
text.gsub!(/<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)<\/a><!-- \w -->/i, '[\2](\1)')
|
||||
|
||||
# Replace internal forum links that aren't in the <!-- l --> format
|
||||
text.gsub!(@short_internal_link_regexp) do |link|
|
||||
replace_internal_link(link, $1, $2)
|
||||
end
|
||||
|
||||
# phpBB shortens link text like this, which breaks our markdown processing:
|
||||
# [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
|
||||
#
|
||||
# Work around it for now:
|
||||
text.gsub!(/\[http(s)?:\/\/(www\.)?/i, '[')
|
||||
end
|
||||
|
||||
def replace_internal_link(link, import_topic_id, import_post_id)
|
||||
if import_post_id.nil?
|
||||
replace_internal_topic_link(link, import_topic_id)
|
||||
else
|
||||
replace_internal_post_link(link, import_post_id)
|
||||
end
|
||||
end
|
||||
|
||||
def replace_internal_topic_link(link, import_topic_id)
|
||||
import_post_id = @database.get_first_post_id(import_topic_id)
|
||||
return link if import_post_id.nil?
|
||||
|
||||
replace_internal_post_link(link, import_post_id)
|
||||
end
|
||||
|
||||
def replace_internal_post_link(link, import_post_id)
|
||||
topic = @lookup.topic_lookup_from_imported_post_id(import_post_id)
|
||||
topic ? "#{@new_site_prefix}#{topic[:url]}" : link
|
||||
end
|
||||
|
||||
def process_lists(text)
|
||||
# convert list tags to ul and list=1 tags to ol
|
||||
# list=a is not supported, so handle it like list=1
|
||||
# list=9 and list=x have the same result as list=1 and list=a
|
||||
text.gsub!(/\[list\](.*?)\[\/list:u\]/mi, '[ul]\1[/ul]')
|
||||
text.gsub!(/\[list=.*?\](.*?)\[\/list:o\]/mi, '[ol]\1[/ol]')
|
||||
|
||||
# convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists:
|
||||
text.gsub!(/\[\*\](.*?)\[\/\*:m\]/mi, '[li]\1[/li]')
|
||||
end
|
||||
|
||||
# This replaces existing [attachment] BBCodes with the corresponding HTML tags for Discourse.
|
||||
# All attachments that haven't been referenced in the text are appended to the end of the text.
|
||||
def process_attachments(text, attachments)
|
||||
attachment_regexp = /\[attachment=([\d])+\]<!-- [\w]+ -->([^<]+)<!-- [\w]+ -->\[\/attachment\]?/i
|
||||
unreferenced_attachments = attachments.dup
|
||||
|
||||
text = text.gsub(attachment_regexp) do
|
||||
index = $1.to_i
|
||||
real_filename = $2
|
||||
unreferenced_attachments[index] = nil
|
||||
attachments.fetch(index, real_filename)
|
||||
end
|
||||
|
||||
unreferenced_attachments = unreferenced_attachments.compact
|
||||
text << "\n" << unreferenced_attachments.join("\n") unless unreferenced_attachments.empty?
|
||||
text
|
||||
end
|
||||
|
||||
def create_internal_link_regexps(original_site_prefix)
|
||||
host = original_site_prefix.gsub('.', '\.')
|
||||
link_regex = "http(?:s)?://#{host}/viewtopic\\.php\\?(?:\\S*)(?:t=(\\d+)|p=(\\d+)(?:#p\\d+)?)"
|
||||
|
||||
@long_internal_link_regexp = Regexp.new(%Q|<!-- l --><a(?:.+)href="#{link_regex}"(?:.*)</a><!-- l -->|, Regexp::IGNORECASE)
|
||||
@short_internal_link_regexp = Regexp.new(link_regex, Regexp::IGNORECASE)
|
||||
end
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user