Improvements to phpBB3 import script (#10999)
* FEATURE: Import attachments * FEATURE: Add support for importing multiple forums in one * FEATURE: Add support for category and tag mapping * FEATURE: Import groups * FIX: Add spaces around images * FEATURE: Custom mapping of user rank to trust levels * FIX: Do not fail import if it cannot import polls * FIX: Optimize existing records lookup Co-authored-by: Gerhard Schlager <mail@gerhard-schlager.at> Co-authored-by: Jarek Radosz <jradosz@gmail.com>
This commit is contained in:
@@ -0,0 +1,88 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
module ImportScripts; end
|
||||
module ImportScripts::PhpBB3; end
|
||||
|
||||
module ImportScripts::PhpBB3::BBCode
|
||||
LINEBREAK_AUTO = :auto
|
||||
LINEBREAK_HARD = :hard
|
||||
LINEBREAK_HTML = :html
|
||||
|
||||
class MarkdownNode
|
||||
# @return [String]
|
||||
attr_reader :xml_node_name
|
||||
|
||||
# @return [MarkdownNode]
|
||||
attr_reader :parent
|
||||
|
||||
# @return [Array<MarkdownNode>]
|
||||
attr_reader :children
|
||||
|
||||
# @return [Array<MarkdownNode>]
|
||||
attr_accessor :previous_sibling
|
||||
|
||||
# @return [Array<MarkdownNode>]
|
||||
attr_accessor :next_sibling
|
||||
|
||||
# @return [String]
|
||||
attr_accessor :text
|
||||
|
||||
# @return [String]
|
||||
attr_accessor :prefix
|
||||
|
||||
# @return [String]
|
||||
attr_accessor :postfix
|
||||
|
||||
# @return [Integer]
|
||||
attr_accessor :prefix_linebreaks
|
||||
|
||||
# @return [Integer]
|
||||
attr_accessor :postfix_linebreaks
|
||||
|
||||
# @return [Symbol]
|
||||
attr_accessor :prefix_linebreak_type
|
||||
|
||||
# @return [Symbol]
|
||||
attr_accessor :postfix_linebreak_type
|
||||
|
||||
# @return [String]
|
||||
attr_accessor :prefix_children
|
||||
|
||||
# @param xml_node_name [String]
|
||||
# @param parent [MarkdownNode]
|
||||
def initialize(xml_node_name:, parent:)
|
||||
@xml_node_name = xml_node_name
|
||||
|
||||
@text = +""
|
||||
@prefix = +""
|
||||
@postfix = +""
|
||||
|
||||
@prefix_linebreaks = 0
|
||||
@postfix_linebreaks = 0
|
||||
|
||||
@prefix_linebreak_type = LINEBREAK_AUTO
|
||||
@postfix_linebreak_type = LINEBREAK_AUTO
|
||||
|
||||
@parent = parent
|
||||
@children = []
|
||||
|
||||
if @parent
|
||||
@previous_sibling = @parent.children.last
|
||||
@previous_sibling.next_sibling = self if @previous_sibling
|
||||
@parent.children << self
|
||||
end
|
||||
end
|
||||
|
||||
def enclosed_with=(text)
|
||||
@prefix = @postfix = text
|
||||
end
|
||||
|
||||
def skip_children
|
||||
@children = nil
|
||||
end
|
||||
|
||||
def to_s
|
||||
"name: #{xml_node_name}, prefix: #{prefix}, text: #{text}, children: #{children.size}, postfix: #{postfix}"
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -0,0 +1,356 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
require 'nokogiri'
|
||||
require_relative 'markdown_node'
|
||||
|
||||
module ImportScripts::PhpBB3::BBCode
|
||||
class XmlToMarkdown
|
||||
def initialize(xml, opts = {})
|
||||
@username_from_user_id = opts[:username_from_user_id]
|
||||
@smilie_to_emoji = opts[:smilie_to_emoji]
|
||||
@quoted_post_from_post_id = opts[:quoted_post_from_post_id]
|
||||
@upload_md_from_file = opts[:upload_md_from_file]
|
||||
@url_replacement = opts[:url_replacement]
|
||||
@allow_inline_code = opts.fetch(:allow_inline_code, false)
|
||||
@traditional_linebreaks = opts.fetch(:traditional_linebreaks, false)
|
||||
|
||||
@doc = Nokogiri::XML(xml)
|
||||
@list_stack = []
|
||||
end
|
||||
|
||||
def convert
|
||||
preprocess_xml
|
||||
|
||||
md_root = MarkdownNode.new(xml_node_name: "ROOT", parent: nil)
|
||||
visit(@doc.root, md_root)
|
||||
to_markdown(md_root).rstrip
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
IGNORED_ELEMENTS = ["s", "e", "i"]
|
||||
ELEMENTS_WITHOUT_LEADING_WHITESPACES = ["LIST", "LI"]
|
||||
ELEMENTS_WITH_HARD_LINEBREAKS = ["B", "I", "U"]
|
||||
EXPLICIT_LINEBREAK_THRESHOLD = 2
|
||||
|
||||
def preprocess_xml
|
||||
@doc.traverse do |node|
|
||||
if node.is_a? Nokogiri::XML::Text
|
||||
node.content = node.content.gsub(/\A\n+\s*/, "")
|
||||
node.content = node.content.lstrip if remove_leading_whitespaces?(node)
|
||||
node.remove if node.content.empty?
|
||||
elsif IGNORED_ELEMENTS.include?(node.name)
|
||||
node.remove
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def remove_leading_whitespaces?(xml_node)
|
||||
parent = xml_node.parent
|
||||
return false unless parent
|
||||
|
||||
ELEMENTS_WITHOUT_LEADING_WHITESPACES.include?(parent.name) &&
|
||||
parent.children.first == xml_node
|
||||
end
|
||||
|
||||
def visit(xml_node, md_parent)
|
||||
visitor = "visit_#{xml_node.name}"
|
||||
visitor_exists = respond_to?(visitor, include_all: true)
|
||||
|
||||
if visitor_exists && md_parent.children
|
||||
md_node = create_node(xml_node, md_parent)
|
||||
send(visitor, xml_node, md_node)
|
||||
end
|
||||
|
||||
xml_node.children.each { |xml_child| visit(xml_child, md_node || md_parent) }
|
||||
|
||||
after_hook = "after_#{xml_node.name}"
|
||||
if respond_to?(after_hook, include_all: true)
|
||||
send(after_hook, xml_node, md_node)
|
||||
end
|
||||
end
|
||||
|
||||
def create_node(xml_node, md_parent)
|
||||
if xml_node.name == "br"
|
||||
last_child = md_parent.children.last
|
||||
return last_child if last_child&.xml_node_name == "br"
|
||||
end
|
||||
|
||||
MarkdownNode.new(xml_node_name: xml_node.name, parent: md_parent)
|
||||
end
|
||||
|
||||
def visit_text(xml_node, md_node)
|
||||
md_node.text << text(xml_node)
|
||||
end
|
||||
|
||||
def visit_B(xml_node, md_node)
|
||||
if xml_node.parent&.name != 'B'
|
||||
md_node.enclosed_with = "**"
|
||||
end
|
||||
end
|
||||
|
||||
def visit_I(xml_node, md_node)
|
||||
if xml_node.parent&.name != 'I'
|
||||
md_node.enclosed_with = "_"
|
||||
end
|
||||
end
|
||||
|
||||
def visit_U(xml_node, md_node)
|
||||
if xml_node.parent&.name != 'U'
|
||||
md_node.prefix = "[u]"
|
||||
md_node.postfix = "[/u]"
|
||||
end
|
||||
end
|
||||
|
||||
def visit_CODE(xml_node, md_node)
|
||||
content = xml_node.content
|
||||
|
||||
if !@allow_inline_code || content.include?("\n")
|
||||
md_node.prefix = "```text\n"
|
||||
md_node.postfix = "\n```"
|
||||
else
|
||||
md_node.enclosed_with = "`"
|
||||
end
|
||||
|
||||
md_node.text = content.rstrip
|
||||
md_node.skip_children
|
||||
md_node.prefix_linebreaks = md_node.postfix_linebreaks = 2
|
||||
md_node.prefix_linebreak_type = LINEBREAK_HTML
|
||||
end
|
||||
|
||||
def visit_LIST(xml_node, md_node)
|
||||
md_node.prefix_linebreaks = md_node.postfix_linebreaks = @list_stack.size == 0 ? 2 : 1
|
||||
md_node.prefix_linebreak_type = LINEBREAK_HTML if @list_stack.size == 0
|
||||
|
||||
@list_stack << {
|
||||
unordered: xml_node.attribute('type').nil?,
|
||||
item_count: 0
|
||||
}
|
||||
end
|
||||
|
||||
def after_LIST(xml_node, md_node)
|
||||
@list_stack.pop
|
||||
end
|
||||
|
||||
def visit_LI(xml_node, md_node)
|
||||
list = @list_stack.last
|
||||
depth = @list_stack.size - 1
|
||||
|
||||
list[:item_count] += 1
|
||||
|
||||
indentation = ' ' * 2 * depth
|
||||
symbol = list[:unordered] ? '*' : "#{list[:item_count]}."
|
||||
|
||||
md_node.prefix = "#{indentation}#{symbol} "
|
||||
md_node.postfix_linebreaks = 1
|
||||
end
|
||||
|
||||
def visit_IMG(xml_node, md_node)
|
||||
md_node.text = +"})"
|
||||
md_node.prefix_linebreaks = md_node.postfix_linebreaks = 2
|
||||
md_node.skip_children
|
||||
end
|
||||
|
||||
def visit_URL(xml_node, md_node)
|
||||
original_url = xml_node.attribute('url').to_s
|
||||
url = CGI.unescapeHTML(original_url)
|
||||
url = @url_replacement.call(url) if @url_replacement
|
||||
|
||||
if xml_node.content.strip == original_url
|
||||
md_node.text = url
|
||||
md_node.skip_children
|
||||
else
|
||||
md_node.prefix = "["
|
||||
md_node.postfix = "](#{url})"
|
||||
end
|
||||
end
|
||||
|
||||
def visit_EMAIL(xml_node, md_node)
|
||||
md_node.prefix = "<"
|
||||
md_node.postfix = ">"
|
||||
end
|
||||
|
||||
def visit_br(xml_node, md_node)
|
||||
md_node.postfix_linebreaks += 1
|
||||
|
||||
if md_node.postfix_linebreaks > 1 && ELEMENTS_WITH_HARD_LINEBREAKS.include?(xml_node.parent&.name)
|
||||
md_node.postfix_linebreak_type = LINEBREAK_HARD
|
||||
end
|
||||
end
|
||||
|
||||
def visit_E(xml_node, md_node)
|
||||
if @smilie_to_emoji
|
||||
md_node.text = @smilie_to_emoji.call(xml_node.content)
|
||||
md_node.skip_children
|
||||
end
|
||||
end
|
||||
|
||||
def visit_QUOTE(xml_node, md_node)
|
||||
if post = quoted_post(xml_node)
|
||||
md_node.prefix = %Q{[quote="#{post[:username]}, post:#{post[:post_number]}, topic:#{post[:topic_id]}"]\n}
|
||||
md_node.postfix = "\n[/quote]"
|
||||
elsif username = quoted_username(xml_node)
|
||||
md_node.prefix = %Q{[quote="#{username}"]\n}
|
||||
md_node.postfix = "\n[/quote]"
|
||||
else
|
||||
md_node.prefix_children = "> "
|
||||
end
|
||||
|
||||
md_node.prefix_linebreaks = md_node.postfix_linebreaks = 2
|
||||
md_node.prefix_linebreak_type = LINEBREAK_HTML
|
||||
end
|
||||
|
||||
def quoted_post(xml_node)
|
||||
if @quoted_post_from_post_id
|
||||
post_id = to_i(xml_node.attr("post_id"))
|
||||
@quoted_post_from_post_id.call(post_id) if post_id
|
||||
end
|
||||
end
|
||||
|
||||
def quoted_username(xml_node)
|
||||
if @username_from_user_id
|
||||
user_id = to_i(xml_node.attr("user_id"))
|
||||
username = @username_from_user_id.call(user_id) if user_id
|
||||
end
|
||||
|
||||
username = xml_node.attr("author") unless username
|
||||
username
|
||||
end
|
||||
|
||||
def to_i(string)
|
||||
string.to_i if string&.match(/\A\d+\z/)
|
||||
end
|
||||
|
||||
def visit_ATTACHMENT(xml_node, md_node)
|
||||
filename = xml_node.attr("filename")
|
||||
index = to_i(xml_node.attr("index"))
|
||||
|
||||
md_node.text = @upload_md_from_file.call(filename, index) if @upload_md_from_file
|
||||
md_node.prefix_linebreaks = md_node.postfix_linebreaks = 1
|
||||
md_node.skip_children
|
||||
end
|
||||
|
||||
def visit_SIZE(xml_node, md_node)
|
||||
size = to_i(xml_node.attr("size"))
|
||||
return if size.nil?
|
||||
|
||||
if size.between?(1, 99)
|
||||
md_node.prefix = '<small>'
|
||||
md_node.postfix = '</small>'
|
||||
elsif size.between?(101, 200)
|
||||
md_node.prefix = '<big>'
|
||||
md_node.postfix = '</big>'
|
||||
end
|
||||
end
|
||||
|
||||
def text(xml_node, escape_markdown: true)
|
||||
text = CGI.unescapeHTML(xml_node.text)
|
||||
# text.gsub!(/[\\`*_{}\[\]()#+\-.!~]/) { |c| "\\#{c}" } if escape_markdown
|
||||
text
|
||||
end
|
||||
|
||||
# @param md_parent [MarkdownNode]
|
||||
def to_markdown(md_parent)
|
||||
markdown = +""
|
||||
|
||||
md_parent.children.each do |md_node|
|
||||
prefix = md_node.prefix
|
||||
text = md_node.children&.any? ? to_markdown(md_node) : md_node.text
|
||||
postfix = md_node.postfix
|
||||
|
||||
parent_prefix = prefix_from_parent(md_parent)
|
||||
|
||||
if parent_prefix && md_node.xml_node_name != "br" && (md_parent.prefix_children || !markdown.empty?)
|
||||
prefix = "#{parent_prefix}#{prefix}"
|
||||
end
|
||||
|
||||
if md_node.xml_node_name != "CODE"
|
||||
text, prefix, postfix = hoist_whitespaces!(markdown, text, prefix, postfix)
|
||||
end
|
||||
|
||||
add_linebreaks!(markdown, md_node.prefix_linebreaks, md_node.prefix_linebreak_type, parent_prefix)
|
||||
markdown << prefix
|
||||
markdown << text
|
||||
markdown << postfix
|
||||
add_linebreaks!(markdown, md_node.postfix_linebreaks, md_node.postfix_linebreak_type, parent_prefix)
|
||||
end
|
||||
|
||||
markdown
|
||||
end
|
||||
|
||||
def hoist_whitespaces!(markdown, text, prefix, postfix)
|
||||
text = text.lstrip if markdown.end_with?("\n")
|
||||
|
||||
unless prefix.empty?
|
||||
if starts_with_whitespace?(text) && !ends_with_whitespace?(markdown)
|
||||
prefix = "#{text[0]}#{prefix}"
|
||||
end
|
||||
text = text.lstrip
|
||||
end
|
||||
|
||||
unless postfix.empty?
|
||||
if ends_with_whitespace?(text)
|
||||
postfix = "#{postfix}#{text[-1]}"
|
||||
end
|
||||
text = text.rstrip
|
||||
end
|
||||
|
||||
[text, prefix, postfix]
|
||||
end
|
||||
|
||||
def prefix_from_parent(md_parent)
|
||||
while md_parent
|
||||
return md_parent.prefix_children if md_parent.prefix_children
|
||||
md_parent = md_parent.parent
|
||||
end
|
||||
end
|
||||
|
||||
def add_linebreaks!(markdown, required_linebreak_count, linebreak_type, prefix = nil)
|
||||
return if required_linebreak_count == 0 || markdown.empty?
|
||||
|
||||
existing_linebreak_count = markdown[/(?:\\?\n|<br>\n)*\z/].count("\n")
|
||||
|
||||
if linebreak_type == LINEBREAK_HTML
|
||||
max_linebreak_count = [existing_linebreak_count, required_linebreak_count - 1].max + 1
|
||||
required_linebreak_count = max_linebreak_count if max_linebreak_count > EXPLICIT_LINEBREAK_THRESHOLD
|
||||
end
|
||||
|
||||
return if existing_linebreak_count >= required_linebreak_count
|
||||
|
||||
rstrip!(markdown)
|
||||
alternative_linebreak_start_index = required_linebreak_count > EXPLICIT_LINEBREAK_THRESHOLD ? 1 : 2
|
||||
|
||||
required_linebreak_count.times do |index|
|
||||
linebreak = linebreak(linebreak_type, index, alternative_linebreak_start_index, required_linebreak_count)
|
||||
|
||||
markdown << (linebreak == "\n" ? prefix.rstrip : prefix) if prefix && index > 0
|
||||
markdown << linebreak
|
||||
end
|
||||
end
|
||||
|
||||
def rstrip!(markdown)
|
||||
markdown.gsub!(/\s*(?:\\?\n|<br>\n)*\z/, '')
|
||||
end
|
||||
|
||||
def linebreak(linebreak_type, linebreak_index, alternative_linebreak_start_index, required_linebreak_count)
|
||||
use_alternative_linebreak = linebreak_index >= alternative_linebreak_start_index
|
||||
is_last_linebreak = linebreak_index + 1 == required_linebreak_count
|
||||
|
||||
return "<br>\n" if linebreak_type == LINEBREAK_HTML &&
|
||||
use_alternative_linebreak && is_last_linebreak
|
||||
|
||||
return "\\\n" if linebreak_type == LINEBREAK_HARD ||
|
||||
@traditional_linebreaks || use_alternative_linebreak
|
||||
|
||||
"\n"
|
||||
end
|
||||
|
||||
def starts_with_whitespace?(text)
|
||||
text.match?(/\A\s/)
|
||||
end
|
||||
|
||||
def ends_with_whitespace?(text)
|
||||
text.match?(/\s\z/)
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,14 +1,23 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
require 'csv'
|
||||
require 'yaml'
|
||||
require_relative '../../base'
|
||||
|
||||
module ImportScripts::PhpBB3
|
||||
class Settings
|
||||
def self.load(filename)
|
||||
yaml = YAML::load_file(filename)
|
||||
Settings.new(yaml)
|
||||
Settings.new(yaml.deep_stringify_keys.with_indifferent_access)
|
||||
end
|
||||
|
||||
attr_reader :site_name
|
||||
|
||||
attr_reader :new_categories
|
||||
attr_reader :category_mappings
|
||||
attr_reader :tag_mappings
|
||||
attr_reader :rank_mapping
|
||||
|
||||
attr_reader :import_anonymous_users
|
||||
attr_reader :import_attachments
|
||||
attr_reader :import_private_messages
|
||||
@@ -34,6 +43,14 @@ module ImportScripts::PhpBB3
|
||||
|
||||
def initialize(yaml)
|
||||
import_settings = yaml['import']
|
||||
|
||||
@site_name = import_settings['site_name']
|
||||
|
||||
@new_categories = import_settings['new_categories']
|
||||
@category_mappings = import_settings['category_mappings']
|
||||
@tag_mappings = import_settings['tag_mappings']
|
||||
@rank_mapping = import_settings['rank_mapping']
|
||||
|
||||
@import_anonymous_users = import_settings['anonymous_users']
|
||||
@import_attachments = import_settings['attachments']
|
||||
@import_private_messages = import_settings['private_messages']
|
||||
@@ -58,6 +75,20 @@ module ImportScripts::PhpBB3
|
||||
|
||||
@database = DatabaseSettings.new(yaml['database'])
|
||||
end
|
||||
|
||||
def prefix(val)
|
||||
@site_name.present? && val.present? ? "#{@site_name}:#{val}" : val
|
||||
end
|
||||
|
||||
def trust_level_for_posts(rank, trust_level: 0)
|
||||
if @rank_mapping.present?
|
||||
@rank_mapping.each do |key, value|
|
||||
trust_level = [trust_level, key.gsub('trust_level_', '').to_i].max if rank >= value
|
||||
end
|
||||
end
|
||||
|
||||
trust_level
|
||||
end
|
||||
end
|
||||
|
||||
class DatabaseSettings
|
||||
|
||||
@@ -3,10 +3,12 @@
|
||||
module ImportScripts::PhpBB3
|
||||
class SmileyProcessor
|
||||
# @param uploader [ImportScripts::Uploader]
|
||||
# @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1]
|
||||
# @param settings [ImportScripts::PhpBB3::Settings]
|
||||
# @param phpbb_config [Hash]
|
||||
def initialize(uploader, settings, phpbb_config)
|
||||
def initialize(uploader, database, settings, phpbb_config)
|
||||
@uploader = uploader
|
||||
@database = database
|
||||
@smilies_path = File.join(settings.base_dir, phpbb_config[:smilies_path])
|
||||
|
||||
@smiley_map = {}
|
||||
@@ -16,12 +18,16 @@ module ImportScripts::PhpBB3
|
||||
|
||||
def replace_smilies(text)
|
||||
# :) is encoded as <!-- s:) --><img src="{SMILIES_PATH}/icon_e_smile.gif" alt=":)" title="Smile" /><!-- s:) -->
|
||||
text.gsub!(/<!-- s(\S+) --><img src="\{SMILIES_PATH\}\/(.+?)" alt="(.*?)" title="(.*?)" \/><!-- s(?:\S+) -->/) do
|
||||
smiley = $1
|
||||
text.gsub!(/<!-- s(\S+) --><img src="\{SMILIES_PATH\}\/.+?" alt=".*?" title=".*?" \/><!-- s?:\S+ -->/) do
|
||||
emoji($1)
|
||||
end
|
||||
end
|
||||
|
||||
@smiley_map.fetch(smiley) do
|
||||
upload_smiley(smiley, $2, $3, $4) || smiley_as_text(smiley)
|
||||
end
|
||||
def emoji(smiley_code)
|
||||
@smiley_map.fetch(smiley_code) do
|
||||
smiley = @database.get_smiley(smiley_code)
|
||||
emoji = upload_smiley(smiley_code, smiley[:smiley_url], smiley_code, smiley[:emotion]) if smiley
|
||||
emoji || smiley_as_text(smiley_code)
|
||||
end
|
||||
end
|
||||
|
||||
@@ -36,7 +42,7 @@ module ImportScripts::PhpBB3
|
||||
[':o', ':-o', ':eek:'] => ':astonished:',
|
||||
[':shock:'] => ':open_mouth:',
|
||||
[':?', ':-?', ':???:'] => ':confused:',
|
||||
['8-)', ':cool:'] => ':sunglasses:',
|
||||
['8)', '8-)', ':cool:'] => ':sunglasses:',
|
||||
[':lol:'] => ':laughing:',
|
||||
[':x', ':-x', ':mad:'] => ':angry:',
|
||||
[':P', ':-P', ':razz:'] => ':stuck_out_tongue:',
|
||||
|
||||
@@ -1,48 +1,75 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
require_relative 'bbcode/xml_to_markdown'
|
||||
|
||||
module ImportScripts::PhpBB3
|
||||
class TextProcessor
|
||||
# @param lookup [ImportScripts::LookupContainer]
|
||||
# @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1]
|
||||
# @param smiley_processor [ImportScripts::PhpBB3::SmileyProcessor]
|
||||
# @param settings [ImportScripts::PhpBB3::Settings]
|
||||
def initialize(lookup, database, smiley_processor, settings)
|
||||
# @param phpbb_config [Hash]
|
||||
def initialize(lookup, database, smiley_processor, settings, phpbb_config)
|
||||
@lookup = lookup
|
||||
@database = database
|
||||
@smiley_processor = smiley_processor
|
||||
@he = HTMLEntities.new
|
||||
@use_xml_to_markdown = phpbb_config[:phpbb_version].start_with?('3.2')
|
||||
|
||||
@settings = settings
|
||||
@new_site_prefix = settings.new_site_prefix
|
||||
create_internal_link_regexps(settings.original_site_prefix)
|
||||
end
|
||||
|
||||
def process_raw_text(raw)
|
||||
text = raw.dup
|
||||
text = CGI.unescapeHTML(text)
|
||||
def process_raw_text(raw, attachments = nil)
|
||||
if @use_xml_to_markdown
|
||||
unreferenced_attachments = attachments&.dup
|
||||
|
||||
clean_bbcodes(text)
|
||||
if @settings.use_bbcode_to_md
|
||||
text = bbcode_to_md(text)
|
||||
converter = BBCode::XmlToMarkdown.new(
|
||||
raw,
|
||||
username_from_user_id: lambda { |user_id| @lookup.find_username_by_import_id(user_id) },
|
||||
smilie_to_emoji: lambda { |smilie| @smiley_processor.emoji(smilie).dup },
|
||||
quoted_post_from_post_id: lambda { |post_id| @lookup.topic_lookup_from_imported_post_id(post_id) },
|
||||
upload_md_from_file: (lambda do |filename, index|
|
||||
unreferenced_attachments[index] = nil
|
||||
attachments.fetch(index, filename).dup
|
||||
end if attachments),
|
||||
url_replacement: nil,
|
||||
allow_inline_code: false
|
||||
)
|
||||
|
||||
text = converter.convert
|
||||
|
||||
text.gsub!(@short_internal_link_regexp) do |link|
|
||||
replace_internal_link(link, $1, $2)
|
||||
end
|
||||
|
||||
add_unreferenced_attachments(text, unreferenced_attachments)
|
||||
else
|
||||
text = raw.dup
|
||||
text = CGI.unescapeHTML(text)
|
||||
|
||||
clean_bbcodes(text)
|
||||
if @settings.use_bbcode_to_md
|
||||
text = bbcode_to_md(text)
|
||||
end
|
||||
process_smilies(text)
|
||||
process_links(text)
|
||||
process_lists(text)
|
||||
process_code(text)
|
||||
fix_markdown(text)
|
||||
process_attachments(text, attachments) if attachments.present?
|
||||
|
||||
text
|
||||
end
|
||||
process_smilies(text)
|
||||
process_links(text)
|
||||
process_lists(text)
|
||||
process_code(text)
|
||||
fix_markdown(text)
|
||||
text
|
||||
end
|
||||
|
||||
def process_post(raw, attachments)
|
||||
text = process_raw_text(raw)
|
||||
text = process_attachments(text, attachments) if attachments.present?
|
||||
text
|
||||
process_raw_text(raw, attachments) rescue raw
|
||||
end
|
||||
|
||||
def process_private_msg(raw, attachments)
|
||||
text = process_raw_text(raw)
|
||||
text = process_attachments(text, attachments) if attachments.present?
|
||||
text
|
||||
process_raw_text(raw, attachments) rescue raw
|
||||
end
|
||||
|
||||
protected
|
||||
@@ -139,6 +166,12 @@ module ImportScripts::PhpBB3
|
||||
attachments.fetch(index, real_filename)
|
||||
end
|
||||
|
||||
add_unreferenced_attachments(text, unreferenced_attachments)
|
||||
end
|
||||
|
||||
def add_unreferenced_attachments(text, unreferenced_attachments)
|
||||
return text unless unreferenced_attachments
|
||||
|
||||
unreferenced_attachments = unreferenced_attachments.compact
|
||||
text << "\n" << unreferenced_attachments.join("\n") unless unreferenced_attachments.empty?
|
||||
text
|
||||
@@ -161,6 +194,7 @@ module ImportScripts::PhpBB3
|
||||
|
||||
def fix_markdown(text)
|
||||
text.gsub!(/(\n*\[\/?quote.*?\]\n*)/mi) { |q| "\n#{q.strip}\n" }
|
||||
text.gsub!(/^!\[[^\]]*\]\([^\]]*\)$/i) { |img| "\n#{img.strip}\n" } # space out images single on line
|
||||
text
|
||||
end
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user