From 70f00e31dfeff7d49ab89900815b07beec73b3d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Hanol?= Date: Mon, 9 Mar 2015 12:32:37 +0100 Subject: [PATCH] FIX: pre-hoist code blocks & spans --- .../discourse/dialects/code_dialect.js | 25 +++--- .../javascripts/discourse/dialects/dialect.js | 78 +++++++++++++++++-- test/javascripts/lib/markdown-test.js.es6 | 38 +++++++-- .../mdtest/fixtures/Backslash escapes.text | 4 +- .../mdtest/fixtures/Code Spans.text | 2 +- .../fixtures/Ordered and unordered lists.text | 2 +- .../Ordered and unordered lists.xhtml | 2 +- 7 files changed, 120 insertions(+), 31 deletions(-) diff --git a/app/assets/javascripts/discourse/dialects/code_dialect.js b/app/assets/javascripts/discourse/dialects/code_dialect.js index 0eb5c738a0..9131775a34 100644 --- a/app/assets/javascripts/discourse/dialects/code_dialect.js +++ b/app/assets/javascripts/discourse/dialects/code_dialect.js @@ -10,7 +10,7 @@ var acceptableCodeClasses = "perl", "php", "profile", "python", "r", "rib", "rsl", "ruby", "rust", "scala", "smalltalk", "sql", "tex", "text", "vala", "vbscript", "vhdl"]; -var textCodeClasses = ["text", "pre"]; +var textCodeClasses = ["text", "pre", "plain"]; function flattenBlocks(blocks) { var result = ""; @@ -39,6 +39,17 @@ Discourse.Dialect.replaceBlock({ } }); +Discourse.Dialect.replaceBlock({ + start: /(]*\>)([\s\S]*)/igm, + stop: /<\/pre>/igm, + rawContents: true, + skipIfTradtionalLinebreaks: true, + + emitter: function(blockContents) { + return ['p', ['pre', flattenBlocks(blockContents)]]; + } +}); + // Ensure that content in a code block is fully escaped. This way it's not white listed // and we can use HTML and Javascript examples. Discourse.Dialect.on('parseNode', function (event) { @@ -51,7 +62,6 @@ Discourse.Dialect.on('parseNode', function (event) { if (path && path[path.length-1] && path[path.length-1][0] && path[path.length-1][0] === "pre") { regexp = / +$/g; - } else { regexp = /^ +| +$/g; } @@ -59,17 +69,6 @@ Discourse.Dialect.on('parseNode', function (event) { } }); -Discourse.Dialect.replaceBlock({ - start: /(]*\>)([\s\S]*)/igm, - stop: /<\/pre>/igm, - rawContents: true, - skipIfTradtionalLinebreaks: true, - - emitter: function(blockContents) { - return ['p', ['pre', flattenBlocks(blockContents)]]; - } -}); - // Whitelist the language classes var regexpSource = "^lang-(" + acceptableCodeClasses.join('|') + ")$"; Discourse.Markdown.whiteListTag('code', 'class', new RegExp(regexpSource, "i")); diff --git a/app/assets/javascripts/discourse/dialects/dialect.js b/app/assets/javascripts/discourse/dialects/dialect.js index 66b72994b4..34ab5b5f04 100644 --- a/app/assets/javascripts/discourse/dialects/dialect.js +++ b/app/assets/javascripts/discourse/dialects/dialect.js @@ -12,7 +12,8 @@ var parser = window.BetterMarkdown, initialized = false, emitters = [], hoisted, - preProcessors = []; + preProcessors = [], + escape = Handlebars.Utils.escapeExpression; /** Initialize our dialects for processing. @@ -162,6 +163,69 @@ function hoister(t, target, replacement) { return t; } +function outdent(t) { + return t.replace(/^([ ]{4}|\t)/gm, ""); +} + +function hideBackslashEscapedCharacters(t) { + return t.replace(/\\\\/g, "\u1E800") + .replace(/\\`/g, "\u1E8001"); +} + +function showBackslashEscapedCharacters(t) { + return t.replace(/\u1E8001/g, "\\`") + .replace(/\u1E800/g, "\\\\"); +} + +function hoistCodeBlocksAndSpans(text) { + // replace all "\`" with a single character + text = hideBackslashEscapedCharacters(text); + + //
...
code blocks + text = text.replace(/(^\n*|\n\n)
([\s\S]*?)<\/pre>/ig, function(_, before, content) {
+    var hash = md5(content);
+    hoisted[hash] = escape(showBackslashEscapedCharacters(content.trim()));
+    return before + "
" + hash + "
"; + }); + + // markdown code blocks + text = text.replace(/(^\n*|\n\n)((?:(?:[ ]{4}|\t).*\n*)+)/g, function(match, before, content, index) { + // make sure we aren't in a list + var previousLine = text.slice(0, index).trim().match(/.*$/); + if (previousLine && previousLine[0].length) { + previousLine = previousLine[0].trim(); + if (/^(?:\*|\+|-|\d+\.)\s+/.test(previousLine)) { + return match; + } + } + // we can safely hoist the code block + var hash = md5(content); + // only remove trailing whitespace + content = content.replace(/\s+$/, ""); + hoisted[hash] = escape(outdent(showBackslashEscapedCharacters(content))); + return before + " " + hash + "\n"; + }); + + // fenced code blocks (AKA GitHub code blocks) + text = text.replace(/(^\n*|\n\n)```([a-z0-9\-]*)\n([\s\S]*?)\n```/g, function(_, before, language, content) { + var hash = md5(content); + hoisted[hash] = escape(showBackslashEscapedCharacters(content.trim())); + return before + "```" + language + "\n" + hash + "\n```"; + }); + + // code spans (double & single `) + ["``", "`"].forEach(function(delimiter) { + var regexp = new RegExp("(^|[^`])" + delimiter + "([^`\\n]+?)" + delimiter + "([^`]|$)", "g"); + text = text.replace(regexp, function(_, before, content, after) { + var hash = md5(content); + hoisted[hash] = escape(showBackslashEscapedCharacters(content.trim())); + return before + delimiter + hash + delimiter + after; + }); + }); + + // replace back all weird character with "\`" + return showBackslashEscapedCharacters(text); +} /** An object used for rendering our dialects. @@ -183,14 +247,19 @@ Discourse.Dialect = { cook: function(text, opts) { if (!initialized) { initializeDialects(); } + dialect.options = opts; + // Helps us hoist out HTML hoisted = {}; + // pre-hoist all code-blocks/spans + text = hoistCodeBlocksAndSpans(text); + + // pre-processors preProcessors.forEach(function(p) { text = p(text, hoister); }); - dialect.options = opts; var tree = parser.toHTMLTree(text, 'Discourse'), result = parser.renderJsonML(parseTree(tree)); @@ -203,12 +272,11 @@ Discourse.Dialect = { // If we hoisted out anything, put it back var keys = Object.keys(hoisted); if (keys.length) { - keys.forEach(function(k) { - result = result.replace(new RegExp(k,"g"), hoisted[k]); + keys.forEach(function(key) { + result = result.replace(new RegExp(key, "g"), hoisted[key]); }); } - hoisted = {}; return result.trim(); }, diff --git a/test/javascripts/lib/markdown-test.js.es6 b/test/javascripts/lib/markdown-test.js.es6 index 7c6cf4e408..a2bfb9faef 100644 --- a/test/javascripts/lib/markdown-test.js.es6 +++ b/test/javascripts/lib/markdown-test.js.es6 @@ -216,15 +216,33 @@ test("Mentions", function() { "

@codinghorror

", "it doesn't do link mentions within links"); - cooked("Hello @EvilTrout", "

Hello @EvilTrout

", "adds a mention class"); - cooked("robin@email.host", "

robin@email.host

", "won't add mention class to an email address"); - cooked("hanzo55@yahoo.com", "

hanzo55@yahoo.com

", "won't be affected by email addresses that have a number before the @ symbol"); - cooked("@EvilTrout yo", "

@EvilTrout yo

", "it handles mentions at the beginning of a string"); - cooked("yo\n@EvilTrout", "

yo
@EvilTrout

", "it handles mentions at the beginning of a new line"); + cooked("Hello @EvilTrout", + "

Hello @EvilTrout

", + "adds a mention class"); + + cooked("robin@email.host", + "

robin@email.host

", + "won't add mention class to an email address"); + + cooked("hanzo55@yahoo.com", + "

hanzo55@yahoo.com

", + "won't be affected by email addresses that have a number before the @ symbol"); + + cooked("@EvilTrout yo", + "

@EvilTrout yo

", + "it handles mentions at the beginning of a string"); + + cooked("yo\n@EvilTrout", + "

yo
@EvilTrout

", + "it handles mentions at the beginning of a new line"); + cooked("`evil` @EvilTrout `trout`", "

evil @EvilTrout trout

", "deals correctly with multiple blocks"); - cooked("```\na @test\n```", "

a @test

", "should not do mentions within a code block."); + + cooked("```\na @test\n```", + "

a @test

", + "should not do mentions within a code block."); cooked("> foo bar baz @eviltrout", "

foo bar baz @eviltrout

", @@ -357,7 +375,9 @@ test("Code Blocks", function() { "

<header>hello</header>

", "it escapes code in the code block"); - cooked("```text\ntext\n```", "

text

", "handles text by adding nohighlight"); + cooked("```text\ntext\n```", + "

text

", + "handles text by adding nohighlight"); cooked("```ruby\n# cool\n```", "

# cool

", @@ -403,7 +423,9 @@ test("Code Blocks", function() { "
[quote]test[/quote]
", "it does not parse other block types in markdown code blocks"); - cooked("## a\nb\n```\nc\n```", "

a

\n\n

c

", "it handles headings with code blocks after them."); + cooked("## a\nb\n```\nc\n```", + "

a

\n\n

c

", + "it handles headings with code blocks after them."); }); test("sanitize", function() { diff --git a/test/javascripts/mdtest/fixtures/Backslash escapes.text b/test/javascripts/mdtest/fixtures/Backslash escapes.text index 5b014cb33d..e30e965489 100755 --- a/test/javascripts/mdtest/fixtures/Backslash escapes.text +++ b/test/javascripts/mdtest/fixtures/Backslash escapes.text @@ -73,7 +73,7 @@ Nor should these, which occur in code spans: Backslash: `\\` -Backtick: `` \` `` +Backtick: `\`` Asterisk: `\*` @@ -113,7 +113,7 @@ other Markdown constructs: \`backticks\` -This is a code span with a literal backslash-backtick sequence: `` \` `` +This is a code span with a literal backslash-backtick sequence: `\`` This is a tag with unescaped backticks bar. diff --git a/test/javascripts/mdtest/fixtures/Code Spans.text b/test/javascripts/mdtest/fixtures/Code Spans.text index 5c229c7ad3..fbef1f5f1d 100755 --- a/test/javascripts/mdtest/fixtures/Code Spans.text +++ b/test/javascripts/mdtest/fixtures/Code Spans.text @@ -2,4 +2,4 @@ Fix for backticks within HTML tag: like this -Here's how you put `` `backticks` `` in a code span. \ No newline at end of file +Here's how you put `` `backticks` `` in a code span. diff --git a/test/javascripts/mdtest/fixtures/Ordered and unordered lists.text b/test/javascripts/mdtest/fixtures/Ordered and unordered lists.text index 37dee0fa98..4c7421c07a 100755 --- a/test/javascripts/mdtest/fixtures/Ordered and unordered lists.text +++ b/test/javascripts/mdtest/fixtures/Ordered and unordered lists.text @@ -86,7 +86,7 @@ Multiple paragraphs: 1. Item 1, graf one. - Item 2. graf two. The quick brown fox jumped over the lazy dog's + Item 1. graf two. The quick brown fox jumped over the lazy dog's back. 2. Item 2. diff --git a/test/javascripts/mdtest/fixtures/Ordered and unordered lists.xhtml b/test/javascripts/mdtest/fixtures/Ordered and unordered lists.xhtml index ba71eab395..ea55846e6b 100755 --- a/test/javascripts/mdtest/fixtures/Ordered and unordered lists.xhtml +++ b/test/javascripts/mdtest/fixtures/Ordered and unordered lists.xhtml @@ -91,7 +91,7 @@
  1. Item 1, graf one.

    -

    Item 2. graf two. The quick brown fox jumped over the lazy dog's +

    Item 1. graf two. The quick brown fox jumped over the lazy dog's back.

  2. Item 2.

  3. Item 3.