s around + // "paragraphs" that are wrapped in non-block-level tags, such as anchors, + // phrase emphasis, and spans. The list of tags we're looking for is + // hard-coded: + var block_tags_a = + "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del"; + var block_tags_b = + "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math"; + + // First, look for nested blocks, e.g.: + //
tags around block-level tags.
+ text = _HashHTMLBlocks(text);
+ text = _FormParagraphs(text, doNotUnhash);
+
+ return text;
+ }
+
+ function _RunSpanGamut(text) {
+ //
+ // These are all the transformations that occur *within* block-level
+ // tags like paragraphs, headers, and list items.
+ //
+
+ text = _DoCodeSpans(text);
+ text = _EscapeSpecialCharsWithinTagAttributes(text);
+ text = _EncodeBackslashEscapes(text);
+
+ // Process anchor and image tags. Images must come first,
+ // because ![foo][f] looks like an anchor.
+ text = _DoImages(text);
+ text = _DoAnchors(text);
+
+ // Make links out of things like ` Just type tags
+ //
+
+ // Strip leading and trailing lines:
+ text = text.replace(/^\n+/g, "");
+ text = text.replace(/\n+$/g, "");
+
+ var grafs = text.split(/\n{2,}/g);
+ var grafsOut = [];
+
+ var markerRe = /~K(\d+)K/;
+
+ //
+ // Wrap tags.
+ //
+ var end = grafs.length;
+ for (var i = 0; i < end; i++) {
+ var str = grafs[i];
+
+ // if this is an HTML marker, copy it
+ if (markerRe.test(str)) {
+ grafsOut.push(str);
+ } else if (/\S/.test(str)) {
+ str = _RunSpanGamut(str);
+ str = str.replace(/^([ \t]*)/g, " ");
+ str += "
\n");
+
+ return text;
+ }
+
+ function _EscapeSpecialCharsWithinTagAttributes(text) {
+ //
+ // Within tags -- meaning between < and > -- encode [\ ` * _] so they
+ // don't conflict with their use in Markdown for code, italics and strong.
+ //
+
+ // Build a regex to find HTML tags and comments. See Friedl's
+ // "Mastering Regular Expressions", 2nd Ed., pp. 200-201.
+
+ // SE: changed the comment part of the regex
+
+ var regex =
+ /(<[a-z\/!$]("[^"]*"|'[^']*'|[^'">])*>|-]|-[^>])(?:[^-]|-[^-])*)--)>)/gi;
+
+ text = text.replace(regex, function (wholeMatch) {
+ var tag = wholeMatch.replace(/(.)<\/?code>(?=.)/g, "$1`");
+ tag = escapeCharacters(
+ tag,
+ wholeMatch.charAt(1) == "!" ? "\\`*_/" : "\\`*_"
+ ); // also escape slashes in comments to prevent autolinking there -- http://meta.stackoverflow.com/questions/95987
+ return tag;
+ });
+
+ return text;
+ }
+
+ function _DoAnchors(text) {
+ //
+ // Turn Markdown link shortcuts into XHTML tags.
+ //
+ //
+ // First, handle reference-style links: [link text] [id]
+ //
+
+ /*
+ text = text.replace(/
+ ( // wrap whole match in $1
+ \[
+ (
+ (?:
+ \[[^\]]*\] // allow brackets nested one level
+ |
+ [^\[] // or anything else
+ )*
+ )
+ \]
+ [ ]? // one optional space
+ (?:\n[ ]*)? // one optional newline followed by spaces
+ \[
+ (.*?) // id = $3
+ \]
+ )
+ ()()()() // pad remaining backreferences
+ /g, writeAnchorTag);
+ */
+ text = text.replace(
+ /(\[((?:\[[^\]]*\]|[^\[\]])*)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g,
+ writeAnchorTag
+ );
+
+ //
+ // Next, inline-style links: [link text](url "optional title")
+ //
+
+ /*
+ text = text.replace(/
+ ( // wrap whole match in $1
+ \[
+ (
+ (?:
+ \[[^\]]*\] // allow brackets nested one level
+ |
+ [^\[\]] // or anything else
+ )*
+ )
+ \]
+ \( // literal paren
+ [ \t]*
+ () // no id, so leave $3 empty
+ ( // href = $4
+ (?:
+ \([^)]*\) // allow one level of (correctly nested) parens (think MSDN)
+ |
+ [^()\s]
+ )*?
+ )>?
+ [ \t]*
+ ( // $5
+ (['"]) // quote char = $6
+ (.*?) // Title = $7
+ \6 // matching quote
+ [ \t]* // ignore any spaces/tabs between closing quote and )
+ )? // title is optional
+ \)
+ )
+ /g, writeAnchorTag);
+ */
+
+ text = text.replace(
+ /(\[((?:\[[^\]]*\]|[^\[\]])*)\]\([ \t]*()((?:\([^)]*\)|[^()\s])*?)>?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g,
+ writeAnchorTag
+ );
+
+ //
+ // Last, handle reference-style shortcuts: [link text]
+ // These must come last in case you've also got [link test][1]
+ // or [link test](/foo)
+ //
+
+ /*
+ text = text.replace(/
+ ( // wrap whole match in $1
+ \[
+ ([^\[\]]+) // link text = $2; can't contain '[' or ']'
+ \]
+ )
+ ()()()()() // pad rest of backreferences
+ /g, writeAnchorTag);
+ */
+ text = text.replace(/(\[([^\[\]]+)\])()()()()()/g, writeAnchorTag);
+
+ return text;
+ }
+
+ function writeAnchorTag(wholeMatch, m1, m2, m3, m4, m5, m6, m7) {
+ if (m7 == undefined) m7 = "";
+ var whole_match = m1;
+ var link_text = m2.replace(/:\/\//g, "~P"); // to prevent auto-linking withing the link. will be converted back after the auto-linker runs
+ var link_id = m3.toLowerCase();
+ var url = m4;
+ var title = m7;
+
+ if (url == "") {
+ if (link_id == "") {
+ // lower-case and turn embedded newlines into spaces
+ link_id = link_text.toLowerCase().replace(/ ?\n/g, " ");
+ }
+ url = "#" + link_id;
+
+ if (g_urls.get(link_id) != undefined) {
+ url = g_urls.get(link_id);
+ if (g_titles.get(link_id) != undefined) {
+ title = g_titles.get(link_id);
+ }
+ } else {
+ if (whole_match.search(/\(\s*\)$/m) > -1) {
+ // Special case for explicit empty url
+ url = "";
+ } else {
+ return whole_match;
+ }
+ }
+ }
+ url = encodeProblemUrlChars(url);
+ url = escapeCharacters(url, "*_");
+ var result = '" + link_text + "";
+
+ return result;
+ }
+
+ function _DoImages(text) {
+ //
+ // Turn Markdown image shortcuts into tags.
+ //
+
+ //
+ // First, handle reference-style labeled images: ![alt text][id]
+ //
+
+ /*
+ text = text.replace(/
+ ( // wrap whole match in $1
+ !\[
+ (.*?) // alt text = $2
+ \]
+ [ ]? // one optional space
+ (?:\n[ ]*)? // one optional newline followed by spaces
+ \[
+ (.*?) // id = $3
+ \]
+ )
+ ()()()() // pad rest of backreferences
+ /g, writeImageTag);
+ */
+ text = text.replace(
+ /(!\[(.*?)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g,
+ writeImageTag
+ );
+
+ //
+ // Next, handle inline images: 
+ // Don't forget: encode * and _
+
+ /*
+ text = text.replace(/
+ ( // wrap whole match in $1
+ !\[
+ (.*?) // alt text = $2
+ \]
+ \s? // One optional whitespace character
+ \( // literal paren
+ [ \t]*
+ () // no id, so leave $3 empty
+ (\S+?)>? // src url = $4
+ [ \t]*
+ ( // $5
+ (['"]) // quote char = $6
+ (.*?) // title = $7
+ \6 // matching quote
+ [ \t]*
+ )? // title is optional
+ \)
+ )
+ /g, writeImageTag);
+ */
+ text = text.replace(
+ /(!\[(.*?)\]\s?\([ \t]*()(\S+?)>?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g,
+ writeImageTag
+ );
+
+ return text;
+ }
+
+ function attributeEncode(text) {
+ // unconditionally replace angle brackets here -- what ends up in an attribute (e.g. alt or title)
+ // never makes sense to have verbatim HTML in it (and the sanitizer would totally break it)
+ return text
+ .replace(/>/g, ">")
+ .replace(/";
+
+ return result;
+ }
+
+ function _DoHeaders(text) {
+ // Setext-style headers:
+ // Header 1
+ // ========
+ //
+ // Header 2
+ // --------
+ //
+ text = text.replace(
+ /^(.+)[ \t]*\n=+[ \t]*\n+/gm,
+ function (wholeMatch, m1) {
+ return "
" + _RunSpanGamut(m1) + "
\n\n";
+ }
+ );
+
+ text = text.replace(
+ /^(.+)[ \t]*\n-+[ \t]*\n+/gm,
+ function (matchFound, m1) {
+ return "" + _RunSpanGamut(m1) + "
\n\n";
+ }
+ );
+
+ // atx-style headers:
+ // # Header 1
+ // ## Header 2
+ // ## Header 2 with closing hashes ##
+ // ...
+ // ###### Header 6
+ //
+
+ /*
+ text = text.replace(/
+ ^(\#{1,6}) // $1 = string of #'s
+ [ \t]*
+ (.+?) // $2 = Header text
+ [ \t]*
+ \#* // optional closing #'s (not counted)
+ \n+
+ /gm, function() {...});
+ */
+
+ text = text.replace(
+ /^(\#{1,6})[ \t]*(.+?)[ \t]*\#*\n+/gm,
+ function (wholeMatch, m1, m2) {
+ var h_level = m1.length;
+ return (
+ "` blocks.
+ //
+
+ /*
+ text = text.replace(/
+ (?:\n\n|^)
+ ( // $1 = the code block -- one or more lines, starting with a space/tab
+ (?:
+ (?:[ ]{4}|\t) // Lines must start with a tab or a tab-width of spaces - attacklab: g_tab_width
+ .*\n+
+ )+
+ )
+ (\n*[ ]{0,3}[^ \t\n]|(?=~0)) // attacklab: g_tab_width
+ /g ,function(){...});
+ */
+
+ // attacklab: sentinel workarounds for lack of \A and \Z, safari\khtml bug
+ text += "~0";
+
+ text = text.replace(
+ /(?:\n\n|^)((?:(?:[ ]{4}|\t).*\n+)+)(\n*[ ]{0,3}[^ \t\n]|(?=~0))/g,
+ function (wholeMatch, m1, m2) {
+ var codeblock = m1;
+ var nextChar = m2;
+
+ codeblock = _EncodeCode(_Outdent(codeblock));
+ codeblock = _Detab(codeblock);
+ codeblock = codeblock.replace(/^\n+/g, ""); // trim leading newlines
+ codeblock = codeblock.replace(/\n+$/g, ""); // trim trailing whitespace
+
+ codeblock = "
";
+
+ return "\n\n" + codeblock + "\n\n" + nextChar;
+ }
+ );
+
+ // attacklab: strip sentinel
+ text = text.replace(/~0/, "");
+
+ return text;
+ }
+
+ function hashBlock(text) {
+ text = text.replace(/(^\n+|\n+$)/g, "");
+ return "\n\n~K" + (g_html_blocks.push(text) - 1) + "K\n\n";
+ }
+
+ function _DoCodeSpans(text) {
+ //
+ // * Backtick quotes are used for " + codeblock + "\n spans.
+ //
+ // * You can use multiple backticks as the delimiters if you want to
+ // include literal backticks in the code span. So, this input:
+ //
+ // Just type ``foo `bar` baz`` at the prompt.
+ //
+ // Will translate to:
+ //
+ // foo `bar` baz at the prompt.`bar` ...
+ //
+
+ /*
+ text = text.replace(/
+ (^|[^\\]) // Character before opening ` can't be a backslash
+ (`+) // $2 = Opening run of `
+ ( // $3 = The code block
+ [^\r]*?
+ [^`] // attacklab: work around lack of lookbehind
+ )
+ \2 // Matching closer
+ (?!`)
+ /gm, function(){...});
+ */
+
+ text = text.replace(
+ /(^|[^\\])(`+)([^\r]*?[^`])\2(?!`)/gm,
+ function (wholeMatch, m1, m2, m3, m4) {
+ var c = m3;
+ c = c.replace(/^([ \t]*)/g, ""); // leading whitespace
+ c = c.replace(/[ \t]*$/g, ""); // trailing whitespace
+ c = _EncodeCode(c);
+ c = c.replace(/:\/\//g, "~P"); // to prevent auto-linking. Not necessary in code *blocks*, but in code spans. Will be converted back after the auto-linker runs.
+ return m1 + "" + c + "";
+ }
+ );
+
+ return text;
+ }
+
+ function _EncodeCode(text) {
+ //
+ // Encode/escape certain characters inside Markdown code runs.
+ // The point is that in code, these characters are literals,
+ // and lose their special Markdown meanings.
+ //
+ // Encode all ampersands; HTML entities are not
+ // entities within a Markdown code span.
+ text = text.replace(/&/g, "&");
+
+ // Do the angle bracket song and dance:
+ text = text.replace(//g, ">");
+
+ // Now, escape characters that are magic in Markdown:
+ text = escapeCharacters(text, "*_{}[]\\", false);
+
+ // jj the line above breaks this:
+ //---
+
+ //* Item
+
+ // 1. Subitem
+
+ // special char: *
+ //---
+
+ return text;
+ }
+
+ function _DoItalicsAndBold(text) {
+ // must go first:
+ text = text.replace(
+ /([\W_]|^)(\*\*|__)(?=\S)([^\r]*?\S[\*_]*)\2([\W_]|$)/g,
+ "$1$3$4"
+ );
+
+ text = text.replace(
+ /([\W_]|^)(\*|_)(?=\S)([^\r\*_]*?\S)\2([\W_]|$)/g,
+ "$1$3$4"
+ );
+
+ return text;
+ }
+
+ function _DoBlockQuotes(text) {
+ /*
+ text = text.replace(/
+ ( // Wrap whole match in $1
+ (
+ ^[ \t]*>[ \t]? // '>' at the start of a line
+ .+\n // rest of the first line
+ (.+\n)* // subsequent consecutive lines
+ \n* // blanks
+ )+
+ )
+ /gm, function(){...});
+ */
+
+ text = text.replace(
+ /((^[ \t]*>[ \t]?.+\n(.+\n)*\n*)+)/gm,
+ function (wholeMatch, m1) {
+ var bq = m1;
+
+ // attacklab: hack around Konqueror 3.5.4 bug:
+ // "----------bug".replace(/^-/g,"") == "bug"
+
+ bq = bq.replace(/^[ \t]*>[ \t]?/gm, "~0"); // trim one level of quoting
+
+ // attacklab: clean up hack
+ bq = bq.replace(/~0/g, "");
+
+ bq = bq.replace(/^[ \t]+$/gm, ""); // trim whitespace-only lines
+ bq = _RunBlockGamut(bq); // recurse
+
+ bq = bq.replace(/(^|\n)/g, "$1 ");
+ // These leading spaces screw with content, so we need to fix that:
+ bq = bq.replace(
+ /(\s*
[^\r]+?<\/pre>)/gm,
+ function (wholeMatch, m1) {
+ var pre = m1;
+ // attacklab: hack around Konqueror 3.5.4 bug:
+ pre = pre.replace(/^ /gm, "~0");
+ pre = pre.replace(/~0/g, "");
+ return pre;
+ }
+ );
+
+ return hashBlock("\n" + bq + "\n
");
+ }
+ );
+ return text;
+ }
+
+ function _FormParagraphs(text, doNotUnhash) {
+ //
+ // Params:
+ // $text - string to process with html ]*>`([\s\S]*)`<\/pre>/gi,
+ function (str, innerHTML) {
+ //innerHTML = innerHTML.replace(/^\t+/g, ' '); // convert tabs to spaces (you know it makes sense)
+ innerHTML = innerHTML.replace(/\n/g, "\n ");
+ return "\n\n " + innerHTML + "\n";
+ }
+ );
+
+ // Lists
+
+ // Escape numbers that could trigger an ol
+ string = string.replace(/(\d+). /g, "$1\\. ");
+
+ // Converts lists that have no child lists (of same type) first, then works it's way up
+ var noChildrenRegex = /<(ul|ol)\b[^>]*>(?:(?!/gi;
+ while (string.match(noChildrenRegex)) {
+ string = string.replace(noChildrenRegex, function (str) {
+ return replaceLists(str);
+ });
+ }
+
+ function replaceLists(html) {
+ html = html.replace(
+ /<(ul|ol)\b[^>]*>([\s\S]*?)<\/\1>/gi,
+ function (str, listType, innerHTML) {
+ var lis = innerHTML.split("");
+ lis.splice(lis.length - 1, 1);
+
+ for (i = 0, len = lis.length; i < len; i++) {
+ if (lis[i]) {
+ var prefix = listType === "ol" ? i + 1 + ". " : "* ";
+ lis[i] = lis[i].replace(
+ /\s*
]*>((?:(?!
/gi;
+ while (string.match(deepest)) {
+ string = string.replace(deepest, function (str) {
+ return replaceBlockquotes(str);
+ });
+ }
+
+ function replaceBlockquotes(html) {
+ html = html.replace(
+ /
]*>([\s\S]*?)<\/blockquote>/gi,
+ function (str, inner) {
+ inner = inner.replace(/^\s+|\s+$/g, "");
+ inner = cleanUp(inner);
+ inner = inner.replace(/^/gm, "> ");
+ inner = inner.replace(/^(>([ \t]{2,}>)+)/gm, "> >");
+ return inner;
+ }
+ );
+ return html;
+ }
+
+ function cleanUp(string) {
+ string = string.replace(/^[\t\r\n]+|[\t\r\n]+$/g, ""); // trim leading/trailing whitespace
+ string = string.replace(/\n\s+\n/g, "\n\n");
+ string = string.replace(/\n{3,}/g, "\n\n"); // limit consecutive linebreaks to 2
+ return string;
+ }
+
+ return cleanUp(string);
+};
+
+var converter = new Markdown.Converter();
+
+var Markdown2HTML = function (data) {
+ return converter.makeHtml(data);
+};
+
+var HTML2Markdown = function (data) {
+ return converter.makeMarkdown(data);
+};
+
+export { Markdown2HTML, HTML2Markdown };
diff --git a/src/events.ts b/src/events.ts
index 869634a..5719566 100644
--- a/src/events.ts
+++ b/src/events.ts
@@ -1684,6 +1684,42 @@ class AddonEvents extends AddonBase {
"Better Notes",
"Image copied to clipboard."
);
+ } else if (message.type == "convertMD") {
+ /*
+ message.content = {}
+ */
+ const source = Zotero.Utilities.Internal.getClipboard("text/unicode");
+ if (!source) {
+ this._Addon.views.showProgressWindow(
+ "Better Notes",
+ "No MarkDown found."
+ );
+ return;
+ }
+ const html = this._Addon.parse.parseMDToHTML(source);
+ console.log(source, html);
+ let transferable = Components.classes[
+ "@mozilla.org/widget/transferable;1"
+ ].createInstance(Components.interfaces.nsITransferable);
+ let clipboardService = Components.classes[
+ "@mozilla.org/widget/clipboard;1"
+ ].getService(Components.interfaces.nsIClipboard);
+ const str = Components.classes[
+ "@mozilla.org/supports-string;1"
+ ].createInstance(Components.interfaces.nsISupportsString);
+ str.data = html;
+ transferable.addDataFlavor("text/html");
+ transferable.setTransferData("text/html", str, html.length * 2);
+
+ clipboardService.setData(
+ transferable,
+ null,
+ Components.interfaces.nsIClipboard.kGlobalClipboard
+ );
+ this._Addon.views.showProgressWindow(
+ "Better Notes",
+ "Converted MarkDown is updated to the clipboard. You can paste them in the note."
+ );
} else {
Zotero.debug(`Knowledge4Zotero: message not handled.`);
}
diff --git a/src/parse.ts b/src/parse.ts
index c430fe5..94c783c 100644
--- a/src/parse.ts
+++ b/src/parse.ts
@@ -1,4 +1,5 @@
import { AddonBase } from "./base";
+import { HTML2Markdown, Markdown2HTML } from "./convertMD";
const TreeModel = require("./treemodel");
class AddonParse extends AddonBase {
@@ -340,6 +341,14 @@ class AddonParse extends AddonBase {
return "";
}
}
+
+ parseMDToHTML(str: string): string {
+ return Markdown2HTML(str);
+ }
+
+ parseHTMLToMD(str: string): string {
+ return HTML2Markdown(str);
+ }
}
export default AddonParse;