diff --git a/package-lock.json b/package-lock.json
index cd82af0..675f276 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -10,12 +10,15 @@
"license": "SEE LICENSE FILE",
"dependencies": {
"abort-controller": "^3.0.0",
+ "commonmark": "^0.31.2",
"fake-indexeddb": "^6.2.5",
"fluent-ffmpeg": "^2.1.2",
"fs-extra": "^11.1.0",
"got": "^12.0.2",
"image-size": "^1.0.2",
"isomorphic-webcrypto": "^2.3.8",
+ "linkifyjs": "^4.3.3",
+ "lodash.escape": "^4.0.1",
"matrix-js-sdk": "^41.5.0",
"mime": "^3.0.0",
"node-fetch": "^3.3.0",
@@ -7314,6 +7317,23 @@
"optional": true,
"peer": true
},
+ "node_modules/commonmark": {
+ "version": "0.31.2",
+ "resolved": "https://registry.npmjs.org/commonmark/-/commonmark-0.31.2.tgz",
+ "integrity": "sha512-2fRLTyb9r/2835k5cwcAwOj0DEc44FARnMp5veGsJ+mEAZdi52sNopLu07ZyElQUz058H43whzlERDIaaSw4rg==",
+ "license": "BSD-2-Clause",
+ "dependencies": {
+ "entities": "~3.0.1",
+ "mdurl": "~1.0.1",
+ "minimist": "~1.2.8"
+ },
+ "bin": {
+ "commonmark": "bin/commonmark"
+ },
+ "engines": {
+ "node": "*"
+ }
+ },
"node_modules/compare-versions": {
"version": "3.6.0",
"resolved": "https://registry.npmjs.org/compare-versions/-/compare-versions-3.6.0.tgz",
@@ -7880,6 +7900,18 @@
"once": "^1.4.0"
}
},
+ "node_modules/entities": {
+ "version": "3.0.1",
+ "resolved": "https://registry.npmjs.org/entities/-/entities-3.0.1.tgz",
+ "integrity": "sha512-WiyBqoomrwMdFG1e0kqvASYfnlb0lp8M5o5Fw2OFq1hNZxxcNk8Ik0Xm7LxzBhuidnZB/UtBqVCgUz3kBOP51Q==",
+ "license": "BSD-2-Clause",
+ "engines": {
+ "node": ">=0.12"
+ },
+ "funding": {
+ "url": "https://github.com/fb55/entities?sponsor=1"
+ }
+ },
"node_modules/env-editor": {
"version": "0.4.2",
"resolved": "https://registry.npmjs.org/env-editor/-/env-editor-0.4.2.tgz",
@@ -10350,6 +10382,12 @@
"optional": true,
"peer": true
},
+ "node_modules/linkifyjs": {
+ "version": "4.3.3",
+ "resolved": "https://registry.npmjs.org/linkifyjs/-/linkifyjs-4.3.3.tgz",
+ "integrity": "sha512-P8aEP5U/D1/IlTY2OeYsErdwh9bGuLE30NcXtKEjgdHcahveQoQwM2yZNsioQHsWFz0P7KKudisbrzCgR0sDHg==",
+ "license": "MIT"
+ },
"node_modules/locate-path": {
"version": "6.0.0",
"resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz",
@@ -10379,6 +10417,12 @@
"optional": true,
"peer": true
},
+ "node_modules/lodash.escape": {
+ "version": "4.0.1",
+ "resolved": "https://registry.npmjs.org/lodash.escape/-/lodash.escape-4.0.1.tgz",
+ "integrity": "sha512-nXEOnb/jK9g0DYMr1/Xvq6l5xMD7GDG55+GSYIYmS0G4tBk/hURD4JR9WCavs04t33WmJx9kCyp9vJ+mr4BOUw==",
+ "license": "MIT"
+ },
"node_modules/lodash.throttle": {
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/lodash.throttle/-/lodash.throttle-4.1.1.tgz",
@@ -10794,6 +10838,12 @@
"optional": true,
"peer": true
},
+ "node_modules/mdurl": {
+ "version": "1.0.1",
+ "resolved": "https://registry.npmjs.org/mdurl/-/mdurl-1.0.1.tgz",
+ "integrity": "sha512-/sKlQJCBYVY9Ers9hqzKou4H6V5UWc/M59TH2dvkt+84itfnq7uFOMLpOiOS4ujvHP4etln18fmIxA5R5fll0g==",
+ "license": "MIT"
+ },
"node_modules/media-typer": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz",
@@ -11836,8 +11886,6 @@
"version": "1.2.8",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
"integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==",
- "optional": true,
- "peer": true,
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
@@ -20839,6 +20887,16 @@
"optional": true,
"peer": true
},
+ "commonmark": {
+ "version": "0.31.2",
+ "resolved": "https://registry.npmjs.org/commonmark/-/commonmark-0.31.2.tgz",
+ "integrity": "sha512-2fRLTyb9r/2835k5cwcAwOj0DEc44FARnMp5veGsJ+mEAZdi52sNopLu07ZyElQUz058H43whzlERDIaaSw4rg==",
+ "requires": {
+ "entities": "~3.0.1",
+ "mdurl": "~1.0.1",
+ "minimist": "~1.2.8"
+ }
+ },
"compare-versions": {
"version": "3.6.0",
"resolved": "https://registry.npmjs.org/compare-versions/-/compare-versions-3.6.0.tgz",
@@ -21287,6 +21345,11 @@
"once": "^1.4.0"
}
},
+ "entities": {
+ "version": "3.0.1",
+ "resolved": "https://registry.npmjs.org/entities/-/entities-3.0.1.tgz",
+ "integrity": "sha512-WiyBqoomrwMdFG1e0kqvASYfnlb0lp8M5o5Fw2OFq1hNZxxcNk8Ik0Xm7LxzBhuidnZB/UtBqVCgUz3kBOP51Q=="
+ },
"env-editor": {
"version": "0.4.2",
"resolved": "https://registry.npmjs.org/env-editor/-/env-editor-0.4.2.tgz",
@@ -23163,6 +23226,11 @@
"optional": true,
"peer": true
},
+ "linkifyjs": {
+ "version": "4.3.3",
+ "resolved": "https://registry.npmjs.org/linkifyjs/-/linkifyjs-4.3.3.tgz",
+ "integrity": "sha512-P8aEP5U/D1/IlTY2OeYsErdwh9bGuLE30NcXtKEjgdHcahveQoQwM2yZNsioQHsWFz0P7KKudisbrzCgR0sDHg=="
+ },
"locate-path": {
"version": "6.0.0",
"resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz",
@@ -23186,6 +23254,11 @@
"optional": true,
"peer": true
},
+ "lodash.escape": {
+ "version": "4.0.1",
+ "resolved": "https://registry.npmjs.org/lodash.escape/-/lodash.escape-4.0.1.tgz",
+ "integrity": "sha512-nXEOnb/jK9g0DYMr1/Xvq6l5xMD7GDG55+GSYIYmS0G4tBk/hURD4JR9WCavs04t33WmJx9kCyp9vJ+mr4BOUw=="
+ },
"lodash.throttle": {
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/lodash.throttle/-/lodash.throttle-4.1.1.tgz",
@@ -23524,6 +23597,11 @@
"optional": true,
"peer": true
},
+ "mdurl": {
+ "version": "1.0.1",
+ "resolved": "https://registry.npmjs.org/mdurl/-/mdurl-1.0.1.tgz",
+ "integrity": "sha512-/sKlQJCBYVY9Ers9hqzKou4H6V5UWc/M59TH2dvkt+84itfnq7uFOMLpOiOS4ujvHP4etln18fmIxA5R5fll0g=="
+ },
"media-typer": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz",
@@ -24351,9 +24429,7 @@
"minimist": {
"version": "1.2.8",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
- "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==",
- "optional": true,
- "peer": true
+ "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA=="
},
"minipass": {
"version": "3.1.6",
diff --git a/package.json b/package.json
index e77b2e9..d005328 100644
--- a/package.json
+++ b/package.json
@@ -4,12 +4,15 @@
"description": "Matrix chat server client for Node-RED",
"dependencies": {
"abort-controller": "^3.0.0",
+ "commonmark": "^0.31.2",
"fake-indexeddb": "^6.2.5",
"fluent-ffmpeg": "^2.1.2",
"fs-extra": "^11.1.0",
"got": "^12.0.2",
"image-size": "^1.0.2",
"isomorphic-webcrypto": "^2.3.8",
+ "linkifyjs": "^4.3.3",
+ "lodash.escape": "^4.0.1",
"matrix-js-sdk": "^41.5.0",
"mime": "^3.0.0",
"node-fetch": "^3.3.0",
diff --git a/src/matrix-markdown.js b/src/matrix-markdown.js
new file mode 100644
index 0000000..e833042
--- /dev/null
+++ b/src/matrix-markdown.js
@@ -0,0 +1,385 @@
+// Markdown -> HTML converter for matrix messages.
+//
+// Ported from matrix-react-sdk's `src/Markdown.ts` (now living at
+// element-hq/element-web `apps/web/src/Markdown.ts`) so the HTML this module
+// generates lines up with what Element produces for the same markdown source.
+//
+// Keep this in sync with element-web's Markdown.ts when noticeable changes
+// land there. Source of truth:
+// https://github.com/element-hq/element-web/blob/develop/apps/web/src/Markdown.ts
+//
+// Copyright 2024 New Vector Ltd.
+// Copyright 2021 The Matrix.org Foundation C.I.C.
+// Copyright 2016 OpenMarket Ltd
+//
+// SPDX-License-Identifier: AGPL-3.0-only OR GPL-3.0-only OR LicenseRef-Element-Commercial
+
+const commonmark = require("commonmark");
+const escape = require("lodash.escape");
+const linkify = require("linkifyjs");
+
+const ALLOWED_HTML_TAGS = ["sub", "sup", "del", "s", "u", "br", "br/"];
+
+// These types of node are definitely text
+const TEXT_NODES = ["text", "softbreak", "linebreak", "paragraph", "document"];
+
+function isAllowedHtmlTag(node) {
+ if (!node.literal) {
+ return false;
+ }
+
+ if (node.literal.match('^<((div|span) data-mx-maths="[^"]*"|/(div|span))>$') != null) {
+ return true;
+ }
+
+ // Regex won't work for tags with attrs, but the tags we allow
+ // shouldn't really have any anyway.
+ const matches = /^<\/?(.*)>$/.exec(node.literal);
+ if (matches && matches.length == 2) {
+ const tag = matches[1];
+ return ALLOWED_HTML_TAGS.indexOf(tag) > -1;
+ }
+
+ return false;
+}
+
+/*
+ * Returns true if the parse output containing the node
+ * comprises multiple block level elements (ie. lines),
+ * or false if it is only a single line.
+ */
+function isMultiLine(node) {
+ let par = node;
+ while (par.parent) {
+ par = par.parent;
+ }
+ return par.firstChild != par.lastChild;
+}
+
+function getTextUntilEndOrLinebreak(node) {
+ let currentNode = node;
+ let text = "";
+ while (currentNode && currentNode.type !== "softbreak" && currentNode.type !== "linebreak") {
+ const { literal, type } = currentNode;
+ if (type === "text" && literal) {
+ let n = 0;
+ let char = literal[n];
+ while (char !== " " && char !== null && n <= literal.length) {
+ if (char === " ") {
+ break;
+ }
+ if (char) {
+ text += char;
+ }
+ n += 1;
+ char = literal[n];
+ }
+ if (char === " ") {
+ break;
+ }
+ }
+ currentNode = currentNode.next;
+ }
+ return text;
+}
+
+const formattingChangesByNodeType = {
+ emph: "_",
+ strong: "__",
+};
+
+/**
+ * Returns the literal of a node and all child nodes.
+ */
+const innerNodeLiteral = (node) => {
+ let literal = "";
+
+ const walker = node.walker();
+ let step;
+
+ while ((step = walker.next())) {
+ const currentNode = step.node;
+ const currentNodeLiteral = currentNode.literal;
+ if (step.entering && currentNode.type === "text" && currentNodeLiteral) {
+ literal += currentNodeLiteral;
+ }
+ }
+
+ return literal;
+};
+
+const emptyItemWithNoSiblings = (node) => {
+ return !node.prev && !node.next && !node.firstChild;
+};
+
+/**
+ * Class that wraps commonmark, adding the ability to see whether
+ * a given message actually uses any markdown syntax or whether
+ * it's plain text.
+ */
+class Markdown {
+ constructor(input) {
+ this.input = input;
+
+ const parser = new commonmark.Parser();
+ this.parsed = parser.parse(this.input);
+ this.parsed = this.repairLinks(this.parsed);
+ }
+
+ /**
+ * This method is modifying the parsed AST in such a way that links are always
+ * properly linkified instead of sometimes being wrongly emphasised in case
+ * if you were to write a link like the example below:
+ * https://my_weird-link_domain.domain.com
+ * ^ this link would be parsed to something like this:
+ * https://myweird-linkdomain.domain.com
+ * This method makes it so the link gets properly modified to a version where it is
+ * not emphasised until it actually ends.
+ * See: https://github.com/vector-im/element-web/issues/4674
+ */
+ repairLinks(parsed) {
+ const walker = parsed.walker();
+ let event = null;
+ let text = "";
+ let isInPara = false;
+ let previousNode = null;
+ let shouldUnlinkFormattingNode = false;
+ while ((event = walker.next())) {
+ const { node } = event;
+ if (node.type === "paragraph") {
+ isInPara = !!event.entering;
+ }
+ if (isInPara) {
+ // Clear saved string when line ends
+ if (
+ node.type === "softbreak" ||
+ node.type === "linebreak" ||
+ // Also start calculating the text from the beginning on any spaces
+ (node.type === "text" && node.literal === " ")
+ ) {
+ text = "";
+ continue;
+ }
+
+ // Break up text nodes on spaces, so that we don't shoot past them without resetting
+ if (node.type === "text" && node.literal) {
+ const [thisPart, ...nextParts] = node.literal.split(/( )/);
+ node.literal = thisPart;
+ text += thisPart;
+
+ // Add the remaining parts as siblings
+ nextParts.reverse().forEach((part) => {
+ if (part) {
+ const nextNode = new commonmark.Node("text");
+ nextNode.literal = part;
+ node.insertAfter(nextNode);
+ // Make the iterator aware of the newly inserted node
+ walker.resumeAt(nextNode, true);
+ }
+ });
+ }
+
+ // We should not do this if previous node was not a textnode, as we can't combine it then.
+ if (
+ (node.type === "emph" || node.type === "strong") &&
+ previousNode && previousNode.type === "text"
+ ) {
+ if (event.entering) {
+ const foundLinks = linkify.find(text);
+ for (const { value } of foundLinks) {
+ if (node && node.firstChild && node.firstChild.literal) {
+ /**
+ * NOTE: This technically should unlink the emph node and create LINK nodes instead, adding all the next elements as siblings
+ * but this solution seems to work well and is hopefully slightly easier to understand too
+ */
+ const format = formattingChangesByNodeType[node.type];
+ const nonEmphasizedText = `${format}${innerNodeLiteral(node)}${format}`;
+ const f = getTextUntilEndOrLinebreak(node);
+ const newText = value + nonEmphasizedText + f;
+ const newLinks = linkify.find(newText);
+ // Should always find only one link here, if it finds more it means that the algorithm is broken
+ if (newLinks.length === 1) {
+ const emphasisTextNode = new commonmark.Node("text");
+ emphasisTextNode.literal = nonEmphasizedText;
+ previousNode.insertAfter(emphasisTextNode);
+ node.firstChild.literal = "";
+ event = node.walker().next();
+ if (event) {
+ // Remove `em` opening and closing nodes
+ node.unlink();
+ previousNode.insertAfter(event.node);
+ shouldUnlinkFormattingNode = true;
+ }
+ } else {
+ console.warn(
+ "matrix-chat markdown: link escaping found too many links for text:",
+ text,
+ "modified:",
+ newText,
+ );
+ }
+ }
+ }
+ } else {
+ if (shouldUnlinkFormattingNode) {
+ node.unlink();
+ shouldUnlinkFormattingNode = false;
+ }
+ }
+ }
+ }
+ previousNode = node;
+ }
+ return parsed;
+ }
+
+ isPlainText() {
+ const walker = this.parsed.walker();
+ let ev;
+
+ while ((ev = walker.next())) {
+ const node = ev.node;
+
+ if (TEXT_NODES.indexOf(node.type) > -1) {
+ // definitely text
+ continue;
+ } else if (node.type == "list" || node.type == "item") {
+ // Special handling for inputs like `+`, `*`, `-` and `2021.` which
+ // would otherwise be treated as a list of a single empty item.
+ // See https://github.com/vector-im/element-web/issues/7631
+ if (
+ node.type == "list" &&
+ node.firstChild &&
+ emptyItemWithNoSiblings(node.firstChild)
+ ) {
+ // A list with a single empty item is treated as plain text.
+ continue;
+ }
+
+ if (node.type == "item" && emptyItemWithNoSiblings(node)) {
+ // An empty list item with no sibling items is treated as plain text.
+ continue;
+ }
+
+ // Everything else is actual lists and therefore not plaintext.
+ return false;
+ } else if (node.type == "html_inline" || node.type == "html_block") {
+ // if it's an allowed html tag, we need to render it and therefore
+ // we will need to use HTML. If it's not allowed, it's not HTML since
+ // we'll just be treating it as text.
+ if (isAllowedHtmlTag(node)) {
+ return false;
+ }
+ } else {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ toHTML({ externalLinks = false } = {}) {
+ const renderer = new commonmark.HtmlRenderer({
+ safe: false,
+
+ // Set soft breaks to hard HTML breaks: commonmark
+ // puts softbreaks in for multiple lines in a blockquote,
+ // so if these are just newline characters then the
+ // block quote ends up all on one line
+ // (https://github.com/vector-im/element-web/issues/3154)
+ softbreak: "
",
+ });
+
+ // Trying to strip out the wrapping
msg.payload if not defined). This only affects HTML messages.msg.payload if not defined). This only affects messages sent in HTML format — in Markdown mode the formatted body is generated from the markdown source.null for plain text and 'html' for HTML.null for plain text, 'markdown' for markdown (converted to HTML the same way Element does), or 'html' for HTML.