1 files changed, 94 insertions, 0 deletions
diff --git a/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js b/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js
new file mode 100644
index 00000000..de6f20ef
--- /dev/null
+++ b/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js
@@ -0,0 +1,94 @@
+/**
+ * This is a tiny purpose-built HTML parser/processor. This basically detects any type of visual newline and
+ * allows it to be processed, useful for greentexting, mostly
+ *
+ * known issue: doesn't handle CDATA so nested CDATA might not work well
+ *
+ * @param {Object} input - input data
+ * @param {(string) => string} processor - function that will be called on every line
+ * @return {string} processed html
+ */
+export const processHtml = (html, processor) => {
+  const handledTags = new Set(['p', 'br', 'div'])
+  const openCloseTags = new Set(['p', 'div'])
+
+  let buffer = '' // Current output buffer
+  const level = [] // How deep we are in tags and which tags were there
+  let textBuffer = '' // Current line content
+  let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag
+
+  // Extracts tag name from tag, i.e. <span a="b"> => span
+  const getTagName = (tag) => {
+    const result = /(?:<\/(\w+)>|<(\w+)\s?[^/]*?\/?>)/gi.exec(tag)
+    return result && (result[1] || result[2])
+  }
+
+  const flush = () => { // Processes current line buffer, adds it to output buffer and clears line buffer
+    if (textBuffer.trim().length > 0) {
+      buffer += processor(textBuffer)
+    } else {
+      buffer += textBuffer
+    }
+    textBuffer = ''
+  }
+
+  const handleBr = (tag) => { // handles single newlines/linebreaks/selfclosing
+    flush()
+    buffer += tag
+  }
+
+  const handleOpen = (tag) => { // handles opening tags
+    flush()
+    buffer += tag
+    level.push(tag)
+  }
+
+  const handleClose = (tag) => { // handles closing tags
+    flush()
+    buffer += tag
+    if (level[level.length - 1] === tag) {
+      level.pop()
+    }
+  }
+
+  for (let i = 0; i < html.length; i++) {
+    const char = html[i]
+    if (char === '<' && tagBuffer === null) {
+      tagBuffer = char
+    } else if (char !== '>' && tagBuffer !== null) {
+      tagBuffer += char
+    } else if (char === '>' && tagBuffer !== null) {
+      tagBuffer += char
+      const tagFull = tagBuffer
+      tagBuffer = null
+      const tagName = getTagName(tagFull)
+      if (handledTags.has(tagName)) {
+        if (tagName === 'br') {
+          handleBr(tagFull)
+        } else if (openCloseTags.has(tagName)) {
+          if (tagFull[1] === '/') {
+            handleClose(tagFull)
+          } else if (tagFull[tagFull.length - 2] === '/') {
+            // self-closing
+            handleBr(tagFull)
+          } else {
+            handleOpen(tagFull)
+          }
+        }
+      } else {
+        textBuffer += tagFull
+      }
+    } else if (char === '\n') {
+      handleBr(char)
+    } else {
+      textBuffer += char
+    }
+  }
+  if (tagBuffer) {
+    textBuffer += tagBuffer
+  }
+
+  flush()
+
+  return buffer
+}