From cc00af7a3102034b05ebcd4aa1fd01c6f467184a Mon Sep 17 00:00:00 2001
From: Henry Jameson <me@hjkos.com>
Date: Thu, 10 Jun 2021 18:52:01 +0300
Subject: Hellthread(tm) Certified

---
 .../html_converter/html_line_converter.service.js  | 102 ++++++++++++++
 .../html_converter/html_tree_converter.service.js  | 146 +++++++++++++++++++++
 .../mini_html_converter.service.js                 | 138 -------------------
 .../tiny_post_html_processor.service.js            |  94 -------------
 4 files changed, 248 insertions(+), 232 deletions(-)
 create mode 100644 src/services/html_converter/html_line_converter.service.js
 create mode 100644 src/services/html_converter/html_tree_converter.service.js
 delete mode 100644 src/services/mini_html_converter/mini_html_converter.service.js
 delete mode 100644 src/services/tiny_post_html_processor/tiny_post_html_processor.service.js

(limited to 'src/services')
diff --git a/src/services/html_converter/html_line_converter.service.js b/src/services/html_converter/html_line_converter.service.js
new file mode 100644
index 00000000..80482c9a
--- /dev/null
+++ b/src/services/html_converter/html_line_converter.service.js
@@ -0,0 +1,102 @@
+/**
+ * This is a tiny purpose-built HTML parser/processor. This basically detects
+ * any type of visual newline and converts entire HTML into a array structure.
+ *
+ * Text nodes are represented as object with single property - text - containing
+ * the visual line. Intended usage is to process the array with .map() in which
+ * map function returns a string and resulting array can be converted back to html
+ * with a .join('').
+ *
+ * Generally this isn't very useful except for when you really need to either
+ * modify visual lines (greentext i.e. simple quoting) or do something with
+ * first/last line.
+ *
+ * known issue: doesn't handle CDATA so nested CDATA might not work well
+ *
+ * @param {Object} input - input data
+ * @return {(string|{ text: string })[]} processed html in form of a list.
+ */
+export const convertHtmlToLines = (html) => {
+  const handledTags = new Set(['p', 'br', 'div'])
+  const openCloseTags = new Set(['p', 'div'])
+
+  let buffer = [] // Current output buffer
+  const level = [] // How deep we are in tags and which tags were there
+  let textBuffer = '' // Current line content
+  let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag
+
+  // Extracts tag name from tag, i.e. <span a="b"> => span
+  const getTagName = (tag) => {
+    const result = /(?:<\/(\w+)>|<(\w+)\s?[^/]*?\/?>)/gi.exec(tag)
+    return result && (result[1] || result[2])
+  }
+
+  const flush = () => { // Processes current line buffer, adds it to output buffer and clears line buffer
+    if (textBuffer.trim().length > 0) {
+      buffer.push({ text: textBuffer })
+    } else {
+      buffer.push(textBuffer)
+    }
+    textBuffer = ''
+  }
+
+  const handleBr = (tag) => { // handles single newlines/linebreaks/selfclosing
+    flush()
+    buffer.push(tag)
+  }
+
+  const handleOpen = (tag) => { // handles opening tags
+    flush()
+    buffer.push(tag)
+    level.push(tag)
+  }
+
+  const handleClose = (tag) => { // handles closing tags
+    flush()
+    buffer.push(tag)
+    if (level[level.length - 1] === tag) {
+      level.pop()
+    }
+  }
+
+  for (let i = 0; i < html.length; i++) {
+    const char = html[i]
+    if (char === '<' && tagBuffer === null) {
+      tagBuffer = char
+    } else if (char !== '>' && tagBuffer !== null) {
+      tagBuffer += char
+    } else if (char === '>' && tagBuffer !== null) {
+      tagBuffer += char
+      const tagFull = tagBuffer
+      tagBuffer = null
+      const tagName = getTagName(tagFull)
+      if (handledTags.has(tagName)) {
+        if (tagName === 'br') {
+          handleBr(tagFull)
+        } else if (openCloseTags.has(tagName)) {
+          if (tagFull[1] === '/') {
+            handleClose(tagFull)
+          } else if (tagFull[tagFull.length - 2] === '/') {
+            // self-closing
+            handleBr(tagFull)
+          } else {
+            handleOpen(tagFull)
+          }
+        }
+      } else {
+        textBuffer += tagFull
+      }
+    } else if (char === '\n') {
+      handleBr(char)
+    } else {
+      textBuffer += char
+    }
+  }
+  if (tagBuffer) {
+    textBuffer += tagBuffer
+  }
+
+  flush()
+
+  return buffer
+}
diff --git a/src/services/html_converter/html_tree_converter.service.js b/src/services/html_converter/html_tree_converter.service.js
new file mode 100644
index 00000000..badd473a
--- /dev/null
+++ b/src/services/html_converter/html_tree_converter.service.js
@@ -0,0 +1,146 @@
+/**
+ * This is a not-so-tiny purpose-built HTML parser/processor. This parses html
+ * and converts it into a tree structure representing tag openers/closers and
+ * children.
+ *
+ * Structure follows this pattern: [opener, [...children], closer] except root
+ * node which is just [...children]. Text nodes can only be within children and
+ * are represented as strings.
+ *
+ * Intended use is to convert HTML structure and then recursively iterate over it
+ * most likely using a map. Very useful for dynamically rendering html replacing
+ * tags with JSX elements in a render function.
+ *
+ * known issue: doesn't handle CDATA so CDATA might not work well
+ * known issue: doesn't handle HTML comments
+ *
+ * @param {Object} input - input data
+ * @return {string} processed html
+ */
+export const convertHtmlToTree = (html) => {
+  // Elements that are implicitly self-closing
+  // https://developer.mozilla.org/en-US/docs/Glossary/empty_element
+  const emptyElements = new Set([
+    'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input',
+    'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr'
+  ])
+  // TODO For future - also parse HTML5 multi-source components?
+
+  const buffer = [] // Current output buffer
+  const levels = [['', buffer]] // How deep we are in tags and which tags were there
+  let textBuffer = '' // Current line content
+  let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag
+
+  const getCurrentBuffer = () => {
+    return levels[levels.length - 1][1]
+  }
+
+  const flushText = () => { // Processes current line buffer, adds it to output buffer and clears line buffer
+    if (textBuffer === '') return
+    getCurrentBuffer().push(textBuffer)
+    textBuffer = ''
+  }
+
+  const handleSelfClosing = (tag) => {
+    getCurrentBuffer().push([tag])
+  }
+
+  const handleOpen = (tag) => {
+    const curBuf = getCurrentBuffer()
+    const newLevel = [tag, []]
+    levels.push(newLevel)
+    curBuf.push(newLevel)
+  }
+
+  const handleClose = (tag) => {
+    const currentTag = levels[levels.length - 1]
+    if (getTagName(levels[levels.length - 1][0]) === getTagName(tag)) {
+      currentTag.push(tag)
+      levels.pop()
+    } else {
+      getCurrentBuffer().push(tag)
+    }
+  }
+
+  for (let i = 0; i < html.length; i++) {
+    const char = html[i]
+    if (char === '<' && tagBuffer === null) {
+      flushText()
+      tagBuffer = char
+    } else if (char !== '>' && tagBuffer !== null) {
+      tagBuffer += char
+    } else if (char === '>' && tagBuffer !== null) {
+      tagBuffer += char
+      const tagFull = tagBuffer
+      tagBuffer = null
+      const tagName = getTagName(tagFull)
+      if (tagFull[1] === '/') {
+        handleClose(tagFull)
+      } else if (emptyElements.has(tagName) || tagFull[tagFull.length - 2] === '/') {
+        // self-closing
+        handleSelfClosing(tagFull)
+      } else {
+        handleOpen(tagFull)
+      }
+    } else {
+      textBuffer += char
+    }
+  }
+  if (tagBuffer) {
+    textBuffer += tagBuffer
+  }
+
+  flushText()
+  return buffer
+}
+
+// Extracts tag name from tag, i.e. <span a="b"> => span
+export const getTagName = (tag) => {
+  const result = /(?:<\/(\w+)>|<(\w+)\s?.*?\/?>)/gi.exec(tag)
+  return result && (result[1] || result[2])
+}
+
+export const processTextForEmoji = (text, emojis, processor) => {
+  const buffer = []
+  let textBuffer = ''
+  for (let i = 0; i < text.length; i++) {
+    const char = text[i]
+    if (char === ':') {
+      const next = text.slice(i + 1)
+      let found = false
+      for (let emoji of emojis) {
+        if (next.slice(0, emoji.shortcode.length + 1) === (emoji.shortcode + ':')) {
+          found = emoji
+          break
+        }
+      }
+      if (found) {
+        buffer.push(textBuffer)
+        textBuffer = ''
+        buffer.push(processor(found))
+        i += found.shortcode.length + 1
+      } else {
+        textBuffer += char
+      }
+    } else {
+      textBuffer += char
+    }
+  }
+  if (textBuffer) buffer.push(textBuffer)
+  return buffer
+}
+
+export const getAttrs = tag => {
+  const innertag = tag
+    .substring(1, tag.length - 1)
+    .replace(new RegExp('^' + getTagName(tag)), '')
+    .replace(/\/?$/, '')
+    .trim()
+  const attrs = Array.from(innertag.matchAll(/([a-z0-9-]+)(?:=("[^"]+?"|'[^']+?'))?/gi))
+    .map(([trash, key, value]) => [key, value])
+    .map(([k, v]) => {
+      if (!v) return [k, true]
+      return [k, v.substring(1, v.length - 1)]
+    })
+  return Object.fromEntries(attrs)
+}
diff --git a/src/services/mini_html_converter/mini_html_converter.service.js b/src/services/mini_html_converter/mini_html_converter.service.js
deleted file mode 100644
index 900752cd..00000000
--- a/src/services/mini_html_converter/mini_html_converter.service.js
+++ /dev/null
@@ -1,138 +0,0 @@
-/**
- * This is a not-so-tiny purpose-built HTML parser/processor. It was made for use
- * with StatusBody component for purpose of replacing tags with vue components
- *
- * known issue: doesn't handle CDATA so nested CDATA might not work well
- *
- * @param {Object} input - input data
- * @param {(string) => string} lineProcessor - function that will be called on every line
- * @param {{ key[string]: (string) => string}} tagProcessor - map of processors for tags
- * @return {string} processed html
- */
-export const convertHtml = (html) => {
-  // Elements that are implicitly self-closing
-  // https://developer.mozilla.org/en-US/docs/Glossary/empty_element
-  const emptyElements = new Set([
-    'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input',
-    'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr'
-  ])
-  // TODO For future - also parse HTML5 multi-source components?
-
-  const buffer = [] // Current output buffer
-  const levels = [['', buffer]] // How deep we are in tags and which tags were there
-  let textBuffer = '' // Current line content
-  let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag
-
-  const getCurrentBuffer = () => {
-    return levels[levels.length - 1][1]
-  }
-
-  const flushText = () => { // Processes current line buffer, adds it to output buffer and clears line buffer
-    if (textBuffer === '') return
-    getCurrentBuffer().push(textBuffer)
-    textBuffer = ''
-  }
-
-  const handleSelfClosing = (tag) => {
-    getCurrentBuffer().push([tag])
-  }
-
-  const handleOpen = (tag) => {
-    const curBuf = getCurrentBuffer()
-    const newLevel = [tag, []]
-    levels.push(newLevel)
-    curBuf.push(newLevel)
-  }
-
-  const handleClose = (tag) => {
-    const currentTag = levels[levels.length - 1]
-    if (getTagName(levels[levels.length - 1][0]) === getTagName(tag)) {
-      currentTag.push(tag)
-      levels.pop()
-    } else {
-      getCurrentBuffer().push(tag)
-    }
-  }
-
-  for (let i = 0; i < html.length; i++) {
-    const char = html[i]
-    if (char === '<' && tagBuffer === null) {
-      flushText()
-      tagBuffer = char
-    } else if (char !== '>' && tagBuffer !== null) {
-      tagBuffer += char
-    } else if (char === '>' && tagBuffer !== null) {
-      tagBuffer += char
-      const tagFull = tagBuffer
-      tagBuffer = null
-      const tagName = getTagName(tagFull)
-      if (tagFull[1] === '/') {
-        handleClose(tagFull)
-      } else if (emptyElements.has(tagName) || tagFull[tagFull.length - 2] === '/') {
-        // self-closing
-        handleSelfClosing(tagFull)
-      } else {
-        handleOpen(tagFull)
-      }
-    } else {
-      textBuffer += char
-    }
-  }
-  if (tagBuffer) {
-    textBuffer += tagBuffer
-  }
-
-  flushText()
-  return buffer
-}
-
-// Extracts tag name from tag, i.e. <span a="b"> => span
-export const getTagName = (tag) => {
-  const result = /(?:<\/(\w+)>|<(\w+)\s?.*?\/?>)/gi.exec(tag)
-  return result && (result[1] || result[2])
-}
-
-export const processTextForEmoji = (text, emojis, processor) => {
-  const buffer = []
-  let textBuffer = ''
-  for (let i = 0; i < text.length; i++) {
-    const char = text[i]
-    if (char === ':') {
-      const next = text.slice(i + 1)
-      let found = false
-      for (let emoji of emojis) {
-        if (next.slice(0, emoji.shortcode.length + 1) === (emoji.shortcode + ':')) {
-          found = emoji
-          break
-        }
-      }
-      if (found) {
-        buffer.push(textBuffer)
-        textBuffer = ''
-        buffer.push(processor(found))
-        i += found.shortcode.length + 1
-      } else {
-        textBuffer += char
-      }
-    } else {
-      textBuffer += char
-    }
-  }
-  if (textBuffer) buffer.push(textBuffer)
-  return buffer
-}
-
-export const getAttrs = tag => {
-  const innertag = tag
-    .substring(1, tag.length - 1)
-    .replace(new RegExp('^' + getTagName(tag)), '')
-    .replace(/\/?$/, '')
-    .trim()
-  const attrs = Array.from(innertag.matchAll(/([a-z0-9-]+)(?:=("[^"]+?"|'[^']+?'))?/gi))
-    .map(([trash, key, value]) => [key, value])
-    .map(([k, v]) => {
-      if (!v) return [k, true]
-      return [k, v.substring(1, v.length - 1)]
-    })
-  return Object.fromEntries(attrs)
-}
diff --git a/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js b/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js
deleted file mode 100644
index de6f20ef..00000000
--- a/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js
+++ /dev/null
@@ -1,94 +0,0 @@
-/**
- * This is a tiny purpose-built HTML parser/processor. This basically detects any type of visual newline and
- * allows it to be processed, useful for greentexting, mostly
- *
- * known issue: doesn't handle CDATA so nested CDATA might not work well
- *
- * @param {Object} input - input data
- * @param {(string) => string} processor - function that will be called on every line
- * @return {string} processed html
- */
-export const processHtml = (html, processor) => {
-  const handledTags = new Set(['p', 'br', 'div'])
-  const openCloseTags = new Set(['p', 'div'])
-
-  let buffer = '' // Current output buffer
-  const level = [] // How deep we are in tags and which tags were there
-  let textBuffer = '' // Current line content
-  let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag
-
-  // Extracts tag name from tag, i.e. <span a="b"> => span
-  const getTagName = (tag) => {
-    const result = /(?:<\/(\w+)>|<(\w+)\s?[^/]*?\/?>)/gi.exec(tag)
-    return result && (result[1] || result[2])
-  }
-
-  const flush = () => { // Processes current line buffer, adds it to output buffer and clears line buffer
-    if (textBuffer.trim().length > 0) {
-      buffer += processor(textBuffer)
-    } else {
-      buffer += textBuffer
-    }
-    textBuffer = ''
-  }
-
-  const handleBr = (tag) => { // handles single newlines/linebreaks/selfclosing
-    flush()
-    buffer += tag
-  }
-
-  const handleOpen = (tag) => { // handles opening tags
-    flush()
-    buffer += tag
-    level.push(tag)
-  }
-
-  const handleClose = (tag) => { // handles closing tags
-    flush()
-    buffer += tag
-    if (level[level.length - 1] === tag) {
-      level.pop()
-    }
-  }
-
-  for (let i = 0; i < html.length; i++) {
-    const char = html[i]
-    if (char === '<' && tagBuffer === null) {
-      tagBuffer = char
-    } else if (char !== '>' && tagBuffer !== null) {
-      tagBuffer += char
-    } else if (char === '>' && tagBuffer !== null) {
-      tagBuffer += char
-      const tagFull = tagBuffer
-      tagBuffer = null
-      const tagName = getTagName(tagFull)
-      if (handledTags.has(tagName)) {
-        if (tagName === 'br') {
-          handleBr(tagFull)
-        } else if (openCloseTags.has(tagName)) {
-          if (tagFull[1] === '/') {
-            handleClose(tagFull)
-          } else if (tagFull[tagFull.length - 2] === '/') {
-            // self-closing
-            handleBr(tagFull)
-          } else {
-            handleOpen(tagFull)
-          }
-        }
-      } else {
-        textBuffer += tagFull
-      }
-    } else if (char === '\n') {
-      handleBr(char)
-    } else {
-      textBuffer += char
-    }
-  }
-  if (tagBuffer) {
-    textBuffer += tagBuffer
-  }
-
-  flush()
-
-  return buffer
-}
-- 
cgit v1.2.3-70-g09d2