aboutsummaryrefslogtreecommitdiff
path: root/src/services/html_converter
diff options
context:
space:
mode:
authorHenry Jameson <me@hjkos.com>2021-06-12 19:47:23 +0300
committerHenry Jameson <me@hjkos.com>2021-06-12 19:54:30 +0300
commitcd4455675024a3dfc8930184114d5f92438d0466 (patch)
tree14a11f546ab86fc1f5a2d288344f3ff7c7e246a5 /src/services/html_converter
parentca6c7d5b10e48299dcb0ee65248de14f27ed78c8 (diff)
restructure and tests
squash! restructure and tests
Diffstat (limited to 'src/services/html_converter')
-rw-r--r--src/services/html_converter/html_line_converter.service.js8
-rw-r--r--src/services/html_converter/html_tree_converter.service.js53
-rw-r--r--src/services/html_converter/utility.service.js73
3 files changed, 77 insertions, 57 deletions
diff --git a/src/services/html_converter/html_line_converter.service.js b/src/services/html_converter/html_line_converter.service.js
index d8f5ecb8..e448d5cd 100644
--- a/src/services/html_converter/html_line_converter.service.js
+++ b/src/services/html_converter/html_line_converter.service.js
@@ -1,3 +1,5 @@
+import { getTagName } from './utility.service.js'
+
/**
* This is a tiny purpose-built HTML parser/processor. This basically detects
* any type of visual newline and converts entire HTML into a array structure.
@@ -26,12 +28,6 @@ export const convertHtmlToLines = (html) => {
let textBuffer = '' // Current line content
let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag
- // Extracts tag name from tag, i.e. <span a="b"> => span
- const getTagName = (tag) => {
- const result = /(?:<\/(\w+)>|<(\w+)\s?[^/]*?\/?>)/gi.exec(tag)
- return result && (result[1] || result[2])
- }
-
const flush = () => { // Processes current line buffer, adds it to output buffer and clears line buffer
if (textBuffer.trim().length > 0 && !level.some(l => ignoredTags.has(l))) {
buffer.push({ text: textBuffer })
diff --git a/src/services/html_converter/html_tree_converter.service.js b/src/services/html_converter/html_tree_converter.service.js
index badd473a..804d35d7 100644
--- a/src/services/html_converter/html_tree_converter.service.js
+++ b/src/services/html_converter/html_tree_converter.service.js
@@ -1,3 +1,5 @@
+import { getTagName } from './utility.service.js'
+
/**
* This is a not-so-tiny purpose-built HTML parser/processor. This parses html
* and converts it into a tree structure representing tag openers/closers and
@@ -93,54 +95,3 @@ export const convertHtmlToTree = (html) => {
flushText()
return buffer
}
-
-// Extracts tag name from tag, i.e. <span a="b"> => span
-export const getTagName = (tag) => {
- const result = /(?:<\/(\w+)>|<(\w+)\s?.*?\/?>)/gi.exec(tag)
- return result && (result[1] || result[2])
-}
-
-export const processTextForEmoji = (text, emojis, processor) => {
- const buffer = []
- let textBuffer = ''
- for (let i = 0; i < text.length; i++) {
- const char = text[i]
- if (char === ':') {
- const next = text.slice(i + 1)
- let found = false
- for (let emoji of emojis) {
- if (next.slice(0, emoji.shortcode.length + 1) === (emoji.shortcode + ':')) {
- found = emoji
- break
- }
- }
- if (found) {
- buffer.push(textBuffer)
- textBuffer = ''
- buffer.push(processor(found))
- i += found.shortcode.length + 1
- } else {
- textBuffer += char
- }
- } else {
- textBuffer += char
- }
- }
- if (textBuffer) buffer.push(textBuffer)
- return buffer
-}
-
-export const getAttrs = tag => {
- const innertag = tag
- .substring(1, tag.length - 1)
- .replace(new RegExp('^' + getTagName(tag)), '')
- .replace(/\/?$/, '')
- .trim()
- const attrs = Array.from(innertag.matchAll(/([a-z0-9-]+)(?:=("[^"]+?"|'[^']+?'))?/gi))
- .map(([trash, key, value]) => [key, value])
- .map(([k, v]) => {
- if (!v) return [k, true]
- return [k, v.substring(1, v.length - 1)]
- })
- return Object.fromEntries(attrs)
-}
diff --git a/src/services/html_converter/utility.service.js b/src/services/html_converter/utility.service.js
new file mode 100644
index 00000000..4d0c36c2
--- /dev/null
+++ b/src/services/html_converter/utility.service.js
@@ -0,0 +1,73 @@
+/**
+ * Extract tag name from tag opener/closer.
+ *
+ * @param {String} tag - tag string, i.e. '<a href="...">'
+ * @return {String} - tagname, i.e. "div"
+ */
+export const getTagName = (tag) => {
+ const result = /(?:<\/(\w+)>|<(\w+)\s?.*?\/?>)/gi.exec(tag)
+ return result && (result[1] || result[2])
+}
+
+/**
+ * Extract attributes from tag opener.
+ *
+ * @param {String} tag - tag string, i.e. '<a href="...">'
+ * @return {Object} - map of attributes key = attribute name, value = attribute value
+ * attributes without values represented as boolean true
+ */
+export const getAttrs = tag => {
+ const innertag = tag
+ .substring(1, tag.length - 1)
+ .replace(new RegExp('^' + getTagName(tag)), '')
+ .replace(/\/?$/, '')
+ .trim()
+ const attrs = Array.from(innertag.matchAll(/([a-z0-9-]+)(?:=("[^"]+?"|'[^']+?'))?/gi))
+ .map(([trash, key, value]) => [key, value])
+ .map(([k, v]) => {
+ if (!v) return [k, true]
+ return [k, v.substring(1, v.length - 1)]
+ })
+ return Object.fromEntries(attrs)
+}
+
+/**
+ * Finds shortcodes in text
+ *
+ * @param {String} text - original text to find emojis in
+ * @param {{ url: String, shortcode: Sring }[]} emoji - list of shortcodes to find
+ * @param {Function} processor - function to call on each encountered emoji,
+ * function is passed single object containing matching emoji ({ url, shortcode })
+ * return value will be inserted into resulting array instead of :shortcode:
+ * @return {Array} resulting array with non-emoji parts of text and whatever {processor}
+ * returned for emoji
+ */
+export const processTextForEmoji = (text, emojis, processor) => {
+ const buffer = []
+ let textBuffer = ''
+ for (let i = 0; i < text.length; i++) {
+ const char = text[i]
+ if (char === ':') {
+ const next = text.slice(i + 1)
+ let found = false
+ for (let emoji of emojis) {
+ if (next.slice(0, emoji.shortcode.length + 1) === (emoji.shortcode + ':')) {
+ found = emoji
+ break
+ }
+ }
+ if (found) {
+ buffer.push(textBuffer)
+ textBuffer = ''
+ buffer.push(processor(found))
+ i += found.shortcode.length + 1
+ } else {
+ textBuffer += char
+ }
+ } else {
+ textBuffer += char
+ }
+ }
+ if (textBuffer) buffer.push(textBuffer)
+ return buffer
+}