From 50dc9df8a44d408dd83ae4b17c407fa36c85cf8e Mon Sep 17 00:00:00 2001
From: Henry Jameson <me@hjkos.com>
Date: Thu, 14 Nov 2019 00:18:14 +0200
Subject: adds greentext, also small fixes

---
 .../tiny_post_html_processor.service.js            | 84 ++++++++++++++++++++++
 1 file changed, 84 insertions(+)
 create mode 100644 src/services/tiny_post_html_processor/tiny_post_html_processor.service.js

(limited to 'src/services/tiny_post_html_processor')
diff --git a/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js b/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js
new file mode 100644
index 00000000..c9ff81e1
--- /dev/null
+++ b/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js
@@ -0,0 +1,84 @@
+/**
+ * This is a tiny purpose-built HTML parser/processor. This basically detects any type of visual newline and
+ * allows it to be processed, useful for greentexting, mostly
+ *
+ * @param {Object} input - input data
+ * @param {(string) => string} processor - function that will be called on every line
+ * @return {string} processed html
+ */
+export const processHtml = (html, processor) => {
+  const handledTags = new Set(['p', 'br', 'div'])
+  const openCloseTags = new Set(['p', 'div'])
+  const tagRegex = /(?:<\/(\w+)>|<(\w+)\s?[^/]*?\/?>)/gi
+
+  let buffer = '' // Current output buffer
+  const level = [] // How deep we are in tags and which tags were there
+  let textBuffer = '' // Current line content
+  let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag
+
+  // Extracts tagname from tag, i.e. <span a="b"> => span
+  const getTagName = (tag) => {
+    // eslint-disable-next-line no-unused-vars
+    const result = tagRegex.exec(tag)
+    return result && (result[1] || result[2])
+  }
+
+  const flush = () => { // Processes current line buffer, adds it to output buffer and clears line buffer
+    buffer += processor(textBuffer)
+    textBuffer = ''
+  }
+
+  const handleBr = (tag) => { // handles single newlines/linebreaks
+    flush()
+    buffer += tag
+  }
+
+  const handleOpen = (tag) => { // handles opening tags
+    flush()
+    buffer += tag
+    level.push(tag)
+  }
+
+  const handleClose = (tag) => { // handles closing tags
+    flush()
+    buffer += tag
+    if (level[level.length - 1] === tag) {
+      level.pop()
+    }
+  }
+
+  for (let i = 0; i < html.length; i++) {
+    const char = html[i]
+    if (char === '<' && tagBuffer !== null) {
+      tagBuffer = char
+    } else if (char !== '>' && tagBuffer !== null) {
+      tagBuffer += char
+    } else if (char === '>' && tagBuffer !== null) {
+      tagBuffer += char
+      const tagName = getTagName(tagBuffer)
+      if (handledTags.has(tagName)) {
+        if (tagName === 'br') {
+          handleBr(tagBuffer)
+        }
+        if (openCloseTags.has(tagBuffer)) {
+          if (tagBuffer[1] === '/') {
+            handleClose(tagBuffer)
+          } else {
+            handleOpen(tagBuffer)
+          }
+        }
+      } else {
+        textBuffer += tagBuffer
+      }
+      tagBuffer = null
+    } else if (char === '\n') {
+      handleBr(char)
+    } else {
+      textBuffer += char
+    }
+  }
+
+  flush()
+
+  return buffer
+}
-- 
cgit v1.2.3-70-g09d2


From 692ee0e95a852b1f803b7ae92d65cbf4f3ce3445 Mon Sep 17 00:00:00 2001
From: Henry Jameson <me@hjkos.com>
Date: Thu, 14 Nov 2019 00:41:14 +0200
Subject: Fix regex, tag detector condition

---
 src/components/status/status.js                    |  2 +-
 .../tiny_post_html_processor.service.js            | 25 +++++++++++-----------
 2 files changed, 13 insertions(+), 14 deletions(-)

(limited to 'src/services/tiny_post_html_processor')

diff --git a/src/components/status/status.js b/src/components/status/status.js
index 6dbb2199..416aa36a 100644
--- a/src/components/status/status.js
+++ b/src/components/status/status.js
@@ -43,7 +43,7 @@ const Status = {
       showingTall: this.inConversation && this.focused,
       showingLongSubject: false,
       error: null,
-      // Initial state
+      // not as computed because it sets the initial state which will be changed later
       expandingSubject: !this.$store.getters.mergedConfig.collapseMessageWithSubject,
     }
   },
diff --git a/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js b/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js
index c9ff81e1..b96c1ccf 100644
--- a/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js
+++ b/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js
@@ -9,17 +9,15 @@
 export const processHtml = (html, processor) => {
   const handledTags = new Set(['p', 'br', 'div'])
   const openCloseTags = new Set(['p', 'div'])
-  const tagRegex = /(?:<\/(\w+)>|<(\w+)\s?[^/]*?\/?>)/gi
 
   let buffer = '' // Current output buffer
   const level = [] // How deep we are in tags and which tags were there
   let textBuffer = '' // Current line content
   let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag
 
-  // Extracts tagname from tag, i.e. <span a="b"> => span
+  // Extracts tag name from tag, i.e. <span a="b"> => span
   const getTagName = (tag) => {
-    // eslint-disable-next-line no-unused-vars
-    const result = tagRegex.exec(tag)
+    const result = /(?:<\/(\w+)>|<(\w+)\s?[^/]*?\/?>)/gi.exec(tag)
     return result && (result[1] || result[2])
   }
 
@@ -49,28 +47,29 @@ export const processHtml = (html, processor) => {
 
   for (let i = 0; i < html.length; i++) {
     const char = html[i]
-    if (char === '<' && tagBuffer !== null) {
+    if (char === '<' && tagBuffer === null) {
       tagBuffer = char
     } else if (char !== '>' && tagBuffer !== null) {
       tagBuffer += char
     } else if (char === '>' && tagBuffer !== null) {
       tagBuffer += char
-      const tagName = getTagName(tagBuffer)
+      const tagFull = tagBuffer
+      tagBuffer = null
+      const tagName = getTagName(tagFull)
       if (handledTags.has(tagName)) {
         if (tagName === 'br') {
-          handleBr(tagBuffer)
+          handleBr(tagFull)
         }
-        if (openCloseTags.has(tagBuffer)) {
-          if (tagBuffer[1] === '/') {
-            handleClose(tagBuffer)
+        if (openCloseTags.has(tagFull)) {
+          if (tagFull[1] === '/') {
+            handleClose(tagFull)
           } else {
-            handleOpen(tagBuffer)
+            handleOpen(tagFull)
           }
         }
       } else {
-        textBuffer += tagBuffer
+        textBuffer += tagFull
       }
-      tagBuffer = null
     } else if (char === '\n') {
       handleBr(char)
     } else {
-- 
cgit v1.2.3-70-g09d2


From bd2a682b83743311645241fe644e853e1a359b67 Mon Sep 17 00:00:00 2001
From: Henry Jameson <me@hjkos.com>
Date: Thu, 14 Nov 2019 22:40:20 +0200
Subject: tests + updates

---
 .../tiny_post_html_processor.service.js            | 19 ++++-
 .../tiny_post_html_processor.spec.js               | 96 ++++++++++++++++++++++
 2 files changed, 111 insertions(+), 4 deletions(-)
 create mode 100644 test/unit/specs/services/tiny_post_html_processor/tiny_post_html_processor.spec.js

(limited to 'src/services/tiny_post_html_processor')

diff --git a/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js b/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js
index b96c1ccf..de6f20ef 100644
--- a/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js
+++ b/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js
@@ -2,6 +2,8 @@
  * This is a tiny purpose-built HTML parser/processor. This basically detects any type of visual newline and
  * allows it to be processed, useful for greentexting, mostly
  *
+ * known issue: doesn't handle CDATA so nested CDATA might not work well
+ *
  * @param {Object} input - input data
  * @param {(string) => string} processor - function that will be called on every line
  * @return {string} processed html
@@ -22,11 +24,15 @@ export const processHtml = (html, processor) => {
   }
 
   const flush = () => { // Processes current line buffer, adds it to output buffer and clears line buffer
-    buffer += processor(textBuffer)
+    if (textBuffer.trim().length > 0) {
+      buffer += processor(textBuffer)
+    } else {
+      buffer += textBuffer
+    }
     textBuffer = ''
   }
 
-  const handleBr = (tag) => { // handles single newlines/linebreaks
+  const handleBr = (tag) => { // handles single newlines/linebreaks/selfclosing
     flush()
     buffer += tag
   }
@@ -59,10 +65,12 @@ export const processHtml = (html, processor) => {
       if (handledTags.has(tagName)) {
         if (tagName === 'br') {
           handleBr(tagFull)
-        }
-        if (openCloseTags.has(tagFull)) {
+        } else if (openCloseTags.has(tagName)) {
           if (tagFull[1] === '/') {
             handleClose(tagFull)
+          } else if (tagFull[tagFull.length - 2] === '/') {
+            // self-closing
+            handleBr(tagFull)
           } else {
             handleOpen(tagFull)
           }
@@ -76,6 +84,9 @@ export const processHtml = (html, processor) => {
       textBuffer += char
     }
   }
+  if (tagBuffer) {
+    textBuffer += tagBuffer
+  }
 
   flush()
 
diff --git a/test/unit/specs/services/tiny_post_html_processor/tiny_post_html_processor.spec.js b/test/unit/specs/services/tiny_post_html_processor/tiny_post_html_processor.spec.js
new file mode 100644
index 00000000..f301429d
--- /dev/null
+++ b/test/unit/specs/services/tiny_post_html_processor/tiny_post_html_processor.spec.js
@@ -0,0 +1,96 @@
+import { processHtml } from 'src/services/tiny_post_html_processor/tiny_post_html_processor.service.js'
+
+describe('TinyPostHTMLProcessor', () => {
+  describe('with processor that keeps original line should not make any changes to HTML when', () => {
+    const processorKeep = (line) => line
+    it('fed with regular HTML with newlines', () => {
+      const inputOutput = '1<br/>2<p class="lol">3 4</p> 5 \n 6 <p > 7 <br> 8 </p> <br>\n<br/>'
+      expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput)
+    })
+
+    it('fed with possibly broken HTML with invalid tags/composition', () => {
+      const inputOutput = '<feeee dwdwddddddw> <i>ayy<b>lm</i>ao</b> </section>'
+      expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput)
+    })
+
+    it('fed with very broken HTML with broken composition', () => {
+      const inputOutput = '</p> lmao what </div> whats going on <div> wha <p>'
+      expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput)
+    })
+
+    it('fed with sorta valid HTML but tags aren\'t closed', () => {
+      const inputOutput = 'just leaving a <div> hanging'
+      expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput)
+    })
+
+    it('fed with not really HTML at this point... tags that aren\'t finished', () => {
+      const inputOutput = 'do you expect me to finish this <div class='
+      expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput)
+    })
+
+    it('fed with dubiously valid HTML (p within p and also div inside p)', () => {
+      const inputOutput = 'look ma <p> p \nwithin <p> p! </p> and a <br/><div>div!</div></p>'
+      expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput)
+    })
+
+    it('fed with maybe valid HTML? self-closing divs and ps', () => {
+      const inputOutput = 'a <div class="what"/> what now <p aria-label="wtf"/> ?'
+      expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput)
+    })
+
+    it('fed with valid XHTML containing a CDATA', () => {
+      const inputOutput = 'Yes, it is me, <![CDATA[DIO]]>'
+      expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput)
+    })
+  })
+  describe('with processor that replaces lines with word "_" should match expected line when', () => {
+    const processorReplace = (line) => '_'
+    it('fed with regular HTML with newlines', () => {
+      const input = '1<br/>2<p class="lol">3 4</p> 5 \n 6 <p > 7 <br> 8 </p> <br>\n<br/>'
+      const output = '_<br/>_<p class="lol">_</p>_\n_<p >_<br>_</p> <br>\n<br/>'
+      expect(processHtml(input, processorReplace)).to.eql(output)
+    })
+
+    it('fed with possibly broken HTML with invalid tags/composition', () => {
+      const input = '<feeee dwdwddddddw> <i>ayy<b>lm</i>ao</b> </section>'
+      const output = '_'
+      expect(processHtml(input, processorReplace)).to.eql(output)
+    })
+
+    it('fed with very broken HTML with broken composition', () => {
+      const input = '</p> lmao what </div> whats going on <div> wha <p>'
+      const output = '</p>_</div>_<div>_<p>'
+      expect(processHtml(input, processorReplace)).to.eql(output)
+    })
+
+    it('fed with sorta valid HTML but tags aren\'t closed', () => {
+      const input = 'just leaving a <div> hanging'
+      const output = '_<div>_'
+      expect(processHtml(input, processorReplace)).to.eql(output)
+    })
+
+    it('fed with not really HTML at this point... tags that aren\'t finished', () => {
+      const input = 'do you expect me to finish this <div class='
+      const output = '_'
+      expect(processHtml(input, processorReplace)).to.eql(output)
+    })
+
+    it('fed with dubiously valid HTML (p within p and also div inside p)', () => {
+      const input = 'look ma <p> p \nwithin <p> p! </p> and a <br/><div>div!</div></p>'
+      const output = '_<p>_\n_<p>_</p>_<br/><div>_</div></p>'
+      expect(processHtml(input, processorReplace)).to.eql(output)
+    })
+
+    it('fed with maybe valid HTML? self-closing divs and ps', () => {
+      const input = 'a <div class="what"/> what now <p aria-label="wtf"/> ?'
+      const output = '_<div class="what"/>_<p aria-label="wtf"/>_'
+      expect(processHtml(input, processorReplace)).to.eql(output)
+    })
+
+    it('fed with valid XHTML containing a CDATA', () => {
+      const input = 'Yes, it is me, <![CDATA[DIO]]>'
+      const output = '_'
+      expect(processHtml(input, processorReplace)).to.eql(output)
+    })
+  })
+})
-- 
cgit v1.2.3-70-g09d2