aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTianhao Wang <tianhao.wang2@mailbox.tu-dresden.de>2023-10-14 00:04:45 +0200
committerTianhao Wang <tianhao.wang2@mailbox.tu-dresden.de>2023-10-14 00:04:45 +0200
commit2580e02140f112e8797807620e36712489039754 (patch)
tree4956d918ecb3d79ff772c52576bfdee20e88a811
init
-rw-r--r--.gitignore3
-rw-r--r--config.py32
-rw-r--r--examples/hugo_timeline_partial.html22
-rw-r--r--examples/syncblog.sh22
-rwxr-xr-xtimelinebot.py75
-rw-r--r--utils.py9
6 files changed, 163 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..8f59883
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+*.secret
+__pycache__
+mastodon
diff --git a/config.py b/config.py
new file mode 100644
index 0000000..b5f37cd
--- /dev/null
+++ b/config.py
@@ -0,0 +1,32 @@
+APPNAME = 'bot_id'
+BASEURL = 'https://_instance_'
+
+FOLLOW = "yourself"
+PUBLIC_ONLY = True
+NO_REBLOG = True
+DATEFORMAT = "%m/%d/%Y"
+OUTPUT_JSON_FILE = 'tl.json'
+
+# TODO add caching options
+
+# max number of statuses per fetch. There is no guarantee to fetch ALL from
+# timeline if this number is set higher. As it also depends on the server side
+# rate limit
+LIMIT = 40
+
+# bot account username and password
+# Priority: plaintext UNAME/PW in this config > input at runtime
+UNAME = ""
+PW = ""
+
+# you don't need to modify these
+CLIENTID = 'client.secret'
+TOKEN = 'token.secret'
+
+def get_secrets_from_input():
+ print("account secret not found, please manually input: ")
+ UNAME = input("username or email: ")
+ PW = input("password (not concealed): ")
+ return (UNAME, PW)
+
+
diff --git a/examples/hugo_timeline_partial.html b/examples/hugo_timeline_partial.html
new file mode 100644
index 0000000..251ccce
--- /dev/null
+++ b/examples/hugo_timeline_partial.html
@@ -0,0 +1,22 @@
+<style type="text/css">
+table.tidy, table.tidy * {
+ border: none;
+ padding-top:0;
+ padding-bottom:0.2em;
+ text-align: left;
+ vertical-align: top;
+}
+</style>
+
+<h1>Timeline</h1>
+<table class="tidy" align=left>
+ <tbody>
+ {{ $tl := site.Data.tl }}
+ {{ range $tl }}
+ <tr>
+ <td> @{{.account}}<a href={{.url}}>🔗</a><br> </td>
+ <td> [{{.created_at}}] {{ .content }}<br></td>
+ </tr>
+ {{ end }}
+ </tbody>
+</table>
diff --git a/examples/syncblog.sh b/examples/syncblog.sh
new file mode 100644
index 0000000..5661923
--- /dev/null
+++ b/examples/syncblog.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/bash
+
+# THIS IS AN EXAMPLE
+# DON'T RUN ME WITHOUT MODIFYING!
+
+# FOOLPROOF
+echo "do you know what you are doing?"
+exit()
+
+cd <PATH_TO_TIMELINE_BOT>
+echo "Fetching Timeline from fedi..."
+./timelinebot.py
+cp tl.json <PATH_TO_SITE_ROOT_DATA>
+
+cd <PATH_TO_SITE_ROOT>
+echo "Rendering blog..."
+hugo
+
+echo "Sync blog to remote server..."
+rsync -avh -og --chown=http:http --info=progress2 --info=name0 --update --delete-after \
+ <PATH_TO_SITE_ROOT_PUBLIC> \
+ user@remote_server:<PATH_TO_WEB_DOC_ROOT>
diff --git a/timelinebot.py b/timelinebot.py
new file mode 100755
index 0000000..8cf6635
--- /dev/null
+++ b/timelinebot.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+from mastodon import Mastodon
+import config as cfg
+import time
+import re
+import os
+import re
+import utils
+import json
+
+class TLBot():
+ def __init__(self, config=cfg):
+ self.config = cfg
+
+ def init_app(self):
+ self.session = Mastodon.create_app(self.config.APPNAME, api_base_url=self.config.BASEURL, to_file=self.config.CLIENTID)
+
+ def init_session(self):
+ if not os.path.isfile(self.config.CLIENTID):
+ print("[booting] client data doesn't exist..creating...")
+ self.session = self.init_app()
+ else:
+ self.session = Mastodon(client_id=self.config.CLIENTID, access_token=self.config.TOKEN, feature_set="pleroma")
+ try:
+ self.session.account_verify_credentials()
+ except:
+ print("invalid credentials, trying to log in with pw")
+ try:
+ self.login()
+ except Exception as e:
+ print("can't log in, abort")
+ print(e)
+ exit()
+
+ def login(self):
+ print("trying manual login")
+ if self.config.UNAME == "" or self.config.PW == "":
+ (self.config.UNAME, self.config.PW) = cfg.get_secrets_from_input()
+ try:
+ self.session.log_in(username=self.config.UNAME,password=self.config.PW, to_file=self.config.TOKEN)
+ except Exception as e:
+ print("log in failed, check your credentials")
+ print(e)
+ exit()
+
+ def get_sanitized_timeline_json(self):
+ # TODO make this config options
+ tl = self.session.timeline_home(limit=40)
+ results = []
+ for status in tl:
+ s = {}
+ if self.config.PUBLIC_ONLY and status['visibility'] != 'public':
+ continue
+ if status['account']['acct'] != self.config.FOLLOW:
+ continue
+ if self.config.NO_REBLOG and status['reblog'] != None:
+ continue
+ s['account'] = status['account']['acct']
+ s['content'] = utils.sanitize_html(status['content'])
+ s['created_at'] = status['created_at']
+ s['url'] = status['url']
+ results.append(s)
+ sorted_list = sorted(results, key=lambda d: d['created_at'], reverse=True)
+ # format datetime string:
+ for s in sorted_list:
+ s['created_at'] = s['created_at'].strftime("%m/%d/%Y")
+ return sorted_list
+
+if __name__ == "__main__":
+ bot = TLBot(cfg)
+ bot.init_session()
+ sl = bot.get_sanitized_timeline_json()
+ file = cfg.OUTPUT_JSON_FILE
+ with open(file, 'w', encoding='utf8') as json_file:
+ json.dump(sl, json_file, ensure_ascii=False)
diff --git a/utils.py b/utils.py
new file mode 100644
index 0000000..aa0f4ed
--- /dev/null
+++ b/utils.py
@@ -0,0 +1,9 @@
+import re
+import html
+
+CLEANR = re.compile('<.*?>')
+
+def sanitize_html(raw_html):
+ cleantext = re.sub(CLEANR, '', raw_html)
+ return html.unescape(cleantext)
+