diff options
| -rw-r--r-- | .gitignore | 3 | ||||
| -rw-r--r-- | config.py | 32 | ||||
| -rw-r--r-- | examples/hugo_timeline_partial.html | 22 | ||||
| -rw-r--r-- | examples/syncblog.sh | 22 | ||||
| -rwxr-xr-x | timelinebot.py | 75 | ||||
| -rw-r--r-- | utils.py | 9 |
6 files changed, 163 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8f59883 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*.secret +__pycache__ +mastodon diff --git a/config.py b/config.py new file mode 100644 index 0000000..b5f37cd --- /dev/null +++ b/config.py @@ -0,0 +1,32 @@ +APPNAME = 'bot_id' +BASEURL = 'https://_instance_' + +FOLLOW = "yourself" +PUBLIC_ONLY = True +NO_REBLOG = True +DATEFORMAT = "%m/%d/%Y" +OUTPUT_JSON_FILE = 'tl.json' + +# TODO add caching options + +# max number of statuses per fetch. There is no guarantee to fetch ALL from +# timeline if this number is set higher. As it also depends on the server side +# rate limit +LIMIT = 40 + +# bot account username and password +# Priority: plaintext UNAME/PW in this config > input at runtime +UNAME = "" +PW = "" + +# you don't need to modify these +CLIENTID = 'client.secret' +TOKEN = 'token.secret' + +def get_secrets_from_input(): + print("account secret not found, please manually input: ") + UNAME = input("username or email: ") + PW = input("password (not concealed): ") + return (UNAME, PW) + + diff --git a/examples/hugo_timeline_partial.html b/examples/hugo_timeline_partial.html new file mode 100644 index 0000000..251ccce --- /dev/null +++ b/examples/hugo_timeline_partial.html @@ -0,0 +1,22 @@ +<style type="text/css"> +table.tidy, table.tidy * { + border: none; + padding-top:0; + padding-bottom:0.2em; + text-align: left; + vertical-align: top; +} +</style> + +<h1>Timeline</h1> +<table class="tidy" align=left> + <tbody> + {{ $tl := site.Data.tl }} + {{ range $tl }} + <tr> + <td> @{{.account}}<a href={{.url}}>🔗</a><br> </td> + <td> [{{.created_at}}] {{ .content }}<br></td> + </tr> + {{ end }} + </tbody> +</table> diff --git a/examples/syncblog.sh b/examples/syncblog.sh new file mode 100644 index 0000000..5661923 --- /dev/null +++ b/examples/syncblog.sh @@ -0,0 +1,22 @@ +#!/usr/bin/bash + +# THIS IS AN EXAMPLE +# DON'T RUN ME WITHOUT MODIFYING! + +# FOOLPROOF +echo "do you know what you are doing?" +exit() + +cd <PATH_TO_TIMELINE_BOT> +echo "Fetching Timeline from fedi..." +./timelinebot.py +cp tl.json <PATH_TO_SITE_ROOT_DATA> + +cd <PATH_TO_SITE_ROOT> +echo "Rendering blog..." +hugo + +echo "Sync blog to remote server..." +rsync -avh -og --chown=http:http --info=progress2 --info=name0 --update --delete-after \ + <PATH_TO_SITE_ROOT_PUBLIC> \ + user@remote_server:<PATH_TO_WEB_DOC_ROOT> diff --git a/timelinebot.py b/timelinebot.py new file mode 100755 index 0000000..8cf6635 --- /dev/null +++ b/timelinebot.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python +from mastodon import Mastodon +import config as cfg +import time +import re +import os +import re +import utils +import json + +class TLBot(): + def __init__(self, config=cfg): + self.config = cfg + + def init_app(self): + self.session = Mastodon.create_app(self.config.APPNAME, api_base_url=self.config.BASEURL, to_file=self.config.CLIENTID) + + def init_session(self): + if not os.path.isfile(self.config.CLIENTID): + print("[booting] client data doesn't exist..creating...") + self.session = self.init_app() + else: + self.session = Mastodon(client_id=self.config.CLIENTID, access_token=self.config.TOKEN, feature_set="pleroma") + try: + self.session.account_verify_credentials() + except: + print("invalid credentials, trying to log in with pw") + try: + self.login() + except Exception as e: + print("can't log in, abort") + print(e) + exit() + + def login(self): + print("trying manual login") + if self.config.UNAME == "" or self.config.PW == "": + (self.config.UNAME, self.config.PW) = cfg.get_secrets_from_input() + try: + self.session.log_in(username=self.config.UNAME,password=self.config.PW, to_file=self.config.TOKEN) + except Exception as e: + print("log in failed, check your credentials") + print(e) + exit() + + def get_sanitized_timeline_json(self): + # TODO make this config options + tl = self.session.timeline_home(limit=40) + results = [] + for status in tl: + s = {} + if self.config.PUBLIC_ONLY and status['visibility'] != 'public': + continue + if status['account']['acct'] != self.config.FOLLOW: + continue + if self.config.NO_REBLOG and status['reblog'] != None: + continue + s['account'] = status['account']['acct'] + s['content'] = utils.sanitize_html(status['content']) + s['created_at'] = status['created_at'] + s['url'] = status['url'] + results.append(s) + sorted_list = sorted(results, key=lambda d: d['created_at'], reverse=True) + # format datetime string: + for s in sorted_list: + s['created_at'] = s['created_at'].strftime("%m/%d/%Y") + return sorted_list + +if __name__ == "__main__": + bot = TLBot(cfg) + bot.init_session() + sl = bot.get_sanitized_timeline_json() + file = cfg.OUTPUT_JSON_FILE + with open(file, 'w', encoding='utf8') as json_file: + json.dump(sl, json_file, ensure_ascii=False) diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..aa0f4ed --- /dev/null +++ b/utils.py @@ -0,0 +1,9 @@ +import re +import html + +CLEANR = re.compile('<.*?>') + +def sanitize_html(raw_html): + cleantext = re.sub(CLEANR, '', raw_html) + return html.unescape(cleantext) + |
