summaryrefslogtreecommitdiff
path: root/telegram.py
diff options
context:
space:
mode:
Diffstat (limited to 'telegram.py')
-rwxr-xr-xtelegram.py72
1 files changed, 72 insertions, 0 deletions
diff --git a/telegram.py b/telegram.py
new file mode 100755
index 0000000..3058339
--- /dev/null
+++ b/telegram.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+
+from urllib.request import urlopen, Request
+from datetime import datetime
+from bs4 import BeautifulSoup
+import sys
+
+def _format_date(dt):
+ """convert a datetime into an RFC 822 formatted date
+ Input date must be in GMT.
+ Stolen from PyRSS2Gen.
+ """
+ # Looks like:
+ # Sat, 07 Sep 2002 00:00:01 GMT
+ # Can't use strftime because that's locale dependent
+ #
+ # Isn't there a standard way to do this for Python? The
+ # rfc822 and email.Utils modules assume a timestamp. The
+ # following is based on the rfc822 module.
+ return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
+ ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][dt.weekday()],
+ dt.day,
+ ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][dt.month-1],
+ dt.year, dt.hour, dt.minute, dt.second)
+
+def telegram(channel):
+ url = 'https://t.me/s/' + channel
+ res = urlopen(Request(url))
+ soup = BeautifulSoup(res, features="html.parser")
+
+ # messages = soup.find_all('div', attrs={'class': 'tgme_widget_message_wrap'})
+ messages = soup.find_all('div', attrs={'class': 'tgme_widget_message_bubble'})
+
+ for message in messages:
+ date = message.find('time', attrs={'class': 'time'})['datetime']
+ html = message.find('div', attrs={'class': 'tgme_widget_message_text'})
+ # preview = message.find('div', attrs={'class': 'tgme_widget_message_bubble'})
+ link = message.find('a', attrs={'class': 'tgme_widget_message_date'})
+ title = html.text if html else 'No text'
+ description = str(message) # if preview else '?'
+ link = link['href']
+ yield title, description, link, date
+
+def main(channel):
+ url = 'https://t.me/s/' + channel
+
+ print("""<?xml version="1.0" encoding="UTF-8"?>
+<rss version="2.0">
+ <channel>
+ <title>Telegram: """ + channel + """</title>
+ <link>""" + url + """</link>
+ <description>The latest entries of the telegram channel of """ +channel + """</description>
+ <lastBuildDate>""" + _format_date(datetime.now()) + """</lastBuildDate>""")
+
+ for title, description, link, date in telegram(channel):
+ print(' <item>')
+ print(' <title><![CDATA[' + title + ']]></title>')
+ print(' <link>' + link + '</link>')
+ print(' <description><![CDATA[' + description + ']]></description>')
+ print(' <pubDate>' + date + '</pubDate>')
+ # print(' <media:content url="' + thumbnail + b'" type="image/jpeg" />')
+ print(' </item>')
+
+ print(' </channel>')
+ print('</rss>')
+
+if __name__ == "__main__":
+ if len(sys.argv) != 2:
+ print('Usage:', sys.argv[0], '<telegram channel>')
+ sys.exit(1)
+ main(sys.argv[1])