#!/usr/bin/env python3 from urllib.request import urlopen, Request from datetime import datetime from bs4 import BeautifulSoup import sys def _format_date(dt): """convert a datetime into an RFC 822 formatted date Input date must be in GMT. Stolen from PyRSS2Gen. """ # Looks like: # Sat, 07 Sep 2002 00:00:01 GMT # Can't use strftime because that's locale dependent # # Isn't there a standard way to do this for Python? The # rfc822 and email.Utils modules assume a timestamp. The # following is based on the rfc822 module. return "%s, %02d %s %04d %02d:%02d:%02d GMT" % ( ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][dt.weekday()], dt.day, ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][dt.month-1], dt.year, dt.hour, dt.minute, dt.second) def telegram(channel): url = 'https://t.me/s/' + channel res = urlopen(Request(url)) soup = BeautifulSoup(res, features="html.parser") # messages = soup.find_all('div', attrs={'class': 'tgme_widget_message_wrap'}) messages = soup.find_all('div', attrs={'class': 'tgme_widget_message_bubble'}) for message in messages: date = message.find('time', attrs={'class': 'time'})['datetime'] html = message.find('div', attrs={'class': 'tgme_widget_message_text'}) # preview = message.find('div', attrs={'class': 'tgme_widget_message_bubble'}) link = message.find('a', attrs={'class': 'tgme_widget_message_date'}) title = html.text if html else 'No text' description = str(message) # if preview else '?' link = link['href'] yield title, description, link, date def main(channel): url = 'https://t.me/s/' + channel print(""" Telegram: """ + channel + """ """ + url + """ The latest entries of the telegram channel of """ +channel + """ """ + _format_date(datetime.now()) + """""") for title, description, link, date in telegram(channel): print(' ') print(' <![CDATA[' + title + ']]>') print(' ' + link + '') print(' ') print(' ' + date + '') # print(' ') print(' ') print(' ') print('') if __name__ == "__main__": if len(sys.argv) != 2: print('Usage:', sys.argv[0], '') sys.exit(1) main(sys.argv[1])