summaryrefslogtreecommitdiff
path: root/telegram.py
blob: 305833922e636c4133153965d1f8fe61935386fb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/env python3

from urllib.request import urlopen, Request
from datetime import datetime
from bs4 import BeautifulSoup
import sys

def _format_date(dt):
    """convert a datetime into an RFC 822 formatted date
    Input date must be in GMT.
    Stolen from PyRSS2Gen.
    """
    # Looks like:
    #   Sat, 07 Sep 2002 00:00:01 GMT
    # Can't use strftime because that's locale dependent
    #
    # Isn't there a standard way to do this for Python?  The
    # rfc822 and email.Utils modules assume a timestamp.  The
    # following is based on the rfc822 module.
    return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
            ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][dt.weekday()],
            dt.day,
            ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
             "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][dt.month-1],
            dt.year, dt.hour, dt.minute, dt.second)

def telegram(channel):
    url = 'https://t.me/s/' + channel
    res = urlopen(Request(url))
    soup = BeautifulSoup(res, features="html.parser")

    # messages = soup.find_all('div', attrs={'class': 'tgme_widget_message_wrap'})
    messages = soup.find_all('div', attrs={'class': 'tgme_widget_message_bubble'})

    for message in messages:
        date = message.find('time', attrs={'class': 'time'})['datetime']
        html = message.find('div', attrs={'class': 'tgme_widget_message_text'})
        # preview = message.find('div', attrs={'class': 'tgme_widget_message_bubble'})
        link = message.find('a', attrs={'class': 'tgme_widget_message_date'})
        title = html.text if html else 'No text'
        description = str(message) # if preview else '?'
        link = link['href']
        yield title, description, link, date

def main(channel):
    url = 'https://t.me/s/' + channel

    print("""<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
  <channel>
    <title>Telegram: """ + channel + """</title>
    <link>""" + url + """</link>
    <description>The latest entries of the telegram channel of """ +channel + """</description>
    <lastBuildDate>""" + _format_date(datetime.now()) + """</lastBuildDate>""")

    for title, description, link, date in telegram(channel):
        print('    <item>')
        print('      <title><![CDATA[' + title + ']]></title>')
        print('      <link>' + link + '</link>')
        print('      <description><![CDATA[' + description + ']]></description>')
        print('      <pubDate>' + date + '</pubDate>')
        # print('      <media:content url="' + thumbnail + b'" type="image/jpeg" />')
        print('    </item>')

    print('  </channel>')
    print('</rss>')

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print('Usage:', sys.argv[0], '<telegram channel>')
        sys.exit(1)
    main(sys.argv[1])