summaryrefslogtreecommitdiff
path: root/telegram.py
blob: d95ce34d14ffb5399c15e8d11dbeed629dc6b14e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/env python3

from urllib.request import urlopen, Request
from datetime import datetime
from bs4 import BeautifulSoup
import sys

from rss import _format_date


def telegram(channel):
    url = "https://t.me/s/" + channel
    res = urlopen(Request(url))
    soup = BeautifulSoup(res, features="html.parser")

    # messages = soup.find_all('div', attrs={'class': 'tgme_widget_message_wrap'})
    messages = soup.find_all("div", attrs={"class": "tgme_widget_message_bubble"})

    for message in messages:
        date = message.find("time", attrs={"class": "time"})["datetime"]
        html = message.find("div", attrs={"class": "tgme_widget_message_text"})
        # preview = message.find('div', attrs={'class': 'tgme_widget_message_bubble'})
        link = message.find("a", attrs={"class": "tgme_widget_message_date"})
        title = html.text if html else "No text"
        description = str(message)  # if preview else '?'
        link = link["href"]
        yield title, description, link, date


def main(channel):
    url = "https://t.me/s/" + channel

    print(
        """<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
  <channel>
    <title>Telegram: """
        + channel
        + """</title>
    <link>"""
        + url
        + """</link>
    <description>The latest entries of the telegram channel of """
        + channel
        + """</description>
    <lastBuildDate>"""
        + _format_date(datetime.now())
        + """</lastBuildDate>"""
    )

    for title, description, link, date in telegram(channel):
        print("    <item>")
        print("      <title><![CDATA[" + title + "]]></title>")
        print("      <link>" + link + "</link>")
        print("      <description><![CDATA[" + description + "]]></description>")
        print("      <pubDate>" + date + "</pubDate>")
        # print('      <media:content url="' + thumbnail + b'" type="image/jpeg" />')
        print("    </item>")

    print("  </channel>")
    print("</rss>")


if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage:", sys.argv[0], "<telegram channel>")
        sys.exit(1)
    main(sys.argv[1])