diff options
author | André Glüpker <git@wgmd.de> | 2021-05-05 20:09:30 +0200 |
---|---|---|
committer | André Glüpker <git@wgmd.de> | 2021-05-05 20:09:30 +0200 |
commit | 5774dbfb2caa42cb55bafab98a40e47f395e44d9 (patch) | |
tree | 8294b7b6fefebc1befeed4104f3b5604683999a8 /netto.py | |
download | rss-feeds-5774dbfb2caa42cb55bafab98a40e47f395e44d9.tar.gz rss-feeds-5774dbfb2caa42cb55bafab98a40e47f395e44d9.tar.bz2 rss-feeds-5774dbfb2caa42cb55bafab98a40e47f395e44d9.zip |
Initial commit of RSS converter application
Diffstat (limited to 'netto.py')
-rwxr-xr-x | netto.py | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/netto.py b/netto.py new file mode 100755 index 0000000..fda6409 --- /dev/null +++ b/netto.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 + +from urllib.request import urlopen, Request +from datetime import datetime +from bs4 import BeautifulSoup +import sys + +def _format_date(dt): + """convert a datetime into an RFC 822 formatted date + Input date must be in GMT. + Stolen from PyRSS2Gen. + """ + # Looks like: + # Sat, 07 Sep 2002 00:00:01 GMT + # Can't use strftime because that's locale dependent + # + # Isn't there a standard way to do this for Python? The + # rfc822 and email.Utils modules assume a timestamp. The + # following is based on the rfc822 module. + return "%s, %02d %s %04d %02d:%02d:%02d GMT" % ( + ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][dt.weekday()], + dt.day, + ["Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][dt.month-1], + dt.year, dt.hour, dt.minute, dt.second) + +def netto(store_id): + url = 'https://www.netto-online.de/ueber-netto/Online-Prospekte.chtm/' + str(store_id) + res = urlopen(Request(url)) + soup = BeautifulSoup(res, features="html.parser") + + # messages = soup.find_all('div', attrs={'class': 'tgme_widget_message_wrap'}) + message = soup.find('a', attrs={'class': 'flipbook_pdf_flipbook'}) + + url = message['href'].split('?')[0] + year = str(datetime.now().year) + title = url[ url.find(year) : url.find(year) + 7 ] + + return title, url + +def main(store_id = 9110): + url = 'https://www.netto-online.de/ueber-netto/Online-Prospekte.chtm/' + str(store_id) + + print("""<?xml version="1.0" encoding="UTF-8"?> +<rss version="2.0"> + <channel> + <title>Netto Angebote """ + str(store_id) + """</title> + <link>""" + url + """</link> + <description>PDF der neuen Netto Angebote für den Laden um die Ecke.</description> + <lastBuildDate>""" + _format_date(datetime.now()) + """</lastBuildDate>""") + + title, link = netto(url) + print(' <item>') + print(' <title><![CDATA[Angebote für ' + title + ']]></title>') + print(' <link>' + link + '</link>') + # print(' <description><![CDATA[' + description + ']]></description>') + # print(' <pubDate>' + date + '</pubDate>') + # print(' <media:content url="' + thumbnail + b'" type="image/jpeg" />') + print(' </item>') + + print(' </channel>') + print('</rss>') + +if __name__ == "__main__": + # if len(sys.argv) != 2: + # print('Usage:', sys.argv[0], '<foobar>') + # sys.exit(1) + # main(sys.argv[1]) + main() |