1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
#!/usr/bin/env python3
from urllib.request import urlopen, Request
from datetime import datetime
from bs4 import BeautifulSoup
import sys
def _format_date(dt):
"""convert a datetime into an RFC 822 formatted date
Input date must be in GMT.
Stolen from PyRSS2Gen.
"""
# Looks like:
# Sat, 07 Sep 2002 00:00:01 GMT
# Can't use strftime because that's locale dependent
#
# Isn't there a standard way to do this for Python? The
# rfc822 and email.Utils modules assume a timestamp. The
# following is based on the rfc822 module.
return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][dt.weekday()],
dt.day,
["Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][dt.month-1],
dt.year, dt.hour, dt.minute, dt.second)
def netto(store_id):
url = 'https://www.netto-online.de/ueber-netto/Online-Prospekte.chtm/' + str(store_id)
res = urlopen(Request(url))
soup = BeautifulSoup(res, features="html.parser")
# messages = soup.find_all('div', attrs={'class': 'tgme_widget_message_wrap'})
message = soup.find('a', attrs={'class': 'flipbook_pdf_flipbook'})
url = message['href'].split('?')[0]
year = str(datetime.now().year)
title = url[ url.find(year) : url.find(year) + 7 ]
return title, url
def main(store_id = 9110):
url = 'https://www.netto-online.de/ueber-netto/Online-Prospekte.chtm/' + str(store_id)
print("""<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Netto Angebote """ + str(store_id) + """</title>
<link>""" + url + """</link>
<description>PDF der neuen Netto Angebote für den Laden um die Ecke.</description>
<lastBuildDate>""" + _format_date(datetime.now()) + """</lastBuildDate>""")
title, link = netto(url)
print(' <item>')
print(' <title><![CDATA[Angebote für ' + title + ']]></title>')
print(' <link>' + link + '</link>')
# print(' <description><![CDATA[' + description + ']]></description>')
# print(' <pubDate>' + date + '</pubDate>')
# print(' <media:content url="' + thumbnail + b'" type="image/jpeg" />')
print(' </item>')
print(' </channel>')
print('</rss>')
if __name__ == "__main__":
# if len(sys.argv) != 2:
# print('Usage:', sys.argv[0], '<foobar>')
# sys.exit(1)
# main(sys.argv[1])
main()
|