1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import html.parser
import json
import re
import sys
from datetime import datetime, timedelta
from urllib.request import urlopen
### Config ###
mensa_url = 'http://www.uni-kiel.de/stwsh/seiten_essen/plan_mensa_luebeck.html'
encoding = 'ISO-8859-1'
meal_types = ['Eintopf', 'Hauptgericht', 'Vegetarisches Hauptgericht', 'Beilagen']
### Config end ###
class Mensa:
def __init__(self):
self._HTMLparser = html.parser.HTMLParser()
self._re_data_match = re.compile(r'^.*?<table[^>]*>.*?<td[^>]*>.*?(\d+)\.(\d+)\. ?- ?\d+\.\d+\.(\d+).*?</td>.*?<td[^>]*>Freitag</td>\s*</tr>(.*?)</table>.*$', re.DOTALL|re.IGNORECASE)
self._re_day_match = re.compile(r'.*?<tr[^>]*>(.*?)</tr>.*?<tr[^>]*>(.*?)</tr>', re.DOTALL|re.IGNORECASE)
self._re_meal_match = re.compile(r'.*?<td[^>]*>(.*?)</td>', re.DOTALL|re.IGNORECASE)
def _unescape_strip(self, s):
s = self._HTMLparser.unescape(s)
s = s.replace('<br />', ' ')
s = re.sub(r'-\s+', '-', s)
s = re.sub(r'\s+', ' ', s)
s = re.sub(r'([a-zäöüß])-([a-zäöüß])', r'\1\2', s)
s = re.sub(r'<img[^>]*logo_bio[^>]*>', 'Bio', s)
s = re.sub(r'<[^>]*>', '', s)
return s.strip()
def _handle_data_row(self, data, pos):
match = self._re_day_match.match(data, pos)
if not match:
return None
ret = []
mealpos = 0
pricepos = 0
for day in range(0,5):
mealmatch = self._re_meal_match.match(match.group(1), mealpos)
pricematch = self._re_meal_match.match(match.group(2), pricepos)
if not mealmatch or not pricematch:
return None
ret.append((self._unescape_strip(mealmatch.group(1)), self._unescape_strip(pricematch.group(1))))
mealpos = mealmatch.end()
pricepos = pricematch.end()
return (ret, match.end())
def handle_data(self, data):
match = self._re_data_match.match(data)
if not match:
return None
firstday = datetime.strptime('%s-%s-%s' % (match.group(3), match.group(2), match.group(1)), '%Y-%m-%d').date()
data = match.group(4)
meals = []
pos = 0
for i in range(0, len(meal_types)):
row = self._handle_data_row(data, pos)
if not row:
break
meals.append(row[0])
pos = row[1]
if len(meals) == 0:
return None
while len(meals) < len(meal_types):
meals.append([[None], [None], [None], [None], [None]])
ret = []
for weekday in range(0, 5):
mealdict = dict()
for i in range(0, len(meal_types)):
if meals[i][weekday][0]:
mealdict[meal_types[i]] = {
'name' : meals[i][weekday][0],
'price' : meals[i][weekday][1],
}
if (len(mealdict) > 0) or True:
daydict = dict()
daydict['date'] = (firstday + timedelta(weekday)).strftime('%Y-%m-%d')
daydict['meals'] = mealdict
ret.append(daydict)
return ret
if __name__ == '__main__':
try:
data = urlopen(mensa_url, None, 20).read().decode(encoding)
except:
data = None
if not data:
print("Could not read Mensa data", file=sys.stderr)
sys.exit(1)
meals = Mensa().handle_data(data)
if not meals:
print("No or invalid Mensa data was returned", file=sys.stderr)
sys.exit(1)
json.dump(meals, sys.stdout, ensure_ascii=False)
|