#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import html.parser
import json
import re
import sys
from datetime import datetime, timedelta
from urllib.request import urlopen
### Config ###
mensa_url = 'http://www.uni-kiel.de/stwsh/seiten_essen/plan_mensa_luebeck.html'
encoding = 'ISO-8859-1'
meal_types = ['Eintopf', 'Hauptgericht', 'Vegetarisches Hauptgericht', 'Beilagen']
### Config end ###
class Mensa:
def __init__(self):
self._HTMLparser = html.parser.HTMLParser()
self._re_data_match = re.compile(r'^.*?
]*>.*?]*>.*?(\d+)\.(\d+)\. ?- ?\d+\.\d+\.(\d+).*? | .*?]*>Freitag | \s*(.*?)
.*$', re.DOTALL|re.IGNORECASE)
self._re_day_match = re.compile(r'.*?]*>(.*?)
.*?]*>(.*?)
', re.DOTALL|re.IGNORECASE)
self._re_meal_match = re.compile(r'.*?]*>(.*?) | ', re.DOTALL|re.IGNORECASE)
def _unescape_strip(self, s):
s = self._HTMLparser.unescape(s)
s = s.replace('
', ' ')
s = re.sub(r'-\s+', '-', s)
s = re.sub(r'\s+', ' ', s)
s = re.sub(r'([a-zäöüß])-([a-zäöüß])', r'\1\2', s)
s = re.sub(r']*logo_bio[^>]*>', 'Bio', s)
s = re.sub(r'<[^>]*>', '', s)
return s.strip()
def _handle_data_row(self, data, pos):
match = self._re_day_match.match(data, pos)
if not match:
return None
ret = []
mealpos = 0
pricepos = 0
for day in range(0,5):
mealmatch = self._re_meal_match.match(match.group(1), mealpos)
pricematch = self._re_meal_match.match(match.group(2), pricepos)
if not mealmatch or not pricematch:
return None
ret.append((self._unescape_strip(mealmatch.group(1)), self._unescape_strip(pricematch.group(1))))
mealpos = mealmatch.end()
pricepos = pricematch.end()
return (ret, match.end())
def handle_data(self, data):
match = self._re_data_match.match(data)
if not match:
return None
firstday = datetime.strptime('%s-%s-%s' % (match.group(3), match.group(2), match.group(1)), '%Y-%m-%d').date()
data = match.group(4)
meals = []
pos = 0
for i in range(0, len(meal_types)):
row = self._handle_data_row(data, pos)
if not row:
break
meals.append(row[0])
pos = row[1]
if len(meals) == 0:
return None
while len(meals) < len(meal_types):
meals.append([[None], [None], [None], [None], [None]])
ret = []
for weekday in range(0, 5):
mealdict = dict()
for i in range(0, len(meal_types)):
if meals[i][weekday][0]:
mealdict[meal_types[i]] = {
'name' : meals[i][weekday][0],
'price' : meals[i][weekday][1],
}
if (len(mealdict) > 0) or True:
daydict = dict()
daydict['date'] = (firstday + timedelta(weekday)).strftime('%Y-%m-%d')
daydict['meals'] = mealdict
ret.append(daydict)
return ret
if __name__ == '__main__':
try:
data = urlopen(mensa_url, None, 20).read().decode(encoding)
except:
data = None
if not data:
print("Could not read Mensa data", file=sys.stderr)
sys.exit(1)
meals = Mensa().handle_data(data)
if not meals:
print("No or invalid Mensa data was returned", file=sys.stderr)
sys.exit(1)
json.dump(meals, sys.stdout, ensure_ascii=False)