From b98e064c24562de8401d720c09d3be5f34a79a1a Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Wed, 15 Aug 2012 16:06:33 +0200 Subject: Initial version --- mensahl.py | 126 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100755 mensahl.py diff --git a/mensahl.py b/mensahl.py new file mode 100755 index 0000000..9f167a3 --- /dev/null +++ b/mensahl.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import html.parser +import json +import re +import sys + +from datetime import datetime, timedelta +from urllib.request import urlopen + + +### Config ### +mensa_url = 'http://www.uni-kiel.de/stwsh/seiten_essen/plan_mensa_luebeck.html' +encoding = 'ISO-8859-1' +meal_types = ['Eintopf', 'Hauptgericht', 'Vegetarisches Hauptgericht', 'Beilagen'] +### Config end ### + + +class Mensa: + def __init__(self): + self._HTMLparser = html.parser.HTMLParser() + + self._re_data_match = re.compile(r'^.*?]*>.*?]*>.*?(\d+)\.(\d+)\. ?- ?\d+\.\d+\.(\d+).*?.*?]*>Freitag\s*(.*?).*$', re.DOTALL|re.IGNORECASE) + self._re_day_match = re.compile(r'.*?]*>(.*?).*?]*>(.*?)', re.DOTALL|re.IGNORECASE) + self._re_meal_match = re.compile(r'.*?]*>(.*?)', re.DOTALL|re.IGNORECASE) + + def _unescape_strip(self, s): + s = self._HTMLparser.unescape(s) + s = s.replace('
', ' ') + s = re.sub(r'-\s+', '-', s) + s = re.sub(r'\s+', ' ', s) + s = re.sub(r'([a-zäöüß])-([a-zäöüß])', r'\1\2', s) + s = re.sub(r']*logo_bio[^>]*>', 'Bio', s) + s = re.sub(r'<[^>]*>', '', s) + return s.strip() + + def _handle_data_row(self, data, pos): + match = self._re_day_match.match(data, pos) + + if not match: + return None + + ret = [] + mealpos = 0 + pricepos = 0 + + for day in range(0,5): + mealmatch = self._re_meal_match.match(match.group(1), mealpos) + pricematch = self._re_meal_match.match(match.group(2), pricepos) + + if not mealmatch or not pricematch: + return None + + ret.append((self._unescape_strip(mealmatch.group(1)), self._unescape_strip(pricematch.group(1)))) + mealpos = mealmatch.end() + pricepos = pricematch.end() + + return (ret, match.end()) + + def handle_data(self, data): + match = self._re_data_match.match(data) + + if not match: + return None + + firstday = datetime.strptime('%s-%s-%s' % (match.group(3), match.group(2), match.group(1)), '%Y-%m-%d').date() + + data = match.group(4) + + meals = [] + pos = 0 + + for i in range(0, len(meal_types)): + row = self._handle_data_row(data, pos) + + if not row: + break + + meals.append(row[0]) + pos = row[1] + + if len(meals) == 0: + return None + + while len(meals) < len(meal_types): + meals.append([[None], [None], [None], [None], [None]]) + + ret = [] + + for weekday in range(0, 5): + mealdict = dict() + + for i in range(0, len(meal_types)): + if meals[i][weekday][0]: + mealdict[meal_types[i]] = { + 'name' : meals[i][weekday][0], + 'price' : meals[i][weekday][1], + } + + if (len(mealdict) > 0) or True: + daydict = dict() + daydict['date'] = (firstday + timedelta(weekday)).strftime('%Y-%m-%d') + daydict['meals'] = mealdict + + ret.append(daydict) + + return ret + + +if __name__ == '__main__': + try: + data = urlopen(mensa_url, None, 20).read().decode(encoding) + except: + data = None + + if not data: + print("Could not read Mensa data", file=sys.stderr) + sys.exit(1) + + meals = Mensa().handle_data(data) + if not meals: + print("No or invalid Mensa data was returned", file=sys.stderr) + sys.exit(1) + + json.dump(meals, sys.stdout, ensure_ascii=False) -- cgit v1.2.3