summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthias Schiffer <mschiffer@universe-factory.net>2012-08-15 16:06:33 +0200
committerMatthias Schiffer <mschiffer@universe-factory.net>2012-08-15 16:06:33 +0200
commitb98e064c24562de8401d720c09d3be5f34a79a1a (patch)
tree73bb2ee59e12538bb946bff55530cd3ca7b2409b
downloadmensahl-master.tar
mensahl-master.zip
Initial versionHEADmaster
-rwxr-xr-xmensahl.py126
1 files changed, 126 insertions, 0 deletions
diff --git a/mensahl.py b/mensahl.py
new file mode 100755
index 0000000..9f167a3
--- /dev/null
+++ b/mensahl.py
@@ -0,0 +1,126 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import html.parser
+import json
+import re
+import sys
+
+from datetime import datetime, timedelta
+from urllib.request import urlopen
+
+
+### Config ###
+mensa_url = 'http://www.uni-kiel.de/stwsh/seiten_essen/plan_mensa_luebeck.html'
+encoding = 'ISO-8859-1'
+meal_types = ['Eintopf', 'Hauptgericht', 'Vegetarisches Hauptgericht', 'Beilagen']
+### Config end ###
+
+
+class Mensa:
+ def __init__(self):
+ self._HTMLparser = html.parser.HTMLParser()
+
+ self._re_data_match = re.compile(r'^.*?<table[^>]*>.*?<td[^>]*>.*?(\d+)\.(\d+)\. ?- ?\d+\.\d+\.(\d+).*?</td>.*?<td[^>]*>Freitag</td>\s*</tr>(.*?)</table>.*$', re.DOTALL|re.IGNORECASE)
+ self._re_day_match = re.compile(r'.*?<tr[^>]*>(.*?)</tr>.*?<tr[^>]*>(.*?)</tr>', re.DOTALL|re.IGNORECASE)
+ self._re_meal_match = re.compile(r'.*?<td[^>]*>(.*?)</td>', re.DOTALL|re.IGNORECASE)
+
+ def _unescape_strip(self, s):
+ s = self._HTMLparser.unescape(s)
+ s = s.replace('<br />', ' ')
+ s = re.sub(r'-\s+', '-', s)
+ s = re.sub(r'\s+', ' ', s)
+ s = re.sub(r'([a-zäöüß])-([a-zäöüß])', r'\1\2', s)
+ s = re.sub(r'<img[^>]*logo_bio[^>]*>', 'Bio', s)
+ s = re.sub(r'<[^>]*>', '', s)
+ return s.strip()
+
+ def _handle_data_row(self, data, pos):
+ match = self._re_day_match.match(data, pos)
+
+ if not match:
+ return None
+
+ ret = []
+ mealpos = 0
+ pricepos = 0
+
+ for day in range(0,5):
+ mealmatch = self._re_meal_match.match(match.group(1), mealpos)
+ pricematch = self._re_meal_match.match(match.group(2), pricepos)
+
+ if not mealmatch or not pricematch:
+ return None
+
+ ret.append((self._unescape_strip(mealmatch.group(1)), self._unescape_strip(pricematch.group(1))))
+ mealpos = mealmatch.end()
+ pricepos = pricematch.end()
+
+ return (ret, match.end())
+
+ def handle_data(self, data):
+ match = self._re_data_match.match(data)
+
+ if not match:
+ return None
+
+ firstday = datetime.strptime('%s-%s-%s' % (match.group(3), match.group(2), match.group(1)), '%Y-%m-%d').date()
+
+ data = match.group(4)
+
+ meals = []
+ pos = 0
+
+ for i in range(0, len(meal_types)):
+ row = self._handle_data_row(data, pos)
+
+ if not row:
+ break
+
+ meals.append(row[0])
+ pos = row[1]
+
+ if len(meals) == 0:
+ return None
+
+ while len(meals) < len(meal_types):
+ meals.append([[None], [None], [None], [None], [None]])
+
+ ret = []
+
+ for weekday in range(0, 5):
+ mealdict = dict()
+
+ for i in range(0, len(meal_types)):
+ if meals[i][weekday][0]:
+ mealdict[meal_types[i]] = {
+ 'name' : meals[i][weekday][0],
+ 'price' : meals[i][weekday][1],
+ }
+
+ if (len(mealdict) > 0) or True:
+ daydict = dict()
+ daydict['date'] = (firstday + timedelta(weekday)).strftime('%Y-%m-%d')
+ daydict['meals'] = mealdict
+
+ ret.append(daydict)
+
+ return ret
+
+
+if __name__ == '__main__':
+ try:
+ data = urlopen(mensa_url, None, 20).read().decode(encoding)
+ except:
+ data = None
+
+ if not data:
+ print("Could not read Mensa data", file=sys.stderr)
+ sys.exit(1)
+
+ meals = Mensa().handle_data(data)
+ if not meals:
+ print("No or invalid Mensa data was returned", file=sys.stderr)
+ sys.exit(1)
+
+ json.dump(meals, sys.stdout, ensure_ascii=False)