From 94de356834be40b1e14c85a016a2e0a9f1768053 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Sun, 11 Jul 2010 08:28:49 +0200 Subject: Added support for parsing strings to complex terms --- src/Parse.vala | 392 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 392 insertions(+) create mode 100644 src/Parse.vala (limited to 'src/Parse.vala') diff --git a/src/Parse.vala b/src/Parse.vala new file mode 100644 index 0000000..7cadd35 --- /dev/null +++ b/src/Parse.vala @@ -0,0 +1,392 @@ +namespace Eva { + internal static Term? parse(string str) { + string s = str.chomp(); + + if(s.length == 0) + return null; + + Term? ret = parse_term(ref s); + + if(s.length != 0) + return null; + else + return ret; + } + + private static Term? parse_term(ref string str) { + str = str.chug(); + unichar c = str.get_char(); + + if(c.isdigit() || c == '-') { + return parse_number(ref str); + } + else if(c.islower() || c == '\'') { + return parse_atom(ref str); + } + else if(c == '"') { + return parse_string(ref str); + } + else if(c.isupper() || c == '_') { + return parse_var(ref str); + } + else if(c == '<') { + return parse_binary(ref str); + } + else if(c == '{') { + return parse_tuple(ref str); + } + else if(c == '[') { + return parse_list(ref str); + } + else { + return null; + } + } + + private static Term? parse_number(ref string str) { + bool positive = true; + bool fractional = false; + + unowned string s = str; + + if(s.get_char() == '-') { + positive = false; + s = s.next_char(); + + if(s.length == 0 || !s.get_char().isdigit()) + return null; + } + + if(s.get_char() == '.') + return null; + + while(s.length > 0) { + unichar c = s.get_char(); + + if(c == '.') { + if(fractional) + return null; + + fractional = true; + + s = s.next_char(); + if(s.length == 0 || !s.get_char().isdigit()) + return null; + + continue; + } + + if(!c.isdigit()) + break; + + s = s.next_char(); + } + + string number = str.substring(0, str.pointer_to_offset(s)); + str = s; + + if(fractional) { + return new Double(number.to_double()); + } + else if(positive) { + return new UInt(number.to_ulong()); + } + else { + return new Int(number.to_long()); + } + } + + private static Term? parse_atom(ref string str) { + if(str.get_char() == '\'') { + string atom = ""; + unowned string s = str.next_char(); + + while(s.length > 0) { + unichar c = s.get_char(); + + if(c == '\'') + break; + + if(c == '\\') { + s = s.next_char(); + if(s.length == 0) + return null; + c = s.get_char(); + } + + if(c > 255) { + return null; + } + + string buf = string.nfill(6, 0); + c.to_utf8(buf); + atom += buf; + + s = s.next_char(); + } + + if(s.length == 0) + return null; + + Term ret = new Atom(atom); + str = s.next_char(); + + return ret; + } + else { + unowned string s = str; + + while(s.length > 0) { + unichar c = s.get_char(); + + if(c != '_' && !c.isalnum()) { + break; + } + + s = s.next_char(); + } + + Term ret = new Atom(str.substring(0, str.pointer_to_offset(s))); + str = s; + + return ret; + } + } + + private static Term? parse_string(ref string str) { + string ret = do_parse_string(ref str); + if(ret == null) + return null; + + return new String(ret); + } + + private static string? do_parse_string(ref string str) { + if(str.get_char() != '"') { + return null; + } + + string retstr = ""; + unowned string s = str.next_char(); + + while(s.length > 0) { + unichar c = s.get_char(); + + if(c == '\"') + break; + + if(c == '\\') { + s = s.next_char(); + if(s.length == 0) + return null; + c = s.get_char(); + } + + string buf = string.nfill(6, 0); + c.to_utf8(buf); + retstr += buf; + + s = s.next_char(); + } + + if(s.length == 0) + return null; + + str = s.next_char(); + + return retstr; + } + + private static Term? parse_var(ref string str) { + unowned string s = str; + + while(s.length > 0) { + unichar c = s.get_char(); + + if(c != '_' && !c.isalnum()) { + break; + } + + s = s.next_char(); + } + + Term ret = new Var(str.substring(0, str.pointer_to_offset(s))); + str = s; + + return ret; + } + + private static Term? parse_tuple(ref string str) { + Gee.ArrayList list = new Gee.ArrayList(); + + if(str.get_char() != '{') + return null; + + unowned string s = str.next_char(); + + while(s.length > 0 && s.get_char().isspace()) { + s = s.next_char(); + } + + if(s.length == 0) + return null; + + if(s.get_char() == '}') { + str = s.next_char(); + return new Tuple({}); + } + + while(s.length > 0) { + if(s.get_char().isspace()) { + s = s.next_char(); + continue; + } + + str = s; + Term t = parse_term(ref str); + if(t == null) + return null; + list.add(t); + + s = str; + + while(s.length > 0 && s.get_char().isspace()) { + s = s.next_char(); + } + + unichar c = s.get_char(); + if(c == '}') { + str = s.next_char(); + Term[] terms = list.to_array(); + return new Tuple(terms); + } + else if(c != ',') { + return null; + } + else { + s = s.next_char(); + } + } + + return null; + } + + private static Term? parse_list(ref string str) { + if(str.get_char() != '[') + return null; + + str = str.next_char(); + return parse_list_tail(ref str); + } + + private static Term? parse_list_tail(ref string str) { + str = str.chug(); + + if(str.length == 0) + return null; + + if(str.get_char() == ']') { + str = str.next_char(); + return List.empty; + } + + Term head = parse_term(ref str); + if(head == null) + return null; + + str = str.chug(); + if(str.length == 0) + return null; + unichar c = str.get_char(); + str = str.next_char(); + + switch(c) { + case ']': + return new List(head); + case ',': + return new List(head, parse_list_tail(ref str)); + case '|': + Term ret = new List(head, parse_term(ref str)); + + str = str.chug(); + if(str.length == 0 || str.get_char() != ']') + return null; + str = str.next_char(); + + return ret; + default: + return null; + } + } + + private static Term? parse_binary(ref string str) { + if(str.length < 4) + return null; + + if(str[0:2] != "<<") + return null; + + str = str.offset(2).chug(); + + if(str[0:2] == ">>") { + str = str.offset(2); + return new Binary({}); + } + + if(str.get_char() == '"') { + string ret = do_parse_string(ref str); + if(ret == null) + return null; + + str = str.chug(); + + if(str.length < 2 || str[0:2] != ">>") + return null; + + str = str.offset(2); + char[] binary = string_to_binary(ret); + return new Binary(binary); + } + else { + Gee.ArrayList data = new Gee.ArrayList(); + + while(str.length > 0) { + unichar c = str.get_char(); + + if(c != '-' && !c.isdigit()) + return null; + + unowned string beg = str; + unowned string end = str.next_char(); + + while(end.length > 0 && end.get_char().isdigit()) + end = end.next_char(); + + data.add((char)beg.substring(0, beg.pointer_to_offset(end)).to_int()); + + str = end.chug(); + + if(str.length >= 2 && str[0:2] == ">>") { + str = str.offset(2); + + // Workaround for libgee to_array bug 597737 + char[] array = new char[data.size]; + int index = 0; + foreach(char i in data) { + array[index++] = i; + } + + return new Binary(array); + } + else if(str.length != 0 && str.get_char() == ',') { + str = str.next_char().chug(); + } + else { + return null; + } + } + + return null; + } + } +} -- cgit v1.2.3