[svn] commit: r906 - /experiments/each-zoneload/parse.py

Mon Feb 22 04:08:43 UTC 2010

Author: each
Date: Mon Feb 22 04:08:42 2010
New Revision: 906

Log:
cleaned up, added comments

Modified:
    experiments/each-zoneload/parse.py

Modified: experiments/each-zoneload/parse.py
==============================================================================

--- experiments/each-zoneload/parse.py (original)
+++ experiments/each-zoneload/parse.py Mon Feb 22 04:08:42 2010
@@ -1,84 +1,48 @@
 #!/usr/bin/python3
 import re, string;
 
+#########################################################################
+# define exceptions
+#########################################################################
 class ParseError(Exception):
     def __init__(self, value):
         self.value = value
     def __str__(self):
         return repr(self.value)
 
+#########################################################################
 # global variables
-decomment = re.compile('\s*(?:;.*)+')
-
-origin='.'
-isname = re.compile('[-\w\$\d\/*]+(?:\.[-\w\$\d\/]+)*\.?')
-
-isttl = re.compile('[0-9]+[wdhms]?', re.I)
+#########################################################################
 maxttl = 0x7fffffff;
 defttl = -1
-
-rrtypes = set(['a', 'aaaa', 'afsdb', 'apl', 'cert', 'cname', 'dhcid',
-               'dlv', 'dname', 'dnskey', 'ds', 'gpos', 'hinfo', 'hip',
-               'ipseckey', 'isdn', 'key', 'kx', 'loc', 'mb', 'md',
-               'mf', 'mg', 'minfo', 'mr', 'mx', 'naptr', 'ns', 'nsap',
-               'nsap-ptr', 'nsec', 'nsec3', 'nsec3param', 'null',
-               'nxt', 'opt', 'ptr', 'px', 'rp', 'rrsig', 'rt', 'sig',
-               'soa', 'spf', 'srv', 'sshfp', 'tkey', 'tsig', 'txt',
-               'x25', 'wks'])
-rrclasses = set(['in', 'ch', 'chaos', 'hs', 'hesiod'])
+origin='.'
 defclass = 'IN'
 
+#########################################################################
+# cleanup: removes excess content from zone files, including comments
+# and extra whitespace
+# input:
+#   a line of text
+# returns:
+#   the same line, with comments removed, leading and trailing
+#   whitespace removed, and all other whitespace compressed to
+#   single spaces
+#########################################################################
+decomment = re.compile('\s*(?:;.*)+')
 def cleanup(s):
     global decomment
     s = s.strip().expandtabs()
     s = decomment.sub('', s)
     return ' '.join(s.split())
 
-def parse_ttl(s):
-    m = re.match('([0-9]+)(.*)', s)
-    if not m:
-        raise ParseError('Invalid TTL: ' + s)
-    ttl, suffix = int(m.group(1)), m.group(2)
-    if suffix.lower() == 'w':
-        ttl *= 604800
-    elif suffix.lower() == 'd':
-        ttl *= 86400
-    elif suffix.lower() == 'h':
-        ttl *= 3600
-    elif suffix.lower() == 'm':
-        ttl *= 60
-    return ttl
-
-def isdirective(s):
-    global origin, defttl, maxttl, isname
-    first, xx, more = s.partition(' ')
-    second, xx, more = more.partition(' ')
-    if re.match('\$origin', first, re.I):
-        if not isname.match(second):
-            raise ParseError('Invalid $ORIGIN')
-        if more:
-            raise ParseError('Invalid $ORIGIN')
-        if second[-1] == '.':
-            origin = second
-        else:
-            origin = second + '.' + origin
-        return True
-    elif re.match('\$ttl', first, re.I):
-        if not isttl.match(second):
-            raise ParseError('Invalid $TTL: ' + second)
-        if more:
-            raise ParseError('Invalid $TTL statement')
-        defttl = parse_ttl(second)
-        if defttl > maxttl:
-            raise ParseError('TTL too high: ' + second)
-        return True
-    elif re.match('\$include', first, re.I):
-        raise ParseError('$INCLUDE not yet implemented')
-    elif re.match('\$generate', first, re.I):
-        raise ParseError('$GENERATE not yet implemented')
-    else:
-        return False
-
+#########################################################################
+# records: generator function to return complete RRs from the zone file,
+# combining lines when necessary because of parentheses
+# input:
+#   zonedata as an array of lines
+# yields:
+#   complete RR
+#########################################################################
 def records(data):
     record = []
     complete = True
@@ -110,15 +74,29 @@
         record = []
         yield ret
 
+#########################################################################
+# pop: remove the first word from a line
+# input: a line
+# returns: first word, rest of the line
+#########################################################################
 def pop(line):
     list = line.split()
     first = list[0]
     rest = ' '.join(list[1:])
     return first, rest
 
-def unpop(word, line):
-    return ' '.join(line.split().insert(0, word))
-
+#########################################################################
+# istype: check whether a string is a known RR type.
+# returns: boolean
+#########################################################################
+rrtypes = set(['a', 'aaaa', 'afsdb', 'apl', 'cert', 'cname', 'dhcid',
+               'dlv', 'dname', 'dnskey', 'ds', 'gpos', 'hinfo', 'hip',
+               'ipseckey', 'isdn', 'key', 'kx', 'loc', 'mb', 'md',
+               'mf', 'mg', 'minfo', 'mr', 'mx', 'naptr', 'ns', 'nsap',
+               'nsap-ptr', 'nsec', 'nsec3', 'nsec3param', 'null',
+               'nxt', 'opt', 'ptr', 'px', 'rp', 'rrsig', 'rt', 'sig',
+               'soa', 'spf', 'srv', 'sshfp', 'tkey', 'tsig', 'txt',
+               'x25', 'wks'])
 def istype(s):
     global rrtypes
     if s.lower() in rrtypes:
@@ -126,6 +104,12 @@
     else:
         return False
 
+#########################################################################
+# istype: check whether a string is a known RR class.  (only 'IN' is
+# supported, but the others must still be recognizable.)
+# returns: boolean
+#########################################################################
+rrclasses = set(['in', 'ch', 'chaos', 'hs', 'hesiod'])
 def isclass(s):
     global rrclasses
     if s.lower() in rrclasses:
@@ -133,37 +117,144 @@
     else:
         return False
 
+#########################################################################
+# isname: check whether a string is a valid DNS name.
+# returns: boolean
+#########################################################################
+name_regex = re.compile('[-\w\$\d\/*]+(?:\.[-\w\$\d\/]+)*\.?')
+def isname(s):
+    global name_regex
+    if name_regex.match(s):
+        return True
+    else:
+        return False
+
+#########################################################################
+# isname: check whether a string is a valid TTL specifier.
+# returns: boolean
+#########################################################################
+ttl_regex = re.compile('[0-9]+[wdhms]?', re.I)
+def isttl(s):
+    global ttl_regex
+    if ttl_regex.match(s):
+        return True
+    else:
+        return False
+
+#########################################################################
+# parse_ttl: convert a TTL field into an integer TTL value
+# (multiplying as needed for minutes, hours, etc.)
+# input:
+#   string
+# returns:
+#   int
+# throws:
+#   ParseError
+#########################################################################
+def parse_ttl(s):
+    m = re.match('([0-9]+)(.*)', s)
+    if not m:
+        raise ParseError('Invalid TTL: ' + s)
+    ttl, suffix = int(m.group(1)), m.group(2)
+    if suffix.lower() == 'w':
+        ttl *= 604800
+    elif suffix.lower() == 'd':
+        ttl *= 86400
+    elif suffix.lower() == 'h':
+        ttl *= 3600
+    elif suffix.lower() == 'm':
+        ttl *= 60
+    return ttl
+
+#########################################################################
+# directive: handle $ORIGIN, $TTL, $INCLUDE and $GENERATE directives
+# (currently only $ORIGIN and $TTL are implemented)
+# input:
+#   a line from a zone file
+# returns:
+#   a boolean indicating whether a directive was found
+# throws:
+#   ParseError
+#########################################################################
+def directive(s):
+    global origin, defttl, maxttl
+    first, xx, more = s.partition(' ')
+    second, xx, more = more.partition(' ')
+    if re.match('\$origin', first, re.I):
+        if not isname(second):
+            raise ParseError('Invalid $ORIGIN')
+        if more:
+            raise ParseError('Invalid $ORIGIN')
+        if second[-1] == '.':
+            origin = second
+        else:
+            origin = second + '.' + origin
+        return True
+    elif re.match('\$ttl', first, re.I):
+        if not isttl(second):
+            raise ParseError('Invalid $TTL: ' + second)
+        if more:
+            raise ParseError('Invalid $TTL statement')
+        defttl = parse_ttl(second)
+        if defttl > maxttl:
+            raise ParseError('TTL too high: ' + second)
+        return True
+    elif re.match('\$include', first, re.I):
+        raise ParseError('$INCLUDE not yet implemented')
+    elif re.match('\$generate', first, re.I):
+        raise ParseError('$GENERATE not yet implemented')
+    else:
+        return False
+
+#########################################################################
+# four: try parsing on the assumption that the RR type is specified in
+# field 4, and name, ttl and class are in fields 1-3
+# are all specified, with type in field 4
+# input:
+#   a record to parse, and the most recent name found in prior records
+# returns:
+#   empty list if parse failed, else name, ttl, class, type, rdata
+# throws:
+#   ParseError
+#########################################################################
 def four(record, curname):
     ret = ''
     list = record.split()
     if len(list) <= 4:
         return ret
     if istype(list[3]):
-        if isclass(list[2]) and isttl.match(list[1]) \
-                            and isname.match(list[0]):
+        if isclass(list[2]) and isttl(list[1]) and isname(list[0]):
             name, ttl, rrclass, rrtype = list[0:3]
             rdata = ' '.join(list[4:])
             ret = name, ttl, rrclass, rrtype, rdata
     return ret
 
+#########################################################################
+# three: try parsing on the assumption that the RR type is specified in
+# field 3, and one of name, ttl, or class has been omitted
+# input:
+#   a record to parse, and the most recent name found in prior records
+# returns:
+#   empty list if parse failed, else name, ttl, class, type, rdata
+# throws:
+#   ParseError
+#########################################################################
 def three(record, curname):
-    global isttl, defttl, defclass
+    global defttl, defclass
     ret = ''
     list = record.split()
     if len(list) <= 3:
         return ret
     if istype(list[2]):
-        if isclass(list[1]) and not isttl.match(list[0]) \
-                            and isname.match(list[0]):
+        if isclass(list[1]) and not isttl(list[0]) and isname(list[0]):
             rrclass = list[1]
             ttl = defttl
             name = list[0]
-        elif not isclass(list[1]) and isttl.match(list[1]) \
-                                  and isname.match(list[0]):
+        elif not isclass(list[1]) and isttl(list[1]) and isname(list[0]):
             rrclass = defclass
             ttl = parse_ttl(list[1])
             name = list[0]
-        elif curname and isclass(list[1]) and isttl.match(list[0]):
+        elif curname and isclass(list[1]) and isttl(list[0]):
             rrclass = defclass
             ttl = parse_ttl(list[1])
             name = curname
@@ -175,14 +266,24 @@
         ret = name, ttl, rrclass, rrtype, rdata
     return ret
 
+#########################################################################
+# two: try parsing on the assumption that the RR type is specified in
+# field 2, and field 1 is name, with ttl and class omitted.
+# input:
+#   a record to parse, and the most recent name found in prior records
+# returns:
+#   empty list if parse failed, else name, ttl, class, type, rdata
+# throws:
+#   ParseError
+#########################################################################
 def two(record, curname):
-    global isttl, defttl, defclass
+    global defttl, defclass
     ret = ''
     list = record.split()
     if len(list) <= 2:
         return ret
     if istype(list[1]):
-        if isname.match(list[0]):
+        if isname(list[0]):
             name = list[0]
         else:
             raise ParseError("Cannot parse RR: " + record)
@@ -194,18 +295,18 @@
         ret = name, ttl, rrclass, rrtype, rdata
     return ret
 
-def main():
+#########################################################################
+# parse_zonefile: parse a zone master file and return it as an array of
+# tuples
+#########################################################################
+def parse_zonefile(file):
     global defttl, defclass
-    data = open('testfile').read().splitlines()
+    data = open(file).read().splitlines()
+    zone = []
     name = ''
 
-    print ('---------------------')
-
     for record in records(data):
-        if isdirective(record):
-            print('ORIGIN: ' + origin)
-            print('TTL: ' + str(defttl))
-            print ('---------------------')
+        if directive(record):
             continue;
 
         first = record.split()[0]
@@ -238,11 +339,23 @@
 
         # add origin to rdata if necessary
         if rrtype.lower() in ('cname', 'dname', 'ns'):
-            if not isname.match(rdata):
+            if not isname(rdata):
                 raise ParseError("Invalid " + rrtype + ": " + rdata)
             if rdata[-1] != '.':
                 rdata += '.' + origin
 
+        zone.append((name, ttl, rrclass, rrtype, rdata))
+
+    return zone
+
+#########################################################################
+# main: used for testing; parse a zone file and print out each record
+# broken up into separate name, ttl, class, type, and rdata files
+#########################################################################
+def main():
+    print ('---------------------')
+    zone = parse_zonefile('testfile')
+    for name, ttl, rrclass, rrtype, rdata in zone:
         print ('name: ' + name)
         print ('ttl: ' + str(ttl))
         print ('rrclass: ' + rrclass)