djbdns/io.py: fix test that fails in other timezones.

[djbdns-logparse.git] / bin / djbdns-logparse
diff --git a/bin/djbdns-logparse b/bin/djbdns-logparse

index 998f5475c6127306a7ff3a5b5e32e1837a768aa0..2c88462bd2ec479753ad5030f7e6da0859c1aa4d 100755 (executable)
--- a/bin/djbdns-logparse
+++ b/bin/djbdns-logparse
@@ -1,257 +1,42 @@
  #!/usr/bin/python3
-"""
+r"""
  Convert tinydns and dnscache logs to human-readable form
  """
-
-#
-# Reads log files from tinydns and/or dnscache and prints them out in
-# human-readable form. Logs can be supplied on stdin, or listed on the
-# command line:
-#
-#   $ cat @*.s | djbdns-logparse
-#   $ djbdns-logparse @*.s
-#   $ tail -f current | djbdns-logparse
-#
-# Pipes each log file through tai64nlocal, which must be on your path.
-#
-# Acknowledgments:
-#
-# * The log format descriptions by Rob Mayoff were invaluable:
-#   ** http://dqd.com/~mayoff/notes/djbdns/tinydns-log.html
-#   ** http://dqd.com/~mayoff/notes/djbdns/dnscache-log.html
+# Avoid clobbering the top-level exit() built-in.
+import sys
+
+from signal import signal, SIGINT
+from argparse import ArgumentParser, FileType
+from djbdns.io import parse_logfile
+
+# Create an argument parser using the file's docsctring as its
+# description.
+parser = ArgumentParser(description = __doc__)
+
+# Parse zero or more positional arguments into a list of
+# "logfiles". If none are given, read from stdin instead.
+parser.add_argument("logfiles",
+                    metavar="LOGFILE",
+                    type=FileType("r"),
+                    nargs="*",
+                    default=[sys.stdin],
+                    help="djbdns logfile to process (default: stdin)")
+
+# Warning: argparse automatically opens its file arguments here,
+# and they only get closed when the program terminates. There's no
+# real benefit to closing them one-at-a-time after calling
+# parse_logfile(), because the "scarce" resource of open file
+# descriptors gets consumed immediately, before any processing has
+# happened. In other words, if you're going to run out of file
+# descriptors, it's going to happen right now.
  #
-# * Faried Nawaz's dnscache log parser was the original inspiration:
-#   ** http://www.hungry.com/~fn/dnscache-log.pl.txt
-#
-
-import re
-from struct import pack
-from time import strftime, gmtime
-from subprocess import Popen, PIPE
-
-
-# common components of line-matching regexes
-timestamp_pat = r'[\d-]+ [\d:\.]+'      # output of tai64nlocal
-hex4_pat = r'[0-9a-f]{4}'
-ip_pat = r'[0-9a-f]{8,32}'              # IPv4 or IPv6 addresses in hex
-
-# discriminate between dnscache and tinydns log lines
-tinydns_log_re = re.compile(
-    r'(%s) (%s):(%s):(%s) ([\+\-IC/]) (%s) (.*)'
-    % (timestamp_pat, ip_pat, hex4_pat, hex4_pat, hex4_pat))
-dnscache_log_re = re.compile(r'(%s) (\w+)(.*)' % timestamp_pat)
-
-query_type = {
-      1: "a",
-      2: "ns",
-      5: "cname",
-      6: "soa",
-     12: "ptr",
-     13: "hinfo",
-     15: "mx",
-     16: "txt",
-     17: "rp",
-     24: "sig",
-     25: "key",
-     28: "aaaa",
-     38: "a6",
-    252: "axfr",
-    255: "any",
-}
-
-# for tinydns only
-query_drop_reason = {
-    "-": "no authority",
-    "I": "invalid query",
-    "C": "invalid class",
-    }
-
-
-def convert_ip(ip):
-    """Convert a hex string representing an IP address to conventional
-    human-readable form, ie. dotted-quad decimal for IPv4, and
-    8 colon-separated hex shorts for IPv6.
-    """
-    if len(ip) == 8:
-        # IPv4, eg. "7f000001" -> "127.0.0.1"
-        return "%d.%d.%d.%d" % tuple(pack(">L", int(ip, 16)))
-    elif len(ip) == 32:
-        # IPv6 is actually simpler -- it's just a string-slicing operation,
-        # eg. "00000000000000000000ffff7f000001" ->
-        # "0000:0000:0000:0000:0000:ffff:7f00:0001"
-        return ":".join([ip[(4*i) : (4*i+4)] for i in range(8)])
-
-
-def _cvt_ip(match):
-    return convert_ip(match.group(1))
-
-def _cvt_port(match):
-    return ":" + str(int(match.group(1), 16))
-
-def decode_client(words, i):
-    chunks = words[i].split(":")
-    if len(chunks) == 2:                # ip:port
-        words[i] = "%s:%d" % (convert_ip(chunks[0]), int(chunks[1], 16))
-    elif len(chunks) == 3:
-        words[i] = "%s:%d (id %d)" % (convert_ip(chunks[0]),
-                                      int(chunks[1], 16),
-                                      int(chunks[2], 16))
-
-def decode_ip(words, i):
-    words[i] = convert_ip(words[i])
-
-def decode_ttl(words, i):
-    words[i] = "TTL=%s" % words[i]
-
-def decode_serial(words, i):
-    serial = int(words[i])
-    words[i] = "#%d" % serial
-
-def decode_type(words, i):
-    qt = words[i]
-    words[i] = query_type.get(int(qt), qt)
-
-def handle_dnscache_log(line, match):
-    (timestamp, event, data) = match.groups()
-
-    words = data.split()
-    if event == "cached":
-        if words[0] not in ("cname", "ns", "nxdomain"):
-            decode_type(words, 0)
-
-    elif event == "drop":
-        decode_serial(words, 0)
-
-    elif event == "lame":
-        decode_ip(words, 0)
-
-    elif event == "nodata":
-        decode_ip(words, 0)
-        decode_ttl(words, 1)
-        decode_type(words, 2)
-
-    elif event == "nxdomain":
-        decode_ip(words, 0)
-        decode_ttl(words, 1)
-
-    elif event == "query":
-        decode_serial(words, 0)
-        decode_client(words, 1)
-        decode_type(words, 2)
-
-    elif event == "rr":
-        decode_ip(words, 0)
-        decode_ttl(words, 1)
-        if words[2] not in ("cname", "mx", "ns", "ptr", "soa"):
-            decode_type(words, 2)
-            if words[2] == "a":         # decode answer to an A query
-                decode_ip(words, 4)
-            if words[2] == "txt":       # text record
-                response = words[4]
-                if response.endswith("..."):
-                    ellipsis = "..."
-                    response = response[0:-3]
-                else:
-                    ellipsis = ""
-                length = int(response[0:2], 16)
-                chars = []
-                for i in range(1, len(response)/2):
-                    chars.append(chr(int(response[2*i : (2*i)+2], 16)))
-                words[4] = "%d:\"%s%s\"" % (length, "".join(chars), ellipsis)
-
-    elif event == "sent":
-        decode_serial(words, 0)
-
-    elif event == "stats":
-        words[0] = "count=%s" % words[0]
-        words[1] = "motion=%s" % words[1]
-        words[2] = "udp-active=%s" % words[2]
-        words[3] = "tcp-active=%s" % words[3]
-
-    elif event == "tx":
-        words[0] = "g=%s" % words[0]
-        decode_type(words, 1)
-        # words[2] = name
-        # words[3] = control (domain for which these servers are believed
-        #            to be authoritative)
-        for i in range(4, len(words)):
-            decode_ip(words, i)
-
-    elif event in ("tcpopen", "tcpclose"):
-        decode_client(words, 0)
-
-    print(timestamp, event, " ".join(words))
-
-
-def handle_tinydns_log(line, match):
-    (timestamp, ip, port, id, code, type, name) = match.groups()
-    ip = convert_ip(ip)
-    port = int(port, 16)
-    id = int(id, 16)
-    type = int(type, 16)                # "001c" -> 28
-    type = query_type.get(type, type)   # 28 -> "aaaa"
-
-    print(timestamp, end=' ')
-
-    if code == "+":
-        print ("sent response to %s:%s (id %s): %s %s"
-               % (ip, port, id, type, name))
-    elif code in ("-", "I", "C"):
-        reason = query_drop_reason[code]
-        print ("dropped query (%s) from %s:%s (id %s): %s %s"
-               % (reason, ip, port, id, type, name))
-    elif code == "/":
-        print ("dropped query (couldn't parse) from %s:%s"
-               % (ip, port))
-    else:
-        print ("%s from %s:%s (id %s): %s %s"
-               % (code, ip, port, id, type, name))
-
-
-def parse_logfile(file):
-    # Open pipe to tai64nlocal: we will write lines of our input (the
-    # raw log file) to it, and read log lines with readable timestamps
-    # from it.
-    tai = Popen(["tai64nlocal"], stdin=PIPE, stdout=PIPE, text=True, bufsize=0)
-
-    for line in file:
-        tai.stdin.write(line)
-        line = tai.stdout.readline()
-
-        match = tinydns_log_re.match(line)
-        if match:
-            handle_tinydns_log(line, match)
-            continue
-
-        match = dnscache_log_re.match(line)
-        if match:
-            handle_dnscache_log(line, match)
-            continue
-
-        print(line)
-
-def main():
-    # Create an argument parser using the file's docsctring as its
-    # description.
-    from argparse import ArgumentParser, FileType
-    parser = ArgumentParser(description = __doc__)
-
-    # Parse zero or more positional arguments into a list of
-    # "logfiles". If none are given, read from stdin instead.
-    from sys import stdin
-    parser.add_argument("logfiles",
-                        metavar="LOGFILE",
-                        type=FileType("r"),
-                        nargs="*",
-                        default=[stdin],
-                        help="djbdns logfile to process (default: stdin)")
-
-    args = parser.parse_args()
-    for f in args.logfiles:
-        parse_logfile(f)
-
-
+# So anyway, don't run this on several million logfiles.
+args = parser.parse_args()
  
+# Install a SIGINT handler so thst we don't spit out a stack trace when
+# the user accidentally starts the program with no arguments and then
+# hits Ctrl-C to kill it.
+signal(SIGINT, lambda s,f: sys.exit(0))
  
-if __name__ == "__main__":
-    main()
+for f in args.logfiles:
+    parse_logfile(f)