X-Git-Url: https://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=bin%2Fdjbdns-logparse.py;h=2a7f6cab5bb4a1f4411d8de82c9ac88db46c0be4;hb=f035bb9069431e0364a2dfd326e2787297a05cae;hp=9af836262e78f1e485188a65a6854072d0de084b;hpb=2b9cc3504eabf66e04b6d0eace01427ae72bcefe;p=djbdns-logparse.git diff --git a/bin/djbdns-logparse.py b/bin/djbdns-logparse.py index 9af8362..2a7f6ca 100755 --- a/bin/djbdns-logparse.py +++ b/bin/djbdns-logparse.py @@ -6,7 +6,6 @@ Convert tinydns and dnscache logs to human-readable form import re, typing from struct import pack from time import strftime, gmtime -from subprocess import Popen, PIPE ## Regular expressions for matching tinydns/dnscache log lines. We @@ -70,19 +69,37 @@ query_type = { # tinydns can drop a query for one of three reasons; this dictionary # maps the symbol that gets logged in each case to a human-readable -# reason. +# reason. We include the "+" case here, indicating that the query was +# NOT dropped, to avoid a special case later on when we're formatting +# the human-readable output. query_drop_reason = { + "+": None, "-": "no authority", "I": "invalid query", "C": "invalid class", + "/": "couldn't parse" } -def convert_ip(ip : str): +def convert_ip(ip : str) -> str: """ - Convert a hex string representing an IP address to conventional - human-readable form, ie. dotted-quad decimal for IPv4, and - 8 colon-separated hex shorts for IPv6. + Convert a hex string representing an IP address to + human-readable form. + + Parameters + ---------- + + ip : str + The hexadecimal representation of either an IPv4 or an IPv6 + address. + + Returns + ------- + + The usual decimal dotted-quad representation is returned for an + IPv4 address. IPv6 addresses are returned almost as-is, but with + colons inserted in the appropriate places, between every four + characters. Examples -------- @@ -91,40 +108,128 @@ def convert_ip(ip : str): '127.0.0.1' >>> convert_ip("00000000000000000000ffff7f000001") '0000:0000:0000:0000:0000:ffff:7f00:0001' - """ if len(ip) == 8: # IPv4, eg. "7f000001" -> "7f 00 00 01" -> "127.0.0.1" - return "%d.%d.%d.%d" % tuple(pack(">L", int(ip, 16))) + return ".".join(map(str, pack(">L", int(ip, 16)))) elif len(ip) == 32: # IPv6 is actually simpler -- it's just a string-slicing operation. return ":".join([ip[(4*i) : (4*i+4)] for i in range(8)]) -def decode_client(words, i): +def decode_client(words : list, i : int): + r""" + Helper function to decode the client field in a dnscache log + entry. + + There are two possible formats for the client field, + + 1. clientip:clientport, used by tcpopen/tcpclose entries, + 2. clientip:clientport:id, used by "query" entries. + + Parameters + ---------- + + words : list + The ``words`` list (a list of fields) from + :func:`handle_dnscache_log`. + + i : int + The index of the client field within ``words`` + + Returns + ------- + + Nothing; the ``i``th entry in the ``words`` list is modified + in-place. + + Examples + -------- + + >>> words = ["foo", "bar", "7f000001:9253", "quux"] + >>> decode_client(words, 2) + >>> words + ['foo', 'bar', '127.0.0.1:37459', 'quux'] + + >>> words = ["foo", "7f000001:a3db:4fb9", "bar", "quux"] + >>> decode_client(words, 1) + >>> words + ['foo', '127.0.0.1:41947 (id 20409)', 'bar', 'quux'] + + """ chunks = words[i].split(":") - if len(chunks) == 2: # ip:port - words[i] = "%s:%d" % (convert_ip(chunks[0]), int(chunks[1], 16)) - elif len(chunks) == 3: - words[i] = "%s:%d (id %d)" % (convert_ip(chunks[0]), - int(chunks[1], 16), - int(chunks[2], 16)) + + ip = convert_ip(chunks[0]) + port = int(chunks[1], 16) + words[i] = f"{ip}:{port}" + + if len(chunks) == 3: + # For a "query" entry's clientip:clientport:id field. + id = int(chunks[2], 16) + words[i] += f" (id {id})" def decode_ip(words, i): words[i] = convert_ip(words[i]) def decode_ttl(words, i): - words[i] = "TTL=%s" % words[i] + words[i] = f"TTL={words[i]}" def decode_serial(words, i): serial = int(words[i]) - words[i] = "#%d" % serial + words[i] = f"#{serial}" def decode_type(words, i): qt = words[i] words[i] = query_type.get(int(qt), qt) -def handle_dnscache_log(line, match): +def handle_dnscache_log(line) -> typing.Optional[str]: + """ + Handle a single log line if it matches the ``dnscache_log_re`` regex. + + Parameters + ---------- + + line : string + The log line that might match ``dnscache_log_re``. + + Returns + ------- + + Either the human-readable string if the log line was handled (that + is, if it was really a dnscache log line), or ``None`` if it was + not. + + Examples + -------- + + >>> line = "2022-09-15 18:37:33.863805500 query 1 7f000001:a3db:4fb9 1 www.example.com." + >>> handle_dnscache_log(line) + '2022-09-15 18:37:33.863805500 query #1 127.0.0.1:41947 (id 20409) a www.example.com.' + + >>> line = "2022-09-15 18:37:33.863874500 tx 0 1 www.example.com. . c0a80101" + >>> handle_dnscache_log(line) + '2022-09-15 18:37:33.863874500 tx g=0 a www.example.com. . 192.168.1.1' + + >>> line = "2022-09-15 18:37:33.878529500 rr c0a80101 20865 1 www.example.com. 5db8d822" + >>> handle_dnscache_log(line) + '2022-09-15 18:37:33.878529500 rr 192.168.1.1 TTL=20865 a www.example.com. 93.184.216.34' + + >>> line = "2022-09-15 18:37:33.878532500 stats 1 43 1 0" + >>> handle_dnscache_log(line) + '2022-09-15 18:37:33.878532500 stats count=1 motion=43 udp-active=1 tcp-active=0' + + >>> line = "2022-09-15 18:37:33.878602500 sent 1 49" + >>> handle_dnscache_log(line) + '2022-09-15 18:37:33.878602500 sent #1 49' + + >>> line = "this line is nonsense" + >>> handle_dnscache_log(line) + + """ + match = dnscache_log_re.match(line) + if not match: + return None + (timestamp, event, data) = match.groups() words = data.split() @@ -168,21 +273,22 @@ def handle_dnscache_log(line, match): ellipsis = "" length = int(response[0:2], 16) chars = [] - for i in range(1, len(response)/2): + for i in range(1, len(response)//2): chars.append(chr(int(response[2*i : (2*i)+2], 16))) - words[4] = "%d:\"%s%s\"" % (length, "".join(chars), ellipsis) + txt = "".join(chars) + words[4] = f"{length}:\"{txt}{ellipsis}\"" elif event == "sent": decode_serial(words, 0) elif event == "stats": - words[0] = "count=%s" % words[0] - words[1] = "motion=%s" % words[1] - words[2] = "udp-active=%s" % words[2] - words[3] = "tcp-active=%s" % words[3] + words[0] = f"count={words[0]}" + words[1] = f"motion={words[1]}" + words[2] = f"udp-active={words[2]}" + words[3] = f"tcp-active={words[3]}" elif event == "tx": - words[0] = "g=%s" % words[0] + words[0] = f"g={words[0]}" decode_type(words, 1) # words[2] = name # words[3] = control (domain for which these servers are believed @@ -193,32 +299,42 @@ def handle_dnscache_log(line, match): elif event in ("tcpopen", "tcpclose"): decode_client(words, 0) - print(timestamp, event, " ".join(words)) + return f"{timestamp} {event} " + " ".join(words) -def handle_tinydns_log(line : str, match: re.Match): + +def handle_tinydns_log(line : str) -> typing.Optional[str]: """ - Handle a line that matched the ``tinydns_log_re`` regex. + Handle a single log line if it matches the ``tinydns_log_re`` regex. Parameters ---------- line : string - The tinydns log line that matched ``tinydns_log_re``. + The log line that might match ``tinydns_log_re``. + + Returns + ------- - match : re.Match - The match object that was returned when ``line`` was - tested against ``tinydns_log_re``. + Either the human-readable string if the log line was handled (that + is, if it was really a tinydns log line), or ``None`` if it was + not. Examples -------- >>> line = "2022-09-14 21:04:40.206516500 7f000001:9d61:be69 - 0001 www.example.com" - >>> match = tinydns_log_re.match(line) - >>> handle_tinydns_log(line, match) - 2022-09-14 21:04:40.206516500 dropped query (no authority) from 127.0.0.1:40289 (id 48745): a www.example.com + >>> handle_tinydns_log(line) + '2022-09-14 21:04:40.206516500 dropped query (no authority) from 127.0.0.1:40289 (id 48745): a www.example.com' + + >>> line = "this line is nonsense" + >>> handle_tinydns_log(line) """ + match = tinydns_log_re.match(line) + if not match: + return None + (timestamp, ip, port, id, code, type, name) = match.groups() ip = convert_ip(ip) port = int(port, 16) @@ -230,25 +346,29 @@ def handle_tinydns_log(line : str, match: re.Match): type = int(type, 16) # "001c" -> 28 type = query_type.get(type, type) # 28 -> "aaaa" - print(timestamp, end=' ') + line_tpl = "{timestamp} " + reason = query_drop_reason[code] if code == "+": - print ("sent response to %s:%s (id %s): %s %s" - % (ip, port, id, type, name)) - elif code in ("-", "I", "C"): - reason = query_drop_reason[code] - print ("dropped query (%s) from %s:%s (id %s): %s %s" - % (reason, ip, port, id, type, name)) - elif code == "/": - print ("dropped query (couldn't parse) from %s:%s" - % (ip, port)) + line_tpl += "sent response to {ip}:{port} (id {id}): {type} {name}" else: - print ("%s from %s:%s (id %s): %s %s" - % (code, ip, port, id, type, name)) + line_tpl += "dropped query ({reason}) from {ip}:{port}" + if code != "/": + # If the query can actually be parsed, the log line is a + # bit more informative than it would have been otherwise. + line_tpl += " (id {id}): {type} {name}" + + return line_tpl.format(timestamp=timestamp, + reason=reason, + ip=ip, + port=port, + id=id, + type=type, + name=name) def parse_logfile(file : typing.TextIO): - """ + r""" Process a single log ``file``. Parameters @@ -257,29 +377,46 @@ def parse_logfile(file : typing.TextIO): file : typing.TextIO An open log file, or stdin. + Examples + -------- + + >>> line = "@4000000063227a320c4f3114 7f000001:9d61:be69 - 0001 www.example.com\n" + >>> from tempfile import NamedTemporaryFile + >>> with NamedTemporaryFile(mode="w", delete=False) as f: + ... _ = f.write(line) + >>> f = open(f.name, 'r') + >>> parse_logfile(f) + 2022-09-14 21:04:40.206516500 dropped query (no authority) from 127.0.0.1:40289 (id 48745): a www.example.com + >>> f.close() + >>> from os import remove + >>> remove(f.name) + """ - # Open pipe to tai64nlocal: we will write lines of our input (the - # raw log file) to it, and read log lines with readable timestamps - # from it. + # Open a pipe to tai64nlocal. We'll write lines of our input file + # (the log file) to it, and read back the same lines but with + # friendly timestamps in them. + from subprocess import Popen, PIPE tai = Popen(["tai64nlocal"], stdin=PIPE, stdout=PIPE, text=True, bufsize=0) for line in file: tai.stdin.write(line) line = tai.stdout.readline() - match = tinydns_log_re.match(line) - if match: - handle_tinydns_log(line, match) - continue + friendly_line = handle_tinydns_log(line) + if not friendly_line: + friendly_line = handle_dnscache_log(line) + if not friendly_line: + friendly_line = line - match = dnscache_log_re.match(line) - if match: - handle_dnscache_log(line, match) - continue - - print(line) + print(friendly_line) def main(): + r""" + The entry point to the program. + + This function is responsible only for parsing any command-line + arguments, and then calling :func`parse_logfile` on them. + """ # Create an argument parser using the file's docsctring as its # description. from argparse import ArgumentParser, FileType