From: Michael Orlitzky Date: Thu, 15 Sep 2022 13:30:18 +0000 (-0400) Subject: bin/djbdns-logparse.py: use f-strings for regex patterns; add comments. X-Git-Tag: 0.0.1~50 X-Git-Url: http://gitweb.michael.orlitzky.com/?a=commitdiff_plain;h=855a443bce100c00e4753780957ad02a78a8b8f4;p=djbdns-logparse.git bin/djbdns-logparse.py: use f-strings for regex patterns; add comments. --- diff --git a/bin/djbdns-logparse.py b/bin/djbdns-logparse.py index ed85381..8380124 100755 --- a/bin/djbdns-logparse.py +++ b/bin/djbdns-logparse.py @@ -9,17 +9,41 @@ from time import strftime, gmtime from subprocess import Popen, PIPE -# common components of line-matching regexes -timestamp_pat = r'[\d-]+ [\d:\.]+' # output of tai64nlocal +## Regular expressions for matching tinydns/dnscache log lines. We +## compile these once here rather than within the corresponding +## matching functions, because the latter get executed repeatedly. + +# This first pattern is used to match the timestamp format that the +# tai64nlocal program produces. It appears in both dnscache and +# tinydns lines, after they've been piped through tai64nlocal, of +# course. +timestamp_pat = r'[\d-]+ [\d:\.]+' + +# The regex to match dnscache log lines. +dnscache_log_re = re.compile(fr'({timestamp_pat}) (\w+)(.*)') + +# The "hex4" pattern matches a string of four hexadecimal digits. This +# is used, for example, by tinydns to encode the query type +# identifier. hex4_pat = r'[0-9a-f]{4}' -ip_pat = r'[0-9a-f]{8,32}' # IPv4 or IPv6 addresses in hex -# discriminate between dnscache and tinydns log lines -tinydns_log_re = re.compile( - r'(%s) (%s):(%s):(%s) ([\+\-IC/]) (%s) (.*)' - % (timestamp_pat, ip_pat, hex4_pat, hex4_pat, hex4_pat)) -dnscache_log_re = re.compile(r'(%s) (\w+)(.*)' % timestamp_pat) +# The IP pattern matches a string of either 8 or 32 hexadecimal +# characters, which correspond to IPv4 and IPv6 addresses, +# respectively, in tinydns logs. +ip_pat = r'[0-9a-f]{8,32}' +# The regex to match tinydns log lines. +tinydns_log_re = re.compile( + rf'({timestamp_pat}) ({ip_pat}):({hex4_pat}):({hex4_pat}) ([\+\-IC/]) ({hex4_pat}) (.*)' +) + +# A dictionary mapping query type identifiers, in decimal, to their +# friendly names for tinydns. Reference: +# +# https://en.wikipedia.org/wiki/List_of_DNS_record_types +# +# Note that mapping here is non-exhaustive, and that tinydns will +# log responses for record types that it does not know about. query_type = { 1: "a", 2: "ns", @@ -44,12 +68,14 @@ query_type = { 257: "caa" } -# for tinydns only +# tinydns can drop a query for one of three reasons; this dictionary +# maps the symbol that gets logged in each case to a human-readable +# reason. query_drop_reason = { "-": "no authority", "I": "invalid query", "C": "invalid class", - } +} def convert_ip(ip : str):