]>
gitweb.michael.orlitzky.com - djbdns-logparse.git/blob - bin/djbdns-logparse.py
ed8538174e6b232773d5a64ddc38eec58cf278a7
3 Convert tinydns and dnscache logs to human-readable form
7 from struct
import pack
8 from time
import strftime
, gmtime
9 from subprocess
import Popen
, PIPE
12 # common components of line-matching regexes
13 timestamp_pat
= r
'[\d-]+ [\d:\.]+' # output of tai64nlocal
14 hex4_pat
= r
'[0-9a-f]{4}'
15 ip_pat
= r
'[0-9a-f]{8,32}' # IPv4 or IPv6 addresses in hex
17 # discriminate between dnscache and tinydns log lines
18 tinydns_log_re
= re
.compile(
19 r
'(%s) (%s):(%s):(%s) ([\+\-IC/]) (%s) (.*)'
20 % (timestamp_pat
, ip_pat
, hex4_pat
, hex4_pat
, hex4_pat
))
21 dnscache_log_re
= re
.compile(r
'(%s) (\w+)(.*)' % timestamp_pat
)
55 def convert_ip(ip
: str):
57 Convert a hex string representing an IP address to conventional
58 human-readable form, ie. dotted-quad decimal for IPv4, and
59 8 colon-separated hex shorts for IPv6.
64 >>> convert_ip("7f000001")
66 >>> convert_ip("00000000000000000000ffff7f000001")
67 '0000:0000:0000:0000:0000:ffff:7f00:0001'
71 # IPv4, eg. "7f000001" -> "7f 00 00 01" -> "127.0.0.1"
72 return "%d.%d.%d.%d" % tuple(pack(">L", int(ip
, 16)))
74 # IPv6 is actually simpler -- it's just a string-slicing operation.
75 return ":".join([ip
[(4*i
) : (4*i
+4)] for i
in range(8)])
79 return convert_ip(match
.group(1))
82 return ":" + str(int(match
.group(1), 16))
84 def decode_client(words
, i
):
85 chunks
= words
[i
].split(":")
86 if len(chunks
) == 2: # ip:port
87 words
[i
] = "%s:%d" % (convert_ip(chunks
[0]), int(chunks
[1], 16))
88 elif len(chunks
) == 3:
89 words
[i
] = "%s:%d (id %d)" % (convert_ip(chunks
[0]),
93 def decode_ip(words
, i
):
94 words
[i
] = convert_ip(words
[i
])
96 def decode_ttl(words
, i
):
97 words
[i
] = "TTL=%s" % words
[i
]
99 def decode_serial(words
, i
):
100 serial
= int(words
[i
])
101 words
[i
] = "#%d" % serial
103 def decode_type(words
, i
):
105 words
[i
] = query_type
.get(int(qt
), qt
)
107 def handle_dnscache_log(line
, match
):
108 (timestamp
, event
, data
) = match
.groups()
111 if event
== "cached":
112 if words
[0] not in ("cname", "ns", "nxdomain"):
113 decode_type(words
, 0)
115 elif event
== "drop":
116 decode_serial(words
, 0)
118 elif event
== "lame":
121 elif event
== "nodata":
124 decode_type(words
, 2)
126 elif event
== "nxdomain":
130 elif event
== "query":
131 decode_serial(words
, 0)
132 decode_client(words
, 1)
133 decode_type(words
, 2)
138 if words
[2] not in ("cname", "mx", "ns", "ptr", "soa"):
139 decode_type(words
, 2)
140 if words
[2] == "a": # decode answer to an A query
142 if words
[2] == "txt": # text record
144 if response
.endswith("..."):
146 response
= response
[0:-3]
149 length
= int(response
[0:2], 16)
151 for i
in range(1, len(response
)/2):
152 chars
.append(chr(int(response
[2*i
: (2*i
)+2], 16)))
153 words
[4] = "%d:\"%s%s\"" % (length
, "".join(chars
), ellipsis
)
155 elif event
== "sent":
156 decode_serial(words
, 0)
158 elif event
== "stats":
159 words
[0] = "count=%s" % words
[0]
160 words
[1] = "motion=%s" % words
[1]
161 words
[2] = "udp-active=%s" % words
[2]
162 words
[3] = "tcp-active=%s" % words
[3]
165 words
[0] = "g=%s" % words
[0]
166 decode_type(words
, 1)
168 # words[3] = control (domain for which these servers are believed
169 # to be authoritative)
170 for i
in range(4, len(words
)):
173 elif event
in ("tcpopen", "tcpclose"):
174 decode_client(words
, 0)
176 print(timestamp
, event
, " ".join(words
))
179 def handle_tinydns_log(line
: str, match
: re
.Match
):
181 Handle a line that matched the ``tinydns_log_re`` regex.
187 The tinydns log line that matched ``tinydns_log_re``.
190 The match object that was returned when ``line`` was
191 tested against ``tinydns_log_re``.
196 >>> line = "2022-09-14 21:04:40.206516500 7f000001:9d61:be69 - 0001 www.example.com"
197 >>> match = tinydns_log_re.match(line)
198 >>> handle_tinydns_log(line, match)
199 2022-09-14 21:04:40.206516500 dropped query (no authority) from 127.0.0.1:40289 (id 48745): a www.example.com
202 (timestamp
, ip
, port
, id, code
, type, name
) = match
.groups()
207 # Convert the "type" field to a human-readable record type name
208 # using the query_type dictionary. If the right name isn't present
209 # in the dictionary, we use the (decimal) type id instead.
210 type = int(type, 16) # "001c" -> 28
211 type = query_type
.get(type, type) # 28 -> "aaaa"
213 print(timestamp
, end
=' ')
216 print ("sent response to %s:%s (id %s): %s %s"
217 % (ip
, port
, id, type, name
))
218 elif code
in ("-", "I", "C"):
219 reason
= query_drop_reason
[code
]
220 print ("dropped query (%s) from %s:%s (id %s): %s %s"
221 % (reason
, ip
, port
, id, type, name
))
223 print ("dropped query (couldn't parse) from %s:%s"
226 print ("%s from %s:%s (id %s): %s %s"
227 % (code
, ip
, port
, id, type, name
))
230 def parse_logfile(file):
231 # Open pipe to tai64nlocal: we will write lines of our input (the
232 # raw log file) to it, and read log lines with readable timestamps
234 tai
= Popen(["tai64nlocal"], stdin
=PIPE
, stdout
=PIPE
, text
=True, bufsize
=0)
237 tai
.stdin
.write(line
)
238 line
= tai
.stdout
.readline()
240 match
= tinydns_log_re
.match(line
)
242 handle_tinydns_log(line
, match
)
245 match
= dnscache_log_re
.match(line
)
247 handle_dnscache_log(line
, match
)
253 # Create an argument parser using the file's docsctring as its
255 from argparse
import ArgumentParser
, FileType
256 parser
= ArgumentParser(description
= __doc__
)
258 # Parse zero or more positional arguments into a list of
259 # "logfiles". If none are given, read from stdin instead.
260 from sys
import stdin
261 parser
.add_argument("logfiles",
266 help="djbdns logfile to process (default: stdin)")
268 args
= parser
.parse_args()
269 for f
in args
.logfiles
:
275 if __name__
== "__main__":