]> gitweb.michael.orlitzky.com - djbdns-logparse.git/blob - bin/djbdns-logparse
32c7ba106139916df2cd0241c2e50956e824e815
[djbdns-logparse.git] / bin / djbdns-logparse
1 #!/usr/bin/python3
2 """
3 Convert tinydns and dnscache logs to human-readable form
4 """
5
6 #
7 # Reads log files from tinydns and/or dnscache and prints them out in
8 # human-readable form. Logs can be supplied on stdin, or listed on the
9 # command line:
10 #
11 # $ cat @*.s | djbdns-logparse
12 # $ djbdns-logparse @*.s
13 # $ tail -f current | djbdns-logparse
14 #
15 # Pipes each log file through tai64nlocal, which must be on your path.
16 #
17 # Acknowledgments:
18 #
19 # * The log format descriptions by Rob Mayoff were invaluable:
20 # ** http://dqd.com/~mayoff/notes/djbdns/tinydns-log.html
21 # ** http://dqd.com/~mayoff/notes/djbdns/dnscache-log.html
22 #
23 # * Faried Nawaz's dnscache log parser was the original inspiration:
24 # ** http://www.hungry.com/~fn/dnscache-log.pl.txt
25 #
26
27 import sys, re
28 from struct import pack
29 from time import strftime, gmtime
30 from subprocess import Popen, PIPE
31
32
33 # common components of line-matching regexes
34 timestamp_pat = r'[\d-]+ [\d:\.]+' # output of tai64nlocal
35 hex4_pat = r'[0-9a-f]{4}'
36 ip_pat = r'[0-9a-f]{8,32}' # IPv4 or IPv6 addresses in hex
37
38 # discriminate between dnscache and tinydns log lines
39 tinydns_log_re = re.compile(
40 r'(%s) (%s):(%s):(%s) ([\+\-IC/]) (%s) (.*)'
41 % (timestamp_pat, ip_pat, hex4_pat, hex4_pat, hex4_pat))
42 dnscache_log_re = re.compile(r'(%s) (\w+)(.*)' % timestamp_pat)
43
44 query_type = {
45 1: "a",
46 2: "ns",
47 5: "cname",
48 6: "soa",
49 12: "ptr",
50 13: "hinfo",
51 15: "mx",
52 16: "txt",
53 17: "rp",
54 24: "sig",
55 25: "key",
56 28: "aaaa",
57 38: "a6",
58 252: "axfr",
59 255: "any",
60 }
61
62 # for tinydns only
63 query_drop_reason = {
64 "-": "no authority",
65 "I": "invalid query",
66 "C": "invalid class",
67 }
68
69
70 def warn(filename, msg):
71 sys.stderr.write("warning: %s: %s\n" % (filename, msg))
72
73 def convert_ip(ip):
74 """Convert a hex string representing an IP address to conventional
75 human-readable form, ie. dotted-quad decimal for IPv4, and
76 8 colon-separated hex shorts for IPv6.
77 """
78 if len(ip) == 8:
79 # IPv4, eg. "7f000001" -> "127.0.0.1"
80 return "%d.%d.%d.%d" % tuple(pack(">L", int(ip, 16)))
81 elif len(ip) == 32:
82 # IPv6 is actually simpler -- it's just a string-slicing operation,
83 # eg. "00000000000000000000ffff7f000001" ->
84 # "0000:0000:0000:0000:0000:ffff:7f00:0001"
85 return ":".join([ip[(4*i) : (4*i+4)] for i in range(8)])
86
87
88 def _cvt_ip(match):
89 return convert_ip(match.group(1))
90
91 def _cvt_port(match):
92 return ":" + str(int(match.group(1), 16))
93
94 def decode_client(words, i):
95 chunks = words[i].split(":")
96 if len(chunks) == 2: # ip:port
97 words[i] = "%s:%d" % (convert_ip(chunks[0]), int(chunks[1], 16))
98 elif len(chunks) == 3:
99 words[i] = "%s:%d (id %d)" % (convert_ip(chunks[0]),
100 int(chunks[1], 16),
101 int(chunks[2], 16))
102
103 def decode_ip(words, i):
104 words[i] = convert_ip(words[i])
105
106 def decode_ttl(words, i):
107 words[i] = "TTL=%s" % words[i]
108
109 def decode_serial(words, i):
110 serial = int(words[i])
111 words[i] = "#%d" % serial
112
113 def decode_type(words, i):
114 qt = words[i]
115 words[i] = query_type.get(int(qt), qt)
116
117 def handle_dnscache_log(line, match):
118 (timestamp, event, data) = match.groups()
119
120 words = data.split()
121 if event == "cached":
122 if words[0] not in ("cname", "ns", "nxdomain"):
123 decode_type(words, 0)
124
125 elif event == "drop":
126 decode_serial(words, 0)
127
128 elif event == "lame":
129 decode_ip(words, 0)
130
131 elif event == "nodata":
132 decode_ip(words, 0)
133 decode_ttl(words, 1)
134 decode_type(words, 2)
135
136 elif event == "nxdomain":
137 decode_ip(words, 0)
138 decode_ttl(words, 1)
139
140 elif event == "query":
141 decode_serial(words, 0)
142 decode_client(words, 1)
143 decode_type(words, 2)
144
145 elif event == "rr":
146 decode_ip(words, 0)
147 decode_ttl(words, 1)
148 if words[2] not in ("cname", "mx", "ns", "ptr", "soa"):
149 decode_type(words, 2)
150 if words[2] == "a": # decode answer to an A query
151 decode_ip(words, 4)
152 if words[2] == "txt": # text record
153 response = words[4]
154 if response.endswith("..."):
155 ellipsis = "..."
156 response = response[0:-3]
157 else:
158 ellipsis = ""
159 length = int(response[0:2], 16)
160 chars = []
161 for i in range(1, len(response)/2):
162 chars.append(chr(int(response[2*i : (2*i)+2], 16)))
163 words[4] = "%d:\"%s%s\"" % (length, "".join(chars), ellipsis)
164
165 elif event == "sent":
166 decode_serial(words, 0)
167
168 elif event == "stats":
169 words[0] = "count=%s" % words[0]
170 words[1] = "motion=%s" % words[1]
171 words[2] = "udp-active=%s" % words[2]
172 words[3] = "tcp-active=%s" % words[3]
173
174 elif event == "tx":
175 words[0] = "g=%s" % words[0]
176 decode_type(words, 1)
177 # words[2] = name
178 # words[3] = control (domain for which these servers are believed
179 # to be authoritative)
180 for i in range(4, len(words)):
181 decode_ip(words, i)
182
183 elif event in ("tcpopen", "tcpclose"):
184 decode_client(words, 0)
185
186 print(timestamp, event, " ".join(words))
187
188
189 def handle_tinydns_log(line, match):
190 (timestamp, ip, port, id, code, type, name) = match.groups()
191 ip = convert_ip(ip)
192 port = int(port, 16)
193 id = int(id, 16)
194 type = int(type, 16) # "001c" -> 28
195 type = query_type.get(type, type) # 28 -> "aaaa"
196
197 print(timestamp,)
198
199 if code == "+":
200 print ("sent response to %s:%s (id %s): %s %s"
201 % (ip, port, id, type, name))
202 elif code in ("-", "I", "C"):
203 reason = query_drop_reason[code]
204 print ("dropped query (%s) from %s:%s (id %s): %s %s"
205 % (reason, ip, port, id, type, name))
206 elif code == "/":
207 print ("dropped query (couldn't parse) from %s:%s"
208 % (ip, port))
209 else:
210 print ("%s from %s:%s (id %s): %s %s"
211 % (code, ip, port, id, type, name))
212
213
214 def parse_logfile(file):
215 # Open pipe to tai64nlocal: we will write lines of our input (the
216 # raw log file) to it, and read log lines with readable timestamps
217 # from it.
218 tai = Popen(["tai64nlocal"], stdin=PIPE, stdout=PIPE, text=True, bufsize=0)
219
220 for line in file:
221 tai.stdin.write(line)
222 line = tai.stdout.readline()
223
224 match = tinydns_log_re.match(line)
225 if match:
226 handle_tinydns_log(line, match)
227 continue
228
229 match = dnscache_log_re.match(line)
230 if match:
231 handle_dnscache_log(line, match)
232 continue
233
234 sys.stdout.write(line)
235
236 def main():
237 # Create an argument parser using the file's docsctring as its
238 # description.
239 from argparse import ArgumentParser, FileType
240 parser = ArgumentParser(description = __doc__)
241
242 # Parse zero or more positional arguments into a list of
243 # "logfiles". If none are given, read from stdin instead.
244 parser.add_argument("logfiles",
245 metavar="LOGFILE",
246 type=FileType("r"),
247 nargs="*",
248 default=[sys.stdin],
249 help="djbdns logfile to process (default: stdin)")
250
251 args = parser.parse_args()
252 for f in args.logfiles:
253 parse_logfile(f)
254
255
256
257
258 if __name__ == "__main__":
259 main()