]> gitweb.michael.orlitzky.com - djbdns-logparse.git/blob - bin/djbdns-logparse.py
bfb29135d0fcd6d7a0a769b31f9f4f2f5bd4b02b
[djbdns-logparse.git] / bin / djbdns-logparse.py
1 #!/usr/bin/python3
2 """
3 Convert tinydns and dnscache logs to human-readable form
4 """
5
6 import re
7 from struct import pack
8 from time import strftime, gmtime
9 from subprocess import Popen, PIPE
10
11
12 # common components of line-matching regexes
13 timestamp_pat = r'[\d-]+ [\d:\.]+' # output of tai64nlocal
14 hex4_pat = r'[0-9a-f]{4}'
15 ip_pat = r'[0-9a-f]{8,32}' # IPv4 or IPv6 addresses in hex
16
17 # discriminate between dnscache and tinydns log lines
18 tinydns_log_re = re.compile(
19 r'(%s) (%s):(%s):(%s) ([\+\-IC/]) (%s) (.*)'
20 % (timestamp_pat, ip_pat, hex4_pat, hex4_pat, hex4_pat))
21 dnscache_log_re = re.compile(r'(%s) (\w+)(.*)' % timestamp_pat)
22
23 query_type = {
24 1: "a",
25 2: "ns",
26 5: "cname",
27 6: "soa",
28 12: "ptr",
29 13: "hinfo",
30 15: "mx",
31 16: "txt",
32 17: "rp",
33 24: "sig",
34 25: "key",
35 28: "aaaa",
36 33: "srv",
37 35: "naptr",
38 38: "a6",
39 48: "dnskey",
40 52: "tlsa",
41 65: "https",
42 252: "axfr",
43 255: "any",
44 257: "caa"
45 }
46
47 # for tinydns only
48 query_drop_reason = {
49 "-": "no authority",
50 "I": "invalid query",
51 "C": "invalid class",
52 }
53
54
55 def convert_ip(ip : str):
56 """
57 Convert a hex string representing an IP address to conventional
58 human-readable form, ie. dotted-quad decimal for IPv4, and
59 8 colon-separated hex shorts for IPv6.
60
61 Examples
62 --------
63
64 >>> convert_ip("7f000001")
65 '127.0.0.1'
66 >>> convert_ip("00000000000000000000ffff7f000001")
67 '0000:0000:0000:0000:0000:ffff:7f00:0001'
68
69 """
70 if len(ip) == 8:
71 # IPv4, eg. "7f000001" -> "127.0.0.1"
72 return "%d.%d.%d.%d" % tuple(pack(">L", int(ip, 16)))
73 elif len(ip) == 32:
74 # IPv6 is actually simpler -- it's just a string-slicing operation,
75 # eg. "00000000000000000000ffff7f000001" ->
76 # "0000:0000:0000:0000:0000:ffff:7f00:0001"
77 return ":".join([ip[(4*i) : (4*i+4)] for i in range(8)])
78
79
80 def _cvt_ip(match):
81 return convert_ip(match.group(1))
82
83 def _cvt_port(match):
84 return ":" + str(int(match.group(1), 16))
85
86 def decode_client(words, i):
87 chunks = words[i].split(":")
88 if len(chunks) == 2: # ip:port
89 words[i] = "%s:%d" % (convert_ip(chunks[0]), int(chunks[1], 16))
90 elif len(chunks) == 3:
91 words[i] = "%s:%d (id %d)" % (convert_ip(chunks[0]),
92 int(chunks[1], 16),
93 int(chunks[2], 16))
94
95 def decode_ip(words, i):
96 words[i] = convert_ip(words[i])
97
98 def decode_ttl(words, i):
99 words[i] = "TTL=%s" % words[i]
100
101 def decode_serial(words, i):
102 serial = int(words[i])
103 words[i] = "#%d" % serial
104
105 def decode_type(words, i):
106 qt = words[i]
107 words[i] = query_type.get(int(qt), qt)
108
109 def handle_dnscache_log(line, match):
110 (timestamp, event, data) = match.groups()
111
112 words = data.split()
113 if event == "cached":
114 if words[0] not in ("cname", "ns", "nxdomain"):
115 decode_type(words, 0)
116
117 elif event == "drop":
118 decode_serial(words, 0)
119
120 elif event == "lame":
121 decode_ip(words, 0)
122
123 elif event == "nodata":
124 decode_ip(words, 0)
125 decode_ttl(words, 1)
126 decode_type(words, 2)
127
128 elif event == "nxdomain":
129 decode_ip(words, 0)
130 decode_ttl(words, 1)
131
132 elif event == "query":
133 decode_serial(words, 0)
134 decode_client(words, 1)
135 decode_type(words, 2)
136
137 elif event == "rr":
138 decode_ip(words, 0)
139 decode_ttl(words, 1)
140 if words[2] not in ("cname", "mx", "ns", "ptr", "soa"):
141 decode_type(words, 2)
142 if words[2] == "a": # decode answer to an A query
143 decode_ip(words, 4)
144 if words[2] == "txt": # text record
145 response = words[4]
146 if response.endswith("..."):
147 ellipsis = "..."
148 response = response[0:-3]
149 else:
150 ellipsis = ""
151 length = int(response[0:2], 16)
152 chars = []
153 for i in range(1, len(response)/2):
154 chars.append(chr(int(response[2*i : (2*i)+2], 16)))
155 words[4] = "%d:\"%s%s\"" % (length, "".join(chars), ellipsis)
156
157 elif event == "sent":
158 decode_serial(words, 0)
159
160 elif event == "stats":
161 words[0] = "count=%s" % words[0]
162 words[1] = "motion=%s" % words[1]
163 words[2] = "udp-active=%s" % words[2]
164 words[3] = "tcp-active=%s" % words[3]
165
166 elif event == "tx":
167 words[0] = "g=%s" % words[0]
168 decode_type(words, 1)
169 # words[2] = name
170 # words[3] = control (domain for which these servers are believed
171 # to be authoritative)
172 for i in range(4, len(words)):
173 decode_ip(words, i)
174
175 elif event in ("tcpopen", "tcpclose"):
176 decode_client(words, 0)
177
178 print(timestamp, event, " ".join(words))
179
180
181 def handle_tinydns_log(line, match):
182 (timestamp, ip, port, id, code, type, name) = match.groups()
183 ip = convert_ip(ip)
184 port = int(port, 16)
185 id = int(id, 16)
186 type = int(type, 16) # "001c" -> 28
187 type = query_type.get(type, type) # 28 -> "aaaa"
188
189 print(timestamp, end=' ')
190
191 if code == "+":
192 print ("sent response to %s:%s (id %s): %s %s"
193 % (ip, port, id, type, name))
194 elif code in ("-", "I", "C"):
195 reason = query_drop_reason[code]
196 print ("dropped query (%s) from %s:%s (id %s): %s %s"
197 % (reason, ip, port, id, type, name))
198 elif code == "/":
199 print ("dropped query (couldn't parse) from %s:%s"
200 % (ip, port))
201 else:
202 print ("%s from %s:%s (id %s): %s %s"
203 % (code, ip, port, id, type, name))
204
205
206 def parse_logfile(file):
207 # Open pipe to tai64nlocal: we will write lines of our input (the
208 # raw log file) to it, and read log lines with readable timestamps
209 # from it.
210 tai = Popen(["tai64nlocal"], stdin=PIPE, stdout=PIPE, text=True, bufsize=0)
211
212 for line in file:
213 tai.stdin.write(line)
214 line = tai.stdout.readline()
215
216 match = tinydns_log_re.match(line)
217 if match:
218 handle_tinydns_log(line, match)
219 continue
220
221 match = dnscache_log_re.match(line)
222 if match:
223 handle_dnscache_log(line, match)
224 continue
225
226 print(line)
227
228 def main():
229 # Create an argument parser using the file's docsctring as its
230 # description.
231 from argparse import ArgumentParser, FileType
232 parser = ArgumentParser(description = __doc__)
233
234 # Parse zero or more positional arguments into a list of
235 # "logfiles". If none are given, read from stdin instead.
236 from sys import stdin
237 parser.add_argument("logfiles",
238 metavar="LOGFILE",
239 type=FileType("r"),
240 nargs="*",
241 default=[stdin],
242 help="djbdns logfile to process (default: stdin)")
243
244 args = parser.parse_args()
245 for f in args.logfiles:
246 parse_logfile(f)
247
248
249
250
251 if __name__ == "__main__":
252 main()