]> gitweb.michael.orlitzky.com - djbdns-logparse.git/blob - bin/djbdns-logparse
Initial commit, just starting to organize everything.
[djbdns-logparse.git] / bin / djbdns-logparse
1 #!/usr/bin/python3
2 #
3 # Reads log files from tinydns and/or dnscache and prints them out in
4 # human-readable form. Logs can be supplied on stdin, or listed on the
5 # command line:
6 #
7 # $ cat @*.s | djbdns-logparse
8 # $ djbdns-logparse @*.s
9 # $ tail -f current | djbdns-logparse
10 #
11 # Pipes each log file through tai64nlocal, which must be on your path.
12 #
13 # Acknowledgments:
14 #
15 # * The log format descriptions by Rob Mayoff were invaluable:
16 # ** http://dqd.com/~mayoff/notes/djbdns/tinydns-log.html
17 # ** http://dqd.com/~mayoff/notes/djbdns/dnscache-log.html
18 #
19 # * Faried Nawaz's dnscache log parser was the original inspiration:
20 # ** http://www.hungry.com/~fn/dnscache-log.pl.txt
21 #
22
23 import sys, re
24 from struct import pack
25 from time import strftime, gmtime
26 from subprocess import Popen, PIPE
27
28 # common components of line-matching regexes
29 timestamp_pat = r'[\d-]+ [\d:\.]+' # output of tai64nlocal
30 hex4_pat = r'[0-9a-f]{4}'
31 ip_pat = r'[0-9a-f]{8,32}' # IPv4 or IPv6 addresses in hex
32
33 # discriminate between dnscache and tinydns log lines
34 tinydns_log_re = re.compile(
35 r'(%s) (%s):(%s):(%s) ([\+\-IC/]) (%s) (.*)'
36 % (timestamp_pat, ip_pat, hex4_pat, hex4_pat, hex4_pat))
37 dnscache_log_re = re.compile(r'(%s) (\w+)(.*)' % timestamp_pat)
38
39 query_type = {
40 1: "a",
41 2: "ns",
42 5: "cname",
43 6: "soa",
44 12: "ptr",
45 13: "hinfo",
46 15: "mx",
47 16: "txt",
48 17: "rp",
49 24: "sig",
50 25: "key",
51 28: "aaaa",
52 38: "a6",
53 252: "axfr",
54 255: "any",
55 }
56
57 # for tinydns only
58 query_drop_reason = {
59 "-": "no authority",
60 "I": "invalid query",
61 "C": "invalid class",
62 }
63
64
65 def warn(filename, msg):
66 sys.stderr.write("warning: %s: %s\n" % (filename, msg))
67
68 def convert_ip(ip):
69 """Convert a hex string representing an IP address to conventional
70 human-readable form, ie. dotted-quad decimal for IPv4, and
71 8 colon-separated hex shorts for IPv6.
72 """
73 if len(ip) == 8:
74 # IPv4, eg. "7f000001" -> "127.0.0.1"
75 return "%d.%d.%d.%d" % tuple(pack(">L", int(ip, 16)))
76 elif len(ip) == 32:
77 # IPv6 is actually simpler -- it's just a string-slicing operation,
78 # eg. "00000000000000000000ffff7f000001" ->
79 # "0000:0000:0000:0000:0000:ffff:7f00:0001"
80 return ":".join([ip[(4*i) : (4*i+4)] for i in range(8)])
81
82
83 def _cvt_ip(match):
84 return convert_ip(match.group(1))
85
86 def _cvt_port(match):
87 return ":" + str(int(match.group(1), 16))
88
89 def decode_client(words, i):
90 chunks = words[i].split(":")
91 if len(chunks) == 2: # ip:port
92 words[i] = "%s:%d" % (convert_ip(chunks[0]), int(chunks[1], 16))
93 elif len(chunks) == 3:
94 words[i] = "%s:%d (id %d)" % (convert_ip(chunks[0]),
95 int(chunks[1], 16),
96 int(chunks[2], 16))
97
98 def decode_ip(words, i):
99 words[i] = convert_ip(words[i])
100
101 def decode_ttl(words, i):
102 words[i] = "TTL=%s" % words[i]
103
104 def decode_serial(words, i):
105 serial = int(words[i])
106 words[i] = "#%d" % serial
107
108 def decode_type(words, i):
109 qt = words[i]
110 words[i] = query_type.get(int(qt), qt)
111
112 def handle_dnscache_log(line, match):
113 (timestamp, event, data) = match.groups()
114
115 words = data.split()
116 if event == "cached":
117 if words[0] not in ("cname", "ns", "nxdomain"):
118 decode_type(words, 0)
119
120 elif event == "drop":
121 decode_serial(words, 0)
122
123 elif event == "lame":
124 decode_ip(words, 0)
125
126 elif event == "nodata":
127 decode_ip(words, 0)
128 decode_ttl(words, 1)
129 decode_type(words, 2)
130
131 elif event == "nxdomain":
132 decode_ip(words, 0)
133 decode_ttl(words, 1)
134
135 elif event == "query":
136 decode_serial(words, 0)
137 decode_client(words, 1)
138 decode_type(words, 2)
139
140 elif event == "rr":
141 decode_ip(words, 0)
142 decode_ttl(words, 1)
143 if words[2] not in ("cname", "mx", "ns", "ptr", "soa"):
144 decode_type(words, 2)
145 if words[2] == "a": # decode answer to an A query
146 decode_ip(words, 4)
147 if words[2] == "txt": # text record
148 response = words[4]
149 if response.endswith("..."):
150 ellipsis = "..."
151 response = response[0:-3]
152 else:
153 ellipsis = ""
154 length = int(response[0:2], 16)
155 chars = []
156 for i in range(1, len(response)/2):
157 chars.append(chr(int(response[2*i : (2*i)+2], 16)))
158 words[4] = "%d:\"%s%s\"" % (length, "".join(chars), ellipsis)
159
160 elif event == "sent":
161 decode_serial(words, 0)
162
163 elif event == "stats":
164 words[0] = "count=%s" % words[0]
165 words[1] = "motion=%s" % words[1]
166 words[2] = "udp-active=%s" % words[2]
167 words[3] = "tcp-active=%s" % words[3]
168
169 elif event == "tx":
170 words[0] = "g=%s" % words[0]
171 decode_type(words, 1)
172 # words[2] = name
173 # words[3] = control (domain for which these servers are believed
174 # to be authoritative)
175 for i in range(4, len(words)):
176 decode_ip(words, i)
177
178 elif event in ("tcpopen", "tcpclose"):
179 decode_client(words, 0)
180
181 print(timestamp, event, " ".join(words))
182
183
184 def handle_tinydns_log(line, match):
185 (timestamp, ip, port, id, code, type, name) = match.groups()
186 ip = convert_ip(ip)
187 port = int(port, 16)
188 id = int(id, 16)
189 type = int(type, 16) # "001c" -> 28
190 type = query_type.get(type, type) # 28 -> "aaaa"
191
192 print(timestamp,)
193
194 if code == "+":
195 print ("sent response to %s:%s (id %s): %s %s"
196 % (ip, port, id, type, name))
197 elif code in ("-", "I", "C"):
198 reason = query_drop_reason[code]
199 print ("dropped query (%s) from %s:%s (id %s): %s %s"
200 % (reason, ip, port, id, type, name))
201 elif code == "/":
202 print ("dropped query (couldn't parse) from %s:%s"
203 % (ip, port))
204 else:
205 print ("%s from %s:%s (id %s): %s %s"
206 % (code, ip, port, id, type, name))
207
208
209 def parse_logfile(file, filename):
210 # Open pipe to tai64nlocal: we will write lines of our input (the
211 # raw log file) to it, and read log lines with readable timestamps
212 # from it.
213 tai = Popen(["tai64nlocal"], stdin=PIPE, stdout=PIPE, text=True, bufsize=0)
214
215 for line in file:
216 tai.stdin.write(line)
217 line = tai.stdout.readline()
218
219 match = tinydns_log_re.match(line)
220 if match:
221 handle_tinydns_log(line, match)
222 continue
223
224 match = dnscache_log_re.match(line)
225 if match:
226 handle_dnscache_log(line, match)
227 continue
228
229 sys.stdout.write(line)
230
231 def main():
232 if len(sys.argv) > 1:
233 for filename in sys.argv[1:]:
234 if filename == "-":
235 parse_logfile(sys.stdin, "(stdin)")
236 else:
237 with open(filename) as file:
238 parse_logfile(file, filename)
239 else:
240 parse_logfile(sys.stdin, "(stdin)")
241
242
243
244 if __name__ == "__main__":
245 main()