]> gitweb.michael.orlitzky.com - djbdns-logparse.git/blob - djbdns/dnscache.py
a67060bb545ed0d57dbb5ab7da6345e9dc7a2fe3
[djbdns-logparse.git] / djbdns / dnscache.py
1 # Don't clobber the global compile() with a named import.
2 import re
3
4 from typing import Optional
5 from djbdns.common import convert_ip, query_type_name, timestamp_pat
6
7 # The regex to match dnscache log lines.
8 dnscache_log_re = re.compile(fr'({timestamp_pat}) (\w+)(.*)')
9
10
11 def decode_client(words : list, i : int):
12 r"""
13 Helper function to decode the client field in a dnscache log
14 entry.
15
16 There are two possible formats for the client field,
17
18 1. clientip:clientport, used by tcpopen/tcpclose entries,
19 2. clientip:clientport:id, used by "query" entries.
20
21 We convert each part from hex to decimal, and in the second
22 format, separate the packet id from the client information.
23
24 Parameters
25 ----------
26
27 words : list
28 The ``words`` list (a list of fields) from
29 :func:`handle_dnscache_log`.
30
31 i : int
32 The index of the client field within ``words``
33
34 Returns
35 -------
36
37 Nothing; the ``i``th entry in the ``words`` list is modified
38 in-place.
39
40 Examples
41 --------
42
43 >>> words = ["foo", "bar", "7f000001:9253", "quux"]
44 >>> decode_client(words, 2)
45 >>> words
46 ['foo', 'bar', '127.0.0.1:37459', 'quux']
47
48 >>> words = ["foo", "7f000001:a3db:4fb9", "bar", "quux"]
49 >>> decode_client(words, 1)
50 >>> words
51 ['foo', '127.0.0.1:41947 (id 20409)', 'bar', 'quux']
52
53 """
54 chunks = words[i].split(":")
55
56 ip = convert_ip(chunks[0])
57 port = int(chunks[1], 16)
58 words[i] = f"{ip}:{port}"
59
60 if len(chunks) == 3:
61 # For a "query" entry's clientip:clientport:id field.
62 packet_id = int(chunks[2], 16)
63 words[i] += f" (id {packet_id})"
64
65 def decode_ip(words : list, i : int):
66 r"""
67 Helper function to decode the ip field in a dnscache log
68 entry.
69
70 A single "serverip" field is present in the lame, nodata,
71 nxdomain, and rr entry types. We convert it from hex to decimal.
72
73 Parameters
74 ----------
75
76 words : list
77 The ``words`` list (a list of fields) from
78 :func:`handle_dnscache_log`.
79
80 i : int
81 The index of the ip field within ``words``
82
83 Returns
84 -------
85
86 Nothing; the ``i``th entry in the ``words`` list is modified
87 in-place.
88
89 Examples
90 --------
91
92 >>> words = ["foo", "bar", "7f000001", "quux"]
93 >>> decode_ip(words, 2)
94 >>> words
95 ['foo', 'bar', '127.0.0.1', 'quux']
96
97 >>> words = ["foo", "00000000000000000000ffff7f000001", "bar", "quux"]
98 >>> decode_ip(words, 1)
99 >>> words
100 ['foo', '0000:0000:0000:0000:0000:ffff:7f00:0001', 'bar', 'quux']
101 """
102 words[i] = convert_ip(words[i])
103
104 def decode_ttl(words : list, i : int):
105 r"""
106 Helper function to decode the ttl field in a dnscache log
107 entry.
108
109 A single "ttl" field is present in the nodata, nxdomain, and
110 rr entry types. We prefix it with "TTL=" so that its meaning
111 is clear in the human-readable logs.
112
113 Parameters
114 ----------
115
116 words : list
117 The ``words`` list (a list of fields) from
118 :func:`handle_dnscache_log`.
119
120 i : int
121 The index of the ttl field within ``words``
122
123 Returns
124 -------
125
126 Nothing; the ``i``th entry in the ``words`` list is modified
127 in-place.
128
129 Examples
130 --------
131
132 >>> words = ["c0a80101", "20865", "1", "www.example.com.", "5db8d822"]
133 >>> decode_ttl(words, 1)
134 >>> words
135 ['c0a80101', 'TTL=20865', '1', 'www.example.com.', '5db8d822']
136
137 """
138 words[i] = f"TTL={words[i]}"
139
140 def decode_serial(words : list, i : int):
141 r"""
142 Helper function to decode the serial field in a dnscache log
143 entry.
144
145 A single "serial" field is present in the drop and query entry
146 types. It's already in decimal; we simply prefix it with a hash.
147
148 Parameters
149 ----------
150
151 words : list
152 The ``words`` list (a list of fields) from
153 :func:`handle_dnscache_log`.
154
155 i : int
156 The index of the serial field within ``words``
157
158 Returns
159 -------
160
161 Nothing; the ``i``th entry in the ``words`` list is modified
162 in-place.
163
164 Examples
165 --------
166
167 >>> words = ["1", "7f000001:a3db:4fb9", "1", "www.example.com."]
168 >>> decode_serial(words, 0)
169 >>> words
170 ['#1', '7f000001:a3db:4fb9', '1', 'www.example.com.']
171
172 """
173 words[i] = f"#{words[i]}"
174
175 def decode_type(words : list, i : int):
176 r"""
177 Helper function to decode the type field in a dnscache log
178 entry.
179
180 A single "type" field is present in cached, nodata, query, rr, and
181 tx entries. Unlike with tinydns entries, dnscache logs have
182 this field already in decimal, so we just look up the
183 corresponding name in the query type map.
184
185 Parameters
186 ----------
187
188 words : list
189 A list with the "type" string at index ``i``
190
191 i : int
192 The index of the type field within ``words``
193
194 Returns
195 -------
196
197 Nothing; the ``i``th entry in the ``words`` list is modified
198 in-place.
199
200 Examples
201 --------
202
203 >>> words = ["2", "7f000001:b848:0f0b", "16", "example.com."]
204 >>> decode_type(words, 2)
205 >>> words
206 ['2', '7f000001:b848:0f0b', 'txt', 'example.com.']
207
208 """
209 qt = words[i]
210 words[i] = query_type_name.get(int(qt), qt)
211
212 def handle_dnscache_log(line : str) -> Optional[str]:
213 r"""
214 Handle a single log line if it matches the ``dnscache_log_re`` regex.
215
216 Parameters
217 ----------
218
219 line : string
220 The log line that might match ``dnscache_log_re``.
221
222 Returns
223 -------
224
225 Either the human-readable string if the log line was handled (that
226 is, if it was really a dnscache log line), or ``None`` if it was
227 not.
228
229 Examples
230 --------
231
232 >>> line = "2022-09-15 18:37:33.863805500 query 1 7f000001:a3db:4fb9 1 www.example.com."
233 >>> handle_dnscache_log(line)
234 '2022-09-15 18:37:33.863805500 query #1 127.0.0.1:41947 (id 20409) a www.example.com.'
235
236 >>> line = "2022-09-15 18:37:33.863874500 tx 0 1 www.example.com. . c0a80101"
237 >>> handle_dnscache_log(line)
238 '2022-09-15 18:37:33.863874500 tx g=0 a www.example.com. . 192.168.1.1'
239
240 >>> line = "2022-09-15 18:37:33.878529500 rr c0a80101 20865 1 www.example.com. 5db8d822"
241 >>> handle_dnscache_log(line)
242 '2022-09-15 18:37:33.878529500 rr 192.168.1.1 TTL=20865 a www.example.com. 93.184.216.34'
243
244 >>> line = "2022-09-15 18:37:33.878532500 stats 1 43 1 0"
245 >>> handle_dnscache_log(line)
246 '2022-09-15 18:37:33.878532500 stats count=1 motion=43 udp-active=1 tcp-active=0'
247
248 >>> line = "2022-09-15 18:37:33.878602500 sent 1 49"
249 >>> handle_dnscache_log(line)
250 '2022-09-15 18:37:33.878602500 sent #1 49'
251
252 >>> line = "this line is nonsense"
253 >>> handle_dnscache_log(line)
254
255 """
256 match = dnscache_log_re.match(line)
257 if not match:
258 return None
259
260 (timestamp, event, data) = match.groups()
261
262 words = data.split()
263 if event == "cached":
264 if words[0] not in ("cname", "ns", "nxdomain"):
265 decode_type(words, 0)
266
267 elif event == "drop":
268 decode_serial(words, 0)
269
270 elif event == "lame":
271 decode_ip(words, 0)
272
273 elif event == "nodata":
274 decode_ip(words, 0)
275 decode_ttl(words, 1)
276 decode_type(words, 2)
277
278 elif event == "nxdomain":
279 decode_ip(words, 0)
280 decode_ttl(words, 1)
281
282 elif event == "query":
283 decode_serial(words, 0)
284 decode_client(words, 1)
285 decode_type(words, 2)
286
287 elif event == "rr":
288 decode_ip(words, 0)
289 decode_ttl(words, 1)
290 if words[2] not in ("cname", "mx", "ns", "ptr", "soa"):
291 decode_type(words, 2)
292 if words[2] == "a":
293 # Decode the response to an 'A' query
294 decode_ip(words, 4)
295 if words[2] == "txt":
296 # Decode the TXT record's data from hex to ASCII.
297 response = words[4]
298 if response.endswith("..."):
299 ellipsis = "..."
300 response = response[0:-3]
301 else:
302 ellipsis = ""
303 length = int(response[0:2], 16)
304 chars = []
305 for i in range(1, len(response)//2):
306 chars.append(chr(int(response[2*i : (2*i)+2], 16)))
307 txt = "".join(chars)
308 words[4] = f"{length}:\"{txt}{ellipsis}\""
309
310 elif event == "sent":
311 decode_serial(words, 0)
312
313 elif event == "stats":
314 words[0] = f"count={words[0]}"
315 words[1] = f"motion={words[1]}"
316 words[2] = f"udp-active={words[2]}"
317 words[3] = f"tcp-active={words[3]}"
318
319 elif event == "tx":
320 words[0] = f"g={words[0]}"
321 decode_type(words, 1)
322 # words[2] = name
323 # words[3] = control (domain for which these servers are believed
324 # to be authoritative)
325 for i in range(4, len(words)):
326 decode_ip(words, i)
327
328 elif event in ("tcpopen", "tcpclose"):
329 decode_client(words, 0)
330
331 # Reconstitute "data" (i.e. everything after the timestamp and the
332 # event) from "words", which was originally obtained by splitting
333 # "data".
334 data = " ".join(words)
335 return f"{timestamp} {event} {data}"