]> gitweb.michael.orlitzky.com - djbdns-logparse.git/blob - djbdns/dnscache.py
3dd6d5f4ac6282bd6aff3aa6610d4a84ef797715
[djbdns-logparse.git] / djbdns / dnscache.py
1 r"""
2 Functions and data specific to dnscache logs.
3 """
4 # Don't clobber the global compile() with a named import.
5 import re
6
7 from typing import Optional
8 from djbdns.common import QUERY_TYPE_NAME, TIMESTAMP_PAT, convert_ip
9
10 # The regex to match dnscache log lines.
11 DNSCACHE_LOG_RE = re.compile(fr'({TIMESTAMP_PAT}) (\w+)(.*)')
12
13
14 def decode_client(words : list, i : int):
15 r"""
16 Helper function to decode the client field in a dnscache log
17 entry.
18
19 There are two possible formats for the client field,
20
21 1. clientip:clientport, used by tcpopen/tcpclose entries,
22 2. clientip:clientport:id, used by "query" entries.
23
24 We convert each part from hex to decimal, and in the second
25 format, separate the packet id from the client information.
26
27 Parameters
28 ----------
29
30 words : list
31 The ``words`` list (a list of fields) from
32 :func:`handle_dnscache_log`.
33
34 i : int
35 The index of the client field within ``words``
36
37 Returns
38 -------
39
40 Nothing; the ``i``th entry in the ``words`` list is modified
41 in-place.
42
43 Examples
44 --------
45
46 >>> words = ["foo", "bar", "7f000001:9253", "quux"]
47 >>> decode_client(words, 2)
48 >>> words
49 ['foo', 'bar', '127.0.0.1:37459', 'quux']
50
51 >>> words = ["foo", "7f000001:a3db:4fb9", "bar", "quux"]
52 >>> decode_client(words, 1)
53 >>> words
54 ['foo', '127.0.0.1:41947 (id 20409)', 'bar', 'quux']
55
56 """
57 chunks = words[i].split(":")
58
59 ip = convert_ip(chunks[0])
60 port = int(chunks[1], 16)
61 words[i] = f"{ip}:{port}"
62
63 if len(chunks) == 3:
64 # For a "query" entry's clientip:clientport:id field.
65 packet_id = int(chunks[2], 16)
66 words[i] += f" (id {packet_id})"
67
68 def decode_ip(words : list, i : int):
69 r"""
70 Helper function to decode the ip field in a dnscache log
71 entry.
72
73 A single "serverip" field is present in the lame, nodata,
74 nxdomain, and rr entry types. We convert it from hex to decimal.
75
76 Parameters
77 ----------
78
79 words : list
80 The ``words`` list (a list of fields) from
81 :func:`handle_dnscache_log`.
82
83 i : int
84 The index of the ip field within ``words``
85
86 Returns
87 -------
88
89 Nothing; the ``i``th entry in the ``words`` list is modified
90 in-place.
91
92 Examples
93 --------
94
95 >>> words = ["foo", "bar", "7f000001", "quux"]
96 >>> decode_ip(words, 2)
97 >>> words
98 ['foo', 'bar', '127.0.0.1', 'quux']
99
100 >>> words = ["foo", "00000000000000000000ffff7f000001", "bar", "quux"]
101 >>> decode_ip(words, 1)
102 >>> words
103 ['foo', '0000:0000:0000:0000:0000:ffff:7f00:0001', 'bar', 'quux']
104 """
105 words[i] = convert_ip(words[i])
106
107 def decode_ttl(words : list, i : int):
108 r"""
109 Helper function to decode the ttl field in a dnscache log
110 entry.
111
112 A single "ttl" field is present in the nodata, nxdomain, and
113 rr entry types. We prefix it with "TTL=" so that its meaning
114 is clear in the human-readable logs.
115
116 Parameters
117 ----------
118
119 words : list
120 The ``words`` list (a list of fields) from
121 :func:`handle_dnscache_log`.
122
123 i : int
124 The index of the ttl field within ``words``
125
126 Returns
127 -------
128
129 Nothing; the ``i``th entry in the ``words`` list is modified
130 in-place.
131
132 Examples
133 --------
134
135 >>> words = ["c0a80101", "20865", "1", "www.example.com.", "5db8d822"]
136 >>> decode_ttl(words, 1)
137 >>> words
138 ['c0a80101', 'TTL=20865', '1', 'www.example.com.', '5db8d822']
139
140 """
141 words[i] = f"TTL={words[i]}"
142
143 def decode_serial(words : list, i : int):
144 r"""
145 Helper function to decode the serial field in a dnscache log
146 entry.
147
148 A single "serial" field is present in the drop and query entry
149 types. It's already in decimal; we simply prefix it with a hash.
150
151 Parameters
152 ----------
153
154 words : list
155 The ``words`` list (a list of fields) from
156 :func:`handle_dnscache_log`.
157
158 i : int
159 The index of the serial field within ``words``
160
161 Returns
162 -------
163
164 Nothing; the ``i``th entry in the ``words`` list is modified
165 in-place.
166
167 Examples
168 --------
169
170 >>> words = ["1", "7f000001:a3db:4fb9", "1", "www.example.com."]
171 >>> decode_serial(words, 0)
172 >>> words
173 ['#1', '7f000001:a3db:4fb9', '1', 'www.example.com.']
174
175 """
176 words[i] = f"#{words[i]}"
177
178 def decode_type(words : list, i : int):
179 r"""
180 Helper function to decode the type field in a dnscache log
181 entry.
182
183 A single "type" field is present in cached, nodata, query, rr, and
184 tx entries. Unlike with tinydns entries, dnscache logs have
185 this field already in decimal, so we just look up the
186 corresponding name in the query type map.
187
188 Parameters
189 ----------
190
191 words : list
192 A list with the "type" string at index ``i``
193
194 i : int
195 The index of the type field within ``words``
196
197 Returns
198 -------
199
200 Nothing; the ``i``th entry in the ``words`` list is modified
201 in-place.
202
203 Examples
204 --------
205
206 >>> words = ["2", "7f000001:b848:0f0b", "16", "example.com."]
207 >>> decode_type(words, 2)
208 >>> words
209 ['2', '7f000001:b848:0f0b', 'txt', 'example.com.']
210
211 """
212 qt = words[i]
213 words[i] = QUERY_TYPE_NAME.get(int(qt), qt)
214
215 def handle_dnscache_log(line : str) -> Optional[str]:
216 r"""
217 Handle a single log line if it matches the ``DNSCACHE_LOG_RE`` regex.
218
219 Parameters
220 ----------
221
222 line : string
223 The log line that might match ``DNSCACHE_LOG_RE``.
224
225 Returns
226 -------
227
228 Either the human-readable string if the log line was handled (that
229 is, if it was really a dnscache log line), or ``None`` if it was
230 not.
231
232 Examples
233 --------
234
235 >>> line = "2022-09-15 18:37:33.863805500 query 1 7f000001:a3db:4fb9 1 www.example.com."
236 >>> handle_dnscache_log(line)
237 '2022-09-15 18:37:33.863805500 query #1 127.0.0.1:41947 (id 20409) a www.example.com.'
238
239 >>> line = "2022-09-15 18:37:33.863874500 tx 0 1 www.example.com. . c0a80101"
240 >>> handle_dnscache_log(line)
241 '2022-09-15 18:37:33.863874500 tx g=0 a www.example.com. . 192.168.1.1'
242
243 >>> line = "2022-09-15 18:37:33.878529500 rr c0a80101 20865 1 www.example.com. 5db8d822"
244 >>> handle_dnscache_log(line)
245 '2022-09-15 18:37:33.878529500 rr 192.168.1.1 TTL=20865 a www.example.com. 93.184.216.34'
246
247 >>> line = "2022-09-15 18:37:33.878532500 stats 1 43 1 0"
248 >>> handle_dnscache_log(line)
249 '2022-09-15 18:37:33.878532500 stats count=1 motion=43 udp-active=1 tcp-active=0'
250
251 >>> line = "2022-09-15 18:37:33.878602500 sent 1 49"
252 >>> handle_dnscache_log(line)
253 '2022-09-15 18:37:33.878602500 sent #1 49'
254
255 >>> line = "this line is nonsense"
256 >>> handle_dnscache_log(line)
257
258 """
259 match = DNSCACHE_LOG_RE.match(line)
260 if not match:
261 return None
262
263 (timestamp, event, data) = match.groups()
264
265 words = data.split()
266 if event == "cached":
267 if words[0] not in ("cname", "ns", "nxdomain"):
268 decode_type(words, 0)
269
270 elif event == "drop":
271 decode_serial(words, 0)
272
273 elif event == "lame":
274 decode_ip(words, 0)
275
276 elif event == "nodata":
277 decode_ip(words, 0)
278 decode_ttl(words, 1)
279 decode_type(words, 2)
280
281 elif event == "nxdomain":
282 decode_ip(words, 0)
283 decode_ttl(words, 1)
284
285 elif event == "query":
286 decode_serial(words, 0)
287 decode_client(words, 1)
288 decode_type(words, 2)
289
290 elif event == "rr":
291 decode_ip(words, 0)
292 decode_ttl(words, 1)
293 if words[2] not in ("cname", "mx", "ns", "ptr", "soa"):
294 decode_type(words, 2)
295
296 if words[2] == "a":
297 # Decode the response to an 'A' query
298 decode_ip(words, 4)
299 if words[2] == "txt":
300 # Decode the TXT record's data from hex to ASCII.
301 response = words[4]
302 if response.endswith("..."):
303 ellipsis = "..."
304 response = response[0:-3]
305 else:
306 ellipsis = ""
307 length = int(response[0:2], 16)
308 chars = []
309 for i in range(1, len(response)//2):
310 chars.append(chr(int(response[2*i : (2*i)+2], 16)))
311 txt = "".join(chars)
312 words[4] = f"{length}:\"{txt}{ellipsis}\""
313
314 elif event == "sent":
315 decode_serial(words, 0)
316
317 elif event == "stats":
318 words[0] = f"count={words[0]}"
319 words[1] = f"motion={words[1]}"
320 words[2] = f"udp-active={words[2]}"
321 words[3] = f"tcp-active={words[3]}"
322
323 elif event == "tx":
324 words[0] = f"g={words[0]}"
325 decode_type(words, 1)
326 # words[2] = name
327 # words[3] = control (domain for which these servers are believed
328 # to be authoritative)
329 for i in range(4, len(words)):
330 decode_ip(words, i)
331
332 elif event in ("tcpopen", "tcpclose"):
333 decode_client(words, 0)
334
335 # Reconstitute "data" (i.e. everything after the timestamp and the
336 # event) from "words", which was originally obtained by splitting
337 # "data".
338 data = " ".join(words)
339 return f"{timestamp} {event} {data}"