]> gitweb.michael.orlitzky.com - djbdns-logparse.git/blob - djbdns/dnscache.py
djbdns/dnscache.py: add docstring for decode_type().
[djbdns-logparse.git] / djbdns / dnscache.py
1 from re import compile
2 from typing import Optional
3 from djbdns.common import *
4
5 # The regex to match dnscache log lines.
6 dnscache_log_re = compile(fr'({timestamp_pat}) (\w+)(.*)')
7
8
9 def decode_client(words : list, i : int):
10 r"""
11 Helper function to decode the client field in a dnscache log
12 entry.
13
14 There are two possible formats for the client field,
15
16 1. clientip:clientport, used by tcpopen/tcpclose entries,
17 2. clientip:clientport:id, used by "query" entries.
18
19 We convert each part from hex to decimal, and in the second
20 format, separate the packet id from the client information.
21
22 Parameters
23 ----------
24
25 words : list
26 The ``words`` list (a list of fields) from
27 :func:`handle_dnscache_log`.
28
29 i : int
30 The index of the client field within ``words``
31
32 Returns
33 -------
34
35 Nothing; the ``i``th entry in the ``words`` list is modified
36 in-place.
37
38 Examples
39 --------
40
41 >>> words = ["foo", "bar", "7f000001:9253", "quux"]
42 >>> decode_client(words, 2)
43 >>> words
44 ['foo', 'bar', '127.0.0.1:37459', 'quux']
45
46 >>> words = ["foo", "7f000001:a3db:4fb9", "bar", "quux"]
47 >>> decode_client(words, 1)
48 >>> words
49 ['foo', '127.0.0.1:41947 (id 20409)', 'bar', 'quux']
50
51 """
52 chunks = words[i].split(":")
53
54 ip = convert_ip(chunks[0])
55 port = int(chunks[1], 16)
56 words[i] = f"{ip}:{port}"
57
58 if len(chunks) == 3:
59 # For a "query" entry's clientip:clientport:id field.
60 id = int(chunks[2], 16)
61 words[i] += f" (id {id})"
62
63 def decode_ip(words : list, i : int):
64 r"""
65 Helper function to decode the ip field in a dnscache log
66 entry.
67
68 A single "serverip" field is present in the lame, nodata,
69 nxdomain, and rr entry types. We convert it from hex to decimal.
70
71 Parameters
72 ----------
73
74 words : list
75 The ``words`` list (a list of fields) from
76 :func:`handle_dnscache_log`.
77
78 i : int
79 The index of the ip field within ``words``
80
81 Returns
82 -------
83
84 Nothing; the ``i``th entry in the ``words`` list is modified
85 in-place.
86
87 Examples
88 --------
89
90 >>> words = ["foo", "bar", "7f000001", "quux"]
91 >>> decode_ip(words, 2)
92 >>> words
93 ['foo', 'bar', '127.0.0.1', 'quux']
94
95 >>> words = ["foo", "00000000000000000000ffff7f000001", "bar", "quux"]
96 >>> decode_ip(words, 1)
97 >>> words
98 ['foo', '0000:0000:0000:0000:0000:ffff:7f00:0001', 'bar', 'quux']
99 """
100 words[i] = convert_ip(words[i])
101
102 def decode_ttl(words : list, i : int):
103 r"""
104 Helper function to decode the ttl field in a dnscache log
105 entry.
106
107 A single "ttl" field is present in the nodata, nxdomain, and
108 rr entry types. We prefix it with "TTL=" so that its meaning
109 is clear in the human-readable logs.
110
111 Parameters
112 ----------
113
114 words : list
115 The ``words`` list (a list of fields) from
116 :func:`handle_dnscache_log`.
117
118 i : int
119 The index of the ttl field within ``words``
120
121 Returns
122 -------
123
124 Nothing; the ``i``th entry in the ``words`` list is modified
125 in-place.
126
127 Examples
128 --------
129
130 >>> words = ["c0a80101", "20865", "1", "www.example.com.", "5db8d822"]
131 >>> decode_ttl(words, 1)
132 >>> words
133 ['c0a80101', 'TTL=20865', '1', 'www.example.com.', '5db8d822']
134
135 """
136 words[i] = f"TTL={words[i]}"
137
138 def decode_serial(words : list, i : int):
139 r"""
140 Helper function to decode the serial field in a dnscache log
141 entry.
142
143 A single "serial" field is present in the drop and query entry
144 types. It's already in decimal; we simply prefix it with a hash.
145
146 Parameters
147 ----------
148
149 words : list
150 The ``words`` list (a list of fields) from
151 :func:`handle_dnscache_log`.
152
153 i : int
154 The index of the serial field within ``words``
155
156 Returns
157 -------
158
159 Nothing; the ``i``th entry in the ``words`` list is modified
160 in-place.
161
162 Examples
163 --------
164
165 >>> words = ["1", "7f000001:a3db:4fb9", "1", "www.example.com."]
166 >>> decode_serial(words, 0)
167 >>> words
168 ['#1', '7f000001:a3db:4fb9', '1', 'www.example.com.']
169
170 """
171 words[i] = f"#{words[i]}"
172
173 def decode_type(words : list, i : int):
174 r"""
175 Helper function to decode the type field in a dnscache log
176 entry.
177
178 A single "type" field is present in cached, nodata, query, rr, and
179 tx entries. Unlike with tinydns entries, dnscache logs have
180 this field already in decimal, so we just look up the
181 corresponding name in the query type map.
182
183 Parameters
184 ----------
185
186 words : list
187 A list with the "type" string at index ``i``
188
189 i : int
190 The index of the type field within ``words``
191
192 Returns
193 -------
194
195 Nothing; the ``i``th entry in the ``words`` list is modified
196 in-place.
197
198 Examples
199 --------
200
201 >>> words = ["2", "7f000001:b848:0f0b", "16", "example.com."]
202 >>> decode_type(words, 2)
203 >>> words
204 ['2', '7f000001:b848:0f0b', 'txt', 'example.com.']
205
206 """
207 qt = words[i]
208 words[i] = query_type.get(int(qt), qt)
209
210 def handle_dnscache_log(line : str) -> Optional[str]:
211 r"""
212 Handle a single log line if it matches the ``dnscache_log_re`` regex.
213
214 Parameters
215 ----------
216
217 line : string
218 The log line that might match ``dnscache_log_re``.
219
220 Returns
221 -------
222
223 Either the human-readable string if the log line was handled (that
224 is, if it was really a dnscache log line), or ``None`` if it was
225 not.
226
227 Examples
228 --------
229
230 >>> line = "2022-09-15 18:37:33.863805500 query 1 7f000001:a3db:4fb9 1 www.example.com."
231 >>> handle_dnscache_log(line)
232 '2022-09-15 18:37:33.863805500 query #1 127.0.0.1:41947 (id 20409) a www.example.com.'
233
234 >>> line = "2022-09-15 18:37:33.863874500 tx 0 1 www.example.com. . c0a80101"
235 >>> handle_dnscache_log(line)
236 '2022-09-15 18:37:33.863874500 tx g=0 a www.example.com. . 192.168.1.1'
237
238 >>> line = "2022-09-15 18:37:33.878529500 rr c0a80101 20865 1 www.example.com. 5db8d822"
239 >>> handle_dnscache_log(line)
240 '2022-09-15 18:37:33.878529500 rr 192.168.1.1 TTL=20865 a www.example.com. 93.184.216.34'
241
242 >>> line = "2022-09-15 18:37:33.878532500 stats 1 43 1 0"
243 >>> handle_dnscache_log(line)
244 '2022-09-15 18:37:33.878532500 stats count=1 motion=43 udp-active=1 tcp-active=0'
245
246 >>> line = "2022-09-15 18:37:33.878602500 sent 1 49"
247 >>> handle_dnscache_log(line)
248 '2022-09-15 18:37:33.878602500 sent #1 49'
249
250 >>> line = "this line is nonsense"
251 >>> handle_dnscache_log(line)
252
253 """
254 match = dnscache_log_re.match(line)
255 if not match:
256 return None
257
258 (timestamp, event, data) = match.groups()
259
260 words = data.split()
261 if event == "cached":
262 if words[0] not in ("cname", "ns", "nxdomain"):
263 decode_type(words, 0)
264
265 elif event == "drop":
266 decode_serial(words, 0)
267
268 elif event == "lame":
269 decode_ip(words, 0)
270
271 elif event == "nodata":
272 decode_ip(words, 0)
273 decode_ttl(words, 1)
274 decode_type(words, 2)
275
276 elif event == "nxdomain":
277 decode_ip(words, 0)
278 decode_ttl(words, 1)
279
280 elif event == "query":
281 decode_serial(words, 0)
282 decode_client(words, 1)
283 decode_type(words, 2)
284
285 elif event == "rr":
286 decode_ip(words, 0)
287 decode_ttl(words, 1)
288 if words[2] not in ("cname", "mx", "ns", "ptr", "soa"):
289 decode_type(words, 2)
290 if words[2] == "a": # decode answer to an A query
291 decode_ip(words, 4)
292 if words[2] == "txt": # text record
293 response = words[4]
294 if response.endswith("..."):
295 ellipsis = "..."
296 response = response[0:-3]
297 else:
298 ellipsis = ""
299 length = int(response[0:2], 16)
300 chars = []
301 for i in range(1, len(response)//2):
302 chars.append(chr(int(response[2*i : (2*i)+2], 16)))
303 txt = "".join(chars)
304 words[4] = f"{length}:\"{txt}{ellipsis}\""
305
306 elif event == "sent":
307 decode_serial(words, 0)
308
309 elif event == "stats":
310 words[0] = f"count={words[0]}"
311 words[1] = f"motion={words[1]}"
312 words[2] = f"udp-active={words[2]}"
313 words[3] = f"tcp-active={words[3]}"
314
315 elif event == "tx":
316 words[0] = f"g={words[0]}"
317 decode_type(words, 1)
318 # words[2] = name
319 # words[3] = control (domain for which these servers are believed
320 # to be authoritative)
321 for i in range(4, len(words)):
322 decode_ip(words, i)
323
324 elif event in ("tcpopen", "tcpclose"):
325 decode_client(words, 0)
326
327 # Reconstitute "data" (i.e. everything after the timestamp and the
328 # event) from "words", which was originally obtained by splitting
329 # "data".
330 data = " ".join(words)
331 return f"{timestamp} {event} {data}"