]> gitweb.michael.orlitzky.com - djbdns-logparse.git/blob - djbdns/dnscache.py
*/*: reorganize for easy testing using setuptools or tox.
[djbdns-logparse.git] / djbdns / dnscache.py
1 from re import compile
2 from typing import Optional
3 from djbdns.common import *
4
5 # The regex to match dnscache log lines.
6 dnscache_log_re = compile(fr'({timestamp_pat}) (\w+)(.*)')
7
8
9 def decode_client(words : list, i : int):
10 r"""
11 Helper function to decode the client field in a dnscache log
12 entry.
13
14 There are two possible formats for the client field,
15
16 1. clientip:clientport, used by tcpopen/tcpclose entries,
17 2. clientip:clientport:id, used by "query" entries.
18
19 We convert each part from hex to decimal, and in the second
20 format, separate the packet id from the client information.
21
22 Parameters
23 ----------
24
25 words : list
26 The ``words`` list (a list of fields) from
27 :func:`handle_dnscache_log`.
28
29 i : int
30 The index of the client field within ``words``
31
32 Returns
33 -------
34
35 Nothing; the ``i``th entry in the ``words`` list is modified
36 in-place.
37
38 Examples
39 --------
40
41 >>> words = ["foo", "bar", "7f000001:9253", "quux"]
42 >>> decode_client(words, 2)
43 >>> words
44 ['foo', 'bar', '127.0.0.1:37459', 'quux']
45
46 >>> words = ["foo", "7f000001:a3db:4fb9", "bar", "quux"]
47 >>> decode_client(words, 1)
48 >>> words
49 ['foo', '127.0.0.1:41947 (id 20409)', 'bar', 'quux']
50
51 """
52 chunks = words[i].split(":")
53
54 ip = convert_ip(chunks[0])
55 port = int(chunks[1], 16)
56 words[i] = f"{ip}:{port}"
57
58 if len(chunks) == 3:
59 # For a "query" entry's clientip:clientport:id field.
60 id = int(chunks[2], 16)
61 words[i] += f" (id {id})"
62
63 def decode_ip(words : list, i : int):
64 r"""
65 Helper function to decode the ip field in a dnscache log
66 entry.
67
68 A single "serverip" field is present in the lame, nodata,
69 nxdomain, and rr entry types. We convert it from hex to decimal.
70
71 Parameters
72 ----------
73
74 words : list
75 The ``words`` list (a list of fields) from
76 :func:`handle_dnscache_log`.
77
78 i : int
79 The index of the ip field within ``words``
80
81 Returns
82 -------
83
84 Nothing; the ``i``th entry in the ``words`` list is modified
85 in-place.
86
87 Examples
88 --------
89
90 >>> words = ["foo", "bar", "7f000001", "quux"]
91 >>> decode_ip(words, 2)
92 >>> words
93 ['foo', 'bar', '127.0.0.1', 'quux']
94
95 >>> words = ["foo", "00000000000000000000ffff7f000001", "bar", "quux"]
96 >>> decode_ip(words, 1)
97 >>> words
98 ['foo', '0000:0000:0000:0000:0000:ffff:7f00:0001', 'bar', 'quux']
99 """
100 words[i] = convert_ip(words[i])
101
102 def decode_ttl(words : list, i : int):
103 r"""
104 Helper function to decode the ttl field in a dnscache log
105 entry.
106
107 A single "ttl" field is present in the nodata, nxdomain, and
108 rr entry types. We prefix it with "TTL=" so that its meaning
109 is clear in the human-readable logs.
110
111 Parameters
112 ----------
113
114 words : list
115 The ``words`` list (a list of fields) from
116 :func:`handle_dnscache_log`.
117
118 i : int
119 The index of the ttl field within ``words``
120
121 Returns
122 -------
123
124 Nothing; the ``i``th entry in the ``words`` list is modified
125 in-place.
126
127 Examples
128 --------
129
130 >>> words = ["c0a80101", "20865", "1", "www.example.com.", "5db8d822"]
131 >>> decode_ttl(words, 1)
132 >>> words
133 ['c0a80101', 'TTL=20865', '1', 'www.example.com.', '5db8d822']
134
135 """
136 words[i] = f"TTL={words[i]}"
137
138 def decode_serial(words : list, i : int):
139 r"""
140 Helper function to decode the serial field in a dnscache log
141 entry.
142
143 A single "serial" field is present in the drop and query entry
144 types. It's already in decimal; we simply prefix it with a hash.
145
146 Parameters
147 ----------
148
149 words : list
150 The ``words`` list (a list of fields) from
151 :func:`handle_dnscache_log`.
152
153 i : int
154 The index of the serial field within ``words``
155
156 Returns
157 -------
158
159 Nothing; the ``i``th entry in the ``words`` list is modified
160 in-place.
161
162 Examples
163 --------
164
165 >>> words = ["1", "7f000001:a3db:4fb9", "1", "www.example.com."]
166 >>> decode_serial(words, 0)
167 >>> words
168 ['#1', '7f000001:a3db:4fb9', '1', 'www.example.com.']
169
170 """
171 words[i] = f"#{words[i]}"
172
173 def decode_type(words : list, i : int):
174 qt = words[i]
175 words[i] = query_type.get(int(qt), qt)
176
177 def handle_dnscache_log(line : str) -> Optional[str]:
178 r"""
179 Handle a single log line if it matches the ``dnscache_log_re`` regex.
180
181 Parameters
182 ----------
183
184 line : string
185 The log line that might match ``dnscache_log_re``.
186
187 Returns
188 -------
189
190 Either the human-readable string if the log line was handled (that
191 is, if it was really a dnscache log line), or ``None`` if it was
192 not.
193
194 Examples
195 --------
196
197 >>> line = "2022-09-15 18:37:33.863805500 query 1 7f000001:a3db:4fb9 1 www.example.com."
198 >>> handle_dnscache_log(line)
199 '2022-09-15 18:37:33.863805500 query #1 127.0.0.1:41947 (id 20409) a www.example.com.'
200
201 >>> line = "2022-09-15 18:37:33.863874500 tx 0 1 www.example.com. . c0a80101"
202 >>> handle_dnscache_log(line)
203 '2022-09-15 18:37:33.863874500 tx g=0 a www.example.com. . 192.168.1.1'
204
205 >>> line = "2022-09-15 18:37:33.878529500 rr c0a80101 20865 1 www.example.com. 5db8d822"
206 >>> handle_dnscache_log(line)
207 '2022-09-15 18:37:33.878529500 rr 192.168.1.1 TTL=20865 a www.example.com. 93.184.216.34'
208
209 >>> line = "2022-09-15 18:37:33.878532500 stats 1 43 1 0"
210 >>> handle_dnscache_log(line)
211 '2022-09-15 18:37:33.878532500 stats count=1 motion=43 udp-active=1 tcp-active=0'
212
213 >>> line = "2022-09-15 18:37:33.878602500 sent 1 49"
214 >>> handle_dnscache_log(line)
215 '2022-09-15 18:37:33.878602500 sent #1 49'
216
217 >>> line = "this line is nonsense"
218 >>> handle_dnscache_log(line)
219
220 """
221 match = dnscache_log_re.match(line)
222 if not match:
223 return None
224
225 (timestamp, event, data) = match.groups()
226
227 words = data.split()
228 if event == "cached":
229 if words[0] not in ("cname", "ns", "nxdomain"):
230 decode_type(words, 0)
231
232 elif event == "drop":
233 decode_serial(words, 0)
234
235 elif event == "lame":
236 decode_ip(words, 0)
237
238 elif event == "nodata":
239 decode_ip(words, 0)
240 decode_ttl(words, 1)
241 decode_type(words, 2)
242
243 elif event == "nxdomain":
244 decode_ip(words, 0)
245 decode_ttl(words, 1)
246
247 elif event == "query":
248 decode_serial(words, 0)
249 decode_client(words, 1)
250 decode_type(words, 2)
251
252 elif event == "rr":
253 decode_ip(words, 0)
254 decode_ttl(words, 1)
255 if words[2] not in ("cname", "mx", "ns", "ptr", "soa"):
256 decode_type(words, 2)
257 if words[2] == "a": # decode answer to an A query
258 decode_ip(words, 4)
259 if words[2] == "txt": # text record
260 response = words[4]
261 if response.endswith("..."):
262 ellipsis = "..."
263 response = response[0:-3]
264 else:
265 ellipsis = ""
266 length = int(response[0:2], 16)
267 chars = []
268 for i in range(1, len(response)//2):
269 chars.append(chr(int(response[2*i : (2*i)+2], 16)))
270 txt = "".join(chars)
271 words[4] = f"{length}:\"{txt}{ellipsis}\""
272
273 elif event == "sent":
274 decode_serial(words, 0)
275
276 elif event == "stats":
277 words[0] = f"count={words[0]}"
278 words[1] = f"motion={words[1]}"
279 words[2] = f"udp-active={words[2]}"
280 words[3] = f"tcp-active={words[3]}"
281
282 elif event == "tx":
283 words[0] = f"g={words[0]}"
284 decode_type(words, 1)
285 # words[2] = name
286 # words[3] = control (domain for which these servers are believed
287 # to be authoritative)
288 for i in range(4, len(words)):
289 decode_ip(words, i)
290
291 elif event in ("tcpopen", "tcpclose"):
292 decode_client(words, 0)
293
294 # Reconstitute "data" (i.e. everything after the timestamp and the
295 # event) from "words", which was originally obtained by splitting
296 # "data".
297 data = " ".join(words)
298 return f"{timestamp} {event} {data}"