Separate the Network.DNS.RBL.Weight module and fix the doctests.
[dead/harbl.git] / harbl / src / Network / DNS / RBL / Domain / Domain.hs
1 {-# LANGUAGE DoAndIfThenElse #-}
2
3 -- | The 'Domain' data type and its parser. A 'Domain' represents a
4 -- name in the domain name system (DNS) as described by
5 -- RFC1035. In particular, we enforce the restrictions from Section
6 -- 2.3.1 \"Preferred name syntax\". See for example,
7 --
8 -- <https://tools.ietf.org/html/rfc1035#section-2.3.1>
9 --
10 -- We basically work with strings and characters everywhere, even
11 -- though this isn't really correct. The length specifications in
12 -- the RFCs are all in terms of octets, so really a ByteString.Char8
13 -- would be more appropriate. With strings, for example, we could
14 -- have a unicode mumbo jumbo character that takes up two bytes
15 -- (octets).
16 --
17 module Network.DNS.RBL.Domain.Domain (
18 Domain(..),
19 domain )
20 where
21
22 import Text.Parsec (
23 (<|>),
24 string,
25 try )
26 import Text.Parsec.String ( Parser )
27
28 import Network.DNS.RBL.Domain.Subdomain ( Subdomain, subdomain )
29 import Network.DNS.RBL.Pretty ( Pretty(..) )
30 import Network.DNS.RBL.Reversible ( Reversible(..) )
31
32
33
34 -- | An RFC1035 domain. According to RFC1035 a domain can be either a
35 -- subdomain or \" \", which according to RFC2181
36 -- <https://tools.ietf.org/html/rfc2181#section-11> means the root:
37 --
38 -- The zero length full name is defined as representing the root
39 -- of the DNS tree, and is typically written and displayed as
40 -- \".\".
41 --
42 -- We let the 'Domain' type remain true to those RFCs, even though
43 -- they don't support an absolute domain name of e.g. a single dot.
44 --
45 -- ==== _Examples_
46 --
47 -- >>> DomainRoot
48 -- DomainRoot
49 --
50 -- >>> import Text.Parsec ( parse )
51 -- >>> let (Right r) = parse subdomain "" "x"
52 -- >>> DomainName r
53 -- DomainName (SubdomainSingleLabel (Label (LetDigLetter (Letter 'x')) Nothing))
54 --
55 data Domain =
56 DomainName Subdomain |
57 DomainRoot
58 deriving (Eq, Show)
59
60
61 -- | Pretty-print a 'Domain'.
62 --
63 -- ==== _Examples_
64 --
65 -- >>> pretty_show $ DomainRoot
66 -- ""
67 --
68 -- >>> import Text.Parsec ( parse )
69 -- >>> let (Right r) = parse subdomain "" "x"
70 -- >>> pretty_print $ DomainName r
71 -- x
72 --
73 instance Pretty Domain where
74 pretty_show DomainRoot = ""
75 pretty_show (DomainName s) = pretty_show s
76
77
78 -- | The maximum number of characters (octets, really) allowed in a
79 -- label. Quoting Section 3.1, \"Name space definitions\", of
80 -- RFC1035:
81 --
82 -- To simplify implementations, the total length of a domain name
83 -- (i.e., label octets and label length octets) is restricted to 255
84 -- octets or less.
85 --
86 domain_max_length :: Int
87 domain_max_length = 255
88
89
90 -- | Parse an RFC1035 \"domain\"
91 --
92 -- ==== _Examples_
93 --
94 -- >>> import Text.Parsec ( parse, parseTest )
95 --
96 -- Make sure we can parse a single character:
97 --
98 -- >>> pretty_print $ parse domain "" "a"
99 -- a
100 --
101 -- And the empty domain:
102 --
103 -- >>> parseTest domain ""
104 -- DomainRoot
105 --
106 -- We will in fact parse the \"empty\" domain off the front of
107 -- pretty much anything:
108 --
109 -- >>> parseTest domain "!8===D"
110 -- DomainRoot
111 --
112 -- Equality of domains is case-insensitive:
113 --
114 -- >>> let (Right r1) = parse domain "" "example.com"
115 -- >>> let (Right r2) = parse domain "" "ExaMPle.coM"
116 -- >>> r1 == r2
117 -- True
118 --
119 -- A single dot IS parsed as the root, but the dot isn't consumed:
120 --
121 -- >>> parseTest domain "."
122 -- DomainRoot
123 --
124 -- Anything over domain_max_length characters is an error, so the
125 -- root will be parsed:
126 --
127 -- >>> let big_l1 = replicate 63 'x'
128 -- >>> let big_l2 = replicate 63 'y' -- Avoid equal neighboring labels!
129 -- >>> let big_labels = big_l1 ++ "." ++ big_l2 ++ "."
130 -- >>> let big_subdomain = concat $ replicate 3 big_labels
131 -- >>> parseTest domain big_subdomain
132 -- DomainRoot
133 --
134 -- But exactly domain_max_length is allowed:
135 --
136 -- >>> import Data.List ( intercalate )
137 -- >>> let l1 = replicate 63 'w'
138 -- >>> let l2 = replicate 63 'x'
139 -- >>> let l3 = replicate 63 'y'
140 -- >>> let l4 = replicate 63 'z'
141 -- >>> let big_subdomain = intercalate "." [l1,l2,l3,l4]
142 -- >>> let (Right r) = parse domain "" big_subdomain
143 -- >>> length (pretty_show r)
144 -- 255
145 --
146 domain :: Parser Domain
147 domain = try parse_subdomain <|> parse_empty
148 where
149 parse_subdomain :: Parser Domain
150 parse_subdomain = do
151 s <- subdomain
152 if length (pretty_show s) <= domain_max_length
153 then return $ DomainName s
154 else fail $ "subdomains can be at most " ++
155 (show domain_max_length) ++
156 " characters"
157
158 parse_empty :: Parser Domain
159 parse_empty = string "" >> return DomainRoot
160
161
162 instance Reversible Domain where
163 -- | Reverse the labels of a 'Domain'.
164 --
165 -- -- ==== _Examples_
166 --
167 -- >>> import Text.Parsec ( parse )
168 --
169 -- The root reverses to itself:
170 --
171 -- >>> let (Right r) = parse domain "" ""
172 -- >>> backwards r
173 -- DomainRoot
174 --
175 -- >>> let (Right r) = parse domain "" "new.www.example.com"
176 -- >>> pretty_print $ backwards r
177 -- com.example.www.new
178 --
179 backwards DomainRoot = DomainRoot
180 backwards (DomainName s) = DomainName $ backwards s