Begin moving the name parsers to the Network.DNS.RBL.Domain namespace.
[dead/harbl.git] / harbl / src / Network / DNS / RBL / Domain / LdhStr.hs
1 -- | The 'LdhStr' type and a Parsec parser to parse one. We don't
2 -- export its constructor because then you could do something dumb
3 -- like stick a semicolon inside one.
4 --
5 -- These are defined in RFC1035, Section 2.3.1, \"Preferred name
6 -- syntax\" <https://tools.ietf.org/html/rfc1035#section-2.3.1>:
7 --
8 -- <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
9 --
10 -- We export our constructors so that we can pattern match to find
11 -- out whether or not we have a hyphen at the end of a label.
12 --
13 module Network.DNS.RBL.Domain.LdhStr (
14 LdhStr(..),
15 ldh_str,
16 ldh_str_length )
17 where
18
19 import Text.Parsec ( (<|>), try )
20 import Text.Parsec.String ( Parser )
21
22 import Network.DNS.RBL.Domain.LetDigHyp ( LetDigHyp, let_dig_hyp )
23 import Network.DNS.RBL.Pretty ( Pretty(..) )
24 import Network.DNS.RBL.Reversible ( Reversible(..) )
25
26
27 -- | A string of letters, digits, and hyphens from the RFC1035 grammar:
28 --
29 -- <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
30 --
31 -- These are represented as either a single instance of a
32 -- 'LetDigHyp', or a string of them (recursive).
33 --
34 -- ==== _Examples_
35 --
36 -- >>> import Text.Parsec ( parse )
37 --
38 -- We can create an 'LdhStrSingleLdh' from a single (let-dig-hyp)
39 -- character:
40 --
41 -- >>> let (Right r) = parse let_dig_hyp "" "x"
42 -- >>> LdhStrSingleLdh r
43 -- LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'x')))
44 --
45 -- >>> let (Right r) = parse let_dig_hyp "" "1"
46 -- >>> LdhStrSingleLdh r
47 -- LdhStrSingleLdh (LetDigHypLetDig (LetDigDigit (Digit '1')))
48 --
49 -- >>> let (Right r) = parse let_dig_hyp "" "-"
50 -- >>> LdhStrSingleLdh r
51 -- LdhStrSingleLdh (LetDigHypHyphen (Hyphen '-'))
52 --
53 -- We can create an 'LdhStrMultipleLdh' from multiple (let-dig-hyp)
54 -- characters:
55 --
56 -- >>> let (Right r) = parse let_dig_hyp "" "x"
57 -- >>> let (Right r2) = parse let_dig_hyp "" "-"
58 -- >>> let (Right r3) = parse let_dig_hyp "" "1"
59 -- >>> let rs = LdhStrMultipleLdh r2 (LdhStrSingleLdh r3)
60 -- >>> pretty_print $ LdhStrMultipleLdh r rs
61 -- x-1
62 --
63 data LdhStr =
64 LdhStrSingleLdh LetDigHyp |
65 LdhStrMultipleLdh LetDigHyp LdhStr
66 deriving (Eq, Show)
67
68
69 -- | Pretty-printing for strings of letters, digits, and hyphens that
70 -- we've already parsed. Just shows/prints the underlying characters
71 -- (structural) recursively.
72 --
73 -- ==== _Examples_
74 --
75 -- >>> import Text.Parsec ( parse )
76 --
77 -- >>> let (Right r) = parse let_dig_hyp "" "x"
78 -- >>> pretty_print $ LdhStrSingleLdh r
79 -- x
80 --
81 -- >>> let (Right r) = parse let_dig_hyp "" "1"
82 -- >>> pretty_print $ LdhStrSingleLdh r
83 -- 1
84 --
85 -- >>> let (Right r) = parse let_dig_hyp "" "-"
86 -- >>> pretty_print $ LdhStrSingleLdh r
87 -- -
88 --
89 -- >>> let (Right r) = parse ldh_str "" "123"
90 -- >>> pretty_print $ r
91 -- 123
92 --
93 instance Pretty LdhStr where
94 pretty_show (LdhStrSingleLdh ldh) = pretty_show ldh
95 pretty_show (LdhStrMultipleLdh ldh s) = (pretty_show ldh) ++ (pretty_show s)
96
97
98 instance Reversible LdhStr where
99 -- | Reverse the characters of the given 'LdhStr'. We are bordering
100 -- on redundancy here, since the implementation of this is exactly
101 -- the same as for 'Subdomain'. However, if we were to generalize
102 -- 'LdhStr' to e.g. @Str LetDigHyp@ and 'Subdomain' to @Str
103 -- Label@, then we would get some semantic weirdness, like the
104 -- fact that the length of a subdomain includes the \".\"
105 -- characters. So pretty much only 'backwards' would be
106 -- generalizable.
107 --
108 -- ==== _Examples_
109 --
110 -- >>> import Text.Parsec ( parse )
111 --
112 -- Standard usage:
113 --
114 -- >>> let (Right r) = parse ldh_str "" "x"
115 -- >>> pretty_print $ backwards r
116 -- x
117 --
118 -- >>> let (Right r) = parse ldh_str "" "com"
119 -- >>> pretty_print $ backwards r
120 -- moc
121 --
122 -- >>> let (Right r) = parse ldh_str "" "example-com"
123 -- >>> pretty_print $ backwards r
124 -- moc-elpmaxe
125 --
126 -- >>> let (Right r) = parse ldh_str "" "www-example-com"
127 -- >>> pretty_print $ backwards r
128 -- moc-elpmaxe-www
129 --
130
131 -- The easy case, reversing a one-character string.
132 backwards s@(LdhStrSingleLdh _) = s
133
134 -- For multiple-character strings, we have two cases. The first is
135 -- where we have exactly two characters, and we just need to swap them.
136 backwards (LdhStrMultipleLdh l (LdhStrSingleLdh m)) =
137 LdhStrMultipleLdh m (LdhStrSingleLdh l)
138
139 -- And now the hard case. We do this in terms of another function,
140 -- 'build'. The 'build' function works on two strings at a time: the
141 -- first one, @dst@, is the one we're building. We start with @l@ as
142 -- our @dst@, and then append characters to it on the left from
143 -- another string. What's that other string? Just @s@! If we peel
144 -- things off the left of @s@ and stick them to the left of @l@ and
145 -- do that until we can't anymore, we will have reversed the string.
146 backwards (LdhStrMultipleLdh l s) = build (LdhStrSingleLdh l) s
147 where
148 -- Build up the first LdhStr on the left by peeling off elements
149 -- of the second from the left.
150 build :: LdhStr -> LdhStr -> LdhStr
151 build dst (LdhStrSingleLdh final) = LdhStrMultipleLdh final dst
152 build dst (LdhStrMultipleLdh leading rest) =
153 build (LdhStrMultipleLdh leading dst) rest
154
155
156 -- | Parse a string of letters, digits, and hyphens (an 'LdhStr').
157 --
158 -- ==== _Examples_
159 --
160 -- >>> import Text.Parsec ( parseTest )
161 --
162 -- Single letters, digits, and hyphens are parsed:
163 --
164 -- >>> parseTest ldh_str "a"
165 -- LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a')))
166 --
167 -- >>> parseTest ldh_str "0"
168 -- LdhStrSingleLdh (LetDigHypLetDig (LetDigDigit (Digit '0')))
169 --
170 -- >>> parseTest ldh_str "-"
171 -- LdhStrSingleLdh (LetDigHypHyphen (Hyphen '-'))
172 --
173 -- As well as strings of them:
174 --
175 -- >>> import Text.Parsec ( parse )
176 -- >>> pretty_print $ parse ldh_str "" "a0-b"
177 -- a0-b
178 --
179 ldh_str :: Parser LdhStr
180 ldh_str = try both <|> just_one
181 where
182 both :: Parser LdhStr
183 both = do
184 ldh1 <- let_dig_hyp
185 ldh_tail <- ldh_str
186 return $ LdhStrMultipleLdh ldh1 ldh_tail
187
188 just_one :: Parser LdhStr
189 just_one = fmap LdhStrSingleLdh let_dig_hyp
190
191
192
193
194 -- | Compute the length of an 'LdhStr'. It will be at least one, since
195 -- 'LdhStr's are non-empty. And if there's something other than the
196 -- first character present, we simply recurse.
197 --
198 -- ==== _Examples_
199 --
200 -- >>> import Text.Parsec ( parse )
201 --
202 -- >>> let (Right r) = parse ldh_str "" "a"
203 -- >>> ldh_str_length r
204 -- 1
205 --
206 -- >>> let (Right r) = parse ldh_str "" "abc-def"
207 -- >>> ldh_str_length r
208 -- 7
209 --
210 ldh_str_length :: LdhStr -> Int
211 ldh_str_length (LdhStrSingleLdh _) = 1
212 ldh_str_length (LdhStrMultipleLdh _ t) = 1 + (ldh_str_length t)