src/Domain.hs

   1 -- | The 'Domain' data type and its parser. A 'Domain' represents a
   2 --   name in the domain name system (DNS) as described by
   3 --   RFC1035. In particular, we enforce the restrictions from Section
   4 --   2.3.1 \"Preferred name syntax\". See for example,
   5 --
   6 --     <https://tools.ietf.org/html/rfc1035#section-2.3.1>
   7 --
   8 module Domain
   9 where
  10
  11 import Data.Char ( toLower )
  12 import Text.Parsec (
  13   ParseError,
  14   (<|>),
  15   alphaNum,
  16   char,
  17   eof,
  18   many1,
  19   option,
  20   optionMaybe,
  21   parse,
  22   string,
  23   try,
  24   unexpected )
  25 import qualified Text.Parsec as Parsec ( digit, letter)
  26 import Text.Parsec.String ( Parser )
  27
  28 newtype Domain = Domain String deriving Show
  29
  30
  31 -- | The derived instance of 'Eq' for domain names is incorrect. All
  32 --   comparisons are currently made case-insensitively. The following is
  33 --   an excerpt from RFC1035:
  34 --
  35 --     2.3.3. Character Case
  36 --
  37 --     For all parts of the DNS that are part of the official
  38 --     protocol, all comparisons between character strings (e.g.,
  39 --     labels, domain names, etc.)  are done in a case-insensitive
  40 --     manner...
  41 --
  42 --   So to compare two DNS names, we compare their lower-case
  43 --   counterparts.
  44 --
  45 instance Eq Domain where
  46   (Domain d1) == (Domain d2) =
  47     (map toLower d1) == (map toLower d2)
  48
  49
  50 -- * Digits
  51 newtype Digit = Digit Char deriving (Eq, Show)
  52
  53 digit :: Parser Digit
  54 digit = fmap Digit Parsec.digit
  55
  56
  57 -- * Letters
  58 newtype Letter = Letter Char deriving (Eq, Show)
  59
  60 letter :: Parser Letter
  61 letter = fmap Letter Parsec.letter
  62
  63
  64 -- * Letters/Digits
  65 data LetDig = LetDigLetter Letter | LetDigDigit Digit deriving (Eq, Show)
  66
  67 let_dig :: Parser LetDig
  68 let_dig = (fmap LetDigLetter letter) <|> (fmap LetDigDigit digit)
  69
  70
  71 -- * Hyphens
  72 newtype Hyphen = Hyphen Char deriving (Eq, Show)
  73
  74 hyphen :: Parser Hyphen
  75 hyphen = fmap Hyphen (char '-')
  76
  77
  78 -- * Letter, Digit, or Hyphen.
  79
  80 data LetDigHyp = LetDigHypLetDig LetDig
  81                | LetDigHypHyphen Hyphen
  82   deriving (Eq, Show)
  83
  84
  85 -- | The following is the simplest type in the domain grammar that
  86 --   isn't already implemented for us.
  87 --
  88 --     <let-dig> ::= <letter> | <digit>
  89 --
  90 --   ==== _Examples_
  91 --
  92 --   >>> import Text.Parsec ( parseTest )
  93 --
  94 --   Letters, digits, and hyphens are all parsed:
  95 --
  96 --   >>> parseTest let_dig_hyp "a"
  97 --   LetDigHypLetDig (LetDigLetter (Letter 'a'))
  98 --
  99 --   >>> parseTest let_dig_hyp "7"
 100 --   LetDigHypLetDig (LetDigDigit (Digit '7'))
 101 --
 102 --   >>> parseTest let_dig_hyp "-"
 103 --   LetDigHypHyphen (Hyphen '-')
 104 --
 105 --   However, an underscore (for example) is not:
 106 --
 107 --   >>> parseTest let_dig_hyp "_"
 108 --   parse error at (line 1, column 1):
 109 --   unexpected "_"
 110 --   expecting letter, digit or "-"
 111 --
 112 let_dig_hyp :: Parser LetDigHyp
 113 let_dig_hyp =
 114   parse_letdig <|> parse_hyphen
 115   where
 116     parse_letdig :: Parser LetDigHyp
 117     parse_letdig = fmap LetDigHypLetDig let_dig
 118
 119     parse_hyphen :: Parser LetDigHyp
 120     parse_hyphen = fmap LetDigHypHyphen hyphen
 121
 122
 123 -- * Letter/Digit/Hyphen strings
 124
 125 -- | A string of letters, digits, and hyphens from the RFC1035 grammar:
 126 --
 127 --     <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
 128 --
 129 --   These are represented as either a single instance of a
 130 --   'LetDigHyp', or a string of them (recursive).
 131 --
 132 --   ==== _Examples_
 133 --
 134 --   >>> import Text.Parsec ( parseTest )
 135 --
 136 --   Single letters, digits, and hyphens are parsed:
 137 --
 138 --   >>> parseTest ldh_str "a"
 139 --   LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a')))
 140 --
 141 --   >>> parseTest ldh_str "0"
 142 --   LdhStrSingleLdh (LetDigHypLetDig (LetDigDigit (Digit '0')))
 143 --
 144 --   >>> parseTest ldh_str "-"
 145 --   LdhStrSingleLdh (LetDigHypHyphen (Hyphen '-'))
 146 --
 147 --   As well as strings of them:
 148 --
 149 --   LdhStr (LetDigHypLetter (Letter 'a')) (LdhStr (LetDigHypDigit (Digit '0')) (LdhStr (LetDigHypHyphen (Hyphen '-')) (LdhStrSingleLdh (LetDigHypLetter (Letter 'b')))))
 150 --
 151 data LdhStr =
 152   LdhStrSingleLdh LetDigHyp | LdhStrMultipleLdh LetDigHyp LdhStr
 153   deriving (Eq, Show)
 154
 155 ldh_str :: Parser LdhStr
 156 ldh_str = try both <|> just_one
 157   where
 158     both :: Parser LdhStr
 159     both = do
 160       ldh1 <- let_dig_hyp
 161       ldh_tail <- ldh_str
 162       return $ LdhStrMultipleLdh ldh1 ldh_tail
 163
 164     just_one :: Parser LdhStr
 165     just_one = fmap LdhStrSingleLdh let_dig_hyp
 166
 167
 168
 169 -- | A version of 'last' that works on a 'LdhStr' rather than a
 170 --   list. That is, it returns the last 'LetDigHyp' in the
 171 --   string. Since 'LdhStr' contains at least one character, there's
 172 --   no \"nil\" case here.
 173 --
 174 --   ==== _Examples_
 175 --
 176 --   >>> let (Right r) = parse ldh_str "" "a"
 177 --   >>> last_ldh_str r
 178 --   LetDigHypLetDig (LetDigLetter (Letter 'a'))
 179 --
 180 --   >>> let (Right r) = parse ldh_str "" "abc-def"
 181 --   >>> last_ldh_str r
 182 --   LetDigHypLetDig (LetDigLetter (Letter 'f'))
 183 --
 184 last_ldh_str :: LdhStr -> LetDigHyp
 185 last_ldh_str (LdhStrSingleLdh x) = x
 186 last_ldh_str (LdhStrMultipleLdh _ x) = last_ldh_str x
 187
 188
 189 -- | A version of 'init' that works on a 'LdhStr' rather than a
 190 --   list. That is, it returns everything /except/ the last character in
 191 --   the string.
 192 --
 193 --   Since an 'LdhStr' must contain at least one character, this might
 194 --   not be opssible (when the input is of length one). So, we return
 195 --   a 'Maybe' value.
 196 --
 197 --   ==== _Examples_
 198 --
 199 --   >>> let (Right r) = parse ldh_str "" "a"
 200 --   >>> init_ldh_str r
 201 --   Nothing
 202 --
 203 --   >>> let (Right r) = parse ldh_str "" "ab"
 204 --   >>> init_ldh_str r
 205 --   Just (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))))
 206 --
 207 --   >>> let (Right r) = parse ldh_str "" "abc-def"
 208 --   >>> init_ldh_str r
 209 --   Just (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'b'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'c'))) (LdhStrMultipleLdh (LetDigHypHyphen (Hyphen '-')) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'd'))) (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'e')))))))))
 210 --
 211 init_ldh_str :: LdhStr -> Maybe LdhStr
 212 init_ldh_str (LdhStrSingleLdh _) = Nothing
 213 init_ldh_str (LdhStrMultipleLdh h t) =
 214   Just $ case (init_ldh_str t) of
 215            -- We just got the second-to-last character, we're done.
 216            Nothing   -> LdhStrSingleLdh h
 217
 218            -- There's still more stuff. Recurse.
 219            Just rest -> LdhStrMultipleLdh h rest
 220
 221
 222 -- | Compute the length of an 'LdhStr'. It will be at least one, since
 223 --   'LdhStr's are non-empty. And if there's something other than the
 224 --   first character present, we simply recurse.
 225 --
 226 --   ==== _Examples_
 227 --
 228 --   >>> let (Right r) = parse ldh_str "" "a"
 229 --   >>> length_ldh_str r
 230 --   1
 231 --
 232 --   >>> let (Right r) = parse ldh_str "" "abc-def"
 233 --   >>> length_ldh_str r
 234 --   7
 235 --
 236 length_ldh_str :: LdhStr -> Int
 237 length_ldh_str (LdhStrSingleLdh _) = 1
 238 length_ldh_str (LdhStrMultipleLdh _ t) = 1 + (length_ldh_str t)
 239
 240 -- * Letter/Digit/Hyphen string followed by a trailing Letter/Digit
 241
 242 -- | This type isn't explicitly part of the grammar, but it's what
 243 --   shows up in the square brackets of,
 244 --
 245 --     <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
 246 --
 247 --   The ldh-str is optional, but if one is present, we must also have
 248 --   a trailing let-dig to prevent the name from ending with a
 249 --   hyphen. This can be represented with a @Maybe LdhStrLetDig@,
 250 --   which is why we're about to define it.
 251 --
 252 data LdhStrLetDig = LdhStrLetDig (Maybe LdhStr) LetDig
 253   deriving (Eq, Show)
 254
 255 -- | Parse an 'LdhStrLetDig'. This isn't in the grammar, but we might
 256 --   as well define the parser for it independently since we gave it
 257 --   its own data type.
 258 --
 259 --   ==== _Examples_
 260 --
 261 --   >>> import Text.Parsec ( parseTest )
 262 --
 263 --   Make sure we can parse a single character:
 264 --
 265 --   >>> parseTest ldh_str_let_dig "a"
 266 --   LdhStrLetDig Nothing (LetDigLetter (Letter 'a'))
 267 --
 268 --   And longer strings:
 269 --
 270 --   >>> parseTest ldh_str_let_dig "ab"
 271 --   LdhStrLetDig (Just (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))))) (LetDigLetter (Letter 'b'))
 272 --
 273 --   >>> parseTest ldh_str_let_dig "-b"
 274 --   LdhStrLetDig (Just (LdhStrSingleLdh (LetDigHypHyphen (Hyphen '-')))) (LetDigLetter (Letter 'b'))
 275 --
 276 --   >>> parseTest ldh_str_let_dig "b-"
 277 --   parse error at (line 1, column 3):
 278 --   label cannot end with a hyphen
 279 --
 280 ldh_str_let_dig :: Parser LdhStrLetDig
 281 ldh_str_let_dig = do
 282   -- This will happily eat up the trailing let-dig...
 283   full_ldh <- ldh_str
 284
 285   -- So we have to go back and see what happened.
 286   case (last_ldh_str full_ldh) of
 287     (LetDigHypHyphen _) -> fail "label cannot end with a hyphen"
 288     (LetDigHypLetDig ld) ->
 289       -- Ok, the label didn't end with a hyphen; now we need to split
 290       -- off the last letter/digit so we can pack it into our return
 291       -- type separately.
 292       return $ case (init_ldh_str full_ldh) of
 293                  -- We only parsed one letter/digit. This can happen
 294                  -- if the label contains two characters. For example,
 295                  -- if we try to parse the label "ab", then the "a"
 296                  -- will be eaten by the label parser, and this
 297                  -- function will be left with only "b".
 298                  Nothing -> LdhStrLetDig Nothing ld
 299
 300                  -- Usual case: there's was some leading let-dig-hyp junk,
 301                  -- return it too.
 302                  leading_ldhs -> LdhStrLetDig leading_ldhs ld
 303
 304
 305
 306 -- | Compute the length of a 'LdhStrLetDig'. It's at least one, since
 307 --   the let-dig at the end is always there. And when there's an
 308 --   ldh-str too, we add its length to one.
 309 --
 310 --   ==== _Examples_
 311 --
 312 --   >>> let (Right r) = parse ldh_str_let_dig "" "a"
 313 --   >>> length_ldh_str_let_dig r
 314 --   1
 315 --
 316 --   >>> let (Right r) = parse ldh_str_let_dig "" "abc-def"
 317 --   >>> length_ldh_str_let_dig r
 318 --   7
 319 --
 320 length_ldh_str_let_dig :: LdhStrLetDig -> Int
 321 length_ldh_str_let_dig (LdhStrLetDig Nothing _) = 1
 322 length_ldh_str_let_dig (LdhStrLetDig (Just ldhstring) _) =
 323   1 + (length_ldh_str ldhstring)
 324
 325
 326 -- * Labels
 327
 328 -- | The label type from the RFC1035 grammar:
 329 --
 330 --     <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
 331 --
 332 data Label = Label Letter (Maybe LdhStrLetDig)
 333   deriving (Eq, Show)
 334
 335
 336 -- | Parse a 'Label'.
 337 --
 338 --   In addition to the grammar, there's another restriction on
 339 --   labels: their length must be 63 characters or less. Quoting
 340 --   Section 2.3.1, \"Preferred name syntax\", of RFC1035:
 341 --
 342 --     The labels must follow the rules for ARPANET host names.  They
 343 --     must start with a letter, end with a letter or digit, and have
 344 --     as interior characters only letters, digits, and hyphen.  There
 345 --     are also some restrictions on the length.  Labels must be 63
 346 --     characters or less.
 347 --
 348 --   We check this only after we have successfully parsed a label.
 349 --
 350 --   ==== _Examples_
 351 --
 352 --   >>> import Text.Parsec ( parseTest )
 353 --
 354 --   Make sure we can parse a single character:
 355 --
 356 --   >>> parseTest label "a"
 357 --   Label (Letter 'a') Nothing
 358 --
 359 --   And longer strings:
 360 --
 361 --   >>> parseTest label "abc-def"
 362 --   Label (Letter 'a') (Just (LdhStrLetDig (Just (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'b'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'c'))) (LdhStrMultipleLdh (LetDigHypHyphen (Hyphen '-')) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'd'))) (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'e'))))))))) (LetDigLetter (Letter 'f'))))
 363 --
 364 --   But not anything ending in a hyphen:
 365 --
 366 --   >>> parseTest label "abc-"
 367 --   parse error at (line 1, column 5):
 368 --   label cannot end with a hyphen
 369 --
 370 --   Or anything over 63 characters:
 371 --
 372 --   >>> parseTest label (['a'..'z'] ++ ['a'..'z'] ++ ['a'..'z'])
 373 --   parse error at (line 1, column 79):
 374 --   labels must be 63 or fewer characters
 375 --
 376 --   However, /exactly/ 63 characters is acceptable:
 377 --
 378 --   TODO
 379 --
 380 label :: Parser Label
 381 label = do
 382   l <- letter -- Guaranteed to be there
 383   maybe_s <- optionMaybe ldh_str_let_dig -- Might not be there
 384   case maybe_s of
 385     -- It can only be one character long, from the letter...
 386     Nothing -> return $ Label l maybe_s
 387
 388     -- The letter gives us one character, so we check that the rest is
 389     -- less than 62 characters long. But in the error message we need
 390     -- to report 63.
 391     Just s  -> if (length_ldh_str_let_dig s) <= 62
 392               then return $ Label l maybe_s
 393               else fail "labels must be 63 or fewer characters"
 394
 395
 396 domain :: Parser Domain
 397 domain = undefined
 398
 399 --subdomain :: Parser Subdomain
 400 --subdomain = undefined