]> gitweb.michael.orlitzky.com - dead/htsn.git/blob - src/Main.hs
Bump version to 0.1.0.
[dead/htsn.git] / src / Main.hs
1 {-# LANGUAGE BangPatterns #-}
2 {-# LANGUAGE DoAndIfThenElse #-}
3
4 module Main
5 where
6
7 -- System imports.
8 import Control.Applicative ( (<$>) )
9 import Control.Concurrent ( threadDelay )
10 import Control.Exception ( bracket, throw )
11 import Control.Monad ( when )
12 import Data.List ( isPrefixOf )
13 import Data.Maybe ( isNothing )
14 import Data.Monoid ( (<>) )
15 import Network (
16 connectTo,
17 PortID (PortNumber) )
18 import Network.Services.TSN.Logging ( init_logging )
19 import Network.Services.TSN.Report (
20 report_debug,
21 report_info,
22 report_warning,
23 report_error )
24 import Network.Services.TSN.Terminal ( display_sent )
25 import System.Console.CmdArgs ( def )
26 import System.Directory ( doesFileExist )
27 import System.Exit ( ExitCode(..), exitWith )
28 import System.FilePath ( (</>) )
29 import System.IO (
30 BufferMode (NoBuffering),
31 Handle,
32 hClose,
33 hGetChar,
34 hGetLine,
35 hPutStr,
36 hSetBuffering,
37 stderr,
38 stdout )
39 import System.IO.Error ( catchIOError )
40 import System.Timeout ( timeout )
41
42 -- Local imports.
43 import CommandLine ( get_args )
44 import Configuration ( Configuration(..), merge_optional )
45 import ExitCodes (
46 exit_no_feed_hosts,
47 exit_no_password,
48 exit_no_username,
49 exit_pidfile_exists )
50 import FeedHosts ( FeedHosts(..) )
51 import qualified OptionalConfiguration as OC (
52 OptionalConfiguration(..),
53 from_rc )
54 import Xml ( parse_xmlfid )
55 import Unix ( full_daemonize )
56
57
58 -- | Receive a single line of text from a 'Handle', and record it for
59 -- debugging purposes.
60 --
61 recv_line :: Handle -> IO String
62 recv_line h = do
63 line <- hGetLine h
64 report_debug (line ++ "\n")
65 return line
66
67
68 -- | Takes a 'Configuration', and an XML document (as a 'String'). The
69 -- XML document is written to the output directory, as specified by
70 -- the 'Configuration'.
71 --
72 -- This can fail, but we don't purposefully throw any exceptions. If
73 -- something goes wrong, we would rather log it and keep going.
74 --
75 -- And in fact the only \"error\" that can occur is from
76 -- 'parse_xmlfid' if TSN sends us a non-integer XML File ID. But
77 -- this is expected from time to time, and is merely unsupported. So
78 -- we report any failures as info instead of as errors.
79 --
80 save_document :: Configuration
81 -> String -- ^ String representation of an XML document
82 -> IO ()
83 save_document cfg doc =
84 case either_path of
85 Left err -> report_info err -- Can only be non-integer XML File ID
86 Right path -> do
87 already_exists <- doesFileExist path
88 when already_exists $ do
89 let msg = "File " ++ path ++ " already exists, overwriting."
90 report_warning msg
91 writeFile path doc
92 report_info $ "Wrote file: " ++ path ++ "."
93 where
94 -- All the <$> are because we're working inside an Either.
95 xmlfid = show <$> (parse_xmlfid doc)
96 filename = (++ ".xml") <$> xmlfid
97 either_path = ((output_directory cfg) </>) <$> filename
98
99
100 -- | Loop forever, writing the @buffer@ to file whenever a
101 -- \</message\> tag is seen. This is the low-level \"loop forever\"
102 -- function that we stay in as long as we are connected to one feed.
103 --
104 -- The documentation at
105 -- <http://www.sportsnetworkdata.com/feeds/xml-levels.asp> states
106 -- that \<message\> will always be the root element of the XML
107 -- documents, and \</message\> will be the final line transmitted
108 -- for a given document. We therefore rely on this to simplify
109 -- processing.
110 --
111 -- The bang pattern at least on @buffer@ is necessary for
112 -- performance reasons.
113 --
114 -- We specify a timeout of fifteen minutes on the 'recv_line'
115 -- function, after which we will return to our caller. This should
116 -- cause the connection to be dropped, and a new one initiated. The
117 -- timeout is in response to observed behavior where the feed
118 -- eventually stops transmitting data entirely without closing the
119 -- connection.
120 --
121 loop :: Configuration
122 -> Handle -- ^ Handle to the feed (network connection)
123 -> [String] -- ^ Current XML document buffer, line-by-line, in reverse
124 -> IO ()
125 loop !cfg !h !buffer = do
126 line' <- timeout fifteen_minutes $ recv_line h
127 case line' of
128 -- If we haven't received anything in fifteen minutes, return back
129 -- to the calling function. This should only happen in the case of
130 -- an error, and our caller should be prepared to handle it.
131 Nothing -> report_warning "No data received for 15 minutes."
132
133 Just line -> do
134 -- If the recv didn't timeout, proceed normally.
135 let new_buffer = line : buffer
136
137 -- Use isPrefixOf to avoid line-ending issues. Hopefully they won't
138 -- send invalid junk (on the same line) after closing the root
139 -- element.
140 if "</message>" `isPrefixOf` line
141 then do
142 -- The buffer is in reverse (newest first) order, though, so we
143 -- have to reverse it first. We then concatenate all of its lines
144 -- into one big string.
145 let document = concat $ reverse new_buffer
146 save_document cfg document
147 loop cfg h [] -- Empty the buffer before looping again.
148 else
149 -- Append line to the head of the buffer and loop.
150 loop cfg h new_buffer
151 where
152 fifteen_minutes :: Int
153 fifteen_minutes = 15 * 60 * 1000000
154
155
156 -- | Once we're connected to a feed, we need to log in. There's no
157 -- protocol for this (the docs don't mention one), but we have
158 -- (apparently) successfully guessed it.
159 --
160 -- The first thing TSN sends once we've connected is the string
161 -- \"Username: \", containing 10 ASCII characters. We then send a
162 -- username, followed by a newline. If TSN likes the username, the
163 -- second they'll send is the string \"Password: \", also containing
164 -- 10 ASCII characters, to which we reply in kind.
165 --
166 -- Assuming the above will always hold, it is implemented as follows:
167 --
168 -- 1. Receive 10 chars
169 --
170 -- 2. Send username if we got the username prompt
171 --
172 -- 3. Receive 10 chars
173 --
174 -- 4. Send password if we got the password prompt
175 --
176 -- If TSN likes the password as well, they send the string \"The
177 -- Sports Network\" before finally beginning to stream the feed.
178 --
179 log_in :: Configuration -> Handle -> IO ()
180 log_in cfg h = do
181 prompt1 <- recv_prompt h
182
183 if prompt1 /= username_prompt then
184 report_error "Didn't receive username prompt."
185 else do
186 send_cred h (username cfg)
187 prompt2 <- recv_prompt h
188
189 if prompt2 /= password_prompt then
190 report_error "Didn't receive password prompt."
191 else do
192 send_cred h (password cfg)
193 _ <- recv_line h -- "The Sports Network"
194 report_info $ "Logged in as " ++ (username cfg) ++ "."
195 return ()
196 where
197 username_prompt = "Username: "
198 password_prompt = "Password: "
199
200 send_cred :: Handle -> String -> IO ()
201 send_cred h' s = do
202 -- The carriage return is super important!
203 let line = s ++ "\r\n"
204 hPutStr h' line
205 display_sent line -- Don't log the username/password!
206
207 recv_chars :: Int -> Handle -> IO String
208 recv_chars n h' = do
209 s <- sequence [ hGetChar h' | _ <- [1..n] ]
210 report_debug s
211 return s
212
213 recv_prompt :: Handle -> IO String
214 recv_prompt = recv_chars 10
215
216
217 -- | Connect to @host@ and attempt to parse the feed. As long as we
218 -- stay connected and nothing bad happens, the program will remain in
219 -- this function. If anything goes wrong, then the current invocation
220 -- of connect_and_parse will return, and get called again later
221 -- (probably with a different @host@).
222 --
223 -- Steps:
224 --
225 -- 1. Connect to @host@ on the XML feed port.
226 --
227 -- 2. Log in.
228 --
229 -- 3. Go into the eternal read/save loop.
230 --
231 connect_and_parse :: Configuration
232 -> String -- ^ Hostname to connect to
233 -> IO ()
234 connect_and_parse cfg host =
235 bracket acquire_handle release_handle action
236 where
237 five_seconds :: Int
238 five_seconds = 5 * 1000000
239
240 acquire_handle = do
241 report_info $ "Connecting to " ++ host ++ "."
242 connectTo host (PortNumber 4500)
243
244 release_handle h = do
245 report_info $ "Closing connection to " ++ host ++ "."
246 hClose h
247
248 action h = do
249 -- No buffering anywhere.
250 hSetBuffering h NoBuffering
251
252 -- The feed is often unresponsive after we send out username. It
253 -- happens in a telnet session, too (albeit less frequently?),
254 -- so there might be a bug on their end.
255 --
256 -- If we dump the packets with tcpdump, it looks like their
257 -- software is getting confused: they send us some XML in
258 -- the middle of the log-in procedure.
259 --
260 -- On the other hand, the documentation at
261 -- <http://www.sportsnetworkdata.com/feeds/xml-levels.asp>
262 -- states that you can only make one connection per username to
263 -- a given host. So maybe they're simply rejecting the username
264 -- in an unfriendly fashion. In any case, the easiest fix is to
265 -- disconnect and try again.
266 --
267 login_worked <- timeout five_seconds $ log_in cfg h
268 case login_worked of
269 Nothing -> report_info $ "Login timed out (5 seconds). "
270 ++ "Waiting 5 seconds to reconnect."
271
272 -- If loop returns (due to its timeout), it will pop out right
273 -- here and the action will terminate causing 'release_handle'
274 -- to trigger.
275 Just _ -> loop cfg h []
276
277
278 -- | A wrapper around threadDelay which takes seconds instead of
279 -- microseconds as its argument.
280 --
281 thread_sleep :: Int -- ^ Number of seconds for which to sleep.
282 -> IO ()
283 thread_sleep seconds = do
284 let microseconds = seconds * (10 ^ (6 :: Int))
285 threadDelay microseconds
286
287
288 -- | The entry point of the program.
289 --
290 main :: IO ()
291 main = do
292 rc_cfg <- OC.from_rc
293 cmd_cfg <- get_args
294
295 -- Merge the config file options with the command-line ones,
296 -- prefering the command-line ones.
297 let opt_config = rc_cfg <> cmd_cfg
298
299 -- Update a default config with any options that have been set in
300 -- either the config file or on the command-line. We initialize
301 -- logging before the missing parameter checks below so that we can
302 -- log the errors.
303 let cfg = (def :: Configuration) `merge_optional` opt_config
304 init_logging (log_level cfg) (log_file cfg) (syslog cfg)
305
306 -- Check the optional config for missing required options.
307 when (isNothing (OC.password opt_config)) $ do
308 report_error "No password supplied."
309 exitWith (ExitFailure exit_no_password)
310
311 when (isNothing (OC.username opt_config)) $ do
312 report_error "No username supplied."
313 exitWith (ExitFailure exit_no_username)
314
315 -- This should be impossible. We had a choice to make: since the
316 -- command-line feed_hosts are usually not supplied, we don't want
317 -- to take the empty list supplied on the command-line and use
318 -- that. But that means that we need to do the same thing if the
319 -- user supplies an empty list in the config file. That "same thing"
320 -- is to use the default list. So, this should never be empty,
321 -- because if the optional config has no feed hosts, we use the
322 -- default list.
323 when (null $ get_feed_hosts (feed_hosts cfg)) $ do
324 report_error "No feed hosts supplied."
325 exitWith (ExitFailure exit_no_feed_hosts)
326
327 when (daemonize cfg) $ do
328 -- Old PID files can be left around after an unclean shutdown. We
329 -- only care if we're running as a daemon.
330 pidfile_exists <- doesFileExist (pidfile cfg)
331 when pidfile_exists $ do
332 report_error $ "PID file " ++ (pidfile cfg) ++ " already exists. "
333 ++ "Refusing to start."
334 exitWith (ExitFailure exit_pidfile_exists)
335
336 -- This may be superstition (and I believe stderr is unbuffered),
337 -- but it can't hurt.
338 hSetBuffering stderr NoBuffering
339 hSetBuffering stdout NoBuffering
340
341 -- The rest of the program is kicked off by the following line which
342 -- begins connecting to our feed hosts, starting with the first one,
343 -- and proceeds in a round-robin fashion.
344 let run_program = round_robin cfg 0
345
346 -- If we were asked to daemonize, do that; otherwise just run the thing.
347 if (daemonize cfg)
348 then try_daemonize cfg run_program
349 else run_program
350
351 where
352 -- | This is the top-level \"loop forever\" function. If an
353 -- exception is thrown, it will propagate up to this point, where
354 -- it will be logged and ignored in style.
355 --
356 -- Afterwards, we recurse (call ourself) again to loop more forevers.
357 --
358 round_robin :: Configuration -> Int -> IO ()
359 round_robin cfg feed_host_idx = do
360 let hosts = get_feed_hosts $ feed_hosts cfg
361 let host = hosts !! feed_host_idx
362 catchIOError (connect_and_parse cfg host) (report_error . show)
363 thread_sleep 5 -- Wait 5s before attempting to reconnect.
364 round_robin cfg $ (feed_host_idx + 1) `mod` (length hosts)
365
366
367 -- | A exception handler around full_daemonize. If full_daemonize
368 -- doesn't work, we report the error and crash. This is fine; we
369 -- only need the program to be resilient once it actually starts.
370 --
371 try_daemonize :: Configuration -> IO () -> IO ()
372 try_daemonize cfg program =
373 catchIOError
374 (full_daemonize cfg program)
375 (\e -> do
376 report_error (show e)
377 throw e)