File: Methods.hs

package info (click to toggle)
haskell-debian 3.64-3
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 364 kB
  • sloc: haskell: 3,226; ansic: 8; makefile: 3
file content (482 lines) | stat: -rw-r--r-- 19,748 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
{-# OPTIONS_GHC -fno-warn-missing-signatures -fno-warn-name-shadowing #-}
-- |an interface for using the methods in /var/lib/apt/methods
module Debian.Apt.Methods
    ( withMethodPath
    , withMethodURI
    , whichMethodPath
    , openMethod
    , closeMethod
    , recvStatus
    , sendCommand
    , getLastModified
    , simpleFetch
    , fetch
    , FetchCallbacks(..)
    , emptyFetchCallbacks
    , cliFetchCallbacks
    , Command(..)
    , Status(..)
    , Message, Site, User, Password, Media, Drive, Header, ConfigItem
    )
    where

import Debian.Time
import Debian.URI

import Control.Exception
import Control.Monad.Error
import Data.Maybe
import Data.Time
import System.Directory
import System.Exit
import System.IO
import System.Posix.Files
import System.Process

type MethodHandle = (Handle, Handle, Handle, ProcessHandle)

capabilities, logMsg, status, uriStart, uriDone, uriFailure, generalFailure, authorizationRequired, mediaFailure, uriAcquire, configuration, authorizationCredentials, mediaChanged :: String
capabilities = "100"
logMsg = "101"
status = "102"
uriStart = "200"
uriDone = "201"
uriFailure = "400"
generalFailure = "401"
authorizationRequired = "402"
mediaFailure = "403"
uriAcquire = "600"
configuration = "601"
authorizationCredentials = "602"
mediaChanged = "603"

type Message = String
type Site = String
type User = String
type Password = String
type Media = String
type Drive = String

data Status
    = Capabilities { version :: String, singleInstance :: Bool, preScan :: Bool, pipeline :: Bool, sendConfig :: Bool
                   , needsCleanup :: Bool, localOnly :: Bool }
    | LogMsg Message
    | Status URI Message
    | URIStart { uri :: URI, size :: Maybe Integer, lastModified :: Maybe UTCTime, resumePoint :: Maybe Integer }
    | URIDone { uri :: URI, size :: Maybe Integer,  lastModified :: Maybe UTCTime, resumePoint :: Maybe Integer
              , filename :: Maybe FilePath, hashes :: Hashes, imsHit :: Bool }
    | URIFailure { uri :: URI, message :: Message }
    | GeneralFailure Message
    | AuthorizationRequired Site
    | MediaFailure Media Drive
      deriving (Show, Eq)

data Hashes 
    = Hashes { md5 :: Maybe String
             , sha1 :: Maybe String
             , sha256 :: Maybe String
             }
      deriving (Show, Eq)

emptyHashes = Hashes Nothing Nothing Nothing

data Command
    = URIAcquire URI FilePath (Maybe UTCTime)
    | Configuration [ConfigItem]
    | AuthorizationCredentials Site User Password
    | MediaChanged Media (Maybe Bool) -- I don't really understand the Fail field, I am assuming it is 'Fail: true'
      deriving (Show, Eq)

type Header = (String, String)
type ConfigItem = (String, String)

withMethodURI :: URI -> (MethodHandle -> IO a) -> IO a
withMethodURI uri f =
    do  mp <- liftM fromJust (whichMethodPath uri)
        withMethodPath mp f

-- |withMethod - run |methodPath| bracketed with
-- openMethod\/closeMethod. |f| gets the open handle.
withMethodPath :: FilePath -> (MethodHandle -> IO a) -> IO a
withMethodPath methodPath f =
    bracket (openMethod methodPath) closeMethod $ f

-- |whichMethodBinary - find the method executable associated with a URI
-- throws an exception on failure
whichMethodPath :: URI -> IO (Maybe FilePath)
whichMethodPath uri = 
    let scheme = init (uriScheme uri)
        path = "/usr/lib/apt/methods/" ++ scheme
    in
      doesFileExist path >>= return . bool Nothing (Just path)

{-
The flow of messages starts with the method sending out a 
100 Capabilities and APT sending out a 601 Configuration.

The flow is largely unsynchronized, but our function may have to
respond to things like authorization requests. Perhaps we do a
recvContents and then mapM_ over that ? Not all incoming messages
require a response, so...

-}

parseStatus :: [String] -> Status
parseStatus (code' : headers') =
    parseStatus' (take 3 code') (map parseHeader headers')
    where
      parseStatus' code headers
          | code == capabilities =
              foldr updateCapability defaultCapabilities headers
                  where
                    updateCapability (a,v) c
                        | a == "Version"         = c { version = v }
                        | a == "Single-Instance" = c { singleInstance = parseTrueFalse v }
                        | a == "Pre-Scan"        = c { preScan = parseTrueFalse v }
                        | a == "Pipeline"        = c { pipeline = parseTrueFalse v }
                        | a == "Send-Config"     = c { sendConfig = parseTrueFalse v }
                        | a == "Needs-Cleanup"   = c { needsCleanup = parseTrueFalse v }
                        | a == "Local-Only"	 = c { localOnly = parseTrueFalse v }
                        | otherwise = error $ "unknown capability: " ++ show (a,v)
                    defaultCapabilities = 
                        Capabilities { version = ""
                                     , singleInstance = False
                                     , preScan 	      = False 
                                     , pipeline	      = False
                                     , sendConfig     = False
                                     , needsCleanup   = False
                                     , localOnly      = False
                                     }
      parseStatus' code headers
          | code == logMsg =
              case headers of
                [("Message", msg)] -> LogMsg msg
          | code == status =
                Status (fromJust $ parseURI $ fromJust $ lookup "URI" headers) (fromJust $ lookup "Message" headers)
          | code == uriStart =
              foldr updateUriStart (URIStart undefined Nothing Nothing Nothing) headers
                  where
                    updateUriStart (a,v) u
                        | a == "URI" = u { uri = fromJust $ parseURI v }
                        | a == "Size" = u { size = Just (read v) }
                        | a == "Last-Modified" = u { lastModified = parseTimeRFC822 v } -- if the date is unparseable, we silently truncate. Is that bad ?
                        | a == "Resume-Point" = u { resumePoint = Just (read v) }
      parseStatus' code headers
          | code == uriDone =
              foldr updateUriDone (URIDone undefined Nothing Nothing Nothing Nothing emptyHashes False) headers
                  where
                    updateUriDone (a,v) u
                        | a == "URI" = u { uri = fromJust $ parseURI v }
                        | a == "Size" = u { size = Just (read v) }
                        | a == "Last-Modified" = u { lastModified = parseTimeRFC822 v } -- if the date is unparseable, we silently truncate. Is that bad ?
                        | a == "Filename" = u { filename = Just v }
                        | a == "MD5Sum-Hash" = u { hashes = (hashes u) { md5    = Just v } }
                        | a == "MD5-Hash" = u { hashes = (hashes u) { md5    = Just v } }
                        | a == "SHA1-Hash"   = u { hashes = (hashes u) { sha1   = Just v } }
                        | a == "SHA256-Hash" = u { hashes = (hashes u) { sha256 = Just v } }
                        | a == "Resume-Point" = u { resumePoint = Just (read v) }
                        | a == "IMS-Hit" && v == "true" = u { imsHit = True }
                        | otherwise = error $ "updateUriDone: unknown header: " ++ show (a,v)
      parseStatus' code headers
          | code == uriFailure =
              URIFailure (fromJust $ parseURI $ fromJust $ lookup "URI" headers) (fromJust $ lookup "Message" headers)
          | code == generalFailure =
              GeneralFailure (fromJust $ lookup "Message" headers)
          | code == authorizationRequired = 
              AuthorizationRequired (fromJust $ lookup "Site" headers)
          | code == mediaFailure =
              MediaFailure (fromJust $ lookup "Media" headers) (fromJust $ lookup "Drive" headers)


formatCommand :: Command -> [String]
formatCommand (URIAcquire uri filepath mLastModified) =
    [ uriAcquire ++ " URI Acquire"
    , "URI: " ++ uriToString' uri -- will this get credentials correct ? Or do we always pass those in seperately
    , "FileName: " ++ filepath
    ] ++ maybe [] (\lm -> ["Last-Modified: " ++ formatTimeRFC822 lm ]) mLastModified
formatCommand (Configuration configItems) =
    (configuration ++ " Configuration") : (map formatConfigItem configItems) 
    where
      formatConfigItem (a,v) = concat ["Config-Item: ", a, "=", v]
formatCommand (AuthorizationCredentials site user passwd) =
    (authorizationCredentials ++ " Authorization Credentials") :
    [ "Site: " ++ site
    , "User: " ++ user
    , "Password: " ++ passwd
    ]
formatCommand (MediaChanged media mFail) =
    [ mediaChanged ++ " Media Changed"
    , "Media: " ++ media
    ] ++ maybe [] (\b -> ["Fail: " ++ case b of True -> "true" ; False -> "false"]) mFail
      

parseTrueFalse :: String -> Bool
parseTrueFalse "true" = True
parseTrueFalse "false" = False
parseTrueFalse s = error $ "Invalid boolean string: " ++ s


recvStatus :: MethodHandle -> IO Status
recvStatus mh = liftM parseStatus $ recv mh

sendCommand :: MethodHandle -> Command -> IO ()
sendCommand mh cmd = sendMethod mh (formatCommand cmd)


parseHeader :: String -> Header
parseHeader str =
    let (a, r) = span (/= ':') str
        v = dropWhile (flip elem ": \t") r
    in 
      (a, v)
       
openMethod :: FilePath -> IO MethodHandle
openMethod methodBinary =
    do
      -- hPutStrLn stderr ("openMethod " ++ methodBinary)
      runInteractiveCommand methodBinary
      -- runInteractiveProcess methodBinary [] Nothing Nothing

sendMethod :: MethodHandle -> [String] -> IO ()
sendMethod (pIn, _pOut, _, _) strings =
    do
      -- hPutStrLn stderr "send:"
      mapM_ put strings
      hPutStrLn pIn ""
      hFlush pIn
    where
      put line = 
          do
            -- hPutStrLn stderr ("  " ++ line)
            hPutStrLn pIn line

closeMethod :: MethodHandle -> IO ExitCode
closeMethod (pIn, pOut, pErr, handle) =
    do
      -- hPutStrLn stderr "closeMethod"
      hClose pIn
      hClose pOut
      hClose pErr
      waitForProcess handle

recv :: MethodHandle -> IO [String]
recv (_pIn, pOut, _pErr, _pHandle) =
    do
      -- hPutStrLn stderr "recv:"
      readTillEmptyLine pOut
    where
      readTillEmptyLine pOut =
          do
            line <- hGetLine pOut
            case line of
              "" -> return []
              line -> 
                  do
                    -- hPutStrLn stderr ("  " ++ line)
                    tail <- readTillEmptyLine pOut
                    return $ line : tail
{-
The flow of messages starts with the method sending out a 
<em>100 Capabilities</> and APT sending out a <em>601 Configuration</>.

The flow is largely unsynchronized, but our function may have to
respond to things like authorization requests. Perhaps we do a
recvContents and then mapM_ over that ? Not all incoming messages
require a response. 

We probably also need to track state, for example, if we are
pipelining multiple downloads and want to show seperate progress bars
for each download.

If someone wants to use fetch, they will need to provide methods to:

 1. prompt for and provide authentication
 2. show progress
 3. show media change dialog
 4. Show log messages
 5. Show failures
 6. Send Configuration

pipeline vs non-pipeline mode.
what if different methods are being used ?

when pipelining, we probably don't want to have too many pipelines to
the same server. Perhaps there can be a limit, and for non-pipelinable
methods, we set the limit to 1.

Each method can run in a seperate thread, since methods do not
interact with each other. In fact, each unique method+uri can be a
seperate thread. We can use a MVar to track the global max download
count. Perhaps we also want a per host throttle, since it is the host
connect that is likely to max out, not the access method.

Plan:

partition fetches by (host,method).
fork off threads for each (host, method).
Use MVar to throttle per host, and total connections

We don't know if a method supports pipelining until we connect atleast
once. So if we have a non-pipelined method, we might want to start
multiple streams. On the other hand, for something like a CDROM, that
will just cause the system to thrash.

cdrom, file, etc, don't have a host, so that is not a unique key then.
Pipelining on local methods is tricky, because it is hard to tell if
the local methods point to the same device or not.

Even though we have multiple threads, the interactor can view the
incoming Stream as a single Stream because all the events are tagged
with the URI (i think). But, sending commands involves a fancy
router. We could include a reference to corresponding command for each
stream.

For now, let's serialize the transfers, but allow pipeling for methods
that really allow pipelining.

-}

data FetchCallbacks 
    = FetchCallbacks { logCB :: Message ->  IO ()
                     , statusCB :: URI -> Message -> IO ()
                     , uriStartCB :: URI -> Maybe Integer -> Maybe UTCTime -> Maybe Integer -> IO ()
                     , uriDoneCB ::  URI -> Maybe Integer -> Maybe UTCTime -> Maybe Integer -> Maybe FilePath -> Hashes -> Bool -> IO ()
                     , uriFailureCB :: URI -> Message -> IO ()
                     , generalFailureCB :: Message -> IO ()
                     , authorizationRequiredCB :: Site -> IO (Maybe (User, Password))
                     , mediaFailureCB :: Media -> Drive -> IO ()
                     , debugCB :: String -> IO ()
                     }

simpleFetch :: [ConfigItem] -> URI -> FilePath -> Maybe UTCTime -> IO Bool
simpleFetch = fetch cliFetchCallbacks

-- |fetch a single item, show console output
-- see also: getLastModified
fetch :: FetchCallbacks -> [ConfigItem] -> URI -> FilePath -> Maybe UTCTime -> IO Bool
fetch cb configItems uri fp lastModified =
    do withMethodURI uri $ \mh ->
        do s <- recvStatus mh
           debugCB cb ("<- " ++ show s)
           sendCommand' mh (URIAcquire uri fp lastModified)
           loop mh
    where
      sendCommand' mh c =
          do mapM_ (debugCB cb . ("-> " ++)) (formatCommand c)
             sendCommand mh c
      loop mh =
          do r <- recvStatus mh
             case r of
               Capabilities {} ->
                   do unless (null configItems) (sendCommand' mh (Configuration configItems))
                      loop mh
               LogMsg m -> 
                   do logCB cb m
                      loop mh
               Status uri m -> 
                   do statusCB cb uri m
                      loop mh
               URIStart uri size lastModified resumePoint -> 
                   uriStartCB cb uri size lastModified resumePoint >> loop mh
               URIDone uri size lastModified resumePoint filename hashes imsHit ->
                   uriDoneCB cb uri size lastModified resumePoint filename hashes imsHit >> return True
               URIFailure uri message ->
                   uriFailureCB cb uri message >> return False
               GeneralFailure m -> generalFailureCB cb m >> return False
               AuthorizationRequired site -> 
                   do mCredentials <- authorizationRequiredCB cb site
                      case mCredentials of
                        Nothing -> return False -- FIXME: do we need a force close option for closeMethod ?
                        Just (user, passwd) -> 
                            do sendCommand' mh (AuthorizationCredentials site user passwd)
                               loop mh
               MediaFailure media drive ->
                    do mediaFailureCB cb media drive
                       return False

-- |set of callbacks which do nothing.
-- suitable for non-interactive usage. In the case authorization is
-- required, no credentials will be supplied and the download should
-- abort.
emptyFetchCallbacks =
    FetchCallbacks { logCB = \ _m -> return ()
                   , statusCB = \ _uri _m -> return ()
                   , uriStartCB = \ _uri _size _lastModified _resumePoint -> return ()
                   , uriDoneCB = \ _uri _size _lastModified _resumePoint _filename _hashes _imsHit -> return ()
                   , uriFailureCB = \ _uri _message -> return ()
                   , generalFailureCB = \ _m -> return ()
                   , authorizationRequiredCB = \ _site -> return Nothing
                   , mediaFailureCB = \ _media _drive -> return ()
                   , debugCB = \ _m -> return ()
                   }

cliFetchCallbacks =
    emptyFetchCallbacks { statusCB = \uri m -> putStrLn $ uriToString' uri ++ " : " ++ m
                        , uriStartCB = \ uri _size lastModified _resumePoint -> putStrLn $ uriToString' uri ++ " started. " ++ show lastModified
                        , uriDoneCB = \uri _size _lastModified _resumePoint _filename _hashes imsHit -> putStrLn $ uriToString' uri ++ (if imsHit then " cached." else " downloaded.")
                        , uriFailureCB = \uri message -> hPutStrLn stderr $ "URI Failure: " ++ uriToString' uri ++ " : " ++ message
                        , generalFailureCB = \message -> hPutStrLn stderr $ "General Failure: " ++ message
                        , authorizationRequiredCB = \site ->
                                                    do putStrLn $ "Authorization Required for " ++ site
                                                       putStrLn "Username: " >> hFlush stdout
                                                       user <- getLine
                                                       putStrLn "Password: " >> hFlush stdout
                                                       passwd <- getLine -- TODO: write a getPasswd function which does not echo input
                                                       return (Just (user, passwd))
                        , mediaFailureCB = \media drive -> hPutStrLn stderr $ "Media Failure: media=" ++ media ++" drive="++ drive
                        , debugCB = \m -> print m
                        }

{-
    FetchCallbacks { logCB = \m -> hPutStrLn stderr m
                   , statusCB = \uri m -> putStrLn (show uri ++" : "++ m)
                   , uriStartCB = \uri 
                   }

defaultAuthenticate site =
    do putStrLn $ "Authorization Required for " ++ site
       putStrLn "Username: " >> hFlush stdout
       user <- getLine
       putStrLn "Password: " >> hFlush stdout
       passwd <- getLine -- TODO: write a getPasswd function which does not echo input
       return (user, passwd)
-}

{-
    let itemsByHost = groupOn (regName . fst) items
    in
      do totalQSem <- newQSem 16 -- max number of streams allowed for 
         forkIO 
    where
      regName = fmap uriRegName . uriAuthority
      withQSem :: QSem -> IO a -> IO a
      withQSem qSem f = bracket (waitQSem qSem) (const $ signalQSem qSem) (const f)

uris = map (fromJust . parseURI) [ "http://n-heptane.com/whee"
                                 , "file:/one/two/three"
                                 , "ssh://jeremy:aoeu@n-heptane.com"
                                 , "cdrom:/one"
                                 ]
-}    

-- * Misc Helper Functions

bool :: a -> a -> Bool -> a
bool f _ False = f
bool _ t True = t


getLastModified :: FilePath -> IO (Maybe UTCTime)
getLastModified fp =
    do e <- doesFileExist fp
       if e
          then getFileStatus fp >>= return . Just . epochTimeToUTCTime . modificationTime
          else return Nothing

{-
groupOn :: (Ord b) => (a -> b) -> [a] -> [[a]]
groupOn f = groupBy ((==) `on` f) . sortBy (compare `on` f)

on :: (a -> a -> b) -> (c -> a) -> c -> c -> b
on f g x y = f (g x) (g y)
-}