File: csvSelect.hs

package info (click to toggle)
haskell-lazy-csv 0.5.1-3
  • links: PTS
  • area: main
  • in suites: buster
  • size: 140 kB
  • sloc: haskell: 820; makefile: 6
file content (96 lines) | stat: -rw-r--r-- 4,085 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
module Main where

import           Text.CSV.Lazy.ByteString
import qualified Data.ByteString.Lazy.Char8 as BS
import           System.Environment (getArgs)
import           System.Console.GetOpt
import           System.Exit
import           Control.Monad (when, unless)
import           System.IO
import           Data.Char (isDigit)
import           Data.List (elemIndex)
import           Data.Maybe (fromJust)

version = "0.3"

-- lazily read a CSV file, select some columns, and print it out again.
main = do
  opts  <- cmdlineOpts =<< getArgs
  let delim = head $ [ c | Delimiter c <- opts ]++","

  when   (Version   `elem` opts) $ do hPutStrLn stderr $ "csvSelect "++version
                                      exitSuccess
  unless (Unchecked `elem` opts) $ do
      content <- lazyRead opts
      case csvErrors (parseDSV True delim content) of
        errs@(_:_)  -> do hPutStrLn stderr (unlines (map ppCSVError errs))
                          exitWith (ExitFailure 2)
        []          -> return ()
  out <- case [ f | Output f <- opts ] of
              []     -> return stdout
              [file] -> openBinaryFile file WriteMode
              _      -> do hPutStrLn stderr "Too many outputs: only one allowed"
                           exitWith (ExitFailure 3)
  content <- lazyRead opts
  case selectFieldMix [ e | Select e <- opts ]
                      (csvTableFull (parseDSV True delim content)) of
      Left err        -> do hPutStrLn stderr $ "CSV missing fields: "
                                               ++unwords err
                            exitWith (ExitFailure 4)
      Right selection -> do BS.hPut out $ ppCSVTable selection
                            hClose out

-- | The standard Data.CSV.Lazy.selectFields chooses only by field name.
--   This version chooses with any mixture of numeric index or field name.
selectFieldMix :: [ Either Int String ] -> CSVTable -> Either [String] CSVTable
selectFieldMix fields table
    | null table          = Left (map (either show id) fields)
    | not (null missing)  = Left missing
    | otherwise           = Right (map select table)
  where
    header     = map (BS.unpack . csvFieldContent) (head table)
    lenheader  = length header
    missing    = map show (filter (>lenheader)   [ i | Left i     <- fields ])
                 ++ filter (`notElem` header) [ name | Right name <- fields ]
    reordering = map (\e-> case e of Left i  -> i
                                     Right s -> fromJust $ elemIndex s header)
                     fields
    select fields = map (fields!!) reordering

-- | Read a single input file, or stdin.
lazyRead :: [Flag] -> IO BS.ByteString
lazyRead opts =
    case [ f | Input f <- opts ] of
         []     -> BS.hGetContents stdin
         [file] -> BS.readFile file
         _      -> do hPutStrLn stderr "Too many input files: only one allowed"
                      exitWith (ExitFailure 1)


-- Command-line options
data Flag 
     = Version | Input String | Output String | Unchecked
     | Delimiter Char | Select (Either Int String)
     deriving (Show,Eq)
    
options :: [OptDescr Flag]
options =
  [ Option ['v','V'] ["version"]   (NoArg Version)        "show version number"
  , Option ['o']     ["output"]    (ReqArg Output "FILE") "output FILE"
  , Option ['i']     ["input"]     (ReqArg Input  "FILE") "input FILE"
  , Option ['u']     ["unchecked"] (NoArg Unchecked)  "ignore CSV format errors"
  , Option ['d']     ["delimiter"] (ReqArg (Delimiter . head) "@")
                                                      "delimiter char is @"
  ]
    
cmdlineOpts :: [String] -> IO [Flag]
cmdlineOpts argv = 
   case getOpt Permute options argv of
     (o,fs,[] ) -> return (o ++ map field fs)
     (_,_,errs) -> ioError (userError (concat errs ++ usageInfo header options))
  where header = "Usage: csvSelect [OPTION...] (num|fieldname)...\n"
                 ++"    select numbered/named columns from a CSV file"

field :: String -> Flag
field str | all isDigit str = Select (Left (read str))
          | otherwise       = Select (Right str)