1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
|
module Main where
import Text.CSV.Lazy.ByteString
import qualified Data.ByteString.Lazy.Char8 as BS
import System.Environment (getArgs)
import System.Console.GetOpt
import System.Exit
import Control.Monad (when, unless)
import System.IO
import Data.Char (isDigit)
import Data.List (elemIndex)
import Data.Maybe (fromJust)
version = "0.3"
-- lazily read a CSV file, select some columns, and print it out again.
main = do
opts <- cmdlineOpts =<< getArgs
let delim = head $ [ c | Delimiter c <- opts ]++","
when (Version `elem` opts) $ do hPutStrLn stderr $ "csvSelect "++version
exitSuccess
unless (Unchecked `elem` opts) $ do
content <- lazyRead opts
case csvErrors (parseDSV True delim content) of
errs@(_:_) -> do hPutStrLn stderr (unlines (map ppCSVError errs))
exitWith (ExitFailure 2)
[] -> return ()
out <- case [ f | Output f <- opts ] of
[] -> return stdout
[file] -> openBinaryFile file WriteMode
_ -> do hPutStrLn stderr "Too many outputs: only one allowed"
exitWith (ExitFailure 3)
content <- lazyRead opts
case selectFieldMix [ e | Select e <- opts ]
(csvTableFull (parseDSV True delim content)) of
Left err -> do hPutStrLn stderr $ "CSV missing fields: "
++unwords err
exitWith (ExitFailure 4)
Right selection -> do BS.hPut out $ ppCSVTable selection
hClose out
-- | The standard Data.CSV.Lazy.selectFields chooses only by field name.
-- This version chooses with any mixture of numeric index or field name.
selectFieldMix :: [ Either Int String ] -> CSVTable -> Either [String] CSVTable
selectFieldMix fields table
| null table = Left (map (either show id) fields)
| not (null missing) = Left missing
| otherwise = Right (map select table)
where
header = map (BS.unpack . csvFieldContent) (head table)
lenheader = length header
missing = map show (filter (>lenheader) [ i | Left i <- fields ])
++ filter (`notElem` header) [ name | Right name <- fields ]
reordering = map (\e-> case e of Left i -> i
Right s -> fromJust $ elemIndex s header)
fields
select fields = map (fields!!) reordering
-- | Read a single input file, or stdin.
lazyRead :: [Flag] -> IO BS.ByteString
lazyRead opts =
case [ f | Input f <- opts ] of
[] -> BS.hGetContents stdin
[file] -> BS.readFile file
_ -> do hPutStrLn stderr "Too many input files: only one allowed"
exitWith (ExitFailure 1)
-- Command-line options
data Flag
= Version | Input String | Output String | Unchecked
| Delimiter Char | Select (Either Int String)
deriving (Show,Eq)
options :: [OptDescr Flag]
options =
[ Option ['v','V'] ["version"] (NoArg Version) "show version number"
, Option ['o'] ["output"] (ReqArg Output "FILE") "output FILE"
, Option ['i'] ["input"] (ReqArg Input "FILE") "input FILE"
, Option ['u'] ["unchecked"] (NoArg Unchecked) "ignore CSV format errors"
, Option ['d'] ["delimiter"] (ReqArg (Delimiter . head) "@")
"delimiter char is @"
]
cmdlineOpts :: [String] -> IO [Flag]
cmdlineOpts argv =
case getOpt Permute options argv of
(o,fs,[] ) -> return (o ++ map field fs)
(_,_,errs) -> ioError (userError (concat errs ++ usageInfo header options))
where header = "Usage: csvSelect [OPTION...] (num|fieldname)...\n"
++" select numbered/named columns from a CSV file"
field :: String -> Flag
field str | all isDigit str = Select (Left (read str))
| otherwise = Select (Right str)
|