File: stripTable.hs

package info (click to toggle)
phybin 0.3-7
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 608 kB
  • sloc: haskell: 2,141; sh: 584; makefile: 71
file content (50 lines) | stat: -rw-r--r-- 1,318 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50


-- | This is a quick hack to split up a table in a tab-delimited file of
-- the format Irene produces.  The result is a simple two-column,
-- white-space delimited table of the kind phybin expects.


import System.Environment (getArgs)

import Control.Monad
import Data.List
import Data.List.Split

import System.IO


isDataLine :: String -> Bool
isDataLine l =
  case words l of
    ("Roundup":"Orthology":_) -> False
    (_:"results":"found":_)   -> False
    ("Gene":"Cluster":_) -> False
    ("Id":"Genome":_)    -> False
    []                   -> False
    _                    -> True

main = do
  args <- getArgs
  let file = case args of
              [f] -> f
              _   -> error "Expects one argument!! [filename]"

  raw <- readFile file
  let lns  = lines raw
      filt = filter isDataLine lns
      toks = map (splitOn ['\t']) filt
      put  = hPutStrLn stderr
  
  put$"Read "++show (length lns)++" lines from file "++show file
  put$"     "++show (length filt)++" contain data"
  put$"  Distinct #toks found on data lines: " ++ show(nub (map length toks))
  put$"  A sample of ten parsed lines :"
  mapM_ (put .  ("    "++) . show) $ take 10 $ toks

  put$"  Echoing columns one and two in space-separated form:"

  forM_ toks $ \ (one:two:_) ->
    putStrLn$ one ++"  "++ two

  return ()