File: Genome.hs

package info (click to toggle)
haskell-attoparsec 0.14.4-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 880 kB
  • sloc: haskell: 4,749; ansic: 170; makefile: 22
file content (75 lines) | stat: -rw-r--r-- 2,881 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
{-# LANGUAGE OverloadedStrings #-}

module Genome
    (
      genome
    ) where

import Control.Applicative
import Test.Tasty.Bench
import Data.ByteString (ByteString)
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy.Char8 as L8
import Data.Attoparsec.ByteString.Char8 as B
import qualified Data.Attoparsec.ByteString.Lazy as BL
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import Data.Attoparsec.Text as T
import qualified Data.Attoparsec.Text.Lazy as TL
import Common (rechunkBS, rechunkT)

genome :: Benchmark
genome = bgroup "genome" [
    bgroup "bytestring" [
        bench "s" $ nf (map (B.parse searchBS)) (B8.tails geneB)
      , bench "l" $ nf (map (BL.parse searchBS)) (L8.tails geneBL)
      , bgroup "CI" [
          bench "s" $ nf (map (B.parse searchBSCI)) (B8.tails geneB)
        , bench "l" $ nf (map (BL.parse searchBSCI)) (L8.tails geneBL)
      ]
    ]
  , bgroup "text" [
        bench "s" $ nf (map (T.parse searchT)) (T.tails geneT)
      , bench "l" $ nf (map (TL.parse searchT)) (TL.tails geneTL)
      , bgroup "CI" [
          bench "s" $ nf (map (T.parse searchTCI)) (T.tails geneT)
        , bench "l" $ nf (map (TL.parse searchTCI)) (TL.tails geneTL)
      ]
    ]
  ]
  where geneB  = B8.pack gene
        geneBL = rechunkBS 4 geneB
        geneT  = T.pack gene
        geneTL = rechunkT 4 geneT

searchBS :: B.Parser ByteString
searchBS = "caac" *> ("aaca" <|> "aact")

searchBSCI :: B.Parser ByteString
searchBSCI = B.stringCI "CAAC" *> (B.stringCI "AACA" <|> B.stringCI "AACT")

searchT :: T.Parser Text
searchT = "caac" *> ("aaca" <|> "aact")

searchTCI :: T.Parser Text
searchTCI = T.asciiCI "CAAC" *> (T.asciiCI "AACA" <|> T.asciiCI "AACT")

-- Dictyostelium discoideum developmental protein DG1094 (gacT) gene,
-- partial cds. http://www.ncbi.nlm.nih.gov/nuccore/AF081586.1

gene :: String
gene = "atcgatttagaaagatacaaagatagaaccatcaataataaacaagagaagagagcaagt\
       \agagatattaataaagagattgaaagagagattgaaaagaagagattatcaccaagagaa\
       \agattaaatttatttggtctttcttcctcatcttcatcagtgaattcaacattaacaaga\
       \tctacagcaaatattatctctacaatagacggtagtggaggtagtaatcgtaatagtaaa\
       \aattatggtaatggctcatcctcctcctcaaatagaagatatagtaatactattaatcaa\
       \caattacaaatgcaattacaacaacttcaaatccaacaacaacaatatcaacaaactcaa\
       \caatctcaaataccattacaatatcaacaacaacaacagcaacaacaacaacaaaccact\
       \acaactacaactacatcaagtggtagtaatagattctcttcaaatagatataaaccagtt\
       \gatcttacacaatcatcttcaaactttcgttattcacgtgaaatttatgatgatgattat\
       \tattcaaataataatttaatgatgtttggtaatgagcaaccaaatcaaacaccaatttct\
       \gtatcatcttcatctgcattcacacgtcaaagatctcaaagttgctttgaaccagagaat\
       \cttgtattgctacaacaacaatatcaacaatatcaacaacaacaacaacaacaacaacaa\
       \attccattccaagcaaatccacaatatagtaatgctgttattgaacaaaaattggatcaa\
       \attagagataccattaataatttacatagagataaccgagtctctaga"