File: EnglishTest.hs

package info (click to toggle)
haskell-minimorph 0.3.0.1-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 88 kB
  • sloc: haskell: 320; makefile: 6
file content (159 lines) | stat: -rw-r--r-- 4,861 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
module NLP.Minimorph.EnglishTest where

import           Data.Text (Text)
import qualified Data.Text as T

import Test.Framework
import Test.Framework.Providers.HUnit
import Test.HUnit

import NLP.Minimorph.English

suite :: Test.Framework.Test
suite = testGroup "NLP.Minimorph.English"
     [ t_defaultVerbStuff
     , t_defaultNounPlural
     , t_indefiniteDet
     , t_acronymWantsAn
     , t_ordinal
     , t_commas
     ]

-- ----------------------------------------------------------------------
-- tests
-- ----------------------------------------------------------------------

t_defaultVerbStuff :: Test.Framework.Test
t_defaultVerbStuff =
    testGroup "defaultVerbStuff" (map tc verbs)
  where
    tc (pl, sg3, pastP) = testCase summary $
        assertEqual summary (sg3, pastP) (defaultVerbStuff pl)
      where
        summary = T.unpack $ T.concat [ pl, " (", sg3, ", ", pastP, ")" ]

t_defaultNounPlural :: Test.Framework.Test
t_defaultNounPlural =
    testGroup "defaultNounPlural" (map tc nouns)
  where
    tc (sg, pl) = testCase summary $
        assertEqual summary pl (defaultNounPlural sg)
      where
        summary = T.unpack $ T.concat [sg, " (", pl, ")"]

t_indefiniteDet :: Test.Framework.Test
t_indefiniteDet = testGroup "indefiniteDet"
    [ tc "eu"          "a"    "eukaryote"
    , tc "eu"          "a"    "Eukaryote"
    , tc "ewe"         "a"    "ewe" -- google 33k 'a ewe' vs 9k 'an ewe'
    , tc "ewok"        "an"   "ewok"
    , tc "ewok"        "an"   "Ewok"
    , tc "7th"         "a"    "7th"
    , tc "8th"         "an"   "8th"
    , tc "18th"        "an"   "18th"
    , tc "xylophone"   "a"    "xylophone"
    , tc "universe"    "a"    "universe"
    , tc "urge"        "an"   "urge"
    , tc "user"        "a"    "user"
    , tc "usher"       "an"   "usher"
    , tc "xylophone"   "a"    "xylophone"
    , tc "u-turn"      "a"    "u-turn"
    , tc "u"           "a"    "u"
    , tc "x-ray"       "an"   "x-ray"
    , tc "g-ray"       "a"    "g-ray"
    , tc "y-chromo"    "a"    "y-chromosome"
    , tc "x-chromo"    "an"   "x-chromosome"
    , tc "x chromo"    "an"   "x chromosome"
    , tc "18-fold"     "an"   "18-fold"
    , tc "-fold ending"         "a"     "-fold ending"
    , tc "'-fold' ending"       "a"     "'-fold' ending"
    , tc "\"-fold\" ending"     "a"     "\"-fold\" ending"
    , tc "mvp award"   "an"   "MVP award"
    , tc "UUCP user"   "a"    "UUCP user"
    ]
  where
    tc msg res inp = testCase summary $
       assertEqual summary res (indefiniteDet inp)
      where
       summary = msg ++ " (" ++ T.unpack (T.unwords [res, inp]) ++ ")"

t_acronymWantsAn :: Test.Framework.Test
t_acronymWantsAn = testGroup "acronymWantsAn"
    [ tc "rgb"       True    "rgb"
    , tc "kml"       False   "kml"
    , tc "ac"        True    "ac"
    , tc "dc"        False   "dc"
    ]
  where
    tc msg res inp = testCase summary $
       assertEqual summary res (acronymWantsAn inp)
      where
       summary = msg ++ " (" ++ T.unpack inp ++ ")"

t_ordinal :: Test.Framework.Test
t_ordinal = testGroup "ordinal"
    [ tc "12th"                  12
    , tc "42nd"                  42
    , tc "44th"                  44
    , tc "41st"                  41
    , tc "-3rd"                  (-3)
    ]
  where
    tc res inp = testCase (show inp ++ " => " ++ T.unpack res) $
        assertEqual "" res (ordinal inp)

t_commas :: Test.Framework.Test
t_commas = testGroup "commas"
    [ tc "foo"                    ["foo"]
    , tc "foo and bar"            ["foo","bar"]
    , tc "foo, bar and baz"       ["foo","bar","baz"]
    , tc "foo, bar, baz and quux" ["foo","bar","baz","quux"]
    ]
  where
    tc res xs = testCase (show (length xs) ++ ": " ++ T.unpack res) $
        assertEqual "" res (commas "and" xs)

-- ----------------------------------------------------------------------
-- lexicon
-- ----------------------------------------------------------------------

nouns :: [(Text,Text)]
nouns =
    [ noun "star"   "stars"
    , noun "egg"    "eggs"
    , noun "patch"  "patches"
    , noun "boy"    "boys"
    , noun "spy"    "spies"
    , noun "thesis" "theses"
    , noun "elf"    "elves"
    , noun "ace"    "aces"
    , noun "5y"     "5ys"
    ]
  where
    noun s p = (s,p)

detNouns :: [(Text,Text)]
detNouns =
    [ noun "box" "boxes"
    , noun "cat" "cats"
    , noun "dog" "dogs"
    , noun "ant" "ants"
    , noun "egg" "eggs"
    ]
  where
    noun s p = (s,p)

verbs :: [(Text,Text,Text)]
verbs =
    [ verb "walk"  "walks"   "walked"
    , verb "push"  "pushes"  "pushed"
    , verb "pass"  "passes"  "passed"
    , verb "abuse" "abuses"  "abused"
    , verb "banjo" "banjoes" "banjoed"
    , verb "play"  "plays"   "played"
    , verb "cry"   "cries"   "cried"
    , verb "goto"  "gotoes"  "gotoed"
    , verb "boo"   "boos"    "booed"
    ]
  where
    verb x y z = (x, y, z)