1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
|
#!/usr/bin/env python
#-*- coding:utf-8 -*-
#---------------------------------------------------------------
# PyNLPl - Test Units for CQL using Finite State Automata
# by Maarten van Gompel, Radboud University Nijmegen
# proycon AT anaproy DOT nl
#
# Licensed under GPLv3
#----------------------------------------------------------------
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import
import sys
if sys.version < '3':
from codecs import getwriter
stderr = getwriter('utf-8')(sys.stderr)
stdout = getwriter('utf-8')(sys.stdout)
else:
stderr = sys.stderr
stdout = sys.stdout
import sys
import unittest
from pynlpl.formats import cql
tokens = [
{
'word': 'This',
'lemma': 'this',
'pos': 'det',
},
{
'word': 'is',
'lemma': 'be',
'pos': 'v',
},
{
'word': 'a',
'lemma': 'a',
'pos': 'det',
},
{
'word': 'first',
'lemma': 'first',
'pos': 'a',
},
{
'word': 'test',
'lemma': 'test',
'pos': 'n',
},
{
'word': 'of',
'lemma': 'dit',
'pos': 'prep',
},
{
'word': 'the',
'lemma': 'the',
'pos': 'det',
},
{
'word': 'new',
'lemma': 'new',
'pos': 'a',
},
{
'word': 'module',
'lemma': 'module',
'pos': 'n',
},
{
'word': '.',
'lemma': '.',
'pos': 'punc',
},
]
class Test1(unittest.TestCase):
def test1(self):
q = cql.Query("\"the\"")
result = q(tokens)
self.assertEqual(len(result),1) #one result
self.assertEqual(len(result[0]),1) #result 1 consists of one word
self.assertEqual(result[0][0]['word'],"the")
def test2(self):
q = cql.Query("[ pos = \"det\" ]")
result = q(tokens)
self.assertEqual(len(result),3)
self.assertEqual(result[0][0]['word'],"This")
self.assertEqual(result[1][0]['word'],"a")
self.assertEqual(result[2][0]['word'],"the")
def test3(self):
q = cql.Query("[ pos = \"det\" ] [ pos = \"a\" ] [ pos = \"n\" ]")
result = q(tokens)
self.assertEqual(len(result),2)
self.assertEqual(result[0][0]['word'],"a")
self.assertEqual(result[0][1]['word'],"first")
self.assertEqual(result[0][2]['word'],"test")
self.assertEqual(result[1][0]['word'],"the")
self.assertEqual(result[1][1]['word'],"new")
self.assertEqual(result[1][2]['word'],"module")
def test4(self):
q = cql.Query("[ pos = \"det\" ] [ pos = \"a\" ]? [ pos = \"n\" ]")
result = q(tokens)
self.assertEqual(len(result),2)
self.assertEqual(result[0][0]['word'],"a")
self.assertEqual(result[0][1]['word'],"first")
self.assertEqual(result[0][2]['word'],"test")
self.assertEqual(result[1][0]['word'],"the")
self.assertEqual(result[1][1]['word'],"new")
self.assertEqual(result[1][2]['word'],"module")
def test5(self):
q = cql.Query("[ pos = \"det\" ] []? [ pos = \"n\" ]")
result = q(tokens)
self.assertEqual(len(result),2)
self.assertEqual(result[0][0]['word'],"a")
self.assertEqual(result[0][1]['word'],"first")
self.assertEqual(result[0][2]['word'],"test")
self.assertEqual(result[1][0]['word'],"the")
self.assertEqual(result[1][1]['word'],"new")
self.assertEqual(result[1][2]['word'],"module")
def test6(self):
q = cql.Query("[ pos = \"det\" ] []+ [ pos = \"n\" ]")
result = q(tokens)
self.assertEqual(len(result),2)
self.assertEqual(result[0][0]['word'],"a")
self.assertEqual(result[0][1]['word'],"first")
self.assertEqual(result[0][2]['word'],"test")
self.assertEqual(result[1][0]['word'],"the")
self.assertEqual(result[1][1]['word'],"new")
self.assertEqual(result[1][2]['word'],"module")
def test7(self):
q = cql.Query("[ pos = \"det\" ] []* [ pos = \"n\" ]")
result = q(tokens)
self.assertEqual(len(result),2)
self.assertEqual(result[0][0]['word'],"a")
self.assertEqual(result[0][1]['word'],"first")
self.assertEqual(result[0][2]['word'],"test")
self.assertEqual(result[1][0]['word'],"the")
self.assertEqual(result[1][1]['word'],"new")
self.assertEqual(result[1][2]['word'],"module")
if __name__ == '__main__':
unittest.main()
|