1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
|
# Natural Language Toolkit: Generating referring expressions
#
# Author: Margaret Mitchell <itallow@u.washington.edu>
# URL: <http://www.nltk.org/>
# For license information, see LICENSE.TXT
import sys
import re
class IncrementalAlgorithm:
"""
An implementation of the Incremental Algorithm, introduced in:
Dale, Robert and Reiter, Ehud. (1995).
"Computational Interpretations of the Gricean Maxims
in the Generation of Referring Expressions".
Cognitive Science, 18, 233-263.
"""
def __init__(self, KB, r, contrast_set, preferred_attrs):
self.L = []
self.KB = KB
self.r = r
self.C = contrast_set
self.P = preferred_attrs
self.RE = self.make_referring_expression()
def make_referring_expression(self):
"""
This returns a list of attribute-value pairs which
specify a referring expression for the intended referent.
The attributes are tried in the order specified in self.P,
the preferred attributes list, and a value for 'type' is always
included, even if 'type' has no discriminatory power.
"""
for attr in self.P:
if attr in self.r:
value = self.find_best_value(attr, \
self.KB.basic_level_value(self.r, attr))
else:
continue
rules_out_list = self.rules_out((attr, value))
if rules_out_list != []:
self.L += [(attr, value)]
for distractor in rules_out_list:
self.C.remove(distractor)
if self.C == []:
head_noun = False
# Catch-all to make sure that the head noun, 'type',
# is included.
for attr_value in self.L:
if "type" == attr_value[0]:
head_noun = True
if head_noun == False:
self.L = [("type", \
self.KB.basic_level_value(self.r, "type"))] + self.L
else:
return self.L
def find_best_value(self, attr, init_val):
"""
Takes an attribute and an initial value; it returns a value
for that attribute that is subsumed by the initial value,
accurately describes the intended referent, rules out as many
distractors as possible, and, subject to these constraints,
is as close as possible in the taxonomy to the initial value.
"""
if self.KB.user_knows(self.r, (attr, init_val)):
value = init_val
else:
value = None
more_specific_value = self.KB.more_specific_value(self.r, attr, value)
if more_specific_value is not None:
new_value = self.find_best_value(attr, more_specific_value)
if new_value is not None and \
self.rules_out((attr, new_value)) > self.rules_out((attr, value)):
value = new_value
return value
def rules_out(self, attr_value_pair):
"""
Takes an attribute-value pair and returns the elements of the
set of remaining distractors that are ruled out by this
attribute-value pair.
"""
attr = attr_value_pair[0]
value = attr_value_pair[1]
if value is None:
return []
rules_out_list = []
for distractor in self.C:
if self.KB.user_knows(distractor, (attr, value)) is False:
rules_out_list += [distractor]
return rules_out_list
class BackgroundKnowledge:
"""
Interface functions required by the Incremental Algorithm.
Although these functions are included for use here,
their nature varies depending on the program using them,
and should in practice be created independently of this module.
"""
def __init__(self, specificity_hash, user_knowledge=None):
self.specificity_hash = specificity_hash
self.user_knowledge = user_knowledge
if user_knowledge is None:
self.user_knowledge = specificity_hash
def more_specific_value(self, entity, attr, value):
"""
Returns a new value for 'attr' where that value is
more specific than 'value'. If no more specific
value of 'attr' is known, returns None.
"""
if value in self.specificity_hash:
spec_tuples = self.specificity_hash[value]
val_list = []
found_vals = self._recurse_values(spec_tuples, val_list)
for found_val in found_vals:
if found_val == entity[attr] or len(found_vals) == 1:
return found_val
else:
for basic_value in self.specificity_hash:
spec_tuples = self.specificity_hash[basic_value]
spec_val = self._find_more_specific_val(spec_tuples, value)
if spec_val is not None:
return spec_val
return None
def _find_more_specific_val(self, spec_tuples, value):
"""
Recursively searches for a more specific value for a given value.
"""
x = 0
while x < len(spec_tuples):
spec_val = spec_tuples[x]
if spec_val == value:
new_val = spec_tuples[x + 1]
if new_val == str(new_val):
return new_val
elif spec_val == tuple(spec_val):
new_val = self._find_more_specific_val(spec_val, value)
if new_val is not None:
return new_val
x += 1
return None
def basic_level_value(self, entity, attr):
"""
Returns the basic-level value of an attribute of an object.
"""
value = entity[attr]
for basic_value in self.specificity_hash:
spec_tuples = self.specificity_hash[basic_value]
val_list = []
found_vals = self._recurse_values(spec_tuples, val_list)
for found_val in found_vals:
if found_val == value:
value = basic_value
break
return value
def _recurse_values(self, spec_tuples, val_list):
"""
Function used by basic_level_value to return a non-nested
list of values under a basic level. This is helpful when
it doesn't matter how specific the value is, what matters
is what its basic level value is.
"""
for sub_val in spec_tuples:
if sub_val == str(sub_val):
val_list += [sub_val]
if sub_val == tuple(sub_val):
return self._recurse_values(sub_val, val_list)
return val_list
# For now, the user knowledge is the same as the specificity list.
# This could be expanded to use wordnet as the wordnet modules expand.
def user_knows(self, entity, attr_value_pair):
"""
Returns true if the user knows or can easily determine
that the attribute-value pair applies to the object;
false if the user knows or can easily determine that the
attribute-value pair does not apply to the object;
and None otherwise.
"""
attr = attr_value_pair[0]
value = attr_value_pair[1]
if value == tuple(value):
for spec_value in value:
return self.user_knows(entity, (attr, spec_value))
elif attr in entity:
if value in entity[attr]:
return True
else:
for gen_value in self.user_knowledge:
if gen_value == value:
spec_values = self.user_knowledge[gen_value]
for spec_value in spec_values:
if self.user_knows(entity, (attr, spec_value)):
return True
return False
return None
def demo():
specificity_hash = {"dog" : (("chihuahua", "long-haired chihuahua"), "poodle"),
"cat" : ("siamese-cat", "tabby")}
KB = BackgroundKnowledge(specificity_hash)
Object1 = {"type":"chihuahua", "size":"small", "colour":"black"}
Object2 = {"type":"chihuahua", "size":"large", "colour":"white"}
Object3 = {"type":"siamese-cat", "size":"small", "colour":"black"}
print "Given an entity defined as: "
r = Object1
print r
preferred_attrs = ["type", "colour", "size"]
print "In a set defined as: "
contrast_set = [Object2, Object3]
print contrast_set
RE = IncrementalAlgorithm(KB, r, contrast_set, preferred_attrs).RE
print "The referring expression created to uniquely identify",
print "the referent is: "
print RE
RE_string = ""
for attr, val in RE:
RE_string = val + " " + RE_string
RE_string = "The " + RE_string
print "This can be surface-realized as:"
print RE_string
if __name__ == "__main__":
demo()
|