File: test_weightings.py

package info (click to toggle)
python-whoosh 2.7.4%2Bgit6-g9134ad92-10
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,804 kB
  • sloc: python: 38,552; makefile: 118
file content (81 lines) | stat: -rw-r--r-- 2,736 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from __future__ import with_statement
import inspect
from random import choice, randint
import sys

from whoosh import fields, query, scoring
from whoosh.compat import u, xrange, permutations
from whoosh.filedb.filestore import RamStorage


def _weighting_classes(ignore):
    # Get all the subclasses of Weighting in whoosh.scoring
    return [c for _, c in inspect.getmembers(scoring, inspect.isclass)
            if scoring.Weighting in c.__bases__ and c not in ignore]


def test_all():
    domain = [u("alfa"), u("bravo"), u("charlie"), u("delta"), u("echo"),
              u("foxtrot")]
    schema = fields.Schema(text=fields.TEXT)
    storage = RamStorage()
    ix = storage.create_index(schema)
    w = ix.writer()
    for _ in xrange(100):
        w.add_document(text=u(" ").join(choice(domain)
                                      for _ in xrange(randint(10, 20))))
    w.commit()

    # List ABCs that should not be tested
    abcs = ()
    # provide initializer arguments for any weighting classes that require them
    init_args = {"MultiWeighting": ([scoring.BM25F()],
                                    {"text": scoring.Frequency()}),
                 "ReverseWeighting": ([scoring.BM25F()], {})}

    for wclass in _weighting_classes(abcs):
        try:
            if wclass.__name__ in init_args:
                args, kwargs = init_args[wclass.__name__]
                weighting = wclass(*args, **kwargs)
            else:
                weighting = wclass()
        except TypeError:
            e = sys.exc_info()[1]
            raise TypeError("Error instantiating %r: %s" % (wclass, e))

        with ix.searcher(weighting=weighting) as s:
            try:
                for word in domain:
                    s.search(query.Term("text", word))
            except Exception:
                e = sys.exc_info()[1]
                e.msg = "Error searching with %r: %s" % (wclass, e)
                raise


def test_compatibility():
    from whoosh.scoring import Weighting

    # This is the old way of doing a custom weighting model, check that
    # it's still supported...
    class LegacyWeighting(Weighting):
        use_final = True

        def score(self, searcher, fieldname, text, docnum, weight):
            return weight + 0.5

        def final(self, searcher, docnum, score):
            return score * 1.5

    schema = fields.Schema(text=fields.TEXT)
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    domain = "alfa bravo charlie delta".split()
    for ls in permutations(domain, 3):
        w.add_document(text=u(" ").join(ls))
    w.commit()

    s = ix.searcher(weighting=LegacyWeighting())
    r = s.search(query.Term("text", u("bravo")))
    assert r.score(0) == 2.25