File: test_bigsort.py

package info (click to toggle)
python-whoosh 2.7.4%2Bgit6-g9134ad92-5
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 3,656 kB
  • sloc: python: 38,517; makefile: 118
file content (63 lines) | stat: -rw-r--r-- 1,556 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import os.path, random, shutil
from datetime import datetime

from whoosh import fields, index, query
from whoosh.compat import text_type, xrange
from whoosh.util import now


def test_bigsort():
    times = 30000
    dirname = "testindex"

    df = fields.DATETIME(stored=True)
    schema = fields.Schema(id=fields.ID(stored=True), date=df)

    if os.path.exists(dirname):
        shutil.rmtree(dirname)
    os.mkdir(dirname)
    ix = index.create_in(dirname, schema)

    print("Writing...")
    t = now()
    w = ix.writer(limitmb=512)
    for i in xrange(times):
        dt = datetime.fromtimestamp(random.randint(15839593, 1294102139))
        w.add_document(id=text_type(i), date=dt)
    w.commit()
    print("Writing took ", now() - t)

    ix = index.open_dir(dirname)
    s = ix.searcher()
    q = query.Wildcard("id", "1?2*")

    t = now()
    x = list(df.sortable_terms(s.reader(), "date"))
    print(now() - t, len(x))

    t = now()
    for y in x:
        p = list(s.postings("date", y).all_ids())
    print(now() - t)



    t = now()
    r = s.search(q, limit=25, sortedby="date", reverse=True)
    print("Search 1 took", now() - t)
    print("len=", r.scored_length())

    t = now()
    r = s.search(q, limit=25, sortedby="date")
    print("Search 2 took", now() - t)

    t = now()
    r = s.search(q, limit=25, sortedby="date")
    print("Search 2 took", now() - t)

    from heapq import nlargest
    t = now()
    sf = s.stored_fields
    gen = ((sf(n)["date"], n) for n in q.docs(s))
    r = nlargest(25, gen)
    print(now() - t)