File: test_sqlite_index.py

package info (click to toggle)
sourmash 4.9.4-4
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 54,688 kB
  • sloc: python: 59,380; ansic: 332; makefile: 277; sh: 6
file content (921 lines) | stat: -rw-r--r-- 29,270 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
"Tests for SqliteIndex, SqliteCollectionManifest, and LCA_SqliteDatabase"

import os
import pytest
import shutil
import sqlite3

import sourmash
from sourmash.exceptions import IndexNotSupported
from sourmash.index.sqlite_index import (
    SqliteIndex,
    load_sqlite_index,
    SqliteCollectionManifest,
    LCA_SqliteDatabase,
)

from sourmash.index import StandaloneManifestIndex
from sourmash import SourmashSignature
from sourmash.sourmash_args import load_one_signature
from sourmash.picklist import SignaturePicklist, PickStyle
from sourmash.manifest import CollectionManifest
from sourmash.tax.tax_utils import MultiLineageDB

import sourmash_tst_utils as utils
from sourmash_tst_utils import SourmashCommandFailed
from sourmash import sqlite_utils


def test_sqlite_index_prefetch_empty():
    # check that an exception is raised upon for an empty database
    sig2 = utils.get_test_data("2.fa.sig")
    ss2 = load_one_signature(sig2, ksize=31)

    sqlidx = SqliteIndex.create(":memory:")

    # since this is a generator, we need to actually ask for a value to
    # get exception raised.
    g = sqlidx.prefetch(ss2, threshold_bp=0)
    with pytest.raises(ValueError) as e:
        next(g)

    assert "no signatures to search" in str(e.value)


def test_sqlite_index_bad_version(runtmp):
    # create a sqlite database with a bad index version in the
    # sourmash_internal table, see what happens :)

    dbfile = runtmp.output("xyz.sqldb")
    conn = sqlite3.connect(dbfile)
    c = conn.cursor()

    SqliteIndex._create_tables(c)

    # 0.9 doesn't exist/is bad version
    c.execute(
        "UPDATE sourmash_internal SET value=? WHERE key=?", ("0.9", "SqliteIndex")
    )

    conn.commit()

    with pytest.raises(IndexNotSupported):
        sourmash.load_file_as_index(dbfile)


def test_sqlite_index_bad_version_unique(runtmp):
    # try to insert duplicate sqlite index info into sourmash_internal; fail

    dbfile = runtmp.output("xyz.sqldb")
    conn = sqlite3.connect(dbfile)
    c = conn.cursor()

    SqliteIndex._create_tables(c)

    # can't insert duplicate key
    with pytest.raises(sqlite3.IntegrityError):
        c.execute(
            "INSERT INTO sourmash_internal (value, key) VALUES (?, ?)",
            ("1.1", "SqliteIndex"),
        )


def test_index_search_subj_scaled_is_lower():
    # check that subject sketches are appropriately downsampled
    sigfile = utils.get_test_data(
        "scaled100/GCF_000005845.2_ASM584v2_genomic.fna.gz.sig.gz"
    )
    ss = load_one_signature(sigfile)

    # double check :)
    assert ss.minhash.scaled == 100

    # build a new query that has a scaled of 1000
    qs = SourmashSignature(ss.minhash.downsample(scaled=1000))

    # create Index to search
    sqlidx = SqliteIndex.create(":memory:")
    sqlidx.insert(ss)

    # search!
    results = list(sqlidx.search(qs, threshold=0))
    assert len(results) == 1
    # original signature (not downsampled) is returned
    assert results[0].signature == ss


def test_sqlite_index_save_load(runtmp):
    sig2 = utils.get_test_data("2.fa.sig")
    sig47 = utils.get_test_data("47.fa.sig")
    sig63 = utils.get_test_data("63.fa.sig")

    ss2 = load_one_signature(sig2, ksize=31)
    ss47 = load_one_signature(sig47)
    ss63 = load_one_signature(sig63)

    filename = runtmp.output("foo")
    sqlidx = SqliteIndex.create(filename)
    sqlidx.insert(ss2)
    sqlidx.insert(ss47)
    sqlidx.insert(ss63)

    sqlidx.close()

    sqlidx2 = SqliteIndex.load(filename)

    # now, search for sig2
    sr = sqlidx2.search(ss2, threshold=1.0)
    print([s[1].name for s in sr])
    assert len(sr) == 1
    assert sr[0][1] == ss2


def test_sqlite_index_multik_select():
    # this loads three ksizes, 21/31/51
    sig2 = utils.get_test_data("2.fa.sig")
    siglist = sourmash.load_file_as_signatures(sig2)

    sqlidx = SqliteIndex.create(":memory:")
    for ss in siglist:
        sqlidx.insert(ss)

    # select most specifically
    sqlidx2 = sqlidx.select(ksize=31, moltype="DNA")
    assert len(sqlidx2) == 1

    # all are DNA:
    sqlidx2 = sqlidx.select(moltype="DNA")
    assert len(sqlidx2) == 3


def test_sqlite_index_num_select():
    # this will fail on 'num' select, which is not allowed
    sqlidx = SqliteIndex.create(":memory:")
    with pytest.raises(ValueError):
        sqlidx.select(num=100)


def test_sqlite_index_abund_select():
    # this will fail on 'track_abundance' select, which is not allowed
    sqlidx = SqliteIndex.create(":memory:")
    with pytest.raises(ValueError):
        sqlidx.select(track_abundance=True)


def test_sqlite_index_insert_num_fail():
    # cannot insert 'num' signatures
    sqlidx = SqliteIndex.create(":memory:")

    sig47 = utils.get_test_data("num/47.fa.sig")
    ss47 = load_one_signature(sig47, ksize=31)
    assert ss47.minhash.num != 0

    with pytest.raises(ValueError) as exc:
        sqlidx.insert(ss47)

    assert "cannot store 'num' signatures in SqliteIndex" in str(exc)


def test_sqlite_index_insert_abund_fail():
    # cannot insert 'num' signatures
    sqlidx = SqliteIndex.create(":memory:")

    sig47 = utils.get_test_data("track_abund/47.fa.sig")
    ss47 = load_one_signature(sig47, ksize=31)

    with pytest.raises(ValueError) as exc:
        sqlidx.insert(ss47)

    assert "cannot store signatures with abundance in SqliteIndex" in str(exc)


def test_sqlite_index_moltype_multi_fail():
    # check that we cannot store sigs with multiple scaled values.

    # this loads multiple ksizes (19, 31) and moltypes (DNA, protein, hp, etc)
    filename = utils.get_test_data("prot/all.zip")
    siglist = sourmash.load_file_as_signatures(filename)
    siglist = list(siglist)

    sqlidx = SqliteIndex.create(":memory:")

    sqlidx.insert(siglist[0])
    assert sqlidx.scaled == 100

    with pytest.raises(ValueError) as exc:
        for ss in siglist:
            sqlidx.insert(ss)

    assert "this database can only store scaled values=100" in str(exc)


def test_sqlite_index_picklist_select():
    # test select with a picklist

    # this loads three ksizes, 21/31/51
    sig2 = utils.get_test_data("2.fa.sig")
    siglist = sourmash.load_file_as_signatures(sig2)

    sqlidx = SqliteIndex.create(":memory:")
    for ss in siglist:
        sqlidx.insert(ss)

    # construct a picklist...
    picklist = SignaturePicklist("md5prefix8")
    picklist.init(["f3a90d4e"])

    # select on picklist
    sqlidx2 = sqlidx.select(picklist=picklist)
    assert len(sqlidx2) == 1
    ss = list(sqlidx2.signatures())[0]
    assert ss.minhash.ksize == 31
    assert ss.md5sum().startswith("f3a90d4e55")


def test_sqlite_index_picklist_select_exclude():
    # test select with a picklist, but exclude

    # this loads three ksizes, 21/31/51
    sig2 = utils.get_test_data("2.fa.sig")
    siglist = sourmash.load_file_as_signatures(sig2)

    sqlidx = SqliteIndex.create(":memory:")
    for ss in siglist:
        sqlidx.insert(ss)

    # construct a picklist...
    picklist = SignaturePicklist("md5prefix8", pickstyle=PickStyle.EXCLUDE)
    picklist.init(["f3a90d4e"])

    # select on picklist
    sqlidx2 = sqlidx.select(picklist=picklist)
    assert len(sqlidx2) == 2
    md5s = set()
    ksizes = set()
    for ss in list(sqlidx2.signatures()):
        md5s.add(ss.md5sum())
        ksizes.add(ss.minhash.ksize)
    assert md5s == set(
        ["f372e47893edd349e5956f8b0d8dcbf7", "43f3b48e59443092850964d355a20ac0"]
    )
    assert ksizes == set([21, 51])


def test_sqlite_jaccard_ordering():
    # this tests a tricky situation where for three sketches A, B, C,
    # |A intersect B| is greater than |A intersect C|
    # _but_
    # |A jaccard B| is less than |A intersect B|
    a = sourmash.MinHash(ksize=31, n=0, scaled=2)
    b = a.copy_and_clear()
    c = a.copy_and_clear()

    a.add_many([1, 2, 3, 4])
    b.add_many([1, 2, 3] + list(range(10, 30)))
    c.add_many([1, 5])

    def _intersect(x, y):
        return x.intersection_and_union_size(y)[0]

    print("a intersect b:", _intersect(a, b))
    print("a intersect c:", _intersect(a, c))
    print("a jaccard b:", a.jaccard(b))
    print("a jaccard c:", a.jaccard(c))
    assert _intersect(a, b) > _intersect(a, c)
    assert a.jaccard(b) < a.jaccard(c)

    # thresholds to use:
    assert a.jaccard(b) < 0.15
    assert a.jaccard(c) > 0.15

    # now - make signatures, try out :)
    ss_a = sourmash.SourmashSignature(a, name="A")
    ss_b = sourmash.SourmashSignature(b, name="B")
    ss_c = sourmash.SourmashSignature(c, name="C")

    sqlidx = SqliteIndex.create(":memory:")
    sqlidx.insert(ss_a)
    sqlidx.insert(ss_b)
    sqlidx.insert(ss_c)

    sr = sqlidx.search(ss_a, threshold=0.15)
    print(sr)
    assert len(sr) == 2
    assert sr[0].signature == ss_a
    assert sr[0].score == 1.0
    assert sr[1].signature == ss_c
    assert sr[1].score == 0.2


def test_sqlite_index_scaled1():
    # check on scaled=1 storage.
    sqlidx = SqliteIndex.create(":memory:")

    mh1 = sourmash.MinHash(0, 31, scaled=1)
    mh1.add_hash(2**64 - 1)
    mh1.add_hash(2**64 - 2)
    mh1.add_hash(2**64 - 3)
    ss1 = sourmash.SourmashSignature(mh1, name="ss 1")

    mh2 = sourmash.MinHash(0, 31, scaled=1)
    mh2.add_hash(2**64 - 1)
    mh2.add_hash(2**64 - 2)
    mh2.add_hash(2**64 - 3)
    mh2.add_hash(0)
    mh2.add_hash(1)
    mh2.add_hash(2)
    ss2 = sourmash.SourmashSignature(mh2, name="ss 2")

    sqlidx.insert(ss1)
    sqlidx.insert(ss2)

    # check jaccard search
    results = list(sqlidx.search(ss1, threshold=0))
    print(results)
    assert len(results) == 2
    assert results[0].signature == ss1
    assert results[0].score == 1.0
    assert results[1].signature == ss2
    assert results[1].score == 0.5

    results = list(sqlidx.search(ss1, threshold=0, do_containment=True))
    print(results)
    assert results[0].signature == ss1
    assert results[0].score == 1.0
    assert results[1].signature == ss2
    assert results[1].score == 1.0

    # minhashes retrieved successfully?
    assert len(results[0].signature.minhash) == 3
    assert len(results[1].signature.minhash) == 6


def test_sqlite_index_load_existing():
    # try loading an existing sqlite index
    filename = utils.get_test_data("sqlite/index.sqldb")
    sqlidx = sourmash.load_file_as_index(filename)
    assert isinstance(sqlidx, SqliteIndex)

    siglist = list(sqlidx.signatures())
    assert len(siglist) == 2


def test_sqlite_index_create_load_existing(runtmp):
    # try creating then loading an existing sqlite index; create from CLI
    filename = runtmp.output("idx.sqldb")
    sig1 = utils.get_test_data("47.fa.sig")
    sig2 = utils.get_test_data("63.fa.sig")

    runtmp.sourmash("sig", "cat", sig1, sig2, "-o", filename)

    sqlidx = sourmash.load_file_as_index(filename)
    assert isinstance(sqlidx, SqliteIndex)

    siglist = list(sqlidx.signatures())
    assert len(siglist) == 2


def test_sqlite_index_create_load_insert_existing(runtmp):
    # try creating, loading, inserting into an existing sqlite index
    filename = runtmp.output("idx.sqldb")
    sig1 = utils.get_test_data("47.fa.sig")
    sig2 = utils.get_test_data("63.fa.sig")
    sig3 = utils.get_test_data("2.fa.sig")

    runtmp.sourmash("sig", "cat", sig1, sig2, "-o", filename)

    sqlidx = sourmash.load_file_as_index(filename)
    assert isinstance(sqlidx, SqliteIndex)

    siglist = list(sqlidx.signatures())
    assert len(siglist) == 2

    ss3 = load_one_signature(sig3, ksize=31)
    sqlidx.insert(ss3)
    sqlidx.commit()

    runtmp.sourmash("sig", "describe", filename)
    print(runtmp.last_result.out)
    assert "md5: f3a90d4e5528864a5bcc8434b0d0c3b1" in runtmp.last_result.out


def test_sqlite_index_create_load_insert_existing_cli(runtmp):
    # try creating, loading, inserting into an existing sqlite index from cli
    # (aka "append" to existing database)
    filename = runtmp.output("idx.sqldb")
    sig1 = utils.get_test_data("47.fa.sig")
    sig2 = utils.get_test_data("63.fa.sig")
    sig3 = utils.get_test_data("2.fa.sig")

    runtmp.sourmash("sig", "cat", sig1, sig2, "-o", filename)

    sqlidx = sourmash.load_file_as_index(filename)
    assert isinstance(sqlidx, SqliteIndex)

    siglist = list(sqlidx.signatures())
    assert len(siglist) == 2

    # add a third
    runtmp.sourmash("sig", "cat", sig3, "-o", filename, "-k", "31")

    siglist = list(sqlidx.signatures())
    assert len(siglist) == 3


def test_sqlite_manifest_bad_version(runtmp):
    # create a sqlite database with a bad manifest version in the
    # sourmash_internal table, see what happens :)

    dbfile = runtmp.output("xyz.sqlmf")
    conn = sqlite3.connect(dbfile)
    c = conn.cursor()

    SqliteCollectionManifest._create_tables(c)

    # 0.9 doesn't exist/bad version
    c.execute(
        "UPDATE sourmash_internal SET value=? WHERE key=?", ("0.9", "SqliteManifest")
    )

    conn.commit()

    with pytest.raises(IndexNotSupported):
        CollectionManifest.load_from_filename(dbfile)


def test_sqlite_manifest_bad_version_unique(runtmp):
    # try to insert duplicate sqlite manifest info into sourmash_internal; fail

    dbfile = runtmp.output("xyz.sqldb")
    conn = sqlite3.connect(dbfile)
    c = conn.cursor()

    SqliteCollectionManifest._create_tables(c)

    # can't insert duplicate key
    with pytest.raises(sqlite3.IntegrityError):
        c.execute(
            "INSERT INTO sourmash_internal (value, key) VALUES (?, ?)",
            ("1.1", "SqliteManifest"),
        )


def test_sqlite_manifest_basic():
    # test some features of the SQLite-based manifest.
    sig2 = load_one_signature(utils.get_test_data("2.fa.sig"), ksize=31)
    sig47 = load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
    sig63 = load_one_signature(utils.get_test_data("63.fa.sig"), ksize=31)

    sqlidx = SqliteIndex.create(":memory:")

    # empty manifest tests
    manifest = sqlidx.manifest
    assert not manifest
    assert len(manifest) == 0

    sqlidx.insert(sig47)
    sqlidx.insert(sig63)

    # ok, more full manifest tests!
    assert manifest
    assert len(manifest) == 2

    assert sig47 in manifest
    assert sig2 not in manifest

    # check that we can get a "standard" manifest out
    standard_mf = CollectionManifest.load_from_manifest(sqlidx.manifest)
    assert len(standard_mf) == 2

    picklist = manifest.to_picklist()
    assert sig47 in picklist
    assert sig2 not in picklist


def test_sqlite_manifest_round_trip():
    # check that we can go from regular mf -> sqlite mf -> regular again.
    sig2 = load_one_signature(utils.get_test_data("2.fa.sig"), ksize=31)
    sig47 = load_one_signature(utils.get_test_data("47.fa.sig"), ksize=31)
    sig63 = load_one_signature(utils.get_test_data("63.fa.sig"), ksize=31)

    rows = []
    rows.append(
        CollectionManifest.make_manifest_row(sig47, None, include_signature=False)
    )
    rows.append(
        CollectionManifest.make_manifest_row(sig63, None, include_signature=False)
    )
    nosql_mf = CollectionManifest(rows)

    sqlite_mf = SqliteCollectionManifest.load_from_manifest(nosql_mf)

    # test roundtrip
    round_mf = CollectionManifest.load_from_manifest(sqlite_mf)

    assert len(round_mf) == 2
    print(round_mf.rows, nosql_mf.rows)
    assert round_mf == nosql_mf

    for mf in (nosql_mf, sqlite_mf, round_mf):
        picklist = mf.to_picklist()
        assert sig47 in picklist
        assert sig2 not in picklist


def test_sqlite_manifest_create(runtmp):
    # test creation and summarization of a manifest of prot.zip
    zipfile = utils.get_test_data("prot/all.zip")

    # create manifest
    runtmp.sourmash("sig", "manifest", "-F", "sql", zipfile, "-o", "mf.sqlmf")

    sqlmf = runtmp.output("mf.sqlmf")
    assert os.path.exists(sqlmf)

    # verify it's loadable as the right type
    idx = load_sqlite_index(sqlmf)
    assert isinstance(idx, StandaloneManifestIndex)

    # summarize
    runtmp.sourmash("sig", "fileinfo", "mf.sqlmf")

    out = runtmp.last_result.out
    print(out)

    assert "2 sketches with dayhoff, k=19, scaled=100          7945 total hashes" in out
    assert "2 sketches with hp, k=19, scaled=100               5184 total hashes" in out
    assert "2 sketches with protein, k=19, scaled=100          8214 total hashes" in out
    assert "1 sketches with DNA, k=31, scaled=1000             5238 total hashes" in out

    assert "path filetype: StandaloneManifestIndex" in out
    assert "location: mf.sqlmf" in out
    assert "is database? yes" in out
    assert "has manifest? yes" in out
    assert "num signatures: 7" in out


def test_sqlite_manifest_create_noload_sigs(runtmp):
    # sigs should not be loadable from manifest this way...
    zipfile = utils.get_test_data("prot/all.zip")

    # create manifest
    runtmp.sourmash("sig", "manifest", "-F", "sql", zipfile, "-o", "mf.sqlmf")

    # 'describe' should not be able to load the sqlmf b/c prefix is wrong
    with pytest.raises(SourmashCommandFailed):
        runtmp.sourmash("sig", "describe", "mf.sqlmf")


def test_sqlite_manifest_create_yesload_sigs(runtmp):
    # should be able to load after copying files
    zipfile = utils.get_test_data("prot/all.zip")
    shutil.copytree(utils.get_test_data("prot"), runtmp.output("prot"))

    # create manifest
    runtmp.sourmash("sig", "manifest", "-F", "sql", zipfile, "-o", "prot/mf.sqlmf")

    # 'describe' should now be able to load the sqlmf, which is cool
    runtmp.sourmash("sig", "describe", "prot/mf.sqlmf")
    print(runtmp.last_result.out)


def test_sqlite_manifest_num(runtmp):
    # should be able to produce sql manifests with 'num' sketches in them
    numsig = utils.get_test_data("num/47.fa.sig")

    # create mf
    runtmp.sourmash("sig", "manifest", "-F", "sql", numsig, "-o", "mf.sqlmf")

    # do summarize:
    runtmp.sourmash("sig", "summarize", "mf.sqlmf")
    out = runtmp.last_result.out

    print(out)

    assert "1 sketches with DNA, k=21, num=500                 500 total hashes" in out
    assert "1 sketches with DNA, k=31, num=500                 500 total hashes" in out
    assert "1 sketches with DNA, k=51, num=500                 500 total hashes" in out


def test_sqlite_manifest_num_select(runtmp):
    # should be able to _select_ sql manifests with 'num' sketches in them
    numsig = utils.get_test_data("num/47.fa.sig")

    # create mf
    runtmp.sourmash("sig", "manifest", "-F", "sql", numsig, "-o", "mf.sqlmf")

    # load as index
    idx = sourmash.load_file_as_index(runtmp.output("mf.sqlmf"))

    # select
    print(list(idx.manifest.rows))
    idx = idx.select(num=500)
    print(list(idx.manifest.rows))
    assert len(idx) == 3


def test_sqlite_manifest_locations(runtmp):
    # check what locations returns... may return too many, that's ok.
    prot = utils.get_test_data("prot")

    runtmp.sourmash("sig", "manifest", "-F", "sql", prot, "-o", "mf.sqlmf")

    # load as index
    idx = sourmash.load_file_as_index(runtmp.output("mf.sqlmf"))

    picklist = SignaturePicklist("identprefix")
    picklist.pickset = set(["GCA_001593925"])
    idx = idx.select(picklist=picklist)

    sql_locations = set(idx.manifest.locations())
    row_locations = set(row["internal_location"] for row in idx.manifest.rows)

    assert sql_locations.issuperset(row_locations)

    assert "dna-sig.sig.gz" in sql_locations  # this is unnecessary...
    assert "dna-sig.sig.gz" not in row_locations  # ...this is correct :)


def test_sqlite_manifest_create_insert(runtmp):
    # try out creating a sqlite manifest and then running cli on it

    mfname = runtmp.output("some.sqlmf")
    mf = SqliteCollectionManifest.create(mfname)

    sigfile = utils.get_test_data("47.fa.sig")
    ss = load_one_signature(sigfile)

    mf._insert_row(mf.conn.cursor(), mf.make_manifest_row(ss, "some.sig"))
    mf.conn.commit()

    # copy sig in since we want it to resolve...
    shutil.copyfile(sigfile, runtmp.output("some.sig"))

    # 'describe' should work here, to resolve actual sigs.
    runtmp.sourmash("sig", "describe", mfname)
    print(runtmp.last_result.out)
    assert "md5: 09a08691ce52952152f0e866a59f6261" in runtmp.last_result.out


def test_sqlite_manifest_create_insert_2(runtmp):
    # try out creating a sqlite manifest from cli and then _insert_row into it

    # copy sig in since we want it to resolve...
    sigfile = utils.get_test_data("47.fa.sig")
    shutil.copyfile(sigfile, runtmp.output("some.sig"))

    runtmp.sourmash("sig", "manifest", "some.sig", "-F", "sql", "-o", "some.sqlmf")
    mfname = runtmp.output("some.sqlmf")

    mf = CollectionManifest.load_from_filename(mfname)
    ss = load_one_signature(runtmp.output("some.sig"))
    mf._insert_row(mf.conn.cursor(), mf.make_manifest_row(ss, "some.sig"))
    mf.conn.commit()

    # 'describe' should work here, to resolve actual sigs.
    runtmp.sourmash("sig", "describe", mfname)
    print(runtmp.last_result.out)
    assert "md5: 09a08691ce52952152f0e866a59f6261" in runtmp.last_result.out


def test_sqlite_manifest_existing(runtmp):
    # try out an existing sqlite manifest

    prefix = runtmp.output("protdir")
    mf = runtmp.output("protdir/prot.sqlmf")
    shutil.copytree(utils.get_test_data("prot"), prefix)
    shutil.copyfile(utils.get_test_data("sqlite/prot.sqlmf"), mf)

    runtmp.sourmash("sig", "describe", mf)
    print(runtmp.last_result.out)


def test_sqlite_manifest_existing_insert(runtmp):
    # try out an existing sqlite manifest - insert into it

    prefix = runtmp.output("protdir")
    shutil.copytree(utils.get_test_data("prot"), prefix)

    mfname = runtmp.output("protdir/prot.sqlmf")
    shutil.copyfile(utils.get_test_data("sqlite/prot.sqlmf"), mfname)
    mf = CollectionManifest.load_from_filename(mfname)
    assert isinstance(mf, SqliteCollectionManifest)

    sigfile = utils.get_test_data("47.fa.sig")
    ss = load_one_signature(sigfile)

    mf._insert_row(mf.conn.cursor(), mf.make_manifest_row(ss, "some.sig"))
    mf.conn.commit()

    # copy sig in since we want it to resolve...
    shutil.copyfile(sigfile, runtmp.output("protdir/some.sig"))

    # 'describe' should work here.
    runtmp.sourmash("sig", "describe", mfname)
    print(runtmp.last_result.out)


def test_sqlite_manifest_existing_mf_only(runtmp):
    # try out an existing sqlite manifest, but without underlying files -> fail

    mf = runtmp.output("prot.sqlmf")
    shutil.copyfile(utils.get_test_data("sqlite/prot.sqlmf"), mf)

    # 'fileinfo' should work...
    runtmp.sourmash("sig", "fileinfo", mf)
    print(runtmp.last_result.out)
    assert "num signatures: 7" in runtmp.last_result.out

    # ...but 'describe' should fail, since it needs actual sigs.
    with pytest.raises(SourmashCommandFailed):
        runtmp.sourmash("sig", "describe", mf)

    print(runtmp.last_result.err)
    assert "ERROR: Error while reading signatures from" in runtmp.last_result.err


def test_sqlite_manifest_existing_mfonly_insert(runtmp):
    # try out an existing sqlite manifest - insert into it, but fail describe

    mfname = runtmp.output("prot.sqlmf")
    shutil.copyfile(utils.get_test_data("sqlite/prot.sqlmf"), mfname)
    mf = CollectionManifest.load_from_filename(mfname)
    assert isinstance(mf, SqliteCollectionManifest)

    sigfile = utils.get_test_data("47.fa.sig")
    ss = load_one_signature(sigfile)

    mf._insert_row(mf.conn.cursor(), mf.make_manifest_row(ss, sigfile))
    mf.conn.commit()

    # 'fileinfo' should work...
    runtmp.sourmash("sig", "fileinfo", mfname)
    print(runtmp.last_result.out)
    assert "num signatures: 8" in runtmp.last_result.out

    # ...but 'describe' should fail, since it needs actual sigs.
    with pytest.raises(SourmashCommandFailed):
        runtmp.sourmash("sig", "describe", mfname)


def test_sqlite_manifest_load_existing_index():
    # try loading an existing sqlite index as a manifest
    filename = utils.get_test_data("sqlite/index.sqldb")
    mf = CollectionManifest.load_from_filename(filename)
    assert isinstance(mf, SqliteCollectionManifest)

    assert len(mf) == 2


def test_sqlite_manifest_load_existing_index_insert_fail():
    # try loading an existing sqlite index as a manifest; insert should fail
    filename = utils.get_test_data("sqlite/index.sqldb")
    mf = CollectionManifest.load_from_filename(filename)
    assert isinstance(mf, SqliteCollectionManifest)

    assert len(mf) == 2

    # try insert - should fail
    sigfile = utils.get_test_data("47.fa.sig")
    ss = load_one_signature(sigfile)

    with pytest.raises(Exception) as exc:
        mf._insert_row(mf.conn.cursor(), mf.make_manifest_row(ss, sigfile))

    assert "must use SqliteIndex.insert to add to this manifest" in str(exc)


def test_sqlite_manifest_create_load_empty(runtmp):
    # try creating an empty manifest, then loading
    mfname = runtmp.output("some.sqlmf")
    mf = SqliteCollectionManifest.create(mfname)
    mf.close()

    mf2 = load_sqlite_index(mfname)
    assert len(mf2) == 0


def test_sqlite_lca_db_load_existing():
    # try loading an existing sqlite index
    filename = utils.get_test_data("sqlite/lca.sqldb")
    sqlidx = sourmash.load_file_as_index(filename)
    assert isinstance(sqlidx, LCA_SqliteDatabase)

    siglist = list(sqlidx.signatures())
    assert len(siglist) == 2


def test_sqlite_lca_db_select():
    # try loading an existing sqlite index
    filename = utils.get_test_data("sqlite/lca.sqldb")
    sqlidx = sourmash.load_file_as_index(filename)
    assert isinstance(sqlidx, LCA_SqliteDatabase)

    sqlidx2 = sqlidx.select(ksize=31)
    list(sqlidx2.hashvals)  # only on LCA_SqliteDatabase
    assert isinstance(sqlidx2, LCA_SqliteDatabase)


def test_sqlite_lca_db_create_load_existing(runtmp):
    # try creating (from CLI) then loading (from API) an LCA db
    filename = runtmp.output("lca.sqldb")
    sig1 = utils.get_test_data("lca/TARA_ASE_MAG_00031.sig")
    sig2 = utils.get_test_data("lca/TARA_PSW_MAG_00136.sig")

    runtmp.sourmash("sig", "flatten", sig1, sig2, "-o", filename, "-k", "31")

    # load tax
    tax_csv = utils.get_test_data("sqlite/delmont-6.csv")
    runtmp.sourmash("tax", "prepare", "-t", tax_csv, "-o", filename, "-F", "sql")

    sqlidx = sourmash.load_file_as_index(filename)
    assert isinstance(sqlidx, LCA_SqliteDatabase)

    siglist = list(sqlidx.signatures())
    assert len(siglist) == 2


def test_sqlite_lca_db_load_empty(runtmp):
    # try creating then loading an _empty_ LCA_SqliteDatabase

    dbname = runtmp.output("empty.sqldb")

    # create empty SqliteIndex...
    runtmp.sourmash("sig", "cat", "-o", dbname)
    assert os.path.exists(dbname)

    # ...and create empty sourmash_taxonomy tables in there...
    empty_tax = utils.get_test_data("scaled/empty-lineage.csv")
    runtmp.sourmash("tax", "prepare", "-F", "sql", "-t", empty_tax, "-o", dbname)

    runtmp.sourmash("sig", "describe", dbname)
    assert "loaded 0 signatures" in runtmp.last_result.err


def test_sqlite_lca_db_create_readonly(runtmp):
    # try running 'prepare' on a read-only sqlite db, check error message.

    dbname = runtmp.output("empty.sqldb")

    # create empty SqliteIndex...
    runtmp.sourmash("sig", "cat", "-o", dbname)
    assert os.path.exists(dbname)

    # make it read only...
    from stat import S_IREAD, S_IRGRP, S_IROTH

    os.chmod(dbname, S_IREAD | S_IRGRP | S_IROTH)

    # ...and try creating empty sourmash_taxonomy tables in there...
    empty_tax = utils.get_test_data("scaled/empty-lineage.csv")

    with pytest.raises(SourmashCommandFailed):
        runtmp.sourmash("tax", "prepare", "-F", "sql", "-t", empty_tax, "-o", dbname)

    err = runtmp.last_result.err
    print(err)

    assert "taxonomy table already exists in" not in err
    assert "attempt to write a readonly database" in err


def test_sqlite_lca_db_try_load_sqlite_index():
    # try loading a SqliteIndex with no tax tables from .load classmethod
    dbname = utils.get_test_data("sqlite/index.sqldb")

    with pytest.raises(ValueError) as exc:
        LCA_SqliteDatabase.load(dbname)

    assert "not a taxonomy database" in str(exc)


def test_sqlite_lca_db_supply_lineage_db():
    # try creating an LCA_SqliteDatabase object with a separate lineage DB.
    dbname = utils.get_test_data("sqlite/index.sqldb")

    tax_csv = utils.get_test_data("sqlite/shewanella-lineage.csv")
    lineage_db = MultiLineageDB.load([tax_csv])

    db = LCA_SqliteDatabase(dbname, lineage_db=lineage_db)

    hashval = next(iter(db.hashvals))
    lineages = db.get_lineage_assignments(hashval)
    print(lineages)
    assert lineages[0][0].rank == "superkingdom"
    assert lineages[0][0].name == "d__Bacteria"
    assert lineages[0][-1].rank == "species"
    assert lineages[0][-1].name == "s__Shewanella baltica"
    assert lineages[1][0].rank == "superkingdom"
    assert lineages[1][0].name == "d__Bacteria"
    assert lineages[0][-1].rank == "species"
    assert lineages[0][-1].name == "s__Shewanella baltica"


def test_bad_sqlite_internal_version():
    # check get_sourmash_internal
    dbname = utils.get_test_data("sqlite/index.sqldb")

    conn = sqlite_utils.open_sqlite_db(dbname)
    c = conn.cursor()
    with pytest.raises(Exception):
        sqlite_utils.add_sourmash_internal(c, "SqliteIndex", "0.9")