File: sample_test.py

package info (click to toggle)
python-ncls 0.0.57%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 376 kB
  • sloc: ansic: 2,412; python: 204; sh: 18; makefile: 5
file content (33 lines) | stat: -rw-r--r-- 1,202 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from ncls import NCLS
import pandas as pd

def test_basic():
	starts = pd.Series(range(0, 5))
	ends = starts + 100
	ids = starts

	subject_df = pd.DataFrame({"Start": starts, "End": ends}, index=ids)
	ncls = NCLS(starts.values, ends.values, ids.values)

	it = ncls.find_overlap(0, 2)
	overlap_check = []
	for i in it:
		for element in i:
			overlap_check.append(element)
	assert(overlap_check == [0, 100, 0, 1, 101, 1])

	starts_query = pd.Series([1, 3])
	ends_query = pd.Series([52, 14])
	indexes_query = pd.Series([10000, 100])

	query_df = pd.DataFrame({"Start": starts_query.values, "End": ends_query.values}, index=indexes_query.values)

	l_idxs, r_idxs = ncls.all_overlaps_both(starts_query.values, ends_query.values, indexes_query.values)
	assert(list(l_idxs) == [10000, 10000, 10000, 10000, 10000,  100,   100,   100,   100,   100])
	assert(list(r_idxs) == [0, 1, 2, 3, 4, 0, 1, 2, 3, 4])

	assert(list(query_df.loc[l_idxs]["End"]) == [52, 52, 52, 52, 52, 14, 14, 14, 14, 14])
	assert(list(subject_df.loc[r_idxs]["End"]) == [100, 101, 102, 103, 104, 100, 101, 102, 103, 104])

	intervals = ncls.intervals()
	assert(intervals == [(0, 100, 0), (1, 101, 1), (2, 102, 2), (3, 103, 3), (4, 104, 4)])