1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
|
from ncls import NCLS
import pandas as pd
def test_basic():
starts = pd.Series(range(0, 5))
ends = starts + 100
ids = starts
subject_df = pd.DataFrame({"Start": starts, "End": ends}, index=ids)
ncls = NCLS(starts.values, ends.values, ids.values)
it = ncls.find_overlap(0, 2)
overlap_check = []
for i in it:
for element in i:
overlap_check.append(element)
assert(overlap_check == [0, 100, 0, 1, 101, 1])
starts_query = pd.Series([1, 3])
ends_query = pd.Series([52, 14])
indexes_query = pd.Series([10000, 100])
query_df = pd.DataFrame({"Start": starts_query.values, "End": ends_query.values}, index=indexes_query.values)
l_idxs, r_idxs = ncls.all_overlaps_both(starts_query.values, ends_query.values, indexes_query.values)
assert(list(l_idxs) == [10000, 10000, 10000, 10000, 10000, 100, 100, 100, 100, 100])
assert(list(r_idxs) == [0, 1, 2, 3, 4, 0, 1, 2, 3, 4])
assert(list(query_df.loc[l_idxs]["End"]) == [52, 52, 52, 52, 52, 14, 14, 14, 14, 14])
assert(list(subject_df.loc[r_idxs]["End"]) == [100, 101, 102, 103, 104, 100, 101, 102, 103, 104])
intervals = ncls.intervals()
assert(intervals == [(0, 100, 0), (1, 101, 1), (2, 102, 2), (3, 103, 3), (4, 104, 4)])
|