File: test_fncls.py

package info (click to toggle)
python-ncls 0.0.63-hotfix%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 372 kB
  • sloc: ansic: 2,412; python: 205; sh: 18; makefile: 5
file content (32 lines) | stat: -rw-r--r-- 955 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from ncls import FNCLS
import numpy as np
np.random.seed(0)

import pandas as pd
size = int(1e4)

starts = np.random.randint(0, high=int(1e6), size=size) + np.random.random()
ends = starts + np.random.randint(0, high=1000, size=size)
df = pd.DataFrame(data={"Start": starts, "End": ends})

starts = np.random.randint(0, high=int(1e6), size=size) + np.random.random()
ends = starts + np.random.randint(0, high=1000, size=size)
df2 = pd.DataFrame(data={"Start": starts, "End": ends})

print(df)
print(df2)

from time import time

start = time()
fncls = FNCLS(df.Start.values, df.End.values, df.index.values)
end = time()
print("Time:", end - start)
start = time()
qx, sx = fncls.all_overlaps_both(df2.Start.values, df2.End.values, df2.index.values)
end = time()
print("Time:", end - start)
df2.columns = df2.columns + "_b"
j = pd.concat([df.reindex(sx).reset_index(drop=True), df2.reindex(qx).reset_index(drop=True)], axis=1)

print(j.sort_values("Start"))