File: test_feature_spliced_seq.py

package info (click to toggle)
python-pyfaidx 0.8.1.3-2
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 712 kB
  • sloc: python: 3,001; makefile: 16; sh: 6
file content (72 lines) | stat: -rw-r--r-- 1,852 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import os
import pytest
from pyfaidx import Fasta

path = os.path.dirname(__file__)
os.chdir(path)

@pytest.fixture
def remove_index():
    yield
    fais = [
            "data/gene.bed12.fasta.fai",
            "data/chr17.hg19.part.fa.fai"
            ]
    for fai in fais:
        try:    
            os.remove(fai)
        except EnvironmentError:
            pass  # some tests may delete this file
    
def test_split_seq(remove_index):
    """ Fetch sequence by blocks """
    fa = Fasta('data/chr17.hg19.part.fa')
    
    gene = Fasta("data/gene.bed12.fasta")
    expect = gene[list(gene.keys())[0]][:].seq
    
    bed = "data/gene.bed12"
    with open(bed) as fi:
        record = fi.readline().strip().split("\t")

    chrom = record[0]
    start = int(record[1])
    strand = record[5]

    # parse bed12 format
    starts = [int(x) for x in record[11].split(",")[:-1]] 
    sizes = [int(x) for x in record[10].split(",")[:-1]]
    starts = [start + x  for x in starts]
    ends = [start + size  for start,size in zip(starts, sizes)] 
    
    # bed half-open
    if strand == "-":
        starts = [start + 1 for start in starts]
    else: 
        ends = [end - 1 for end in ends]
    
    intervals = zip(starts, ends) 
    result = fa.get_spliced_seq(chrom, intervals, rc=True)
    print(result.seq)
    print("====")
    print(expect)

    assert result.seq == expect

def test_get_seq_rc(remove_index):
    """ Check get_seq with rc argument """
    fa = Fasta('data/chr17.hg19.part.fa')
    
    result = fa.get_seq("chr17", 11, 20, rc=False)
    expect = "CCCTGTTCCT"
    print("normal")
    print(result.seq)
    print(expect)
    assert result.seq == expect
    
    result = fa.get_seq("chr17", 11, 20, rc=True)
    expect = "AGGAACAGGG"
    assert result.seq == expect
    print("rc")
    print(result.seq)
    print(expect)