File: fetch_test_data.py

package info (click to toggle)
epcr 2.3.12-1-11
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 920 kB
  • sloc: cpp: 5,730; ansic: 231; makefile: 45; python: 26; sh: 12
file content (44 lines) | stat: -rwxr-xr-x 1,027 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/usr/bin/python3
# Fetching the required data for testing
# a. entrez NCBI fasta test file
# b. a primer data file
from Bio import Entrez

Entrez.email = 'unknown@debian.org'
database = 'nucleotide'
fn = 'data/gingko.fasta'

query = [
    '((chloroplast OR plastid) AND "complete genome" AND Embryophyta NOT (mi- tochondrion OR mitochondrial)) AND "Ginkgo biloba"'
]

primer = "rbcL1/rbcLA\t TTGGCAGCATTYCGAGTAACTCC\t CCTTTRTAACGATCAAGRC"


def fetch_ids(dbs, qr):
    # Fetch the query IDs
    print(dbs, qr)
    handle = Entrez.esearch(db=dbs, term=qr)
    record = Entrez.read(handle)
    ids = record['IdList']
    handle.close()

    # Fetch the first result
    ff = open(fn, 'w')

    handle_fasta = Entrez.efetch(
        db=dbs, id=ids[0], rettype='fasta', retmode='text')
    ff.write(handle_fasta.read().rstrip('\n'))

    handle_fasta.close()


def create_primer(pr):
    pf = open('data/rbcL-primer.txt', 'w')
    pf.write(pr)
    pf.write('\n')
    pf.close()


fetch_ids(database, query)
create_primer(primer)