File: part1_code.rst

package info (click to toggle)
python-ruffus 2.6.3%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 20,828 kB
  • ctags: 2,843
  • sloc: python: 15,745; makefile: 180; sh: 14
file content (70 lines) | stat: -rw-r--r-- 2,074 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
.. include:: ../../global.inc
.. _examples_bioinformatics_part1_code:


###################################################################
Ruffus code
###################################################################

::
    
    import os, sys
    
    exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
    sys.path.insert(0, os.path.abspath(os.path.join(exe_path,"..", "..","..")))
    
    from ruffus import *
    
    
    original_fasta = "original.fa"
    database_file  = "human.protein.faa"
    
    @split(original_fasta, "*.segment")
    def splitFasta (seqFile, segments):
        """Split sequence file into 
           as many fragments as appropriate
           depending on the size of original_fasta"""
        current_file_index = 0
        for line in open(original_fasta):
            # 
            # start a new file for each accession line
            # 
            if line[0] == '>':
                current_file_index += 1
                current_file = open("%d.segment" % current_file_index, "w")
            current_file.write(line)
    
    
    
    @transform(splitFasta, suffix(".segment"), ".blastResult")
    def runBlast(seqFile,  blastResultFile):
        """Run blast"""
        os.system("blastall -p blastp -d %s -i %s > %s" %
                    (database_file, seqFile, blastResultFile))
    
    
    @merge(runBlast, "final.blast_results")
    def combineBlastResults (blastResultFiles, combinedBlastResultFile):
        """Combine blast results"""
        output_file = open(combinedBlastResultFile,  "w")
        for i in blastResultFiles:
            output_file.write(open(i).read())
    
    
    pipeline_run([combineBlastResults], verbose = 2, multiprocess = 4)
    

    #
    #   Simulate interuption of the pipeline by 
    #        deleting the output of one of the BLAST jobs
    #
    os.unlink("4.blastResult")

    pipeline_printout(sys.stdout, [combineBlastResults], verbose = 4) 
    

    #
    #   Re-running the pipeline
    #
    pipeline_run([combineBlastResults], verbose = 2, multiprocess = 4)