File: Gene.py

package info (click to toggle)
rsem 1.3.3%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 37,700 kB
  • sloc: cpp: 19,230; perl: 1,326; python: 1,245; ansic: 547; makefile: 186; sh: 154
file content (143 lines) | stat: -rw-r--r-- 4,061 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
__doc__="""

  pliu 20131002

  module for gene
"""


class Gene:
  def __init__(self):
    self.gene_id = None;
   #self.rsem_result = None;

    self.chrom = None;
    self.strand = None;
    self.tss = None;   ## transcription starting sites, strand ori considered
    self.tes = None;   ## transcription ending sites, strand ori considered
    self.start = None; ## genomic starting position regardless of strand
                       ## direction, always have a number smaller than self.end
    self.end = None;   ## genomic ending position


    self.transcripts = [];
    self.gtfs = [];
    self.transcript_tss_groups = []; ## a list of list of transcripts having TSS
                                     ## within user-specified distance
    self.transcript_groups = [] ## a list of TranscriptionGroup objects


  def __str__(self):
    s = "%s" % self.gene_id;
    return s;


  ## should be moved to TranscriptGroup file
 #def groupTranscriptsByTSS(self):
 #  """
 #  put transcripts that have TSS within certain distance into a group
 #  """
 # #import Param;

 # #cutoff = 100; ## TSS within 100 bp
 # #cutoff = Param.TSS_GROUP_CUTOFF; ## cutoff for grouping TSS
 #  cutoff = 500 ## cutoff for grouping TSS

 #  group = [self.transcripts[0]]
 #  self.transcript_tss_groups.append(group)
 #  for tr in self.transcripts[1:]:
 #    is_assigned = False
 #    for grp in self.transcript_tss_groups:
 #      if (self.strand == '+') and (abs(tr.start - grp[0].start)<=cutoff):
 #        grp.append(tr)
 #        is_assigned = True;
 #      elif (self.strand == '-') and (abs(tr.end - grp[0].end)<=cutoff):
 #        grp.append(tr)
 #        is_assigned = True

 #      if is_assigned:
 #        break

 #    if not is_assigned:
 #      self.transcript_tss_groups.append([tr])


  ## should be moved to TranscriptGroup file
 #def constructTranscriptGroups(self):
 #  """
 #  construct a list of TranscriptGroup objects
 #  """
 #  import TranscriptGroup

 #  if len(self.transcript_tss_groups) == 0:
 #    self.groupTranscriptsByTSS()

 #  for transcripts in self.transcript_tss_groups:
 #    grp = TranscriptGroup.TranscriptGroup()
 #    grp.chrom = self.chrom
 #    grp.gene_id = self.gene_id
 #    grp.strand = self.strand
 #    grp.transcripts = transcripts
 #    self.transcript_groups.append(grp)



  def getStartEndTSSTESFromTranscripts(self):
    """
    define start and end from gene's transcripts
    start = min{all starts for transcripts};
    end = max{all ends for transcripts};
    """
    starts = [tr.start for tr in self.transcripts];
    ends = [tr.end for tr in self.transcripts];
    self.start = min(starts);
    self.end = max(ends);

    if self.strand == '+':
      self.tss = self.start;
      self.tes = self.end;
    elif self.strand == '-':
      self.tss = self.end;
      self.tes = self.start;


  def definePeakTypeByTranscriptGroups(self):
    """
    all:  all its transcript groups have peaks
    none: none of its transcript groups has peak
    mixed: some of its transcript groups have peaks, the others do not
    """
    has_tss_peaks = [grp.has_peak_around_TSS for grp in self.transcript_groups]
    if all(has_tss_peaks): ## all groups have peaks
      self.peak_type = 'all'
    else:
      if any(has_tss_peaks): ## some groups have peaks, the others not
        self.peak_type = 'mixed'
      else:                  ## no group has peak
        self.peak_type = 'no'


def constructGenesFromTranscripts(transcripts):
  """
  return a list of genes constructed from input transcripts
  """
  genes = []
  gene_dict_id = {}
  for tr in transcripts:
    if tr.gene_id in gene_dict_id:
      gene_dict_id[tr.gene_id].transcripts.append(tr)
      tr.gene = gene_dict_id[tr.gene_id]
    else:
      gene = Gene()
      gene.gene_id = tr.gene_id
      gene.chrom = tr.chrom
      gene.strand = tr.strand
      gene.transcripts.append(tr)
      genes.append(gene)
      gene_dict_id[tr.gene_id] = gene
      tr.gene = gene

  list(map(lambda gene: gene.getStartEndTSSTESFromTranscripts(), genes));

  return genes;