File: NonOverlapRegions.cpp

package info (click to toggle)
libstatgen 1.0.15-8
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 4,588 kB
  • sloc: cpp: 49,624; ansic: 1,408; makefile: 320; sh: 60
file content (254 lines) | stat: -rw-r--r-- 7,341 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
/*
 *  Copyright (C) 2011  Regents of the University of Michigan
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

//////////////////////////////////////////////////////////////////////////

#include "NonOverlapRegions.h"
#include <iostream>

NonOverlapRegions::NonOverlapRegions()
    : myRegions()
{
}


NonOverlapRegions::~NonOverlapRegions()
{
    myRegions.clear();
}


void NonOverlapRegions::add(const char* chrom, int32_t start, int32_t end)
{
    // Add the region.
    myRegions[chrom].add(start, end);
}


bool NonOverlapRegions::inRegion(const char* chrom, int32_t pos)
{
    // Return whether or not the position was found within a region.
    // Note, this will create a NonOverlapRegion for this chrom if it
    // did not already exist, but it won't have any regions.
    return(myRegions[chrom].inRegion(pos));
}


NonOverlapRegionPos::NonOverlapRegionPos()
    : myRegions()
{
    myRegionIter = myRegions.begin();
    myTmpIter = myRegions.begin();
}


NonOverlapRegionPos::NonOverlapRegionPos(const NonOverlapRegionPos& reg)
    : myRegions()
{
    myRegionIter = myRegions.begin();
    myTmpIter = myRegions.begin();
}


NonOverlapRegionPos::~NonOverlapRegionPos()
{
    myRegionIter = myRegions.begin();
    myTmpIter = myRegions.begin();
    myRegions.clear();
}


void NonOverlapRegionPos::add(int32_t start, int32_t end)
{
    // Check to see if the start/end are valid in relation.
    if(start >= end)
    {
        std::cerr << "NonOverlapRegionPos::add: Invalid Range, "
                  << "start must be < end, but " << start << " >= " << end 
                  << std::endl;
        return;
    }

    bool added = false;
    // Locate the correct position in the region list for this start/end.
    if(inRegion(start))
    {
        // Check if the region end needs to be updated.
        if(end > myRegionIter->second)
        {
            myRegionIter->second = end;
        }
        added = true;
    }
    else
    {
        // Check to see if we are at the end.
        if(myRegionIter != myRegions.end())
        {
            // Not at the end.
            // Check to see if the region overlaps the current region.
            if(end >= myRegionIter->first)
            {
                // Overlaps, so update the start.
                myRegionIter->first = start;
                // Check if the end needs to be updated.
                if(myRegionIter->second < end)
                {
                    myRegionIter->second = end;
                }
                added = true;
            }
        }
    }

    // If we already added the record, check to see if the end of the
    // new region overlaps any additional regions (know that myRegionIter is
    // not at the end.
    if(added)
    {
        // Check to see if any other regions were overlapped by this record.
        myTmpIter = myRegionIter;
        ++myTmpIter;
        while(myTmpIter != myRegions.end())
        {
            // If the region starts before the end of this one, consume it.
            if(myTmpIter->first <= end)
            {
                if(myTmpIter->second > myRegionIter->second)
                {
                    // Update this region with the new end.
                    myRegionIter->second = myTmpIter->second;
                }
                
                myTmpIter = myRegions.erase(myTmpIter);
            }
            else
            {
                // This region is not overlapped by the new region, so stop.
                break;
            }
        }
    }
    else
    {
        // Add the region.
        myRegionIter = myRegions.insert(myRegionIter, 
                                         std::make_pair(start, end));
    }
}


bool NonOverlapRegionPos::inRegion(int32_t pos)
{
    // Return whether or not the position was found within a region.
    // If it is found within the region, myRegionIter will point to the region
    // otherwise myRegionIter will point to the region after the position 
    // or to the end if the position is after the last region.

    // Determine if it needs to search to the left
    //   a) it is at the end
    //   b) the region starts after the position.
    if(myRegionIter == myRegions.end())
    {
        // If the iterator is at the end, search to the left.
        return(findLeft(pos));
    }
    else if(pos < myRegionIter->first)
    {
        // Not at the end, so search left if the position is less
        // than this region's start.
        return(findLeft(pos));
    }
    else
    {
        return(findRight(pos));
    }
}


bool NonOverlapRegionPos::findRight(int32_t pos)
{
    // Keep looping until the end or until the position is found.
    while(myRegionIter != myRegions.end())
    {
        // Check to see if we have passed the position.
        if(pos < myRegionIter->first)
        {
            // stop here, position comes before this region,
            // so myRegionIter is pointing to just after it,
            // but was not found in the region.
            return(false);
        }
        else if(pos < myRegionIter->second)
        {
            // the position is in the region, so return true.
            return(true);
        }
        else
        {
            // The position is after this region, so increment.
            ++myRegionIter;
        }
    }
    // exited because we are at the end of the regions and the position was
    // not found.
    return(false);
}


bool NonOverlapRegionPos::findLeft(int32_t pos)
{
    if(myRegionIter == myRegions.end())
    {
        if(myRegionIter == myRegions.begin())
        {
            // There is nothing in this list, so just return.
            return(false);
        }
        // Move 1 lower than the end.
        --myRegionIter;
    }

    while(myRegionIter->first > pos)
    {
        // This region is before our position, so move to the previous region
        // unless this is the first region in the list.
        if(myRegionIter == myRegions.begin())
        {
            // Did not find the position and the iter is at the element
            // just after the position.
            return(false);
        }
        // Not yet to the beginning of the list, so decrement.
        --myRegionIter;
    }

    // At this point, the regions iter points to a region that starts
    // before the position.
    // Determine if the position is in the region by checking if it is 
    // less than the end of the region.
    if(pos < myRegionIter->second)
    {
        // in the region.
        return(true);
    }

    // This region ends before this position.  The iterator needs to point
    // to the region after the position, so increment it.
    ++myRegionIter;
    return(false);
}