1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
|
/***
IIX = Interval IndeX.
Given a fixed set of intervals on a
sequence of length L, provides an efficient
way to retrieve all intervals in the set that
have non-zero overlap with given interval.
This implementation is designed to be simple
to implement and efficient for the typical
sets of intervals that I will be dealing
with -- this is not a good general solution
to the problem. The main assumption is that
there are few long intervals.
The sequence is divided into blocks of length B.
For each block, there is a list of intervals
that overlap that block. So an indexed interval
(ii) of length > B will appear in more than one
list, and in fact any ii of length > 1 may appear
in more than one list if it happens to cross
a block boundary. Intervals of length >> B are a
problem because they will appear in many lists,
but for my purposes they are rare.
Each interval is associated with a user-defined
integer. This can be used by the calling code to
map an ii to some data, e.g. a GFF record. How
this is done is up to the caller.
***/
#include "piler2.h"
IIX::IIX()
{
m_GlobalLength = 0;
m_BlockLength = 0;
m_BlockCount = 0;
m_Blocks = 0;
m_GLIX = 0;
}
IIX::~IIX()
{
Free();
}
void IIX::Free()
{
// TODO@@
m_GlobalLength = 0;
}
int IIX::PosToBlockIndex(int Pos) const
{
return Pos/m_BlockLength;
}
void IIX::Init(int GlobalLength, int BlockLength)
{
m_GlobalLength = GlobalLength;
m_BlockLength = BlockLength;
m_BlockCount = (GlobalLength + BlockLength - 1)/BlockLength;
m_Blocks = all(INTERVAL *, m_BlockCount);
zero(m_Blocks, INTERVAL *, m_BlockCount);
m_GLIX = 0;
}
bool IIX::ValidInterval(int From, int To) const
{
if (From < 0 || From >= m_GlobalLength)
return false;
if (To < 0 || To >= m_GlobalLength)
return false;
if (From > To)
return false;
return true;
}
void IIX::AddGlobal(int GlobalFrom, int GlobalTo, int User)
{
assert(ValidInterval(GlobalFrom, GlobalTo));
const int BlockFrom = PosToBlockIndex(GlobalFrom);
const int BlockTo = PosToBlockIndex(GlobalTo);
for (int BlockIndex = BlockFrom; BlockIndex <= BlockTo; ++BlockIndex)
{
INTERVAL *ii = all(INTERVAL, 1);
ii->From = GlobalFrom;
ii->To = GlobalTo;
ii->User = User;
ii->Next = m_Blocks[BlockIndex];
m_Blocks[BlockIndex] = ii;
}
}
void IIX::AddLocal(const char *Label, int LocalFrom, int LocalTo, int User)
{
if (0 == m_GLIX)
Quit("IIX::AddLocal: no GLIX");
int GlobalFrom = m_GLIX->LocalToGlobal(Label, LocalFrom);
int GlobalTo = GlobalFrom + (LocalTo - LocalFrom + 1);
return AddGlobal(GlobalFrom, GlobalTo, User);
}
int IIX::LookupGlobal(int GlobalFrom, int GlobalTo, int **ptrHits) const
{
*ptrHits = 0;
if (!ValidInterval(GlobalFrom, GlobalTo))
return 0;
int *Hits = 0;
int HitCount = 0;
int HitBufferSize = 0;
const int BlockFrom = PosToBlockIndex(GlobalFrom);
const int BlockTo = PosToBlockIndex(GlobalTo);
for (int BlockIndex = BlockFrom; BlockIndex <= BlockTo; ++BlockIndex)
{
for (INTERVAL *ii = m_Blocks[BlockIndex]; ii; ii = ii->Next)
{
if (Overlap(GlobalFrom, GlobalTo, ii->From, ii->To) > 0)
{
if (HitCount >= HitBufferSize)
{
HitBufferSize += 128;
reall(Hits, int, HitBufferSize);
}
Hits[HitCount++] = ii->User;
}
}
}
*ptrHits = Hits;
return HitCount;
}
int IIX::LookupLocal(const char *Label, int LocalFrom, int LocalTo, int **ptrHits) const
{
int GlobalFrom = m_GLIX->LocalToGlobalNoFail(Label, LocalFrom);
if (-1 == GlobalFrom)
return 0;
int GlobalTo = GlobalFrom + (LocalTo - LocalFrom + 1);
return LookupGlobal(GlobalFrom, GlobalTo, ptrHits);
}
void IIX::LogMe() const
{
Log("IIX %d blocks\n", m_BlockCount);
Log(" Block From To User\n");
Log("====== ========== ========== ==========\n");
for (int BlockIndex = 0; BlockIndex < m_BlockCount; ++BlockIndex)
{
for (INTERVAL *ii = m_Blocks[BlockIndex]; ii; ii = ii->Next)
Log("%6d %10d %10d %10d\n",
BlockIndex, ii->From, ii->To, ii->User);
}
}
|