1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
|
/*
* Copyright (c) 2009-2010 by The Broad Institute, Inc.
* All Rights Reserved.
*
* This software is licensed under the terms of the GNU Lesser General Public License (LGPL), Version 2.1 which
* is available at http://www.opensource.org/licenses/lgpl-2.1.php.
*
* THE SOFTWARE IS PROVIDED "AS IS." THE BROAD AND MIT MAKE NO REPRESENTATIONS OR WARRANTIES OF
* ANY KIND CONCERNING THE SOFTWARE, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT
* OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. IN NO EVENT SHALL THE BROAD OR MIT, OR THEIR
* RESPECTIVE TRUSTEES, DIRECTORS, OFFICERS, EMPLOYEES, AND AFFILIATES BE LIABLE FOR ANY DAMAGES OF
* ANY KIND, INCLUDING, WITHOUT LIMITATION, INCIDENTAL OR CONSEQUENTIAL DAMAGES, ECONOMIC
* DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER THE BROAD OR MIT SHALL
* BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE
* FOREGOING.
*/
package org.broad.tribble.index.linear;
import org.broad.tribble.AbstractFeatureReader;
import org.broad.tribble.CloseableTribbleIterator;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.TestUtils;
import org.broad.tribble.bed.BEDCodec;
import org.broad.tribble.bed.BEDFeature;
import org.broad.tribble.index.Block;
import org.broad.tribble.index.Index;
import org.broad.tribble.index.IndexFactory;
import org.testng.Assert;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class LinearIndexTest {
private static final File RANDOM_FILE = new File("notMeaningful");
private final static Block CHR1_B1 = new Block(1, 10);
private final static Block CHR1_B2 = new Block(10, 20);
private final static Block CHR1_B3 = new Block(20, 30);
private final static Block CHR2_B1 = new Block(1, 100);
private final static Block CHR2_B2 = new Block(100, 200);
private LinearIndex idx;
@BeforeTest
public void setup() {
idx = createTestIndex();
}
// chr1 (0, 10]
// chr1 (10, 20]
// chr1 (20, 30]
// chr2 (0, 100]
// chr2 (100, 200]
private static LinearIndex createTestIndex() {
final LinearIndex.ChrIndex chr1 = new LinearIndex.ChrIndex("chr1", 10);
chr1.addBlock(CHR1_B1);
chr1.addBlock(CHR1_B2);
chr1.addBlock(CHR1_B3);
chr1.updateLongestFeature(1);
final LinearIndex.ChrIndex chr2 = new LinearIndex.ChrIndex("chr2", 100);
chr2.addBlock(CHR2_B1);
chr2.addBlock(CHR2_B2);
chr2.updateLongestFeature(50);
final List<LinearIndex.ChrIndex> indices = Arrays.asList(chr1, chr2);
return new LinearIndex(indices, RANDOM_FILE);
}
@Test()
public void testBasicFeatures() {
Assert.assertEquals(idx.getChrIndexClass(), LinearIndex.ChrIndex.class);
Assert.assertEquals(idx.getType(), LinearIndex.INDEX_TYPE);
Assert.assertFalse(idx.hasFileSize());
Assert.assertFalse(idx.hasTimestamp());
Assert.assertFalse(idx.hasMD5());
Assert.assertTrue(idx.isCurrentVersion());
Assert.assertNotNull(idx.getSequenceNames());
Assert.assertEquals(idx.getSequenceNames().size(), 2);
Assert.assertTrue(idx.getSequenceNames().contains("chr1"));
Assert.assertTrue(idx.getSequenceNames().contains("chr2"));
Assert.assertTrue(idx.containsChromosome("chr1"));
Assert.assertTrue(idx.containsChromosome("chr2"));
Assert.assertFalse(idx.containsChromosome("chr3"));
Assert.assertEquals(idx.getIndexedFile(), new File(RANDOM_FILE.getAbsolutePath()));
Assert.assertNotNull(idx.getBlocks("chr1"));
Assert.assertEquals(idx.getBlocks("chr1").size(), 3);
Assert.assertNotNull(idx.getBlocks("chr2"));
Assert.assertEquals(idx.getBlocks("chr2").size(), 2);
}
@Test()
public void testEquals() {
final LinearIndex idx2 = createTestIndex();
Assert.assertEquals(idx, idx, "Identical indices are equal");
Assert.assertTrue(idx.equalsIgnoreProperties(idx), "Identical indices are equalIgnoreTimeStamp");
Assert.assertTrue(idx.equalsIgnoreProperties(idx2), "Indices constructed the same are equalIgnoreTimeStamp");
idx2.setTS(123456789);
Assert.assertNotSame(idx, idx2, "Indices with different timestamps are not the same");
Assert.assertTrue(idx.equalsIgnoreProperties(idx2), "Indices with different timestamps are equalIgnoreTimeStamp");
}
// chr1 (0, 10]
// chr1 (10, 20]
// chr1 (20, 30]
// chr2 (0, 100]
// chr2 (100, 200]
//@Test()
// TODO -- this is not a useful test as written -- the linear index always returns a single block since by
// TODO -- definition they are contiguous and can be collapsed to a single block.
public void testBasicQuery() {
testQuery("chr1", 1, 1, CHR1_B1);
testQuery("chr1", 1, 2, CHR1_B1);
testQuery("chr1", 1, 9, CHR1_B1);
testQuery("chr1", 10, 10, CHR1_B1);
testQuery("chr1", 10, 11, CHR1_B1, CHR1_B2);
testQuery("chr1", 11, 11, CHR1_B2);
testQuery("chr1", 11, 12, CHR1_B2);
testQuery("chr1", 11, 19, CHR1_B2);
testQuery("chr1", 10, 19, CHR1_B1, CHR1_B2);
testQuery("chr1", 10, 21, CHR1_B1, CHR1_B2, CHR1_B3);
testQuery("chr1", 25, 30, CHR1_B3);
testQuery("chr1", 35, 40);
testQuery("chr2", 1, 1, CHR2_B1);
testQuery("chr2", 100, 100, CHR2_B1);
testQuery("chr2", 125, 125, CHR2_B1, CHR2_B2); // because of the 50 bp events
testQuery("chr2", 151, 151, CHR2_B2); // because of the 50 bp events
testQuery("chr2", 249, 249, CHR2_B2); // because of the 50 bp events
testQuery("chr2", 251, 251); // just escaping the 50 bp longest event
}
private final void testQuery(final String chr, final int start, final int stop, final Block... expectedBlocksArray) {
final List<Block> qBlocks = idx.getBlocks(chr, start, stop);
final List<Block> eBlocks = Arrays.asList(expectedBlocksArray);
Assert.assertEquals(qBlocks.size(), eBlocks.size(),
String.format("Query %s:%d-%d returned %d blocks but we only expected %d.", chr, start, stop, qBlocks.size(), eBlocks.size()));
for (int i = 0; i < qBlocks.size(); i++)
Assert.assertEquals(qBlocks.get(i), eBlocks.get(i));
}
File fakeBed = new File(TestUtils.DATA_DIR + "fakeBed.bed");
@Test
public void oneEntryFirstChr() {
final BEDCodec code = new BEDCodec();
final Index index = IndexFactory.createLinearIndex(fakeBed, code);
final AbstractFeatureReader reader = AbstractFeatureReader.getFeatureReader(fakeBed.getAbsolutePath(), code, index);
try {
final CloseableTribbleIterator it = reader.iterator();
int count = 0;
while (it.hasNext()) {
it.next();
count++;
}
Assert.assertEquals(51, count);
} catch (final IOException e) {
Assert.fail("Unable to get iterator due to " + e.getMessage());
}
}
@Test
/**
*
* chr2 1 200000000 LONG_FEATURE
* ...
* chr2 179098961 179380395 Hs.134602
* chr2 179209546 179287210 Hs.620337
* chr2 179266309 179266748 Hs.609465
* chr2 179296428 179300012 Hs.623987
* chr2 179302952 179303488 Hs.594545
*/
public void testOverlappingFeatures() throws Exception {
//chr2:179,222,066-179,262,059<- CONTAINS TTN
final Set<String> names = new HashSet<String>(Arrays.asList("Hs.134602", "Hs.620337", "Hs.609465", "Hs.623987",
"Hs.594545", "LONG_FEATURE"));
final String bedFile = TestUtils.DATA_DIR + "bed/Unigene.sample.bed";
final String chr = "chr2";
final int start = 179266309;
final int end = 179303488;
final int expectedCount = 6;
// Linear binned index
LinearIndex.enableAdaptiveIndexing = false;
final int binSize = 1000;
Index idx = IndexFactory.createLinearIndex(new File(bedFile), new BEDCodec(), binSize);
FeatureReader<BEDFeature> bfr = AbstractFeatureReader.getFeatureReader(bedFile, new BEDCodec(), idx);
CloseableTribbleIterator<BEDFeature> iter = bfr.query(chr, start, end);
int countInterval = 0;
while (iter.hasNext()) {
final BEDFeature feature = iter.next();
Assert.assertTrue(feature.getEnd() >= start && feature.getStart() <= end);
Assert.assertTrue(names.contains(feature.getName()));
countInterval++;
}
Assert.assertEquals(countInterval, expectedCount);
//Repeat with adaptive indexing
LinearIndex.enableAdaptiveIndexing = true;
idx = IndexFactory.createLinearIndex(new File(bedFile), new BEDCodec(), binSize);
bfr = AbstractFeatureReader.getFeatureReader(bedFile, new BEDCodec(), idx);
iter = bfr.query(chr, start, end);
countInterval = 0;
while (iter.hasNext()) {
final BEDFeature feature = iter.next();
Assert.assertTrue(feature.getEnd() >= start && feature.getStart() <= end);
Assert.assertTrue(names.contains(feature.getName()));
countInterval++;
}
Assert.assertEquals(countInterval, expectedCount);
}
}
|