1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
|
package jgi;
import java.util.Arrays;
import dna.AminoAcid;
import fileIO.ByteFile;
import fileIO.FileFormat;
import shared.Tools;
import structures.IntList;
public class Assembly {
public Assembly(String fname_) {
fname=fname_;
load();
}
void load() {
FileFormat ff=FileFormat.testInput(fname, FileFormat.FASTA, null, true, true);
assert(ff.fasta());
ByteFile bf=ByteFile.makeByteFile(ff);
clear();
acgtnio=new long[7];
int contigLen=0;
for(byte[] line=bf.nextLine(); line!=null; line=bf.nextLine()) {
if(line[0]=='>') {
headerLength+=(line.length-1);
if(firstHeader==null && line.length>1) {
firstHeader=new String(line, 1, line.length-1);
}
if(contigLen>0) {
contigs.add(contigLen);
}
contigLen=0;
}else {
addToACGTNIO(line);
contigLen+=line.length;
}
}
bf.close();
contigs.sort();
contigs.reverse();
length=contigs.sumLong();
}
void addToACGTNIO(byte[] line) {
for(byte b : line) {
byte x=baseToACGTNIO[b];
acgtnio[x]++;
}
}
void clear() {
contigs.clear();
length=0;
headerLength=0;
firstHeader=null;
acgtnio=null;
}
float gc() {
float AT=acgtnio[A]+acgtnio[T]+acgtnio[U];
float GC=acgtnio[G]+acgtnio[C];
return GC/Tools.max(1, AT+GC);
}
long lengthAtLeast(int minimum) {
long sum=0;
for(int i=0; i<contigs.size; i++) {
int len=contigs.get(i);
if(len<minimum) {break;}
sum+=len;
}
return sum;
}
final String fname;
IntList contigs=new IntList();
long length=0;
long headerLength=0;
String firstHeader=null;
long[] acgtnio;
public static final byte[] baseToACGTNIO=makeBaseToACGTUNIO();
private static final byte A=0, C=1, G=2, T=3, U=4, N=5, IUPAC=6, OTHER=7;
private static final byte[] makeBaseToACGTUNIO() {
final byte[] array=new byte[128];
Arrays.fill(array, OTHER);
array['a']=array['A']=A;
array['c']=array['C']=C;
array['g']=array['G']=G;
array['t']=array['T']=T;
array['u']=array['U']=U;
array['n']=array['N']=N;
for(int i=0; i<array.length; i++) {
if(AminoAcid.baseToNumberExtended[i]>=0 && array[i]==OTHER) {
array[i]=IUPAC;
}
}
return array;
}
}
|