File: RefToIndex.java

package info (click to toggle)
bbmap 38.90%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 21,520 kB
  • sloc: java: 265,882; sh: 14,954; python: 5,247; ansic: 2,074; perl: 96; xml: 38; makefile: 37
file content (173 lines) | stat: -rwxr-xr-x 6,988 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
package align2;

import java.io.File;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;

import dna.ChromosomeArray;
import dna.Data;
import dna.FastaToChromArrays2;
import fileIO.FileFormat;
import fileIO.ReadWrite;
import fileIO.SummaryFile;
import shared.Shared;
import shared.Tools;

/**
 * @author Brian Bushnell
 * @date Sep 25, 2013
 *
 */
public class RefToIndex {
	
	public static final void clear(){
		chromlist=null;
	}
	
	public static String summaryLoc(int build){
		String s=IndexMaker4.fname(1, 1, 13, 1, build);
		String dir=new File(s).getParent();
		dir=dir.replace('\\', '/');
		dir=dir.replace("ref/index/", "ref/genome/");
		String sf=dir+"/summary.txt";
		return sf;
	}
	
	public static String bloomLoc(int build){
		return Data.ROOT_INDEX+build+"/bloom.serial";
	}
	
	public static void makeIndex(String reference, int build, PrintStream sysout, int keylen){
		assert(reference!=null);
		{
			File f=new File(reference);
			if(!f.exists() || !f.isFile() || !f.canRead()){
				if(!reference.startsWith("stdin")){
					throw new RuntimeException("Cannot read file "+f.getAbsolutePath());
				}
			}else{
				FileFormat ff=FileFormat.testInput(reference, FileFormat.FA, null, false, true, true);
				if(!ff.fasta()){
					throw new RuntimeException("Reference file is not in fasta format: "+reference+"\n"+ff);
				}
			}
		}

		String s=IndexMaker4.fname(1, 1, keylen, 1);
		String dir=new File(s).getParent();
		dir=dir.replace('\\', '/');
		final String base=dir.substring(0, dir.length()-7);
		final String args=(Shared.COMMAND_LINE==null ? "null" : Arrays.toString(Shared.COMMAND_LINE));
		final String indexlog=base+"build"+build+"_"+
				(System.nanoTime()&Long.MAX_VALUE)+"."+((args==null ? (reference==null ? "null" : reference) : args).hashCode()&Integer.MAX_VALUE)+".log";
		dir=dir.replace("ref/index/", "ref/genome/");
		String sf=dir+"/summary.txt";
		if(FORCE_READ_ONLY || (!NODISK && new File(sf).exists() && SummaryFile.compare(sf, reference))){
			//do nothing
			if(LOG && !NODISK){
				if(!new File(base).exists()){new File(base).mkdirs();}
				ReadWrite.writeString(new Date()+"\nFound an already-written genome for build "+build+".\n"+args+"\n", indexlog, true);
			}
			sysout.println("NOTE:\tIgnoring reference file because it already appears to have been processed.");
			sysout.println("NOTE:\tIf you wish to regenerate the index, please manually delete "+dir+"/summary.txt");
		}else{
			if(NODISK){}
			else{//Delete old data if present
				File f=new File(dir);
				if(f.exists()){
					File[] f2=f.listFiles();
					if(f2!=null && f2.length>0){
						if(overwrite || f2[0].getAbsolutePath().equals(new File(reference).getAbsolutePath())){
							sysout.println("NOTE:\tDeleting contents of "+dir+" because reference is specified and overwrite="+overwrite);
							if(LOG && !NODISK){ReadWrite.writeString(new Date()+"\nDeleting genome for build "+build+".\n"+args+"\n", indexlog, true);}
							for(File f3 : f2){
								if(f3.isFile()){
									String f3n=f3.getName();
									if((f3n.contains(".chrom") || f3n.endsWith(".txt") || f3n.endsWith(".txt.gz")) && !f3n.endsWith("list.txt")){
										f3.delete();
									}
								}
							}
						}else{
							sysout.println(Arrays.toString(f2));
							if(LOG && !NODISK){ReadWrite.writeString(new Date()+"\nFailed to overwrite genome for build "+build+".\n"+args+"\n", indexlog, true);}
							throw new RuntimeException("\nThere is already a reference at location '"+f.getAbsolutePath()+"'.  " +
									"Please delete it (and the associated index), or use a different build ID, " +
									"or remove the 'reference=' parameter from the command line, or set overwrite=true.");
						}
					}
				}

				dir=dir.replace("ref/genome/", "ref/index/");
				f=new File(dir);
				if(f.exists()){
					File[] f2=f.listFiles();
					if(f2!=null && f2.length>0){
						if(overwrite){
							sysout.println("NOTE:\tDeleting contents of "+dir+" because reference is specified and overwrite="+overwrite);
							if(LOG && !NODISK){ReadWrite.writeString(new Date()+"\nDeleting index for build "+build+".\n"+args+"\n", indexlog, true);}
							for(File f3 : f2){
								if(f3.isFile()){f3.delete();}
							}
						}else{
							if(LOG && !NODISK){ReadWrite.writeString(new Date()+"\nFailed to overwrite index for build "+build+".\n"+args+"\n", indexlog, true);}
							throw new RuntimeException("\nThere is already an index at location '"+f.getAbsolutePath()+"'.  " +
									"Please delete it, or use a different build ID, or remove the 'reference=' parameter from the command line.");
						}
					}
				}
			}

			if(!NODISK){
				sysout.println("Writing reference.");
				if(LOG && !NODISK){
					if(!new File(base).exists()){new File(base).mkdirs();}
					ReadWrite.writeString(new Date()+"\nWriting genome for build "+build+".\n"+args+"\n", indexlog, true);
				}
			}

			int oldzl=ReadWrite.ZIPLEVEL;
			ReadWrite.ZIPLEVEL=Tools.max(4, ReadWrite.ZIPLEVEL);

			//assert(false) : "minScaf="+minScaf+", midPad="+midPad+", maxChromLen="+maxChromLen+
			//		", startPad="+startPad+", stopPad="+stopPad+", FastaToChromArrays2.END_PADDING="+FastaToChromArrays2.END_PADDING;
			
			maxChromLen=maxChromLen>0 ? maxChromLen : AUTO_CHROMBITS ? FastaToChromArrays2.MAX_LENGTH : ((1L<<(31-(chrombits<0 ? 2 : chrombits)))-200000);
			minScaf=minScaf>-1 ? minScaf : FastaToChromArrays2.MIN_SCAFFOLD;
			midPad=midPad>-1 ? midPad : FastaToChromArrays2.MID_PADDING;
			startPad=startPad>-1 ? startPad : FastaToChromArrays2.START_PADDING;
			stopPad=stopPad>-1 ? stopPad : FastaToChromArrays2.END_PADDING;
			
			String[] ftcaArgs=new String[] {reference, ""+build, "writeinthread=false", "genscaffoldinfo="+genScaffoldInfo, "retain", "waitforwriting=false",
					"gz="+(Data.CHROMGZ), "maxlen="+maxChromLen,
					"writechroms="+(!NODISK), "minscaf="+minScaf, "midpad="+midPad, "startpad="+startPad, "stoppad="+stopPad, "nodisk="+NODISK};
			
			chromlist=FastaToChromArrays2.main2(ftcaArgs);

			ReadWrite.ZIPLEVEL=oldzl;
		}

	}

	public static boolean AUTO_CHROMBITS=true;
	public static boolean LOG=false;
	public static boolean NODISK=false;
	public static boolean FORCE_READ_ONLY=false;
	public static boolean overwrite=true;
	public static boolean append=false;
	public static boolean genScaffoldInfo=true;
	
	public static long maxChromLen=-1;
	
	public static int minScaf=-1, midPad=-1, stopPad=-1, startPad=-1;
	public static int chrombits=-1;
//	public static int minScaf=FastaToChromArrays2.MIN_SCAFFOLD;
//	public static int midPad=FastaToChromArrays2.MID_PADDING;
//	public static int startPad=FastaToChromArrays2.START_PADDING;
//	public static int stopPad=FastaToChromArrays2.END_PADDING;
	
	public static ArrayList<ChromosomeArray> chromlist=null;
	
}