File: VcfToGff.java

package info (click to toggle)
bbmap 39.20%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 26,024 kB
  • sloc: java: 312,743; sh: 18,099; python: 5,247; ansic: 2,074; perl: 96; makefile: 39; xml: 38
file content (149 lines) | stat: -rwxr-xr-x 4,131 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
package gff;

import java.io.PrintStream;

import fileIO.ByteFile;
import fileIO.ByteStreamWriter;
import fileIO.FileFormat;
import shared.Parser;
import shared.PreParser;
import shared.Timer;
import shared.Tools;
import structures.ByteBuilder;
import tracker.ReadStats;
import var2.VCFLine;

/**
 * Stripped out of GffLine into independent class.
 * @author Brian Bushnell
 * @date Sep 12, 2018
 *
 */
public class VcfToGff {

	/** Translates VCF to GFF */
	public static void main(String[] args){
		Timer t=new Timer();
		PrintStream outstream=System.err;
		{//Preparse block for help, config files, and outstream
			PreParser pp=new PreParser(args, new Object() { }.getClass().getEnclosingClass(), false);
			args=pp.args;
			outstream=pp.outstream;
			t.outstream=outstream;
		}
		
		Parser parser=new Parser();
		String in=null;
		String out=null;
		boolean overwrite=true, append=false;
		
		//Parse each argument
		for(int i=0; i<args.length; i++){
			String arg=args[i];
			
			//Break arguments into their constituent parts, in the form of "a=b"
			String[] split=arg.split("=");
			String a=split[0].toLowerCase();
			String b=split.length>1 ? split[1] : null;
			
			if(a.equals("in") || a.equals("vcf")){
				in=b;
			}else if(a.equals("out") || a.equals("gff")){
				out=b;
			}else if(parser.parse(arg, a, b)){
				//do nothing
			}else if(in==null && b==null && i==0 && Tools.canRead(arg)){
				in=arg;
			}else if(in==null && b==null && i==1){
				out=arg;
			}else{
				outstream.println("Unknown parameter "+args[i]);
				assert(false) : "Unknown parameter "+args[i];
			}
		}
		
		{//Process parser fields
			overwrite=ReadStats.overwrite=parser.overwrite;
			append=ReadStats.append=parser.append;
		}
		
		//Ensure output files can be written
		if(!Tools.testOutputFiles(overwrite, append, false, out)){
			outstream.println((out==null)+", "+out);
			throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out+"\n");
		}

		//Ensure input files can be read
		if(!Tools.testInputFiles(false, true, in)){
			throw new RuntimeException("\nCan't read some input files.\n");  
		}

		//Ensure that no file was specified multiple times
		if(!Tools.testForDuplicateFiles(true, in, out)){
			throw new RuntimeException("\nSome file names were specified multiple times.\n");
		}
		
		translate(in, out, overwrite, append);
		t.stop("Time: \t");
	}
	
	/** Translates VCF to GFF */
	private static void translate(String in, String out, boolean overwrite, boolean append){
		//Create output FileFormat objects
		FileFormat ffout=FileFormat.testOutput(out, FileFormat.GFF, "gff", true, overwrite, append, false);

		//Create input FileFormat objects
		FileFormat ffin=FileFormat.testInput(in, FileFormat.VCF, "vcf", true, true);
		
		ByteFile bf=ByteFile.makeByteFile(ffin);
		ByteStreamWriter bsw=null;
		if(ffout!=null){
			bsw=new ByteStreamWriter(ffout);
			bsw.start();
		}
		
		ByteBuilder bb=new ByteBuilder(17000);
		bb.append("##gff-version 3\n");
		String header="#seqid	source	type	start	end	score	strand	phase	attributes";
		for(byte[] line=bf.nextLine(); line!=null; line=bf.nextLine()){
			if(line.length>1){
				if(line[0]=='#'){
					if(Tools.startsWith(line, "##fileformat") || Tools.startsWith(line, "##FORMAT") || 
							Tools.startsWith(line, "##INFO") || Tools.startsWith(line, "#CHROM	POS")){
						//skip
					}else{
						int i=1;
						while(i<line.length && line[i]=='#'){i++;}
						i--;
						bb.append(line, i, line.length-i);
						bb.nl();
					}
				}else{
					if(header!=null){
						bb.append(header).append('\n');
						header=null;
					}
					VCFLine vline=new VCFLine(line);
					GffLine gline=new GffLine(vline);
					gline.appendTo(bb);
					bb.nl();
				}
			}
			if(bb.length()>=16384){
				if(bsw!=null){
					bsw.print(bb);
				}
				bb.clear();
			}
		}
		if(bb.length()>0){
			if(bsw!=null){
				bsw.print(bb);
			}
			bb.clear();
		}
		bf.close();
		if(bsw!=null){bsw.poisonAndWait();}
	}
	
}