File: GbffFile.java

package info (click to toggle)
bbmap 39.20%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 26,024 kB
  • sloc: java: 312,743; sh: 18,099; python: 5,247; ansic: 2,074; perl: 96; makefile: 39; xml: 38
file content (97 lines) | stat: -rwxr-xr-x 2,774 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
package gff;

import java.util.ArrayList;

import fileIO.ByteFile;
import fileIO.ByteStreamWriter;
import fileIO.FileFormat;
import shared.Shared;
import shared.Tools;

public class GbffFile {
	
	public static void main(String[] args){
		String gbff=args[0];
		String gff=(args.length>1 ? args[1] : "stdout.gff");

		if(gbff.indexOf('=')>=0){gbff=gbff.split("=")[1];}
		if(gff.indexOf('=')>=0){gff=gff.split("=")[1];}
		
		FileFormat ffin=FileFormat.testInput(gbff, ".gbff", true);
		FileFormat ffout=FileFormat.testOutput(gff, FileFormat.GFF, null, true, true, false, false);
		GbffFile file=new GbffFile(ffin);
		ByteStreamWriter bsw=new ByteStreamWriter(ffout);
		bsw.start();
		file.toGff(bsw, true);
		bsw.poisonAndWait();
	}
	
	
//	##gff-version 3
//	#!gff-spec-version 1.21
//	#!processor NCBI annotwriter
//	#!genome-build IMG-taxon 2724679794 annotated assembly
//	#!genome-build-accession NCBI_Assembly:GCF_900182635.1
//	#!annotation-date 07/14/2019 01:52:19
//	#!annotation-source NCBI RefSeq 
//	##sequence-region NZ_FXTD01000001.1 1 528269
//	##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=413815
	
	public void toGff(ByteStreamWriter bsw, boolean printHeader){
		if(printHeader){
			bsw.println("##gff-version 3".getBytes());
			bsw.println(("#BBTools "+Shared.BBTOOLS_VERSION_STRING+" GbffToGff").getBytes());
			bsw.println("#seqid	source	type	start	end	score	strand	phase	attributes".getBytes());
		}
		for(GbffLocus locus=nextLocus(); locus!=null; locus=nextLocus()){
			locus.toGff(bsw);
		}
	}
	
	public GbffFile(FileFormat ff_) {
		ff=ff_;
		assert(ff.format()==FileFormat.GBFF) : ff;
		reset();
	}
	
	public synchronized void reset(){
		if(bf!=null){
			bf.close();
			bf=null;
		}
		bf=ByteFile.makeByteFile(ff, FileFormat.GBFF);
		line=bf.nextLine();
		if(line==null){bf.close();}//empty
	}
	
	public GbffLocus nextLocus(){
		assert(bf!=null);
		if(line==null){return null;}
		assert(Tools.startsWith(line, "LOCUS ")) : "Expecting: 'LOCUS ...'\nGot: '"+new String(line)+"'";
		ArrayList<byte[]> lines=new ArrayList<byte[]>();
		lines.add(line);
		boolean sequence=false;
		for(line=bf.nextLine(); line!=null && (line.length==0 || line[0]!='L' || !Tools.startsWith(line, "LOCUS ")); line=bf.nextLine()){
			if(line.length>0){
				final byte b=line[0];
				if(b=='/'){
					//skip
				}else if(b=='O' && Tools.startsWith(line, "ORIGIN ")){
					sequence=true;
				}else if(b==' ' && sequence){
						//do nothing
				}else{
					sequence=false;
					lines.add(line);
				}
			}
		}
		if(line==null){bf.close();}
		return new GbffLocus(lines);
	}
	
	private final FileFormat ff;
	private ByteFile bf;
	private byte[] line=null;
	
}