File: UnicodeToAscii.java

package info (click to toggle)
bbmap 39.20%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 26,024 kB
  • sloc: java: 312,743; sh: 18,099; python: 5,247; ansic: 2,074; perl: 96; makefile: 39; xml: 38
file content (141 lines) | stat: -rwxr-xr-x 3,994 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
package jgi;

import java.io.File;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;

import fileIO.ReadWrite;
import fileIO.TextFile;
import fileIO.TextStreamWriter;
import shared.Parse;
import shared.Parser;
import shared.PreParser;
import shared.Shared;
import shared.Timer;
import shared.Tools;
import tracker.ReadStats;

/**
 * @author Brian Bushnell
 * @date Apr 21, 2015
 *
 */
public class UnicodeToAscii {
	
	public static void main(String[] args){

		
		Timer t=new Timer();
		UnicodeToAscii x=new UnicodeToAscii(args);
		x.process(t);
		
		//Close the print stream if it was redirected
		Shared.closeStream(x.outstream);
	}
	
	public UnicodeToAscii(String[] args){
		
		{//Preparse block for help, config files, and outstream
			PreParser pp=new PreParser(args, getClass(), false);
			args=pp.args;
			outstream=pp.outstream;
		}
		
		ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
		ReadWrite.setZipThreads(Shared.threads());
		
		Parser parser=new Parser();
		for(int i=0; i<args.length; i++){
			String arg=args[i];
			String[] split=arg.split("=");
			String a=split[0].toLowerCase();
			String b=split.length>1 ? split[1] : null;
			
			if(parser.parse(arg, a, b)){
				//do nothing
			}else if(a.equals("null")){
				// do nothing
			}else if(a.equals("verbose")){
				verbose=Parse.parseBoolean(b);
			}else if(parser.in1==null && i==0 && Tools.looksLikeInputStream(arg)){
				parser.in1=arg;
			}else if(parser.out1==null && i==1 && !arg.contains("=")){
				parser.out1=arg;
			}else{
				outstream.println("Unknown parameter "+args[i]);
				assert(false) : "Unknown parameter "+args[i];
				//				throw new RuntimeException("Unknown parameter "+args[i]);
			}
		}
		
		{//Process parser fields
			in1=parser.in1;
			in2=parser.in2;

			out1=parser.out1;
			out2=parser.out2;
			
			overwrite=parser.overwrite;
			append=parser.append;
		}
		
		if(in1!=null && in2==null && in1.indexOf('#')>-1 && !new File(in1).exists()){
			in2=in1.replace("#", "2");
			in1=in1.replace("#", "1");
		}
		if(out1!=null && out2==null && out1.indexOf('#')>-1){
			out2=out1.replace("#", "2");
			out1=out1.replace("#", "1");
		}
		
		if(in1==null){throw new RuntimeException("Error - at least one input file is required.");}
		
		if(out1!=null && out1.equalsIgnoreCase("null")){out1=null;}
		if(out2!=null && out2.equalsIgnoreCase("null")){out2=null;}
		
		if(!Tools.testOutputFiles(overwrite, append, false, out1, out2)){
			outstream.println((out1==null)+", "+(out2==null)+", "+out1+", "+out2);
			throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out1+", "+out2+"\n");
		}
		if(!Tools.testForDuplicateFiles(true, in1, in2, out1, out2) || !ReadStats.testFiles(false)){
			throw new RuntimeException("Duplicate filenames are not allowed.");
		}
	}
	
	private void process(Timer t){

		if(in1!=null && out1!=null){process(in1, out1);}
		if(in2!=null && out2!=null){process(in2, out2);}
		
	}
		
	private void process(String infile, String outfile){
		TextFile tf=new TextFile(infile, true);
		TextStreamWriter tsw=new TextStreamWriter(outfile, overwrite, append, true);
		tsw.start();
		for(String line=tf.readLine(false); line!=null; line=tf.readLine(false)){
			String line2=line;
			try {
				line2=new String(line.getBytes(), "UTF-8");
			} catch (UnsupportedEncodingException e) {
				try {
					line2=new String(line.getBytes(), "UTF-16");
				} catch (UnsupportedEncodingException e1) {}
			}
			tsw.println(Tools.fixHeader(line2, false, true));
//			tsw.println(Normalizer.normalize(line, Normalizer.Form.NFD));
		}
		tf.close();
		tsw.poisonAndWait();
	}
	
	private PrintStream outstream=System.err;

	private String in1, in2;
	private String out1, out2;
	@SuppressWarnings("unused")
	private boolean verbose=false;
	private boolean overwrite=true;
	private boolean append=false;
	
}