1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
|
package ml;
import fileIO.ByteFile;
import fileIO.ByteStreamWriter;
import shared.LineParser1;
import shared.Timer;
import shared.Tools;
import structures.IntList;
import structures.ListNum;
public class ReduceColumns {
public static void main(String[] args) {
Timer t=new Timer();
String in=args[0];
String out=args[1];
IntList columns=new IntList();
for(int i=2; i<args.length; i++) {
columns.add(Integer.parseInt(args[i]));
}
columns.shrink();
ByteFile bf=ByteFile.makeByteFile(in, true);
ByteStreamWriter bsw=ByteStreamWriter.makeBSW(out, true, false, true);
LineParser1 lp=new LineParser1('\t');
boolean header=false;
bsw.print("#dims").tab().print(columns.size()-1).tab().print(1).nl();
header=true;
long linesIn=0, bytesIn=0;
for(ListNum<byte[]> ln=bf.nextList(); ln!=null; ln=bf.nextList()) {
for(byte[] line : ln) {
linesIn++;
bytesIn+=line.length;
lp.set(line);
if(lp.startsWith('#')) {
// int a=lp.parseInt(1);
// int b=lp.parseInt(2);//TODO: figure out new dims
// if(!header && lp.startsWith("#dims\t")) {
// bsw.println("#dims").tab().print(columns.size()-1).tab().print(1);
// header=true;
// }
}else {
for(int i=0; i<columns.size; i++) {
if(i>0) {bsw.tab();}
bsw.print(lp.parseByteArray(columns.get(i)));
}
bsw.println();
}
}
}
bsw.poisonAndWait();
t.stop();
System.err.println(Tools.timeLinesBytesProcessed(t, linesIn, bytesIn, 8));
}
}
|