File: Assembler.java

package info (click to toggle)
rockhopper 2.0.3%2Bdfsg2-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 33,136 kB
  • sloc: java: 10,831; sh: 31; xml: 29; makefile: 14
file content (1195 lines) | stat: -rw-r----- 51,808 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
/*
 * Copyright 2014 Brian Tjaden
 *
 * This file is part of Rockhopper.
 *
 * Rockhopper is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * any later version.
 *
 * Rockhopper is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * (in the file gpl.txt) along with Rockhopper.  
 * If not, see <http://www.gnu.org/licenses/>.
 */

import java.util.ArrayList;
import java.util.Scanner;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.InputStreamReader;
import java.io.FileInputStream;
import java.util.zip.GZIPInputStream;
import java.util.concurrent.atomic.AtomicInteger;
import javax.swing.JTextArea;

public class Assembler {

    /********************************************
     **********   Instance Variables   **********
     ********************************************/

    private Dictionary kDict;
    private int PHRED_offset;
    private BufferedReader reader;
    private BufferedReader reader2;  // Mate-pair file if using paired-end reads
    private PrintWriter samWriter;  // Output reads to SAM file
    private boolean isCompressedFile;  // True if gzip file. False otherwise.
    private SamOps sam;  // Used if reads file is in SAM/BAM format
    private AtomicInteger nameCount;  // Used when outputting SAM file and reads have no name
    private boolean firstLineDone;  // Used when reading in the first line of FASTA files
    private long totalTime;  // Time taken to read in sequencing reads and assemble transcripts
    private boolean mapFullReadsToTranscriptome;  // First pass (false), second pass (true).
    private DeNovoTranscripts transcripts;

    // Results
    public AtomicInteger totalReads;
    private AtomicInteger invalidQualityReads;      // Reads containing an invalid quality score
    private AtomicInteger ambiguousReads;           // Reads with ambiguous nucleotides
    private AtomicInteger lowQualityReads;          // Reads with quality scores too low for mapping



    /*****************************************
     **********   Class Variables   **********
     *****************************************/

    // Parameters
    public static JTextArea output;  // Output to GUI or, if null, to System.out
    public static int k = 25;  // Size of k-mer
    private static int readsInputFormat;  // 0=FASTQ, 1=QSEQ, 2=FASTA, 3=SAM(single), 4=SAM(paired), 5=BAM(single), 6=BAM(paired)
    public static ArrayList<String> conditionFiles = new ArrayList<String>();
    public static boolean singleEndOrientationReverseComplement = false;  // RevComp reads
    public static int numThreads = 1;  // Number of threads
    public static int minReadLength = 35;  // Minimum allowed sequencing read length
    public static boolean stopAfterOneHit = true;
    public static boolean computeExpression = true;  // PARAM: compute differential expression
    public static int CAPACITY_POWER = 25;  // PARAM: capacity of hashtable is ~2^n
    public static int MIN_READS_MAPPING = 20;  // PARAM: Min # of full length reads mapping to do novo transcripts
    public static int minTranscriptLength;  // PARAM: Min transcript length: 2*k
    public static int minSeedExpression = 50;  // PARAM: Min expression to initiate a transcript
    public static int minExpression = 5;  // PARAM: Min number of occurrences to be part of transcript
    public static boolean unstranded = false;  // PARAM: Is RNA-seq data strand specific or ambiguous?
    public static String pairedEndOrientation = "fr";  // PARAM: ff, fr, rf, rr
    public static boolean isPairedEnd = false;  // Do we have paired-end reads?
    private static String pairedEndFile;  // Name of mate-pair file if using paired-end reads
    public static int maxPairedEndLength = 500;
    public static String[] labels;
    public static int readsFileIndex;  // Keep track of current sequencing reads file
    public static PrintWriter summaryWriter = null;  // For outputting summary file
    private static String summaryFile = "summary.txt";
    public static String expressionFile = "transcripts.txt";
    public static boolean outputSAM = false;  // Output reads to SAM file
    public static String output_DIR = "Assembler_Results/";  // PARAM: write output files to this directory
    public static boolean time = false;  // PARAM: output execution time of program
    public static boolean verbose = false;  // PARAM: verbose output



    /**************************************
     **********   Constructors   **********
     **************************************/

    public Assembler() {

	totalTime = System.currentTimeMillis();
	FileOps foo = new FileOps(getCompressedFileName(), conditionFiles);
	if (foo.isValid()) {
	    output("\n");
	    for (int i=0; i<conditionFiles.size(); i++) {
		String[] files = conditionFiles.get(i).split(",");
		for (int j=0; j<files.length; j++) {
		    String[] parse_readFiles = files[j].split("%");
		    if (parse_readFiles.length == 1) {  // Using single-end reads (except maybe SAM/BAM)
			String file1 = parse_readFiles[0];
			output("Assembling transcripts from reads in file:             \t" + removePath(file1) + "\n");
			Rockhopper.updateProgress();
			Rockhopper.updateProgress();
			Assembler.isPairedEnd = false;
		    } else {  // Using paired-end reads
			String file1 = parse_readFiles[0];
			String file2 = parse_readFiles[1];
			output("Assembling transcripts from reads in files:\n");
			output("\t" + removePath(file1) + "\n");
			output("\t" + removePath(file2) + "\n");
			Rockhopper.updateProgress();
			Rockhopper.updateProgress();
			Assembler.isPairedEnd = true;
		    }
		}
	    }
	    transcripts = new DeNovoTranscripts(foo.getTranscripts());
	    readsFileIndex = 0;
	    output("\n");
	    for (int i=0; i<conditionFiles.size(); i++) {
		String[] files = conditionFiles.get(i).split(",");
		for (int j=0; j<files.length; j++) {
		    String[] parse_readFiles = files[j].split("%");
		    if (parse_readFiles.length == 1) {  // Using single-end reads (except maybe SAM/BAM)
			String file1 = parse_readFiles[0];
			output("Aligning reads to assembled transcripts using file:\t" + removePath(file1) + "\n");
			Assembler.isPairedEnd = false;
			output("\n\tTotal reads in file:        \t" + DeNovoTranscripts.numReads[readsFileIndex] + "\n");
			output("\tPerfectly aligned reads:    \t" + DeNovoTranscripts.numMappingReads[readsFileIndex] + "\t" + Math.round((100.0*DeNovoTranscripts.numMappingReads[readsFileIndex])/DeNovoTranscripts.numReads[readsFileIndex]) + "%" + "\n\n");
			Rockhopper.updateProgress();
		    } else {  // Using paired-end reads
			String file1 = parse_readFiles[0];
			String file2 = parse_readFiles[1];
			output("Aligning reads to assembled transcripts using files:\n");
			output("\t" + removePath(file1) + "\n");
			output("\t" + removePath(file2) + "\n");
			Assembler.isPairedEnd = true;
			output("\n\tTotal reads in files:        \t" + DeNovoTranscripts.numReads[readsFileIndex] + "\n");
			output("\tPerfectly aligned reads:     \t" + DeNovoTranscripts.numMappingReads[readsFileIndex] + "\t" + Math.round((100.0*DeNovoTranscripts.numMappingReads[readsFileIndex])/DeNovoTranscripts.numReads[readsFileIndex]) + "%" + "\n\n");
			Rockhopper.updateProgress();
		    }
		    readsFileIndex++;
		}
	    }
	    if (outputSAM) output("Rockhopper did not output a SAM file since sequencing read alignments were previously cached. If you want a SAM file to be output, you must first clear the cache." + "\n\n");
	} else {

	    // Create k-mer dictionary and assemble transcripts de novo
	    kDict = new Dictionary(k);
	    totalReads = new AtomicInteger(0);
	    invalidQualityReads = new AtomicInteger(0);
	    ambiguousReads = new AtomicInteger(0);
	    lowQualityReads = new AtomicInteger(0);
	    mapFullReadsToTranscriptome = false;
	    readsFileIndex = 0;
	    output("\n");
	    for (int i=0; i<conditionFiles.size(); i++) {
		String[] files = conditionFiles.get(i).split(",");
		for (int j=0; j<files.length; j++) {
		    String[] parse_readFiles = files[j].split("%");
		    if (parse_readFiles.length == 1) {  // Using single-end reads (except maybe SAM/BAM)
			String file1 = parse_readFiles[0];
			output("Assembling transcripts from reads in file:             \t" + removePath(file1) + "\n");
			Assembler.isPairedEnd = false;
			Assembler.pairedEndFile = null;
			readsInputFormat = getReadsInputFileType(file1);
			this.PHRED_offset = getPhredOffset(file1);
			if (outputSAM) outputSamHeader(file1);
			int numThreads_OLD = numThreads;
			if ((readsInputFormat == 5) || (readsInputFormat == 6)) numThreads = 1;
			firstLineDone = false;
			readInSequencingReads(file1);  // Read in the sequencing reads
			numThreads = numThreads_OLD;
			if (samWriter != null) {
			    samWriter.close();
			    output("Sequencing reads written to SAM file:                  \t" + output_DIR + getFileNameBase(file1) + ".sam" + "\n");
			}
			Rockhopper.updateProgress();
			Rockhopper.updateProgress();
		    } else {  // Using paired-end reads
			String file1 = parse_readFiles[0];
			String file2 = parse_readFiles[1];
			output("Assembling transcripts from reads in files:\n");
			output("\t" + removePath(file1) + "\n");
			output("\t" + removePath(file2) + "\n");
			Assembler.isPairedEnd = true;
			Assembler.pairedEndFile = file2;
			readsInputFormat = getReadsInputFileType(file1);
			this.PHRED_offset = getPhredOffset(file1);
			if (outputSAM) outputSamHeader(file1);
			int numThreads_OLD = numThreads;
			if ((readsInputFormat == 5) || (readsInputFormat == 6)) numThreads = 1;
			firstLineDone = false;
			readInSequencingReads(file1);  // Read in the sequencing reads
			numThreads = numThreads_OLD;
			if (samWriter != null) {
			    samWriter.close();
			    output("Sequencing reads written to SAM file:                  \t" + output_DIR + getFileNameBase(file1) + ".sam" + "\n");
			}
			Rockhopper.updateProgress();
			Rockhopper.updateProgress();
		    }
		    readsFileIndex++;
		}
	    }
	    kDict.assembleTranscripts();  // Assemble transcripts for final time

	    // Map full length reads to transcripts
	    totalReads = new AtomicInteger(0);
	    invalidQualityReads = new AtomicInteger(0);
	    ambiguousReads = new AtomicInteger(0);
	    lowQualityReads = new AtomicInteger(0);
	    mapFullReadsToTranscriptome = true;
	    kDict.initializeReadMapping(stopAfterOneHit);
	    readsFileIndex = 0;
	    output("\n");
	    for (int i=0; i<conditionFiles.size(); i++) {
		String[] files = conditionFiles.get(i).split(",");
		for (int j=0; j<files.length; j++) {
		    String[] parse_readFiles = files[j].split("%");
		    if (parse_readFiles.length == 1) {  // Using single-end reads (except maybe SAM/BAM)
			String file1 = parse_readFiles[0];
			output("Aligning reads to assembled transcripts using file:\t" + removePath(file1) + "\n");
			Assembler.isPairedEnd = false;
			Assembler.pairedEndFile = null;
			readsInputFormat = getReadsInputFileType(file1);
			this.PHRED_offset = getPhredOffset(file1);
			int numThreads_OLD = numThreads;
			if ((readsInputFormat == 5) || (readsInputFormat == 6)) numThreads = 1;
			firstLineDone = false;
			readInSequencingReads(file1);  // Read in the sequencing reads
			numThreads = numThreads_OLD;
			if (unstranded) kDict.bwtIndex.halveReads(readsFileIndex);  // Fix read double counting
			output("\n\tTotal reads in file:        \t" + kDict.bwtIndex.getNumReads(readsFileIndex) + "\n");
			output("\tPerfectly aligned reads:    \t" + kDict.bwtIndex.getNumMappingReads(readsFileIndex) + "\t" + Math.round((100.0*kDict.bwtIndex.getNumMappingReads(readsFileIndex))/kDict.bwtIndex.getNumReads(readsFileIndex)) + "%" + "\n\n");
			Rockhopper.updateProgress();
		    } else {  // Using paired-end reads
			String file1 = parse_readFiles[0];
			String file2 = parse_readFiles[1];
			output("Aligning reads to assembled transcripts using files:\n");
			output("\t" + removePath(file1) + "\n");
			output("\t" + removePath(file2) + "\n");
			Assembler.isPairedEnd = true;
			Assembler.pairedEndFile = file2;
			readsInputFormat = getReadsInputFileType(file1);
			this.PHRED_offset = getPhredOffset(file1);
			int numThreads_OLD = numThreads;
			if ((readsInputFormat == 5) || (readsInputFormat == 6)) numThreads = 1;
			firstLineDone = false;
			readInSequencingReads(file1);  // Read in the sequencing reads
			numThreads = numThreads_OLD;
			output("\n\tTotal reads in files:        \t" + kDict.bwtIndex.getNumReads(readsFileIndex) + "\n");
			output("\tPerfectly aligned reads:     \t" + kDict.bwtIndex.getNumMappingReads(readsFileIndex) + "\t" + Math.round((100.0*kDict.bwtIndex.getNumMappingReads(readsFileIndex))/kDict.bwtIndex.getNumReads(readsFileIndex)) + "%" + "\n\n");
			Rockhopper.updateProgress();
		    }
		    readsFileIndex++;
		}
	    }
	    transcripts = new DeNovoTranscripts(kDict.bwtIndex);
	    if (computeExpression) transcripts.setMinDiffExpressionLevels(kDict.bwtIndex);
	    kDict.bwtIndex = null;  // Clear Index and Dictionary
	    kDict = null;           // Clear Index and Dictionary
	    System.gc();            // Clear Index and Dictionary
	    if (computeExpression) transcripts.computeDifferentialExpression();
	    FileOps.writeCompressedFile(getCompressedFileName(), conditionFiles, DeNovoTranscripts.numReads, DeNovoTranscripts.numMappingReads, transcripts);
	}

	// Incorporate results from SAM file
	if (sam != null) {
	    totalReads.addAndGet(sam.badReads.get());
	    invalidQualityReads.addAndGet(sam.badReads.get());
	}

	outputToFile(output_DIR + expressionFile, transcripts.toString(labels));
	totalTime = System.currentTimeMillis() - totalTime;
	outputResults();
	summaryWriter.close();
	Rockhopper.updateProgress();
    }



    /*************************************************
     **********   Public Instance Methods   **********
     *************************************************/

    /**
     * Read in sequencing reads from file.
     */
    public void processReads() {
	String line = "";;
	String name = "";
	String nextName = "";  // Special case (FASTA)
	String read = "";
	String quality = "";
	String quality_fasta = "";  // Special case (FASTA)
	int[] qualities = new int[0];
	String[] parse_line = null;

	// Using paired-end reads
	String line2 = "";;
	String name2 = "";
	String nextName2 = "";  // Special case (FASTA)
	String read2 = "";
	String quality2 = "";
	String quality_fasta2 = "";  // Special case (FASTA)
	int[] qualities2 = new int[0];
	String[] parse_line2 = null;

	try {

	    synchronized(reader) {
		if ((readsInputFormat == 2) && !firstLineDone) {  // Special case (FASTA)
		    line = reader.readLine();  // Special case (FASTA)
		    if (line.startsWith(">")) nextName = line.substring(1);
		    if (isPairedEnd) {
			line2 = reader2.readLine();
			if (line2.startsWith(">")) nextName2 = line2.substring(1);
		    }
		}
		firstLineDone = true;
	    }

	    while (line != null) {

		synchronized(reader) {  // Thread safe reading from file
		    if (readsInputFormat < 5) line = reader.readLine();  // Not a BAM file
		    if (line == null) continue;
		    if (readsInputFormat == 0) {  // FASTQ file
			name = line;
			read = reader.readLine();
			line = reader.readLine();
			quality = reader.readLine();
		    } else if (readsInputFormat == 1) {  // QSEQ file
			parse_line = line.split("\t");
			name = parse_line[0] + ":" + parse_line[2] + ":" + parse_line[3] + ":" + parse_line[4] + ":" + parse_line[5] + ":" + parse_line[6] + ":" + parse_line[7];
			read = parse_line[8];
			quality = parse_line[9];
		    } else if (readsInputFormat == 2) {  // FASTA file
			name = nextName;
			read = "";
			while ((line != null) && ((line.length() == 0) || (line.charAt(0) != '>'))) {
			    read += line;
			    line = reader.readLine();
			}
			if (line == null) nextName = "";
			else if (line.startsWith(">")) nextName = line.substring(1);
			else nextName = "";  // Case should never be reached
			if (quality_fasta.length() != read.length()) quality_fasta = new String(new char[read.length()]).replace('\0', '(');
			quality = quality_fasta;
		    } else if (readsInputFormat == 3) {  // SAM file (single-end)
			if (SamOps.isHeaderLine(line)) continue;
		    } else if (readsInputFormat == 4) {  // SAM file (paired-end)
			if (SamOps.isHeaderLine(line)) continue;
			line2 = reader.readLine();
		    } else if (readsInputFormat == 5) {  // BAM file (single-end)
			parse_line = sam.parseAlignmentLine_BAM();
			if (parse_line == null) {
			    line = null;
			    continue;
			}
			if (parse_line == SamOps.emptyStringArray) continue;
			if (parse_line[0].startsWith("U")) {
			    read = parse_line[9];
			    quality = mapPhred(parse_line[10]);
			} else if (samWriter != null) {
			    name = parse_line[0].substring(1);
			    while (name.startsWith("@")) name = name.substring(1);
			    int flag = 4;
			    synchronized(samWriter) {
				samWriter.println(name + "\t" + flag + "\t" + "*" + "\t" + "0" + "\t" + "255" + "\t" + "*" + "\t" + "*" + "\t" + "0" + "\t" + "0" + "\t" + parse_line[9] + "\t" + mapPhred(parse_line[10]));
				continue;
			    }
			} else continue;
		    } else if (readsInputFormat == 6) {  // BAM file (paired-end)
			parse_line = sam.parseAlignmentLine_BAM();
			if (parse_line == null) {
			    line = null;
			    continue;
			}
			parse_line2 = sam.parseAlignmentLine_BAM();
			if (parse_line2 == null) {
			    line = null;
			    continue;
			}
			if ((parse_line == SamOps.emptyStringArray) || (parse_line2 == SamOps.emptyStringArray)) continue;
			if ((parse_line[0].startsWith("U")) || parse_line2[0].startsWith("U")) {
			    read = parse_line[9];
			    quality = mapPhred(parse_line[10]);
			    read2 = parse_line2[9];
			    quality2 = mapPhred(parse_line2[10]);
			} else if (samWriter != null) {
			    name = parse_line[0].substring(1);
			    while (name.startsWith("@")) name = name.substring(1);
			    name2 = parse_line2[0].substring(1);
			    while (name2.startsWith("@")) name2 = name2.substring(1);
			    int flag = 77;    // 01001101
			    int flag2 = 141;  // 10001101
			    synchronized(samWriter) {
				samWriter.println(name + "\t" + flag + "\t" + "*" + "\t" + "0" + "\t" + "255" + "\t" + "*" + "\t" + "*" + "\t" + "0" + "\t" + "0" + "\t" + parse_line[9] + "\t" + mapPhred(parse_line[10]));
				samWriter.println(name2 + "\t" + flag2 + "\t" + "*" + "\t" + "0" + "\t" + "255" + "\t" + "*" + "\t" + "*" + "\t" + "0" + "\t" + "0" + "\t" + parse_line2[9] + "\t" + mapPhred(parse_line2[10]));
				continue;
			    }
			} else continue;
		    }

		    if (isPairedEnd) {  // Using paired-end reads
			synchronized(reader2) {  // Thread safe reading from file
			    line2 = reader2.readLine();
			    if (line2 == null) {
				output("Error - the two files of mate-pair reads contain different numbers of sequencing reads.");
				continue;
			    }
			    if (readsInputFormat == 0) {  // FASTQ file
				name2 = line;
				read2 = reader2.readLine();
				line2 = reader2.readLine();
				quality2 = reader2.readLine();
			    } else if (readsInputFormat == 1) {  // QSEQ file
				parse_line2 = line2.split("\t");
				name2 = parse_line2[0] + ":" + parse_line2[2] + ":" + parse_line2[3] + ":" + parse_line2[4] + ":" + parse_line2[5] + ":" + parse_line2[6] + ":" + parse_line2[7];
				read2 = parse_line2[8];
				quality2 = parse_line2[9];
			    } else if (readsInputFormat == 2) {  // FASTA file
				name2 = nextName2;
				read2 = "";
				while ((line2 != null) && ((line2.length() == 0) || (line2.charAt(0) != '>'))) {
				    read2 += line2;
				    line2 = reader2.readLine();
				}
				if (line2 == null) nextName2 = "";
				else if (line2.startsWith(">")) nextName2 = line2.substring(1);
				else nextName2 = "";  // Case should never be reached
				if (quality_fasta2.length() != read2.length()) quality_fasta2 = new String(new char[read2.length()]).replace('\0', '(');
				quality2 = quality_fasta2;
			    }
			}
		    }
		}

		if (readsInputFormat == 3) {  // SAM file (single-end)
		    parse_line = sam.parseAlignmentLine_SAM(line);
		    if (parse_line == SamOps.emptyStringArray) continue;
		    if (parse_line[0].startsWith("U")) {
			read = parse_line[9];
			quality = parse_line[10];
		    } else if (samWriter != null) {  // Output to SAM file
			name = parse_line[0].substring(1);
			while (name.startsWith("@")) name = name.substring(1);
			int flag = 4;
			synchronized(samWriter) {
			    samWriter.println(name + "\t" + flag + "\t" + "*" + "\t" + "0" + "\t" + "255" + "\t" + "*" + "\t" + "*" + "\t" + "0" + "\t" + "0" + "\t" + parse_line[9] + "\t" + parse_line[10]);
			}
			continue;
		    } else continue;
		} else if (readsInputFormat == 4) {  // SAM file (paired-end)
		    parse_line = sam.parseAlignmentLine_SAM(line);
		    parse_line2 = sam.parseAlignmentLine_SAM(line2);
		    if ((parse_line == SamOps.emptyStringArray) || (parse_line2 == SamOps.emptyStringArray)) continue;
		    if ((parse_line[0].startsWith("U")) || parse_line2[0].startsWith("U")) {
			read = parse_line[9];
			quality = parse_line[10];
			read2 = parse_line2[9];
			quality2 = parse_line2[10];
		    } else if (samWriter != null) {
			name = parse_line[0].substring(1);
			while (name.startsWith("@")) name = name.substring(1);
			name2 = parse_line2[0].substring(1);
			while (name2.startsWith("@")) name2 = name2.substring(1);
			int flag = 77;    // 01001101
			int flag2 = 141;  // 10001101
			synchronized(samWriter) {
			    samWriter.println(name + "\t" + flag + "\t" + "*" + "\t" + "0" + "\t" + "255" + "\t" + "*" + "\t" + "*" + "\t" + "0" + "\t" + "0" + "\t" + parse_line[9] + "\t" + parse_line[10]);
			    samWriter.println(name2 + "\t" + flag2 + "\t" + "*" + "\t" + "0" + "\t" + "255" + "\t" + "*" + "\t" + "*" + "\t" + "0" + "\t" + "0" + "\t" + parse_line2[9] + "\t" + parse_line2[10]);
			}
			continue;
		    } else continue;
		}

		// Output to SAM file
		if ((samWriter != null) && (!mapFullReadsToTranscriptome)) {
		    while (name.startsWith("@")) name = name.substring(1);
		    if (isPairedEnd || ((sam != null) && (sam.isPairedEnd()))) {  // Paired-end
			while (name2.startsWith("@")) name2 = name2.substring(1);
			if (name.length() == 0) {
			    name = "r" + nameCount.getAndIncrement();
			    name2 = name;
			}
			int flag = 77;    // 01001101
			int flag2 = 141;  // 10001101
			synchronized(samWriter) {
			    samWriter.println(name + "\t" + flag + "\t" + "*" + "\t" + "0" + "\t" + "255" + "\t" + "*" + "\t" + "*" + "\t" + "0" + "\t" + "0" + "\t" + read + "\t" + quality);
			    samWriter.println(name2 + "\t" + flag2 + "\t" + "*" + "\t" + "0" + "\t" + "255" + "\t" + "*" + "\t" + "*" + "\t" + "0" + "\t" + "0" + "\t" + read2 + "\t" + quality2);
			}
		    } else {  // Single-end
			if (name.length() == 0) name = "r" + nameCount.getAndIncrement();
			int flag = 4;
			synchronized(samWriter) {
			    samWriter.println(name + "\t" + flag + "\t" + "*" + "\t" + "0" + "\t" + "255" + "\t" + "*" + "\t" + "*" + "\t" + "0" + "\t" + "0" + "\t" + read + "\t" + quality);
			}
		    }
		}

		// Process read
		read = read.toUpperCase();
		boolean ambiguous = false;
		for (int i=0; i<read.length(); i++) {
		    if ((read.charAt(i) != 'A') && (read.charAt(i) != 'C') &&
			(read.charAt(i) != 'G') && (read.charAt(i) != 'T'))
			ambiguous = true;
		}
		if (isPairedEnd || ((sam != null) && (sam.isPairedEnd()))) {  // Using paired-end reads
		    read2 = read2.toUpperCase();
		    for (int i=0; i<read2.length(); i++) {
			if ((read2.charAt(i) != 'A') && (read2.charAt(i) != 'C') &&
			    (read2.charAt(i) != 'G') && (read2.charAt(i) != 'T'))
			    ambiguous = true;
		    }
		    // Orient paired-end reads (0=ff, 1=fr, 2=rf, 3=rr)
		    if (pairedEndOrientation.charAt(0) == 'r') {  // Reverse first read
			read = reverseComplement(read);
			quality = reverse(quality);
		    }
		    if (pairedEndOrientation.charAt(1) == 'r') {  // Reverse second read
			read2 = reverseComplement(read2);
			quality2 = reverse(quality2);
		    }
		} else {  // Using single-end reads
		    // Orient single-end reads (true=reverse_complement, false=nothing)
		    if (singleEndOrientationReverseComplement) {  // Reverse complement read
			read = reverseComplement(read);
			quality = reverse(quality);
		    }
		}

		// Process quality
		if (quality.length() != read.length()) {
		    output("Quality sequence has different length than read sequence.\n");
		    continue;
		}
		int endIndex = quality.length()-1;
		while ((endIndex >= 0) && ((int)(quality.charAt(endIndex))-PHRED_offset == 2)) endIndex--;
		quality = quality.substring(0, endIndex+1);
		read = read.substring(0, endIndex+1);
		if (qualities.length != read.length()) qualities = new int[read.length()];
		boolean invalid = false;
		for (int i=0; i<quality.length(); i++) {
		    qualities[i] = (int)(quality.charAt(i)) - PHRED_offset;
		    if ((qualities[i] < 0) || (qualities[i] > 93)) invalid = true;
		}
		if (isPairedEnd || ((sam != null) && (sam.isPairedEnd()))) {  // Using paired-end reads
		    if (quality2.length() != read2.length()) {
			output("Quality sequence has different length than read sequence.\n");
			continue;
		    }
		    int endIndex2 = quality2.length()-1;
		    while ((endIndex2 >= 0) && ((int)(quality2.charAt(endIndex2))-PHRED_offset == 2)) endIndex2--;
		    quality2 = quality2.substring(0, endIndex2+1);
		    read2 = read2.substring(0, endIndex2+1);
		    if (qualities2.length != read2.length()) qualities2 = new int[read2.length()];
		    for (int i=0; i<quality2.length(); i++) {
			qualities2[i] = (int)(quality2.charAt(i)) - PHRED_offset;
			if ((qualities2[i] < 0) || (qualities2[i] > 93)) invalid = true;
		    }
		}

		// Process sequencing reads.
		totalReads.getAndIncrement();
		if (ambiguous) ambiguousReads.getAndIncrement();
		else if ((quality.length() < minReadLength) || (isPairedEnd && (quality2.length() < minReadLength)) || ((sam != null) && (sam.isPairedEnd()) && (quality2.length() < minReadLength))) lowQualityReads.getAndIncrement();  // Minimum read length
		else if (invalid) invalidQualityReads.getAndIncrement();  // Invalid quality score
		else {
		    if (!mapFullReadsToTranscriptome) {  // First pass. Assemble transcriptome.
			kDict.add(read);  // Add read to dictionary!
			if (isPairedEnd || ((sam != null) && (sam.isPairedEnd()))) kDict.add(read2);  // Using paired-end reads
		    } else {  // Second pass. Map full length reads to assembled transcripts.
			if (isPairedEnd || ((sam != null) && (sam.isPairedEnd()))) {  // Using paired-end reads
			    kDict.mapFullLengthRead(read, read2);
			} else {  // Using single-end reads
			    kDict.mapFullLengthRead(read);
			    if (unstranded) kDict.mapFullLengthRead(reverseComplement(read));
			}
		    }
		}
	    }
	} catch (IOException e) {
	    output("Error - could not read in from sequencing reads file.\n\n");
	    System.exit(0);
	}
    }

    /**
     * Return the base file name for the compressed transcripts file.
     * For example, if A,B,C are replicates in conditions 1 and 
     * X,Y,Z are replicates in condition 2, then the base file name
     * would be ABC_XYZ__parameters
     */
    public String getCompressedFileName() {
	StringBuilder sb = new StringBuilder();
	for (int i=0; i<conditionFiles.size(); i++) {
	    String[] files = conditionFiles.get(i).split(",");
	    for (int j=0; j<files.length; j++) sb.append(encode(files[j]));
	    sb.append("_");
	}
	sb.append(parametersToString());
	return sb.toString();
    }



    /**********************************************
     **********   Public Class Methods   **********
     **********************************************/

    public static void output(String s) {
	if (summaryWriter != null) summaryWriter.print(s);
	if (output == null) System.out.print(s);
	else {
	    output.append(s);
	    output.setCaretPosition(output.getDocument().getLength());
	}
    }

    /**
     * Returns a reversed version of String s.
     */
    public static String reverse(String s) {
	return (new StringBuilder(s)).reverse().toString();
    }

    /**
     * Returns a reverse complemented version of String s.
     */
    public static String reverseComplement(String s) {
	StringBuilder sb = new StringBuilder(s.length());
	for (int i=s.length()-1; i>=0; i--) {
	    if (s.charAt(i) == 'A') sb.append('T');
	    else if (s.charAt(i) == 'C') sb.append('G');
	    else if (s.charAt(i) == 'G') sb.append('C');
	    else if (s.charAt(i) == 'T') sb.append('A');
	    else sb.append(s.charAt(i));
	}
	return sb.toString();
    }



    /**************************************************
     **********   Private Instance Methods   **********
     **************************************************/

    /**
     * Read in sequencing reads from file.
     */
    private void readInSequencingReads(String readsFile) {

        try {
	    if (isCompressedFile) reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(readsFile))));
	    else reader = new BufferedReader(new FileReader(readsFile));
	    if (isPairedEnd) {
		if (FileOps.isGZIP(pairedEndFile)) reader2 = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(pairedEndFile))));
		else reader2 = new BufferedReader(new FileReader(pairedEndFile));
	    }
	    Thread[] threads = new Thread[numThreads];
	    for (int i=0; i<numThreads; i++) {
		threads[i] = new ProcessReadsDeNovo_Thread(this);
		threads[i].start();
	    }
	    for (int i=0; i<numThreads; i++) threads[i].join();
	    reader.close();
	    if (isPairedEnd) reader2.close();
	} catch (IOException e) {
	    output("Error - could not read from " + readsFile + "\n\n");
	    System.exit(0);
	} catch (InterruptedException e) {
	    output("Error - thread execution was interrupted.\n\n");
	    System.exit(0);
	}
    }

    /**
     * Outputs to STDOUT stats on sequencing reads.
     */
    private void outputResults() {
	output("Total number of assembled transcripts:\t\t" + transcripts.getNumTranscripts() + "\n");
	output("\tAverage transcript length:             \t" + transcripts.getAverageTranscriptLength() + "\n");
	output("\tMedian transcript length:              \t" + transcripts.getMedianTranscriptLength() + "\n");
	output("\tTotal number of assembled bases:       \t" + transcripts.getTotalAssembledBases() + "\n");
	output("\n");
	output("Summary of results written to file:               \t" + output_DIR + summaryFile + "\n");
	output("Details of assembled transcripts written to file: \t" + output_DIR + expressionFile + "\n");
	output("\n");
	output("FINISHED.\n\n");
    }

    /**
     * Determines the file format of the input reads file.
     * Returns 0 if the file is in FASTQ format.
     * Returns 1 if the file is in QSEQ format.
     * Returns 2 if the file is in FASTA format.
     * Returns 3 if the file is in SAM single-end format.
     * Returns 4 if the file is in SAM paired-end format.
     * Returns 5 if the file is in BAM single-end format.
     * Returns 6 if the file is in BAM paired-end format.
     */
    private int getReadsInputFileType(String readsFile) {
	int MAX_LINES = 10000;
	int lineCounter = 0;
	int fastqCount = 0;
	int qseqCount = 0;
	int fastaCount = 0;
	try {
	    // First check if we have a SAM file
	    sam = new SamOps(readsFile, MAX_LINES, true);
	    if (sam.isValidSam() || sam.isValidBam()) {
		String[] replicons = sam.getReplicons();
		if (sam.isValidSam() && !sam.isPairedEnd()) return 3;  // Single-end SAM file
		else if (sam.isValidSam()) return 4;  // Paired-end SAM file
		else if (sam.isValidBam() && !sam.isPairedEnd()) return 5;  // Single-end BAM file
		else if (sam.isValidBam()) return 6;  // Paired-end BAM file
		else return -1;  // This case should never be reached
	    } else sam = null;

	    Scanner reader = null;
	    isCompressedFile = FileOps.isGZIP(readsFile);
	    if (isCompressedFile) reader = new Scanner(new GZIPInputStream(new FileInputStream(readsFile)));
	    else reader = new Scanner(new File(readsFile));
	    String line;
	    while (reader.hasNextLine() && (lineCounter < MAX_LINES)) {
		line = reader.nextLine();
		if (line.length() > 0) {
		    if (line.charAt(0) == '@') fastqCount++;
		    else if (line.split("\t").length >= 11) qseqCount++;
		    else if (line.charAt(0) == '>') fastaCount++;
		}
		lineCounter++;
	    }
	    reader.close();
	    if (fastqCount > Math.max(qseqCount, fastaCount)) return 0;
	    else if (qseqCount > fastaCount) return 1;
	    else if (fastaCount > 0) return 2;
	    else {
		output("Error - could not recognize format of file " + readsFile + "\n\n");
		System.exit(0);
	    }
	} catch (IOException e) {
	    output("\nError - could not open sequencing reads file: " + readsFile + "\n\n");
	    System.exit(0);
	}
	return -1;
    }

    /**
     * Depending on version of Solexa machine used, the quality
     * score may range from 35 (or 37) up to 73, or it may range
     * from 66 up to 104. To obtain the Phred score, we subtract 
     * either 33 or 64, depending on the version. Here, we get the
     * maximum quality score over all nucleotides in the first
     * 100,000 reads (it should be either 73 or 104). The offset 
     * is the number we must subtract from this maximum to 
     * obtain 40, i.e., either 33 or 64.
     */
    private int getPhredOffset(String readsFile) {
	int lineCounter = 0;
	try {
	    int maxQuality = Integer.MIN_VALUE;
	    if ((readsInputFormat == 5) || (readsInputFormat == 6)) {  // BAM file
		maxQuality = sam.getMaxQuality();
	    } else {
		Scanner reader = null;
		if (isCompressedFile) reader = new Scanner(new GZIPInputStream(new FileInputStream(readsFile)));
		else reader = new Scanner(new File(readsFile));
		String line;
		String read = "";
		String quality = "";
		while (reader.hasNextLine() && (lineCounter < 100000)) {

		    line = reader.nextLine();
		    if (readsInputFormat == 0) {  // FASTQ file
			read = reader.nextLine();
			line = reader.nextLine();
			quality = reader.nextLine();
		    } else if (readsInputFormat == 1) {  // QSEQ file
			String[] parse_line = line.split("\t");
			read = parse_line[8];
			quality = parse_line[9];
		    } else if (readsInputFormat == 2) {  // FASTA file
			read = reader.nextLine();
			quality = "(";  // ASCII value of 40
		    } else if ((readsInputFormat == 3) || (readsInputFormat == 4)) {  // SAM file
			if (line.startsWith("@")) {
			    read = "";
			    quality = "";
			} else {
			    String[] parse_line = line.split("\t");
			    read = parse_line[9];
			    quality = parse_line[10];
			    if (quality.equals("*")) quality = "";
			}
		    }

		    // Process quality
		    for (int i=0; i<quality.length(); i++)
			maxQuality = Math.max(maxQuality, (int)(quality.charAt(i)));

		    lineCounter++;
		}
		reader.close();
	    }

	    // Are we Phred+33 or Phred+64?
	    if (Math.abs(maxQuality - 40 - 33) < Math.abs(maxQuality - 40 - 64))
		return 33;
	    else 
		return 64;
	} catch (IOException e) {
	    output("\nError - could not open file " + readsFile + "\n\n");
	    System.exit(0);
	}
	return -1;
    }

    /**
     * Open SAM file for writing.
     * Output header information to SAM file.
     */
    private void outputSamHeader(String readsFile) {
	String samFileName = output_DIR + getFileNameBase(readsFile) + ".sam";
	if (readsFile.equals(samFileName)) return;  // Do not read and write same file at same time
	nameCount = new AtomicInteger(1);
	try {
	    samWriter = new PrintWriter(new File(samFileName));
	    samWriter.println("@HD" + "\t" + "VN:1.0" + "\t" + "SO:unsorted");
	    samWriter.println("@PG" + "\t" + "ID:Rockhopper" + "\t" + "PN:Rockhopper" + "\t" + "VN:" + Rockhopper.version);
	} catch (IOException f) {
	    samWriter = null;
	}
    }

    /**
     * Return a String representation of the parameter settings.
     */
    private String parametersToString() {
	return "_k" + k + "_e" + ("" + computeExpression).toUpperCase().charAt(0) + "_a" + ("" + stopAfterOneHit).toUpperCase().charAt(0) + "_d" + maxPairedEndLength + "_j" + minReadLength + "_n" + CAPACITY_POWER + "_b" + MIN_READS_MAPPING + "_u" + minTranscriptLength + "_w" + minSeedExpression + "_x" + minExpression + "_" + pairedEndOrientation + "_s" + ("" + unstranded).toUpperCase().charAt(0) + "_c" + ("" + singleEndOrientationReverseComplement).toUpperCase().charAt(0);
    }



    /***********************************************
     **********   Private Class Methods   **********
     ***********************************************/

    /**
     * Returns the base of the specified file name, i.e.,
     * the file extension and path to the file are excluded.
     */
    private static String getFileNameBase(String fileName) {
	int slashIndex = fileName.lastIndexOf(separator());
	if (slashIndex == -1) slashIndex = fileName.lastIndexOf("/");
	int periodIndex = fileName.indexOf('.', slashIndex+1);
	if (periodIndex == -1) periodIndex = fileName.length();
	return fileName.substring(slashIndex+1, periodIndex);
    }

    /**
     * Returns the system-dependent file separator that
     * can be used as a RegEx.
     */
    private static String separator() {
	if (File.separatorChar == '\\') return "\\\\";
	else return File.separator;
    }

    /**
     * Returns the file name with the leading path removed.
     */
    private static String removePath(String fileName) {
	int slashIndex = fileName.lastIndexOf(separator());
	if (slashIndex == -1) slashIndex = fileName.lastIndexOf("/");
	return fileName.substring(slashIndex+1);
    }

    /**
     * Returns a 2 character encoding of the String s.
     * First, s is converted to an integer via hashing.
     * Then, the integer is converted into a 2 character
     * encoding, where each character takes on one of
     * 52 different values (a-z, A-Z).
     */
    private static String encode(String s) {
	int encoding = s.hashCode() % 2704;  // 52*52=2704
	if (encoding < 0) encoding += 2704;
	int digit1 = encoding / 52;
	int digit2 = encoding % 52;
	if (digit2 < 0) digit2 += 52;
	char char1 = '?';
	char char2 = '?';
	if (digit1 < 26) char1 = (char)('A' + digit1);
	else char1 = (char)('a' - 26 + digit1);
	if (digit2 < 26) char2 = (char)('A' + digit2);
	else char2 = (char)('a' - 26 + digit2);
	return char1 + "" + char2;
    }

    /**
     * Output the specified string s to the specified file.
     */
    private static void outputToFile(String fileName, String s) {
	try {
	    PrintWriter writer = new PrintWriter(new File(fileName));
	    writer.print(s);
	    writer.close();
	} catch (FileNotFoundException e) {
	    output("\nError - could not open file " + fileName + "\n\n");
	}
    }

    /**
     * Checks if quality scores specified by parameter String
     * need to be adjusted by a Phred offset of 33. If not,
     * the original String is returned. If so, a new
     * adjusted String is returned.
     */
    private static String mapPhred(String qualities) {
	int offset = 33;
	int maxPhred = 104;
	boolean needsAdjusting = false;
	boolean needsResetting = false;
	for (int i=0; i<qualities.length(); i++) {
	    if ((int)(qualities.charAt(i)) < offset) needsAdjusting = true;
	    if ((int)(qualities.charAt(i)) > maxPhred) needsResetting = true;
	}
	if (!needsAdjusting && !needsResetting) return qualities;
	StringBuilder sb = new StringBuilder(qualities.length());
	for (int i=0; i<qualities.length(); i++) {
	    if (needsAdjusting) sb.append((char)(offset + qualities.charAt(i)));
	    else if (needsResetting) sb.append('(');
	}
	return sb.toString();
    }

    private static void commandLineArguments(String[] args) {
	if (args.length < 1) {
            output("\nThe Assembler application takes one or more files of RNA-seq reads and it assembles the reads (de novo) into a transcriptome map.\n");
	    output("\nThe Assembler application has one required command line argument.\n");
            output("\nREQUIRED ARGUMENT\n\n");
            output("\t" + "a FASTQ file of RNA-seq reads\n");

            output("\nOPTIONAL ARGUMENTS\n\n");
	    output("\t" + "-k <integer>" + "\t" + "size of k-mer, range of values is 15 to 31 (default is 25)\n");
	    output("\t" + "-c <boolean>" + "\t" + "reverse complement single-end reads (default is false)\n");
            output("\t" + "-ff/fr/rf/rr" + "\t" + "orientation of two mate reads for paired-end read, f=forward and r=reverse_complement (default is fr)\n");
	    output("\t" + "-s <boolean>" + "\t" + "RNA-seq experiments are strand specific (true) or strand ambiguous (false) (default is true)\n");
            output("\t" + "-p <integer>" + "\t" + "number of processors (default is self-identify)\n");
            output("\t" + "-e <boolean>" + "\t" + "compute differential expression for transcripts in pairs of experimental conditions (default is true)\n");
            output("\t" + "-a <boolean>" + "\t" + "report 1 alignment (true) or report all optimal alignments (false), (default is true)\n");
            output("\t" + "-d <integer>" + "\t" + "maximum number of bases between mate pairs for paired-end reads (default is 500)\n");
            output("\t" + "-j <integer>" + "\t" + "minimum length required to use a sequencing read after trimming/processing (default is 35)\n");
            output("\t" + "-n <integer>" + "\t" + "size of k-mer hashtable is ~ 2^n (default is 25). HINT: should normally be 25 or, if more memory is available, 26. WARNING: if increased above 25 then more than 1.2M of memory must be allocated\n");
            output("\t" + "-b <integer>" + "\t" + "minimum number of full length reads required to map to a de novo assembled trancript (default is 20)\n");
           output("\t" + "-u <integer>" + "\t" + "minimum length of de novo assembled transcripts (default is 2*k)\n");
           output("\t" + "-w <integer>" + "\t" + "minimum count of k-mer to use it to seed a new de novo assembled transcript (default is 50)\n");
           output("\t" + "-x <integer>" + "\t" + "minimum count of k-mer to use it to extend an existing de novo assembled transcript (default is 5)\n");
	    output("\t" + "-L <comma separated list>" + "\t" + "labels for each condition\n");
            output("\t" + "-v <boolean>" + "\t" + "verbose output including raw/normalized counts aligning to each transcript (default is false)\n");
	    output("\t" + "-TIME       " + "\t" + "output time taken to execute program\n");

	    output("\nEXAMPLE EXECUTION: SINGLE-END READS\n");
	    output("\njava Assembler <options> aerobic_replicate1.fastq,aerobic_replicate2.fastq anaerobic_replicate1.fastq,anaerobic_replicate2.fastq\n");
	    output("\nEXAMPLE EXECUTION: PAIRED-END READS\n");
	    output("\njava Assembler <options> aerobic_replicate1_pairedend1.fastq%aerobic_replicate1_pairedend2.fastq,aerobic_replicate2_pairedend1.fastq%aerobic_replicate2_pairedend2.fastq anaerobic_replicate1_pairedend1.fastq%anaerobic_replicate1_pairedend2.fastq,anaerobic_replicate2_pairedend1.fastq%anaerobic_replicate2_pairedend2.fastq\n");
	    output("\n");
            System.exit(0);
	}

	// Initially set the number of threads
	Assembler.numThreads = Runtime.getRuntime().availableProcessors();
	if (Assembler.numThreads > 4) Assembler.numThreads = (int)Math.min(Assembler.numThreads * 0.75, 8.0);

	int i=0;
	while (i < args.length) {
	    if (args[i].startsWith("-k")) {
                if ((i == args.length-2) || (args[i+1].startsWith("-"))) {
                    output("Error - command line argument -k must be followed by an integer.\n");
                    System.exit(0);
                }
		Assembler.k = Integer.parseInt(args[i+1]);
		if (Assembler.minTranscriptLength == 0) Assembler.minTranscriptLength = 2*Assembler.k;
                i += 2;
	    } else if (args[i].startsWith("-c")) {
                if ((i == args.length-2) || (args[i+1].startsWith("-"))) {
                    output("Error - command line argument -c must be followed by a boolean.\n");
                    System.exit(0);
                }
		Assembler.singleEndOrientationReverseComplement = Boolean.valueOf(args[i+1]);
                i += 2;
	    } else if (args[i].startsWith("-s")) {
                if ((i == args.length-2) || (args[i+1].startsWith("-"))) {
                    output("Error - command line argument -s must be followed by a boolean.\n");
                    System.exit(0);
                }
		Assembler.unstranded = !Boolean.valueOf(args[i+1]);
                i += 2;
	    } else if (args[i].startsWith("-p")) {
                if ((i == args.length-2) || (args[i+1].startsWith("-"))) {
                    output("Error - command line argument -p must be followed by an integer.\n");
                    System.exit(0);
                }
		Assembler.numThreads = Integer.parseInt(args[i+1]);
                i += 2;
	    } else if (args[i].startsWith("-e")) {
                if ((i == args.length-2) || (args[i+1].startsWith("-"))) {
                    output("Error - command line argument -e must be followed by a boolean.\n");
                    System.exit(0);
                }
		Assembler.computeExpression = Boolean.valueOf(args[i+1]);
                i += 2;
	    } else if (args[i].startsWith("-a")) {
                if ((i == args.length-2) || (args[i+1].startsWith("-"))) {
                    output("Error - command line argument -a must be followed by a boolean.\n");
                    System.exit(0);
                }
		Assembler.stopAfterOneHit = Boolean.valueOf(args[i+1]);
                i += 2;
	    } else if (args[i].startsWith("-d")) {
                if ((i == args.length-2) || (args[i+1].startsWith("-"))) {
                    output("Error - command line argument -d must be followed by an integer.\n");
                    System.exit(0);
                }
		Assembler.maxPairedEndLength = Integer.parseInt(args[i+1]);
		SamOps.maxPairedEndLength = Integer.parseInt(args[i+1]);
                i += 2;
	    } else if (args[i].startsWith("-j")) {
                if ((i == args.length-2) || (args[i+1].startsWith("-"))) {
                    output("Error - command line argument -j must be followed by an integer.\n");
                    System.exit(0);
                }
		Assembler.minReadLength = Integer.parseInt(args[i+1]);
                i += 2;
	    } else if (args[i].startsWith("-n")) {
                if ((i == args.length-2) || (args[i+1].startsWith("-"))) {
                    output("Error - command line argument -n must be followed by an integer.\n");
                    System.exit(0);
                }
		Assembler.CAPACITY_POWER = Integer.parseInt(args[i+1]);
                i += 2;
	    } else if (args[i].startsWith("-b")) {
                if ((i == args.length-2) || (args[i+1].startsWith("-"))) {
                    output("Error - command line argument -b must be followed by an integer.\n");
                    System.exit(0);
                }
		Assembler.MIN_READS_MAPPING = Integer.parseInt(args[i+1]);
                i += 2;
	    } else if (args[i].startsWith("-u")) {
                if ((i == args.length-2) || (args[i+1].startsWith("-"))) {
                    output("Error - command line argument -u must be followed by an integer.\n");
                    System.exit(0);
                }
		Assembler.minTranscriptLength = Integer.parseInt(args[i+1]);
                i += 2;
	    } else if (args[i].startsWith("-w")) {
                if ((i == args.length-2) || (args[i+1].startsWith("-"))) {
                    output("Error - command line argument -w must be followed by an integer.\n");
                    System.exit(0);
                }
		Assembler.minSeedExpression = Integer.parseInt(args[i+1]);
                i += 2;
	    } else if (args[i].startsWith("-x")) {
                if ((i == args.length-2) || (args[i+1].startsWith("-"))) {
                    output("Error - command line argument -x must be followed by an integer.\n");
                    System.exit(0);
                }
		Assembler.minExpression = Integer.parseInt(args[i+1]);
                i += 2;
	    } else if (args[i].startsWith("-L")) {
                if ((i == args.length-2) || (args[i+1].startsWith("-"))) {
                    output("Error - command line argument -L must be followed by a comma separated list of names for the conditions.\n");
                    System.exit(0);
                }
		Assembler.labels = args[i+1].split(",");
		i += 2;
	    } else if (args[i].startsWith("-v")) {
                if ((i == args.length-2) || (args[i+1].startsWith("-"))) {
                    output("Error - command line argument -v must be followed by a boolean.\n");
                    System.exit(0);
                }
		Assembler.verbose = Boolean.valueOf(args[i+1]);
                i += 2;
	    } else if (args[i].equals("-ff")) {
		Assembler.pairedEndOrientation = "ff";
		i++;
	    } else if (args[i].equals("-fr")) {
		Assembler.pairedEndOrientation = "fr";
		i++;
	    } else if (args[i].equals("-rf")) {
		Assembler.pairedEndOrientation = "rf";
		i++;
	    } else if (args[i].equals("-rr")) {
		Assembler.pairedEndOrientation = "rr";
		i++;
	    } else if (args[i].equals("-TIME")) {
		time = true;
		i++;
	    } else {  // Sequencing reads files
		Assembler.conditionFiles.add(args[i]);
		i++;
	    }
	}

	// Handle erroneous command line arguments
	if (conditionFiles.size() == 0) {
	    output("Error - sequencing reads files (in FASTQ, QSEQ, or FASTA format) are required as command line arguments.\n");
	    System.exit(0);
	}
	for (int j=0; j<conditionFiles.size(); j++) {
	    String[] files = conditionFiles.get(j).split(",");
	    for (int k=0; k<files.length; k++) {
		String[] pairedEnd_files = files[k].split("%");
		if (!(new File(pairedEnd_files[0]).exists())) {
		    output("Error - file " + pairedEnd_files[0] + " does not exist.\n");
		    System.exit(0);
		}
		if ((pairedEnd_files.length > 1) && (!(new File(pairedEnd_files[1]).exists()))) {
		    output("Error - file " + pairedEnd_files[1] + " does not exist.\n");
		    System.exit(0);
		}
		if (pairedEnd_files.length > 1) Assembler.unstranded = false;  // Assume paired-end reads are strand specific
	    }
	}

	// If not set by command line arguments, set de novo assembled transcript length
	if (Assembler.minTranscriptLength == 0) Assembler.minTranscriptLength = 2*Assembler.k;

	// Output directory does not exist. Create it.
	if (!(new File(output_DIR).exists())) {
	    if (!(new File(output_DIR).mkdir())) {
		output("Error - could not create directory " + output_DIR + "." + "\n");
		System.exit(0);
	    }
	}

	try {
	    summaryWriter = new PrintWriter(new File(output_DIR + summaryFile));
	} catch (FileNotFoundException e) {
	    output("Error - could not create file " + (output_DIR+summaryFile) + "\n");
	    System.exit(0);
	}
    }



    /*************************************
     **********   Main Method   **********
     *************************************/

    public static void main(String[] args) {

	commandLineArguments(args);
	Assembler a = new Assembler();

    }

}



/***************************************************
 **********   ProcessReadsDeNovo_Thread   **********
 ***************************************************/

class ProcessReadsDeNovo_Thread extends Thread {

    private Assembler a;

    public ProcessReadsDeNovo_Thread(Assembler a) {
	this.a = a;
    }

    public void run() {
	a.processReads();
    }
}