/**
 * Title:        ProAlign<p>
 * Description:  <p>
 * Copyright:    Copyright (c) Ari Loytynoja<p>
 * License:      GNU GENERAL PUBLIC LICENSE<p>
 * @see          http://www.gnu.org/copyleft/gpl.html
 * Company:      ULB<p>
 * @author Ari Loytynoja
 * @version 1.0
 */
package proalign;

import java.io.File;
import java.util.Iterator;
import java.util.ArrayList;
import java.util.HashMap;
import java.io.IOException;
import java.io.FileNotFoundException;

/**
 * Reads FASTA/PIR format alignment files.
 * Data read into a hashtable: String 'name', String 'data'
 */
public class SequenceReader {
    String row, str1, str2 = new String();
    HashMap seqmap;
    boolean allFine;
    String alphabet;
    String errors;

    SequenceReader() { }


    /**
     * Tries to read from a file. Returns TRUE if fine, FALSE if errors.
     */
    boolean fromFile(String infile) {

	allFine = true;
	seqmap = new HashMap();
	errors = new String("\n");

	ProAlign.log("SequenceReader: "+infile);

	String firstRow = new String();
	try {
	    InFile in = new InFile(infile);
	    while(true) {
		firstRow = in.readLine().trim();
		if(firstRow.equals("")) {
		    continue;
		}
		break;
	    }
	    
	} catch(FileNotFoundException e) {
	    warnError("File not found:" + infile);
	} catch(IOException e) { }

	if(firstRow.startsWith(">")) {
	    readFasta(infile);
	} else if(Character.isDigit(firstRow.charAt(0))) {
	    readPhylip(infile);
	} else if(firstRow.equalsIgnoreCase("#nexus")) {
	    readNexus(infile);
	} else if(firstRow.equalsIgnoreCase("PileUp")||
		  firstRow.equalsIgnoreCase("!!AA_MULTIPLE_ALIGNMENT")||
		  firstRow.equalsIgnoreCase("!!NA_MULTIPLE_ALIGNMENT")) {
	    readMsf(infile);
	} else {
	    warnError("     The file does not look like Fasta-,\n"+
		      "    PIR-, Phylip-, MSF-, or Nexus-format!");
	}

	if(seqmap.size()>0 && allFine) {
	    return true;
	} else {
	    return false;
	}
    }


    void readFasta(String infile) {
	
	try {
	    
	    // Calls ready-made filereader
	    InFile in = new InFile(infile);
	    
	    int formatType = 0;

	    while(true) { // Read first non-empty line
		row = in.readLine().trim();
		if(row.equals("")) {
		    continue;
		}
		break;
	    }

	    while(row != null) {
		
		// Takes sequence name and checks
		// that same name doesn't exist
		if(row.startsWith(">")) {
		    row = row.substring(1);
		    row = row.trim();
		    
		    // Sequence is PIR
		    if(row.startsWith("DL;")) {
			formatType = 1;
			row = row.substring(3);
			row = row.trim();
			in.readLine(); // skip one row!
			
		    } else if(row.startsWith("P1;")) {
			formatType = 2;
			row = row.substring(3);
			row = row.trim();
			in.readLine(); // skip one row!
			
			//Sequence must be FASTA
		    } else {
			formatType = 0;
		    }
		    
		    // Same sequence name given twice!!
		    if(seqmap.containsKey(row)) {
			warnError("  Sequence " + row + "\n     is double defined!");
		    }
		    
		    if(formatType > 0) { // If PIR -> has to end with '*'
			
			str2 = "";
			while((str1 = in.readLine())!= null) {
			    			    
			    // remove "gap" signs
			    if(str1.indexOf("-")>-1) {
				str1 = removeChar(str1,'-');
			    }
			    if(str1.indexOf(" ")>-1) {
				str1 = removeSpace(str1);
			    }
			    
			    if(str1.length() == 0) {  // Skips empty lines.
				continue;   
			    }
			    
			    str2 += str1;
			    
			    if(str1.endsWith("*")){ // Reads until the asterisk (needed!)
				
				str1 = str2.substring(0,str2.indexOf("*")); // but removed.
				str1 = str1.toUpperCase();
				
				seqmap.put(row, str1);
				ProAlign.log(">"+row+"\n"+str1);
				
				row = in.readLine(); // New one in!
				break;
			    }
			}
		    }

		    
		    
		    // Append FASTA sequence and save it.
		    else { // If FASTA -> no '*' in the end
			
			str2 = "";
			while((str1 = in.readLine())!= null) {
			    
			    // remove "gap" signs
			    if(str1.indexOf("-")>-1) {
				str1 = removeChar(str1,'-');
			    }
			    if(str1.indexOf(" ")>-1) {
				str1 = removeSpace(str1);
			    }
			    
			    if(str1.length() == 0) { // Skips empty lines.
				continue;
			    }
			    
			    if(str1.startsWith(">")){  // Loop works until a new seqname
				
				str2 = str2.toUpperCase();			    
				
				seqmap.put(row, str2);
				ProAlign.log(">"+row+"\n"+str2);
				
				row = str1; // Copy new one
				row = row.trim();
				break;
			    }
			    str2 += str1;
			}
			if(str1 == null) { // Take the last seq. End of the file.
			    
			    str2 = str2.toUpperCase();			    
			    
			    seqmap.put(row, str2);
			    ProAlign.log(">"+row+"\n"+str2);
			    
			    row = str1; // Copy null so first loop stops
			}
		    }
		} else if(row.trim().equals("")){
		    row = in.readLine();
		    continue;		    
		} else {
		    warnError("  File '" + infile + "'\n"+
			      "    is supposed to start with '>'!");
		    break;
		    //row = in.readLine();
		}
	    }
	    in.close();
	    
	    // Something wrong. Give warnings and return.
	} catch(FileNotFoundException e) {
	    warnError("File not found:" + infile);
		
	} catch(IOException e) { }
    }

    void readPhylip(String infile) {

	String[] seqNames = new String[0];
	String[] seqData = new String[0];

	String row = new String();
	try {
	    InFile in = new InFile(infile);
	    while(true) {
		row = in.readLine().trim();
		if(row.equals("")) {
		    continue;
		}
		break;
	    }

	    int numTaxa = new Integer(row.substring(0,row.indexOf(" ")).trim()).intValue();
	    int seqLength = new Integer(row.substring(row.indexOf(" ")+1).trim()).intValue();

	    seqNames = new String[numTaxa];
	    seqData = new String[numTaxa];

	    int sn = 0;
	    int rn = 0;
	    while((row = in.readLine())!= null) {
		if(row.trim().equals("")) {
		    continue;
		}
		if(rn<numTaxa) {
		    seqNames[sn] = row.substring(0,row.indexOf(" ")).trim();
		    seqData[sn] = row.substring(row.indexOf(" ")+1).trim();
		    rn++; sn++;
		} else {
		    seqData[sn++] += row.trim();
		}
		if(sn==numTaxa) {
		    sn=0;
		}
	    }
	    in.close();
	    
	    // Something wrong. Give warnings and return.
	} catch(FileNotFoundException e) {
	    warnError("File not found:" + infile);
		
	} catch(IOException e) { }

	for(int i=0; i<seqNames.length; i++) {

	    if(seqmap.containsKey(seqNames[i])) {
		warnError("  Sequence " +seqNames[i]+ "\n     is double defined!");
	    }

	    String seq = removeSpace(seqData[i].toUpperCase());
	    seq = removeChar(seq,'-');

	    seqmap.put(seqNames[i],seq);
	    ProAlign.log(">"+seqNames[i]+"\n"+seq);
	}
    }


    void readMsf(String infile) {

	String[] seqNames = new String[0];
	String[] seqData = new String[0];
	HashMap names = new HashMap();

	String row = new String();
	try {
	    InFile in = new InFile(infile);
	    int s=0;
	    while(true) {
		row = in.readLine().trim();
		if(row.startsWith("//")) {
		    break;
		} else if(row.indexOf("Name:")>-1) {
		    row = row.substring(row.indexOf("Name:")+5).trim();
		    if(row.indexOf(" ")>-1) {
			row = row.substring(0,row.indexOf(" ")).trim();
		    }
		    names.put(row,new Integer(s));
		    s++;
		} else {
		    continue;		    
		}
	    }
	    
	    seqNames = new String[names.size()];
	    seqData = new String[names.size()];

	    Iterator nameKeys = names.keySet().iterator();
	    while(nameKeys.hasNext()) {
		String n = (String) nameKeys.next();
		s = ((Integer)names.get(n)).intValue();
		seqNames[s] = n;
		seqData[s] = "";
	    }

	    while((row = in.readLine())!= null) {
		if(row.trim().equals("")) {
		    continue;
		}
		
		String begin = row.substring(0,row.indexOf(" ")).trim();

		if(names.containsKey(begin)) {

		    String end = row.substring(row.indexOf(" ")+1).trim();
		    s = ((Integer)names.get(begin)).intValue();
		    seqData[s] += end;

		}
	    }
	    in.close();
	    
	    // Something wrong. Give warnings and return.
	} catch(FileNotFoundException e) {
	    warnError("File not found:" + infile);
		
	} catch(IOException e) { }

	for(int i=0; i<seqNames.length; i++) {

	    if(seqmap.containsKey(seqNames[i])) {
		warnError("  Sequence " +seqNames[i]+ "\n     is double defined!");
	    }

	    String seq = removeSpace(seqData[i].toUpperCase());
	    seq = removeChar(seq,'-');
	    seq = removeChar(seq,'.');
	    seq = removeChar(seq,'~');

	    seqmap.put(seqNames[i],seq);
	    ProAlign.log(">"+seqNames[i]+"\n"+seq);
	}
//
	ProAlign.log.print("CORE ");
	for(int m=0; m<seqData[0].length(); m++) {
	    if(Character.isUpperCase(seqData[0].charAt(m))) {
		ProAlign.log.print("1");
	    } else {
		ProAlign.log.print("0");	
	    }    
	}
	ProAlign.log.println();
//
    }



    void readNexus(String infile) {

	String[] seqNames = new String[0];
	String[] seqData = new String[0];
	String missing = new String("?");

	try {
	    
	    // Calls ready-made filereader
	    InFile in = new InFile(infile);
	    int order = 0;
	    
	    int numChar = 0;
	    int numTaxa = 0;
	    
	    boolean interleave = false;
	    
	    String row = in.readLine(); // Read first line
	    while((row != null)) {
		
		row = row.toUpperCase().trim();
		
		if(row.startsWith("DIMENSIONS")) {
		    if(row.indexOf("NTAX")>-1) {
			String ntax = row.substring(row.indexOf("NTAX"));
			ntax = ntax.substring(ntax.indexOf("=")+1);
			if(ntax.indexOf(" ")>0) {
			    ntax = ntax.substring(0,ntax.indexOf(" "));
			} else {
			    ntax = ntax.substring(0,ntax.indexOf(";"));
			}
			numTaxa = Integer.valueOf(ntax).intValue();
		    }
		    if(row.indexOf("NCHAR")>-1) {
			String nchar = row.substring(row.indexOf("NCHAR"));
			nchar = nchar.substring(nchar.indexOf("=")+1);
			if(nchar.indexOf(" ")>0) {
			    nchar = nchar.substring(0,nchar.indexOf(" "));
			} else {
			    nchar = nchar.substring(0,nchar.indexOf(";"));
			}
			numChar = Integer.valueOf(nchar).intValue();
		    }
		}
		
		if(row.indexOf("MISSING")>-1) {
		    missing = row.substring(row.indexOf("MISSING"));
		    missing = missing.substring(missing.indexOf("=")+1);
		    if(missing.indexOf(" ")>0) {
			missing = missing.substring(0,missing.indexOf(" "));
		    } else {
			missing = missing.substring(0,missing.indexOf(";"));
		    }
		} else if(row.indexOf("GAP")>-1) {
		    missing = row.substring(row.indexOf("GAP"));
		    missing = missing.substring(missing.indexOf("=")+1);
		    if(missing.indexOf(" ")>0) {
			missing = missing.substring(0,missing.indexOf(" "));
		    } else if(missing.indexOf(";")>0) {
			missing = missing.substring(0,missing.indexOf(";"));
		    } else {
			missing = missing.trim();
		    }
		}
		if(row.indexOf("INTERLEAVE")>0) {
		    interleave = true;
		}

		if(row.startsWith("MATRIX")) {

		    // sequence data starts.
		    seqNames = new String[numTaxa];
		    seqData = new String[numTaxa];
		    
		    row = in.readLine();
		    while((row != null)) {
			row = row.trim();
			if(!interleave) {
			    for(int i=0; i<numTaxa; i++) {
				
				if(row.length() == 0 ) {
				    row = in.readLine().trim();
				    continue;
				}			
				if(row.indexOf("[")>-1) {
				    row = removeComment(row);
				}
				//System.out.println(i+": "+row);
				if(row.indexOf(" ")>-1) {
				    seqNames[i] = row.substring(0,row.indexOf(" "));
				    row = row.substring(row.indexOf(" ")+1).toUpperCase();

				    seqData[i] = removeSpace(row);
				    row = in.readLine();
				    while(seqData[i].length() < numChar) {
					if(row.length() == 0 ) {
					    row = in.readLine().trim();
					    continue;
					}
					row = row.toUpperCase();
					seqData[i] += removeSpace(row);
					row = in.readLine();
				    }
				} else {
				    seqNames[i] = row.trim();
				    row = in.readLine().trim();
				    //System.out.println(i+": "+row);
				    seqData[i] = removeSpace(row);
				    row = in.readLine();
				    while(seqData[i].length() < numChar) {
					if(row.length() == 0 ) {
					    row = in.readLine().trim();
					    continue;
					}
					row = row.toUpperCase();
					seqData[i] += removeSpace(row);
					row = in.readLine();
				    }
				} 

			    }
			    break;
			} else {
			    for(int i=0; i<numTaxa; i++) {
				if(row.length() == 0 ) {
				    row = in.readLine().trim();
				    continue;
				}
				seqNames[i] = row.substring(0,row.indexOf(" "));
				row = row.substring(row.indexOf(" ")+1).toUpperCase();
				seqData[i] = removeSpace(row);
				row = in.readLine().trim();
			    }
			    while(seqData[0].length() < numChar) {
				if(row.length() == 0 ) {
				    row = in.readLine().trim();
				    continue;
				}
				for(int i=0; i<numTaxa; i++) {
				    row = row.substring(row.indexOf(" ")+1).toUpperCase();
				    seqData[i] += removeSpace(row);
				    row = in.readLine().trim();
				}
			    }
			    break;
			}
		    }
		}
		
		row = in.readLine();
	    }
	    in.close();

	    // Something wrong. Give warnings and return.
	} catch(FileNotFoundException e) {
	    warnError("File not found:" + infile);
	    
	} catch(IOException e) { }
	
	for(int i=0; i<seqNames.length; i++) {
	    
	    if(seqmap.containsKey(seqNames[i])) {
		warnError("  Sequence " +seqNames[i]+ 
			  "\n     is double defined!");
	    }
	    
	    String seq = seqData[i].toUpperCase();
	    seq = removeChar(seqData[i],missing.charAt(0));

	    seqmap.put(seqNames[i].trim(),seq);
	    ProAlign.log(">"+seqNames[i]+"\n"+seq);
	}
    }


    String removeSpace(String row) {
	if(row.indexOf(" ")>-1) {
	    String str = new String("");
	    for(int j=0; j<row.length(); j++) {
		if(row.charAt(j)!=' ') {
		    str += row.charAt(j);
		}
	    }
	    return str;
	} else {
	    return row;
	}
    }

    String removeChar(String row,char rem) {
	if(row.indexOf(rem)>-1) {
	    String str = new String("");
	    for(int j=0; j<row.length(); j++) {
		if(row.charAt(j)!=rem) {
		    str += row.charAt(j);
		}
	    }
	    return str;
	} else {
	    return row;
	}
    }

    String removeComment(String row) {

	String content = new String();
	boolean keep = true;
	for(int i=0; i<row.length(); i++) {
	    if(row.charAt(i)=='[') {
		keep=false;
	    } else if(row.charAt(i)==']') {
		keep=true;
		i++;
	    }
	    if(keep) {
		content += row.charAt(i);
	    }
	}
	return content.trim();
    }


    /**
     * Reading sequences returns just TRUE or FALSE.
     * Return sequences if reading the file was OK. 
     */
    public HashMap getSequences() {
	return seqmap;
    }
    
    void warnError(String msg) {
	allFine = false;
	ProAlign.log.println("SequenceReader: "+msg);
	errors += msg+"\n";
    }
    String getErrors() {
	return errors;
    }

/*   
    public static void main(String[] args) {
        SequenceReader sr = new SequenceReader();
        if(sr.fromFile(args[0])) {
	    
	    HashMap seqs = sr.getSequences();
	    Iterator it = seqs.keySet().iterator();
	    while(it.hasNext()) {
		String name = (String) it.next();
		String seq = (String) seqs.get(name);
		System.out.println(name+"\n"+seq);
	    }
	} 
    }
//*/
}










