/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */


package  org.biojava.bio.seq.io;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.biojava.bio.BioException;
import org.biojava.bio.seq.DNATools;
import org.biojava.bio.seq.ProteinTools;
import org.biojava.bio.symbol.Alignment;
import org.biojava.bio.symbol.FiniteAlphabet;
import org.biojava.bio.symbol.IllegalSymbolException;
import org.biojava.bio.symbol.SimpleAlignment;
import org.biojava.bio.symbol.Symbol;

/**
 * @author raemig
 * @author Thomas Down
 * @author Keith James
 * @author Nimesh Singh
 * @author Mark Schreiber
 * @author Matthew Pocock
 * @author Bradford Powell
 */

public class MSFAlignmentFormat
       implements AlignmentFormat {
    private static final boolean DEBUGPRINT = false;
    private static final int DNA = 1;
    private static final int PROTEIN = 2;


    public MSFAlignmentFormat () {
    }

    /**
     * used to quick test the code
     * @param args
     */
    public static void main (String[] args) {
        String filename;
        if (args.length < 1) {
            filename = "SimpleMSF.msf";         //change to your favorite
        }
        else {
            filename = args[0];
        }
        try {
            BufferedReader reader = new BufferedReader(new FileReader(filename));
            MSFAlignmentFormat MSFAlignmentFormat1 = new MSFAlignmentFormat();
            MSFAlignmentFormat1.read(reader);
        } catch (Exception E) {}
    }

    /**
     * Reads an MSF Alignment File
     * @param reader The file reader
     * @return Alignment A SimpleAlignment consisting of the sequences in the file.
     */
    public Alignment read (BufferedReader reader) {
        Vector sequenceNames = new Vector();
        String sequenceName = null;
        StringBuffer sequenceData[] = null;
        int startOfData = 0;                    //the start of the sequence data in the line
        int currSeqCount = 0;                   //which sequence data you are currently trying to get
        try {
            Pattern mtc = Pattern.compile("(Name:|NAME:)\\s+(.*?)\\s+(oo|OO|Len:|LEN:)");
            Pattern removewhitespace = Pattern.compile("\\s");
            // REMatch rem = null;
            String line = reader.readLine();
            //parse past header
            while (line.toUpperCase().indexOf("NAME:") == -1) {
                line = reader.readLine();
            }
            //read each name (between Name:   and Len:
            while ((line.indexOf("//") == -1) && ((line.trim()).length() !=
                    0))
            {
                Matcher matcher = mtc.matcher(line);
                if (!matcher.find()) {
                    break;
                }               //end of sequence names
                //sequenceName = line.substring(rem.getSubStartIndex(1),
                //                              rem.getSubEndIndex(1));
                if ((line.trim()).length() == 0) {
                    break;
                }
                sequenceName = matcher.group(2).trim();
                sequenceNames.add(sequenceName);

                line = reader.readLine();
            }
            sequenceData = new StringBuffer[sequenceNames.size()];
            for (int it = 0; it < sequenceNames.size(); it++) {
                sequenceData[it] = new StringBuffer();
            }
            //until you get a line that matches the first sequence
            while (line.indexOf((String)sequenceNames.get(0)) == -1)
            {
                line = reader.readLine();
            }
            //now you on the first line of the sequence data
            while (line != null) {
                for (currSeqCount = 0; currSeqCount < sequenceNames.size(); currSeqCount++) {//you could also check for order of names
                    if (line.indexOf((String)sequenceNames.get(currSeqCount))
                            == -1) {
                        break;
                    }           //error

                    startOfData = line.indexOf((String)sequenceNames.get(currSeqCount))
                            + ((String)sequenceNames.get(currSeqCount)).length();
                    line = (line.substring(startOfData));
                    line = removewhitespace.matcher(line).replaceAll("");
                    sequenceData[currSeqCount].append(line); //make into string buffer
                    line = reader.readLine();
                    if ((currSeqCount < sequenceNames.size() - 1) && (line.trim().length() == 0)) {
                        break;
                    }           //could be an error
                }
                //until you get a line that matches the first sequence
                while ((line != null) && (line.indexOf((String)sequenceNames.get(0))
                        == -1))                 // || (   (line.trim()) .length()>0  )    )
                {
                    line = reader.readLine();
                }
            }
            //print them out for testing
            if (DEBUGPRINT) {
                for (currSeqCount = 0; currSeqCount < sequenceNames.size(); currSeqCount++) {
                    System.out.println((String)sequenceNames.get(currSeqCount)
                            + ":" + sequenceData[currSeqCount]);
                }
            }
            //check DNA, RNA or Prot
            StringBuffer testString = new StringBuffer();
            for (currSeqCount = 0; currSeqCount < sequenceNames.size(); currSeqCount++) {
                testString.append(sequenceData[currSeqCount]);
            }
            String testStringUpper=testString.toString().toUpperCase();


            //now parse through them and create gapped symbol lists
            LinkedHashMap sequenceDataMap = new LinkedHashMap();
            FiniteAlphabet alph = null;

            for (int i = 0; i < testStringUpper.length(); i++) {
              char c=testStringUpper.charAt(i);
                if (c == 'F' ||
                    c == 'L' ||
                    c == 'I' ||
                    c == 'P' ||
                    c == 'Q' ||
                    c == 'E') {
                        alph = ProteinTools.getTAlphabet();
                       break;
                }
            }
            if (alph == null) {
                alph = DNATools.getDNA();
            }
            for (currSeqCount = 0; currSeqCount < sequenceNames.size(); currSeqCount++) {
                String sd = sequenceData[currSeqCount].toString();
                //change stop codons to specified symbols
                sd = sd.replace('~', '-');              //sometimes this is a term signal not a gap
                sd = sd.replace('.', '-');              //sometimes this is a term signal not a gap
                sequenceDataMap.put((String)sequenceNames.get(currSeqCount),
                        alph==ProteinTools.getTAlphabet()?
                        		ProteinTools.createGappedProteinSequence(sd, (String)sequenceNames.get(currSeqCount))
                        		:DNATools.createGappedDNASequence(sd, (String)sequenceNames.get(currSeqCount)));
            }
            SimpleAlignment sa=new SimpleAlignment(sequenceDataMap);
            return  (sa);
        } catch (Exception e) {
            e.printStackTrace();
            System.err.println("MSFFormatReader " + e.getMessage());
            // throw (e);
        }
        return  (null);
    }           //end read it

    //This is where I am writing an alignment writer
    public void write(OutputStream os, Alignment align, int fileType) throws BioException, IllegalSymbolException {
        PrintStream out = new PrintStream(os);
        Object labels[] = align.getLabels().toArray();
        int numSeqs = labels.length;
        Iterator seqIts[] = new Iterator[numSeqs];
        int maxLabelLength = 0;
        for (int i = 0; i < numSeqs; i++) {
            seqIts[i] = align.symbolListForLabel(labels[i]).iterator();
            if (((String) labels[i]).length() > maxLabelLength) {
                maxLabelLength = ((String) labels[i]).length();
            }
        }
        String nl = System.getProperty("line.separator");
        SymbolTokenization toke = null;

//really should determine the filetype based on one of the seqeunces alphabet

if (align.symbolListForLabel(labels[0]).getAlphabet()==DNATools.getDNA()) {
  fileType = DNA;

} else if (align.symbolListForLabel(labels[0]).getAlphabet()==ProteinTools.getAlphabet() ||align.symbolListForLabel(labels[0]).getAlphabet()==ProteinTools.getTAlphabet() ) {
  fileType = PROTEIN;
}

        if (fileType == DNA) {
          out.print("PileUp"+nl);
          out.print(nl);
          out.print(" MSF: " + align.length() + "  Type: ");
          out.print("N");
            out.print("   Check: "+0+"   .."+nl);
            toke = DNATools.getDNA().getTokenization("token");
        }
        else if (fileType == PROTEIN) {
          out.print("PileUp"+nl);
          out.print(nl);
          out.print(" MSF: " + align.length() + "  Type: ");
          out.print("P");
            out.print("   Check: "+0+"   .."+nl);
            toke = ProteinTools.getTAlphabet().getTokenization("token");
        }
        else {
            System.out.println("MSFAlignment.write -- File type not recognized.");
            return;
        }
        out.print(nl);

        for (int i = 0; i < numSeqs; i++) {
            out.print(" Name: " + labels[i]);
            for (int j = 0; j < (maxLabelLength - ((String) labels[i]).length()); j++) {//padding
                out.print(" ");
            }
            out.print("  Len: " + align.length() +" 	Check: "+0+"	Weight: "+0+nl); //this really should be seq length?
        }

        out.println(nl+"//"+nl+nl);
        //now should print the numbering line

        while (seqIts[0].hasNext()) {
            for (int i = 0; i < numSeqs; i++) {
                while (((String) labels[i]).length() < maxLabelLength + 1) {
                    labels[i] = " " + labels[i];
                }
                out.print(labels[i] + " ");
                theLabel:
                for (int j = 0; j < 5; j++) {
                    out.print(" ");
                    for (int k = 0; k < 10; k++) {
                        if (seqIts[i].hasNext()) {
                            out.print(toke.tokenizeSymbol((Symbol) seqIts[i].next()));
                        }
                        else {
                            break theLabel;
                        }
                    }
                }
                out.print(nl);
            }
            out.print(nl);
        }

    } //end write

    public void writeDna(OutputStream os, Alignment align) throws BioException, IllegalSymbolException {
        write(os, align, DNA);
    }

    public void writeProtein(OutputStream os, Alignment align) throws BioException, IllegalSymbolException {
        write(os, align, PROTEIN);
    }

}               //end class



