File: seqToolsPluginDlg.cpp

package info (click to toggle)
massxpert 2.3.6-1squeeze1
links: PTS, VCS
area: main
in suites: squeeze
size: 20,736 kB
ctags: 3,541
sloc: cpp: 44,108; xml: 7,381; sh: 604; makefile: 108; ansic: 7
file content (668 lines) | stat: -rw-r--r-- 16,809 bytes
parent folder | download | duplicates (3)
/* massXpert - the true massist's program.
   --------------------------------------
   Copyright (C) 2006,2007 Filippo Rusconi

   http://www.filomace.org/massXpert

   This file is part of the massXpert project.

   The massxpert project is the successor to the "GNU polyxmass"
   project that is an official GNU project package (see
   www.gnu.org). The massXpert project is not endorsed by the GNU
   project, although it is released ---in its entirety--- under the
   GNU General Public License. A huge part of the code in massXpert
   is actually a C++ rewrite of code in GNU polyxmass. As such
   massXpert was started at the Centre National de la Recherche
   Scientifique (FRANCE), that granted me the formal authorization to
   publish it under this Free Software License.

   This software is free software; you can redistribute it and/or
   modify it under the terms of the GNU  General Public
   License version 3, as published by the Free Software Foundation.
   

   This software is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this software; if not, write to the

   Free Software Foundation, Inc.,

   51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*/

#include <QtGui>

#include "seqToolsPluginDlg.hpp"

SeqToolsPluginDlg::SeqToolsPluginDlg (QWidget *parent)
  : QDialog (parent, Qt::Dialog)
{
  mp_parent = static_cast<QMainWindow *> (parent);
  
  m_strictCodeLength = false;
  m_strictCodeCase = false;
  
  QVBoxLayout *mainLayout = new QVBoxLayout;
  
  QLabel *label = new QLabel (tr ("Sequence Manipulation Tools Plugin"));
    
  mainLayout->addWidget (label, 0, Qt::AlignHCenter);
    
  createEditorGroupBox ();
  mainLayout->addWidget (mp_editorGroupBox);

  m_inputDocument.setParent (this);
  m_outputDocument.setParent (this);

  mp_inputEditor->setDocument (&m_inputDocument);
  mp_outputEditor->setDocument (&m_outputDocument);
  
  createActionGroupBox ();
  mainLayout->addWidget (mp_actionGroupBox);
  m_dictionaryLoaded = false;
  
  setLayout (mainLayout);

  setAttribute (Qt::WA_DeleteOnClose);
  
  setWindowTitle (tr ("Sequence Manipulation Tools Plugin"));

  connect (mp_parent, 
	   SIGNAL (aboutToClose ()),
	   this,
	   SLOT (parentClosing ()));
}


void
SeqToolsPluginDlg::parentClosing ()
{
  QDialog::reject ();
}


void 
SeqToolsPluginDlg::createEditorGroupBox ()
{
    mp_editorGroupBox = new QGroupBox (tr ("Manipulated Sequences"));

    QHBoxLayout *layout = new QHBoxLayout;

    mp_inputEditor = new QTextEdit;
    layout->addWidget (mp_inputEditor);

    mp_outputEditor = new QTextEdit;
    layout->addWidget (mp_outputEditor);
    
    mp_editorGroupBox->setLayout(layout);
}


void 
SeqToolsPluginDlg::createActionGroupBox ()
{
  mp_actionGroupBox = new QGroupBox (tr ("Actions"));

  QHBoxLayout *layout = new QHBoxLayout;

  mp_actionComboBox = new QComboBox ();

  QStringList menuItems = QStringList () 
    << tr ("To Upper Case")
    << tr ("To Lower Case")
    << tr ("Dictionary-based Conversion");
  
  mp_actionComboBox->addItems (menuItems);

  layout->addWidget (mp_actionComboBox);

  mp_executePushButton = new QPushButton (tr ("&Execute"));
  connect(mp_executePushButton, 
	  SIGNAL (clicked ()), 
	  this, 
	  SLOT (execute ()));

  layout->addWidget (mp_executePushButton);
  
  mp_loadDicPushButton = new QPushButton (tr ("&Load Dictionary"));

  connect(mp_loadDicPushButton, 
	  SIGNAL (clicked ()), 
	  this, 
	  SLOT (loadDictionary ()));

  layout->addWidget (mp_loadDicPushButton);

  mp_actionGroupBox->setLayout (layout);
}


void 
SeqToolsPluginDlg::loadDictionary ()
{

  // Reinitialize the two critical variables
  m_strictCodeLength = false;
  m_strictCodeCase = false;
  // as it is possible to load a different dictionary without having
  // to close the dialog.


  // Pop up a file chooser dialog, and load the file.
  // Syntax of the file is per line:
  
  // Arg>R
  
  // Pro>P
  
  QString filePath =
    QFileDialog::getOpenFileName (this, tr ("Open Dictionary File"),
				  QDir::homePath (),
				  tr ("Dictionary files (*.dic)"));
  
  if (!QFile::exists (filePath))
    return;
  
  // Load the file and for each line deconstruct the item into two
  // QString objects that are used to make a QHash entry in
  // QHash<QString, QString> m_dictionaryHash.

  qint64 lineLength;
  QString line;
  char buffer [1024];
  
  QFile file (filePath);

  if (!file.open (QFile::ReadOnly)) 
    {
      m_dictionaryLoaded = false;
      return;
    }
  
  lineLength = file.readLine (buffer, sizeof (buffer));
  
  while (lineLength != -1)
    {
      // The line is now in buffer, and we want to convert
      // it to Unicode by setting it in a QString.
      line = buffer;
      
      if (line.startsWith ('#'))
	{
	  lineLength = file.readLine (buffer, sizeof (buffer));
	  continue;
	}
      
      if (line.simplified() == "strictCodeLength")
	{
	  m_strictCodeLength = true;
	  
	  lineLength = file.readLine (buffer, sizeof (buffer));
	  continue;
	}

        if (line.simplified() == "strictCodeCase")
	{
	  m_strictCodeCase = true;
	  
	  lineLength = file.readLine (buffer, sizeof (buffer));
	  continue;
	}

      QStringList stringList = line.split (">", QString::SkipEmptyParts);
      
      if (!stringList.size () == 2)
	{
	  QMessageBox::warning (this,
				tr ("Sequence Manipulation Tools Plugin"),
				tr ("Failed to load dictionary."),
				QMessageBox::Ok);
	  
	  m_dictionaryLoaded = false;
	  file.close ();
	  return;
	}
      
      if (stringList.last ().endsWith ("\r\n"))
	stringList.last ().chop(2);
      else if (stringList.last ().endsWith ('\n'))
	stringList.last ().chop(1);

      if (stringList.first ().contains (QRegExp ("[0-9]")))
	{
	  bool ok = false;
	  m_inputCodeLength = stringList.first ().toInt (&ok, 10);
	  
	  if (!m_inputCodeLength && !ok)
	    {
	      QMessageBox::warning (this,
				    tr ("Sequence Manipulation Tools Plugin"),
				    tr ("Failed to load dictionary."),
				    QMessageBox::Ok);
	      
	      m_dictionaryLoaded = false;
	      file.close ();
	      return;
	    }
	  
	  ok = false;

	  m_outputCodeLength = stringList.last ().toInt (&ok, 10);
	  
	  if (!m_outputCodeLength && !ok)
	    {
	      QMessageBox::warning (this,
				    tr ("Sequence Manipulation Tools Plugin"),
				    tr ("Failed to load dictionary."),
				    QMessageBox::Ok);
	      
	      m_dictionaryLoaded = false;
	      file.close ();
	      return;
	    }

	  lineLength = file.readLine (buffer, sizeof (buffer));

	  continue;
	}
      
      // We can now feed the dictionary hash. But, if the character
      // case is not important, then set the first item of the hash to
      // uppercase. We will do the same while parsing the text to
      // process, so that indeed, the result is that we are allowing
      // whatever case for the text.

      if (!m_strictCodeCase)
	{
	  m_dictionaryHash.insert (stringList.first ().toUpper(), stringList.last ());
	}
      else
	{
	  m_dictionaryHash.insert (stringList.first (), stringList.last ());
	}
      
      lineLength = file.readLine (buffer, sizeof (buffer));
    }
  
  file.close ();
  
//   qDebug () << __FILE__ << __LINE__ 
// 	    << "input/output code lengths" 
// 	    << m_inputCodeLength << "/" << m_outputCodeLength;
  
  if (m_dictionaryHash.size ())
    m_dictionaryLoaded = true;
  else
    {
      QMessageBox::warning (this,
			    tr ("Sequence Manipulation Tools Plugin"),
			    tr ("Failed to load dictionary."),
			    QMessageBox::Ok);
      
      m_dictionaryLoaded = false;
      return;
    }
  
//   qDebug () << "done loadDictionary";
}


void 
SeqToolsPluginDlg::execute ()
{
  // What's the task to be performed?

  QString comboText = mp_actionComboBox->currentText ();
  
  if (comboText == tr ("To Upper Case"))
    {
      QString docText = mp_inputEditor->toPlainText ();
      
      docText = docText.toUpper ();
      
      mp_outputEditor->clear ();

      mp_outputEditor->setPlainText (docText);
    }
  else if (comboText == tr ("To Lower Case"))
    {
      QString docText = mp_inputEditor->toPlainText ();
      
      docText = docText.toLower ();
      
      mp_outputEditor->clear ();

      mp_outputEditor->setPlainText (docText);
    }
  else if (comboText == tr ("Dictionary-based Conversion"))
    {
      if (!m_dictionaryLoaded)
	{
	  QMessageBox::warning (this,
				tr ("Sequence Manipulation Tools Plugin"),
				tr ("Please, load a dictionary first."),
				QMessageBox::Ok);
	  return;
	}
      
      QString docText = mp_inputEditor->toPlainText ();
      
      QStringList codeList;
      
      if (!makeCodeList (docText, codeList))
	{
	  QMessageBox::warning (this,
				tr ("Sequence Manipulation Tools Plugin"),
				tr ("Failed to parse the input sequence."),
				QMessageBox::Ok);
	  return;
	}
      
      for (int iter = 0; iter < codeList.size (); iter++)
	{
	  QString curString = codeList.at (iter);
	  
	  QHash<QString, QString>::const_iterator hashIter = 
	    m_dictionaryHash.find (curString);

	  if (hashIter != m_dictionaryHash.end ())
	    codeList.replace (iter, hashIter.value ());
	  //curString = hashIter.value ();
	  else
	    {
	      QString message = QString (tr ("Failed to convert monomer code: %1")
					 .arg(curString));
	      
	      QMessageBox::warning (this,
				    tr ("Sequence Manipulation Tools Plugin"),
				    message,
				    QMessageBox::Ok);
	      return;
	    }
	}
      
      mp_outputEditor->clear ();

      docText.clear ();
      
      for (int iter = 0; iter < codeList.size (); iter++)
	docText += codeList.at (iter);
      
      mp_outputEditor->setPlainText (docText);
    }
}


int
SeqToolsPluginDlg::makeCodeList (QString &text, QStringList &stringList)
{
  int index = 0;
  int ret = -1;
  QString code;
  
  ret = nextCode (text, &code, &index);
  
  while (ret >= 1)
    {
      // We can now append the returned code. But, if the character
      // case is not important, then set the first item of the hash to
      // uppercase. We will do the same while parsing the text to
      // process, so that indeed, the result is that we are allowing
      // whatever case for the text.

      if (!m_strictCodeCase)
	{
	  stringList.append (code.toUpper());
	}
      else
	{
	  stringList.append (code);
	}
      
//       qDebug() << __FILE__ << __LINE__
// 	       << "Added code:" << code;
      
      // index was set to the last character parsed during the
      // nextCode call above. Thus we have to increment that index by
      // one so that at next call the following character is parsed at
      // first.
      index++;

      ret = nextCode (text, &code, &index);
    }

  if (ret == -1)
    return -1;
  
//   qDebug () << stringList;
  
  return stringList.size ();
}


int
SeqToolsPluginDlg::nextCode (QString &text, QString *code, int *index)
{
  QString newCode;
  
  // We get a sequence of monomer codes (like "LysArgGlu" for example)
  // and we have to return the next code starting from *index. Note
  // that the sequence must not contain invalid characters. The
  // invalid characters might be placed in err for further scrutiny by
  // the caller.

  // Returns the count of actually parsed characters in the string
  // newCode (copied to 'code' param). If an error occurs -1 is
  // returned and the faulty character is copied in 'err'. 'index' is
  // updated with the index of the last valid character parsed for
  // current code.

  Q_ASSERT (code);
  Q_ASSERT (index);

  code->clear ();
  
  int textLength = text.length ();

//   qDebug()  << __FILE__ << __LINE__
// 	    << "index:" << *index;
  
  // At this point there are differnt situations :
  
  // 1. The input code length is strict, that is the rule is the
  // number of characters in the input code has to be equal to what
  // specified in the file.
  
  // 2. The input code length is *not* strict, in which case the rule
  // is that the number of characters in the input code has to be at
  // most equal to what specified in the file and at minimum 1.
  
  
  if (m_strictCodeLength)
    {
      // Test if we do not go out of the boundaries of the text:

      if ((*index >= textLength) || (*index + m_inputCodeLength) > textLength)
	{
	  // We are getting out of boundaries, but there is some text:

	  int residualCharacters = textLength - *index;
	  
	  // Get those two residual characters.
	  
	  newCode = text.mid(*index, residualCharacters);
	  
	  if (!newCode.isEmpty())
	    {
	      qDebug()  << __FILE__ << __LINE__
			<< "Incomplete remaining code: " << newCode;
	    }
	  
	  return -1;
	}

      // Well we can get a full code.

      newCode = text.mid(*index, m_inputCodeLength);
      
//       qDebug() << __FILE__ << __LINE__
// 	       << "newCode:" << newCode;
      
      // We should ensure that the code's character case is
      // correct.

      if (m_strictCodeCase)
	{
	  // This means that the first character of the code
	  // should be uppercase and all the others
	  // lowercase. Test this.

	  int charIterIndex = 0;
	      
	  while (charIterIndex < newCode.length())
	    {
	      if (!charIterIndex)
		{
		  if (newCode.at(charIterIndex).category() == QChar::Letter_Lowercase)
		    {
		      qDebug() << __FILE__ << __LINE__
			       << "First character of code should be Uppercase:"
			       << newCode;
			  
		      return -1;
		    }
		  else
		    {
		      ++charIterIndex;
		      continue;
		    }
		}
	      else
		{
		  if (newCode.at(charIterIndex).category() == QChar::Letter_Uppercase)
		    {
		      qDebug() << __FILE__ << __LINE__
			       << "Non-first character of code should be Lowercase:"
			       << newCode;
			  
		      return -1;
		    }
		  else
		    {
		      ++charIterIndex;
		      continue;
		    }
		}
	    }
	      
	  // At this point we know that the characters are correct in number and in case.
	}
      // End of 
      // if (m_strictCodeCase)

      // The code is now fully tested.

      *code = newCode;

      // Set index to point to the last character that was parsed.
      *index = *index + m_inputCodeLength - 1;
      
//       qDebug() << __FILE__ << __LINE__
// 	       << "Setting index to" << *index << "prior to returning from nextCode";
            
      return code->length();
    }
  // End of
  // if (m_strictCodeLength)
	
  // If we are here, then we are handling codes that can be of
  // variable size, so the algorithm is a bit more complex. The
  // general idea is to iterate in the string and at each step make
  // all the checks (code length, case of the character). 

  int iter = 0;

  while (1)
    {
      // Make sure we are not parsing a string longer than what allows
      // the codeLength.

      if (iter >= m_inputCodeLength)
	{
	  iter--;
	  break;
	}
      
      if (iter + *index >= textLength)
	break;
      
      QChar curChar = text.at (iter + *index);
      
      if (!curChar.isLetter ())
	{
// 	  qDebug () << __FILE__ << __LINE__
//                  << "The character is not a letter:"
// 		    << curChar;
	  
	  return -1;
	}
      
      bool isLower = (curChar.category () == QChar::Letter_Lowercase);
      
      if (iter == 0)
	{
	  if (isLower)
	    {
// 	      qDebug () << "First character of monomer code might not be"
// 			<< "lower case; sequence is"
// 			<< m_monomerText.toAscii ();
	      
	      return -1;
	    }
	  else
	    {
	      // Good, first char is uppercase.
	      newCode += curChar;
	    }
	}
      else // (iter != 0)
	{
	  // We are not in our first iteration. So either the current
	  // character is lowercase and we are just continuing to
	  // iterate into a multi-char monomer code, or the current
	  // character is uppercase, in which case we are starting to
	  // iterate in a new monomer code.

	  if (isLower)
	    newCode += curChar;
	  else
	    {
	      // Decrement iter, because this round was for nothing:
	      // we had "invaded" the next monomer code in sequence,
	      // which we must not do.

	      iter --;
	      break;
	    }
	}

      iter++;
    }
  
  // We finished parsing at most codeLength characters out of
  // 'm_monomerText', so we have a valid code in the 'code' variable. We
  // can also compute a new index position in the sequence and return
  // the number of characters that we effectively parsed. Note that
  // the caller will be responsible for incrementing the 'index' value
  // by one character unit so as not to reparse the last characters of
  // the sent 'code' object.

  *index = *index + iter;
  *code = newCode;
  
  return code->length ();
}