File: pm3argparse.py

package info (click to toggle)
promod3 3.4.2%2Bds-1
links: PTS, VCS
area: main
in suites: trixie
size: 966,596 kB
sloc: cpp: 55,820; python: 18,058; makefile: 85; sh: 51
file content (913 lines) | stat: -rw-r--r-- 40,055 bytes
parent folder | download | duplicates (3)
# Copyright (c) 2013-2020, SIB - Swiss Institute of Bioinformatics and
#                          Biozentrum - University of Basel
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#   http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


"""
Extensions for the argparse module.
"""

import argparse
import sys
import os
import gzip
import tempfile
#try:
#    import ujson as json
#except ImportError:
import json

import ost
from ost import io, seq

from promod3.core import helper
from promod3 import loop, modelling

def _TmpForGZip(filename, suffix, msg_prefix):
    """Unpack a file to a tmp file if gzipped.
    """
    helper.FileExists(msg_prefix, 12, filename)
    try:
        zip_fh = gzip.open(filename)
        unzip_content = zip_fh.read()
        zip_fh.close()
    except IOError as ioe:
        helper.MsgErrorAndExit(msg_prefix + " gzip file '" + filename +
                               "' cannot be opened: " + str(ioe), 14)
    unzip_file = tempfile.NamedTemporaryFile(mode='wb', suffix=suffix)
    unzip_file.write(unzip_content)
    unzip_file.flush()
    return unzip_file

def _CheckJSONAlnSeqKeyType(key_name, val_type, json_aln, seqtype, json_source):
    '''Check a key/value in a sequence exists and is of certain type.
    '''
    if key_name not in list(json_aln[seqtype].keys()):
        helper.MsgErrorAndExit("JSON 'alignmentlist' '%s' " % seqtype+
                               "from '%s' is " % json_source+
                               "missing the '%s' key" % key_name, 27)
    altype = type(json_aln[seqtype][key_name])

    if val_type is str or val_type is str:
        if not (altype is str or altype is str):
            helper.MsgErrorAndExit("JSON 'alignmentlist' '%s' " % seqtype+
                                   "'%s' from" % key_name+
                                   "'%s' is not a " % json_source+
                                   "%s" % str(val_type), 28)
    elif not altype is val_type:
        helper.MsgErrorAndExit("JSON 'alignmentlist' '%s' " % seqtype+
                               "'%s' from" % key_name+
                               "'%s' is not a " % json_source+
                               "%s" % str(val_type), 28)

def _GetAlnFromJSON(json_object, json_source):
    """Create alignments from a JSON object.

    Iterate the alignments in a JSON object and deliver OST alignments via the
    yield operator.
    """
    # alignments are stored via the 'alignmentlist' key
    if 'alignmentlist' not in list(json_object.keys()):
        helper.MsgErrorAndExit("JSON object from '%s' does not " % json_source+
                               "provide an 'alignmentlist' key.", 21)
    # alignments come as lists, to enable hetero oligos
    if not type(json_object['alignmentlist']) is list:
        helper.MsgErrorAndExit("JSON object from '%s' does not" % json_source+
                               "provide a list behind 'alignmentlist'.", 24)
    # take the alignments apart, each alignment is a dictionary
    for json_aln in json_object['alignmentlist']:
        # json_aln needs to be a dictionary
        if not type(json_aln) is dict:
            helper.MsgErrorAndExit("JSON 'alignmentlist' member from "+
                                   "'%s' is not a ' " %  json_source+
                                   " dictionary: %s" % json_aln, 25)
        # an alignment has a 'target' and a 'template' dictionary
        # each of them has a 'name' and a 'seqres' pair
        for flav in ['target', 'template']:
            if flav not in list(json_aln.keys()):
                helper.MsgErrorAndExit("JSON 'alignmentlist' from "+
                                       "'%s' does not " % json_source+
                                       "provide a '%s' key." % flav, 22)
            # check sequence to be dictionary
            if not type(json_aln[flav]) is dict:
                helper.MsgErrorAndExit("JSON 'alignmentlist' '%s' from" % flav+
                                       "'%s' is not a " % json_source+
                                       "dictionary: %s" % json_aln[flav], 26)
            # check for keys needed by both sequences:
            for aln_key in ['name', 'seqres']:
                _CheckJSONAlnSeqKeyType(aln_key, str, json_aln, flav,
                                        json_source)
        _CheckJSONAlnSeqKeyType('offset', int, json_aln, 'template',
                                json_source)
        # create and yield alignment
        trg_name = str(json_aln['target']['name']).strip()
        trg_seq = str(json_aln['target']['seqres'])
        tpl_name = str(json_aln['template']['name']).strip()
        tpl_seq = str(json_aln['template']['seqres'])
        new_aln = seq.CreateAlignment(seq.CreateSequence(trg_name, trg_seq),
                                      seq.CreateSequence(tpl_name, tpl_seq))
        new_aln.SetSequenceRole(0, 'TARGET')
        new_aln.SetSequenceRole(1, 'TEMPLATE')
        new_aln.SetSequenceOffset(1, json_aln['template']['offset'])
        yield new_aln

def _GetJSONOBject(json_input):
    """Get a JSON object out of a string which may be an object or a path.

    If the input string starts with '{', we assume its a JSON object. File names
    starting with '{' would be a bit weird.

    If we are looking at a file, check and load it.

    For a JSON object, check that everything is there. No checks for
    superfluous stuff.

    As returnvalue we only use JSON objects.
    """
    if json_input[0] != '{':
        helper.FileExists("JSON Alignment", 12, json_input)
        is_gz = helper.FileGzip("JSON alignment", 13, json_input)
        readfile = json_input
        if is_gz:
            unzip_file = _TmpForGZip(json_input, '.json', "JSON alignment")
            readfile = unzip_file.name
        try:
            jfh = open(readfile)
        except IOError as ioe:
            helper.MsgErrorAndExit("'--json' file '%s' " % json_input+
                                   "can not be processed: %s" % ioe.strerror,
                                   19)
        except:
            raise
        try:
            json_object = json.load(jfh)
        except json.JSONDecodeError as e:
            helper.MsgErrorAndExit("'--json' file '%s' could " % json_input+
                                   "not be processed into a JSON object, "+
                                   "probably it's empty.", 20)
        except:
            raise
        finally:
            jfh.close()
    else:
        try:
            json_object = json.loads(json_input)
        except ValueError as vae:
            helper.MsgErrorAndExit("'--json' string '%s' " % json_input+\
                                   "could not be decoded: %s" % str(vae), 23)
    return json_object

def _FetchAlnFromFile(seqfile, allow_multitemplate, format):
    """Read alignment from seqfile and return it."""
    argstr = "'--" + format + " " + seqfile + "'"
    helper.FileExists("Alignment", 12, seqfile)
    # checking if alignment file has 'gz' extension
    is_gz = helper.FileGzip("Alignment", 13, seqfile)
    # loading the alignment, switch for gzip
    readfile = seqfile
    if is_gz:
        unzip_file = _TmpForGZip(seqfile, '.fas', "Alignment")
        readfile = unzip_file.name
    try:
        aln = io.LoadAlignment(readfile, format=format)
    except Exception as exc: #pylint: disable=broad-except
        if str(exc) in ['Bad FASTA file: File is empty',
                        'Bad CLUSTAL file: File is empty']:
            helper.MsgErrorAndExit(argstr +  " refers to an empty file or " +
                                   "its in the wrong format.", 15)
        else:
            helper.MsgErrorAndExit(argstr + ": error when reading alignment "+
                                   "file: " + str(exc), 18)
    finally:
        if is_gz:
            unzip_file.close()
    # checking the alignment
    if aln.GetCount() == 1:
        helper.MsgErrorAndExit(argstr + " points to an alignment with only " +
                               "1 sequence.", 16)
    if aln.GetCount() > 2 and not allow_multitemplate:
        helper.MsgErrorAndExit(argstr + " points to an alignment with more " +
                               "than 2 sequences and we do not allow this.", 16)
    # identify target
    target_idx = -1
    sequences = [(s.name.strip(),s.string) for s in aln.sequences]
    for i,s in enumerate(sequences):
        if s[0].lower() in ['trg', 'target']:
            if target_idx >= 0:
                helper.MsgErrorAndExit(argstr + ": multiple targets found!", 17)
            target_idx = i
    # reshuffle
    if target_idx > 0:
        sequences.insert(0, sequences[target_idx])
        del sequences[target_idx+1]
    # generate alignment
    new_aln = seq.CreateAlignment()
    for s in sequences:
        new_aln.AddSequence(seq.CreateSequence(s[0], s[1]))
    new_aln.SetSequenceRole(0, 'TARGET')
    for i in range(1, new_aln.GetCount()):
        new_aln.SetSequenceRole(i, 'TEMPLATE')

    return new_aln

def _LoadPDB(filename):
    """Load PDB file from filename and return it."""
    argstr = "'--pdb " + filename + "'"
    helper.FileExists("PDB Structure", 32, filename)
    try:
        ent = io.LoadPDB(filename)
    except Exception as exc: #pylint: disable=broad-except
        helper.MsgErrorAndExit(argstr + ": failure to parse PDB file: " +
                               str(exc), 33)
    return ent

def _LoadEntity(filename):
    """Load generic structure file from filename and return it."""
    argstr = "'--entity " + filename + "'"
    helper.FileExists("Structure", 32, filename)
    try:
        ent = io.LoadEntity(filename)
    except Exception as exc: #pylint: disable=broad-except
        if str(exc).startswith('no suitable entity io handler found'):
            helper.MsgErrorAndExit(argstr + ": not a supported format " +
                                   str(exc), 34)
        else:
            helper.MsgErrorAndExit(argstr + ": failure to parse PDB file: " +
                                   str(exc), 33)
    return ent

def _FetchProfileFromFile(filename):
    """Load generic profile file from filename and return it."""
    argstr = "'--seqprof " + filename + "'"
    helper.FileExists("Profile", 51, filename)
    try:
        prof = io.LoadSequenceProfile(filename)
    except Exception as exc:
        helper.MsgErrorAndExit(argstr + ": failure to parse profile file: " +
                               str(exc), 52)        
    return prof

def _FetchPsipredFromFile(filename):
    """Load psipred prediction from filename and return it."""
    argstr = filename + ": "
    helper.FileExists("Profile", 51, filename)
    try:
        pred = loop.PsipredPrediction.FromHHM(filename)
    except Exception as exc:
        helper.MsgErrorAndExit(argstr + ": failure to parse psipred " +
                               "prediction: " + str(exc), 56)        
    return pred


def _GetChains(structures, structure_sources):
    """Get chain id to entity view (single chain) mapping (dict)."""
    # IDs: (file_base = base file name with no extensions)
    # - file_base.chain_name
    # - file_base (iff only one chain in file)
    # - chain_name (iff only one file)
    # - note: single entry with key 'UNIQUE' created if only one chain in total!
    chain_entities = dict()
    single_file = len(structure_sources) == 1
    # parse structures
    for file_name, ent in zip(structure_sources, structures):
        # get pure file name with no extension
        file_base = os.path.basename(file_name)
        file_split = os.path.splitext(file_base)
        if file_split[1] == '.gz':
            file_base = os.path.splitext(file_split[0])[0]
        else:
            file_base = file_split[0]
        # get chainnames
        prot = ent.Select("peptide=True")
        single_chain = prot.chain_count == 1
        chain_names = [ch.name for ch in prot.chains]
        # fill dict
        if single_file and single_chain:
            chain_entities['UNIQUE'] = prot
        elif single_chain:
            chain_entities[file_base + '.' + chain_names[0]] = prot
            chain_entities[file_base] = prot
        else:
            for chain_name in chain_names:
                ch_ent = prot.Select("cname=" + chain_name)
                chain_entities[file_base + '.' + chain_name] = ch_ent
                if single_file:
                    chain_entities[chain_name] = ch_ent
    return chain_entities

def _AttachViewsToAln(aln, chain_entities):
    """Attach views to tpl. sequences in aln according to sequence names."""
    for i in range(1, aln.GetCount()):
        seq_name = aln.GetSequence(i).GetName()
        # extract offset
        my_split = seq_name.split('|')
        tpl_id = my_split[0].strip()
        if len(my_split) == 2 and my_split[1].strip().isdigit():
            # set offset
            tpl_offset = int(my_split[1].strip())
            # mismatch with existing one?
            old_offset = aln.GetSequenceOffset(i)
            if old_offset > 0 and old_offset != tpl_offset:
                helper.MsgErrorAndExit("Inconsistent offsets between seq. name"+
                                       " and seq. in alignment for " + seq_name,
                                       42)
            else:
                aln.SetSequenceOffset(i, tpl_offset)
        elif len(my_split) == 2 and not my_split[1].strip().isdigit():
            helper.MsgErrorAndExit("Non-integer offset defined in seq. name "+
                                   seq_name, 43)
        elif len(my_split) > 2:
            helper.MsgErrorAndExit("Too many '|' in seq. name " + seq_name, 44)
        # identify chain and attach view
        if len(chain_entities) == 1:
            aln.AttachView(i, chain_entities['UNIQUE'].CreateFullView())
        elif tpl_id in chain_entities:
            aln.AttachView(i, chain_entities[tpl_id].CreateFullView())
        else:
            helper.MsgErrorAndExit("Could not find chain with ID " + tpl_id +
                                   " (should be <FILE>.<CHAIN>) to attach to"+
                                   " sequence named " + seq_name, 45)

class PM3ArgumentParser(argparse.ArgumentParser):
    """
    This class is a child of :class:`argparse.ArgumentParser`. It provides a set
    of standard arguments which can be activated with :meth:`Add*` methods and
    then assembled with :meth:`AssembleParser`. This helps keeping up a common
    naming scheme throughout all |project| actions. As a real extension, this
    subclass provides checking of input parameters on :meth:`Parse`. Besides
    this, everything you can do with a 'real' :class:`~argparse.ArgumentParser`
    instance is possible here.

    Attributes beyond :class:`argparse.ArgumentParser`:

    .. attribute:: action

      Indicates if the calling script is a |project| action.

      :type: :class:`bool`
    """
    def __init__(self, description, action=True):
        """
        Create a new instance of :class:`~pm3argparse.PM3ArgumentParser`.

        :param description: Help text for this script, handed down to
                            |descattr|_ of |argpinit|_.
        :type description: :class:`str`

        :param action: Indicates if the calling script is a |project| action.
                       This influences |progattr|_ of
                       :class:`~argparse.ArgumentParser` by clipping of the
                       first 3 characters of the file name of the script. If
                       ``False``, default behaviour of
                       :class:`~argparse.ArgumentParser` kicks in.
        :type action: :class:`bool`

        :returns: :class:`argparse.ArgumentParser`.
        """
        prog = None
        if action:
            prog = os.path.basename(sys.argv[0])[3:]
        argparse.ArgumentParser.__init__(self, prog=prog,
                                         description=description,
                                         formatter_class=\
                                         argparse.RawDescriptionHelpFormatter)
        self.action = action
        self.activate = set()

    def _print_message(self, message, file=None):
        #pylint: disable=redefined-builtin
        """
        This is like a welcome message to the "country of bad style"... we are
        overwriting a "_" function from the parent-class. Those guys should not
        be used outside of the housing module, never... but here it is a single
        function to bend :mod:`argparse` to use :class:`ost.Logger`.
        """
        if message:
            no_nl_msg = message
            if message[-1] == '\n':
                no_nl_msg = message[:-1]
            if file is None or file is sys.stderr:
                ost.LogError(no_nl_msg)
            else:
                ost.LogScript(no_nl_msg)

    def Parse(self, args=None):
        """
        Parse an argument string. See :meth:`Add*` methods.

        Options/arguments added by default: ``-h/--help`` shows usage.

        General exit codes:

        * 1 - an unhandled exception was raised
        * 2 - arguments cannot be parsed or required arguments are missing

        :param args: The argument string. As default |sysargv|_ is used.
        :type args: :class:`list`

        :returns: Namespace filled with attributes (see :meth:`Add*` methods).
        """
        opts = PM3OptionsNamespace()
        self.parse_args(args=args, namespace=opts)

        opts.PostProcess(self.activate)
        return opts

    def AssembleParser(self):
        """
        When adding options via the :meth:`Add*` methods, call this after you
        are done. Everything before just tells the parser that it should
        contain those option sets but does not actually add anything.
        :meth:`AssembleParser` will put everything in place, in the right order
        and with the right constraints.
        """
        if 'ALIGNMENT' in self.activate:
            self._AssembleAlignment()
        if 'STRUCTURE' in self.activate:
            self._AssembleStructure()
        if 'PROFILE' in self.activate:
            self._AssembleProfile()
        if 'FRAGMENTS' in self.activate:
            self._AssembleFragments()

    def AddAlignment(self, allow_multitemplate=False):
        """Commandline options for alignments.

        Activate everything needed to load alignments to the argument parser.
        Command line arguments are then added in :meth:`AssembleParser` and the
        input is post processed and checked in :meth:`Parse`.

        :param allow_multitemplate: enable support for multitemplate alignments
        :type allow_multitemplate:  :class:`bool`

        Options/arguments added:

        * ``-f/--fasta <FILE>`` - Target-template alignment in FASTA format.
          Target sequence is either named "trg" or "target" or the first
          sequence is used. File can be plain or gzipped.

        * ``-c/--clustal <FILE>`` - Target-template alignment in CLUSTAL format.
          Target sequence is either named "trg" or "target" or the first
          sequence is used. File can be plain or gzipped.

        * ``-j/--json <OBJECT>|<FILE>`` - Alignments provided as JSON
          file/object. File can be plain or gzipped.

        See :ref:`here <promod-build-model>` for details on the file formats.

        Attributes added to the namespace returned by :meth:`Parse`:

        * :attr:`fasta` - filled with the input of the ``--fasta`` option, a
          :class:`list` of :class:`str` (filenames).

        * :attr:`clustal` - filled with the input of the ``--clustal`` option, a
          :class:`list` of :class:`str` (filenames).

        * :attr:`json` - filled with the input of the ``--json`` option, a
          :class:`list` of :class:`str`, where each string may be a filename
          or a JSON object string.

        * :attr:`alignments` - :class:`ost.AlignmentList`, same order as given.
          First sequence of the alignment is the target sequence, if in doubt,
          check for sequence roles ``TARGET`` or ``TEMPLATE``

        * :attr:`aln_sources` - :class:`list` of :class:`str` with the original
          source(s) of the alignment: may be filename(s) or JSON strings.

        Exit codes related to alignment input:

        * 12 - a given alignment file does not exist
        * 13 - never raised (parameter for checking gzip files)
        * 14 - gzip file cannot be opened
        * 15 - found an empty alignment file
        * 16 - unsupported number of sequences in alignment: only 1 sequence or
          (unless *allow_multitemplate* = True) more than 2 sequences
        * 17 - mutliple target sequences found in alignment
        * 18 - error when reading fasta/clustal file
        * 19 - problem with a JSON formatted file handed over to ``--json``
        * 20 - JSON file could not be decoded into a JSON object
        * 21 - JSON object has no 'alignmentlist' key
        * 22 - JSON object has no 'target'/'template' in the 'alignmentlist'
        * 23 - JSON string could not be decoded
        * 24 - JSON object 'alignmentlist' does not point to a list
        * 25 - JSON object 'alignmentlist' member is not a dictionary
        * 26 - JSON object 'alignmentlist' 'target'/'template' does not point
          to a dictionary
        * 27 - JSON  object 'alignmentlist' 'target'/'template' does not have
          a needed key
        * 28 - JSON  object 'alignmentlist' 'target'/'template' has a value of
          wrong type
        """
        self.activate.add('ALIGNMENT')
        if allow_multitemplate:
            self.activate.add('ALLOW_MULTITEMPLATE')

    def AddStructure(self, attach_views=False):
        """Commandline options for structures.

        Activate everything needed to load structures to the argument parser.
        Command line arguments are then added in :meth:`AssembleParser` and the
        input is post processed and checked in :meth:`Parse`.

        :param attach_views: if True: attach views to alignments. Requires call
                             to :meth:`AddAlignment`. Chains for each sequence
                             are identified based on the sequence name of the
                             templates in the alignments (see 
                             :ref:`here <promod-build-model>` for details).
        :type attach_views:  :class:`bool`

        Options/arguments added:

        * ``-p/--pdb <FILE>`` - Structure in PDB format. File can be plain or
          gzipped.

        * ``-e/--entity <FILE>`` - Structure in any format readable by the
          :meth:`ost.io.LoadEntity` method. Format is chosen by file ending.
          Recognized File Extensions: .ent, .pdb, .ent.gz, .pdb.gz, .cif, .cif.gz.

        Notes:

        * one of the inputs must be given and only one type of input acceptable

        * callable multiple times (structures appended in given order)

        Attributes added to the namespace returned by :meth:`Parse`:

        * :attr:`pdb` - filled with the input of the ``--pdb`` option, a
          :class:`list` of :class:`str` (filenames).

        * :attr:`entity` - filled with the input of the ``--entity`` option, a
          :class:`list` of :class:`str` (filenames).

        * :attr:`structures` - :class:`list` of :class:`ost.EntityHandle`, same
          order as given.

        * :attr:`structure_sources` - :class:`list` of :class:`str` with the
          original filenames of the structures.

        Exit codes related to alignment input:

        * 32 - a given structure file does not exist
        * 33 - failure to read a given structure file
        * 34 - file ending is not a supported format

        Exit codes if *attach_views* = True:

        * 41 - attach_views used without adding alignments
        * 42 - inconsistent offsets between seq. name and seq. in alignment
        * 43 - non-integer offset defined in seq. name
        * 44 - too many "|" in seq. name
        * 45 - chain to attach to sequence could not be identified
        """
        self.activate.add('STRUCTURE')
        if attach_views:
            self.activate.add('ATTACH_VIEWS')

    def AddProfile(self):
        """Commandline options for profiles

        Activate everything needed to load profiles to the argument parser.
        Command line arguments are then added in :meth:`AssembleParser` and the
        input is post processed and checked in :meth:`Parse`.

        Options/arguments added:

        * ``-s/--seqprof <FILE>`` - Sequence profile in any format readable
          by the :meth:`ost.io.LoadSequenceProfile` method. Format is chosen by 
          file ending. Recognized file extensions: .hhm, .hhm.gz, .pssm, 
          .pssm.gz. Consider to use 
          :meth:`ost.bindings.hhblits.HHblits.A3MToProfile` if you have a file 
          in a3m format at hand. 

        Notes:

        * the profiles are mapped based on exact matches towards the gapless
          target sequences, i.e. one profile is mapped to several chains in
          case of homo-oligomers

        * every profile must have a unique sequence to avoid ambiguities

        * all or nothing - you cannot provide profiles for only a subset of
          target sequences

        Attributes added to the namespace returned by :meth:`Parse`:

        * :attr:`profiles` - :class:`list` of :class:`ost.seq.ProfileHandle`, 
          ordered to match the target sequences.

        Exit codes related to profile input:

        * 51 - a given profile file does not exist
        * 52 - failure to read a given profile file 
        * 53 - a profile cannot be mapped to any target sequence
        * 54 - profile sequences are not unique
        * 55 - only subset of target sequences is covered by profile
        """
        self.activate.add('PROFILE')


    def AddFragments(self):
        """Commandline option for usage of Fragments

        Activate everything needed to setup 
        :class:`promod3.modelling.FraggerHandle` objects in the argument parser.
        Command line arguments are then added in :meth:`AssembleParser` and the
        input is post processed and checked in :meth:`Parse`.

        Options/arguments added:

        * ``-r/--use-fragments`` - Boolean flag whether to setup fragger handles.

        Notes:

        * Fragger handles are setup to identify fragments in a 
          :class:`promod3.loop.StructureDB`.

        * If no profiles are provided as additional argument 
          (``-s/--seqprof <FILE>``), fragments are identified based on BLOSUM62 
          sequence similarity.

        * If you provide profiles that are not in hhm format, fragments are 
          identified based on BLOSUM62 sequence similarity, sequence profile 
          scoring and structural profile scoring.

        * If you provide profiles in hhm format (optimal case), psipred 
          predictions are fetched from there and fragments are identified based
          on secondary structure agreement, secondary structure dependent
          torsion probabilities, sequence profile scoring and structure 
          profile scoring.

        Attributes added to the namespace returned by :meth:`Parse`:

        * :attr:`fragger_handles` - :class:`list` of 
          :class:`promod3.modelling.FraggerHandle`, ordered to match the target 
          sequences.

        Exit codes related to fragments input:

        * 56 - cannot read psipred prediction from hhm file
        """
        self.activate.add('FRAGMENTS')


    def _AssembleAlignment(self):
        """Actually add alignment arguments/options."""
        aln_grp = self.add_mutually_exclusive_group(required=True)
        # fasta input
        aln_grp.add_argument('-f', '--fasta', metavar=('<FILE>'),
                             help='Target-template alignment in FASTA format. '+
                             'Target sequence is either named "trg" or '+
                             '"target" or the first sequence is used. '+
                             'File can be plain or gzipped.',
                             action='append', default=list())
        # clustal input
        aln_grp.add_argument('-c', '--clustal', metavar=('<FILE>'),
                             help='Target-template alignment in CLUSTAL format. '+
                             'Target sequence is either named "trg" or '+
                             '"target" or the first sequence is used. '+
                             'File can be plain or gzipped.',
                             action='append', default=list())
        # JSON input
        aln_grp.add_argument('-j', '--json', metavar='<OBJECT>|<FILE>',
                             help='Alignments provided as JSON file/object. '+
                             'File can be plain or gzipped.',
                             action='append', default=list())

    def _AssembleStructure(self):
        """Actually add structure arguments/options."""
        aln_grp = self.add_mutually_exclusive_group(required=True)
        # pdb input
        aln_grp.add_argument('-p', '--pdb', metavar=('<FILE>'),
                             help='Structure in PDB format. '+
                             'File can be plain or gzipped.',
                             action='append', default=list())
        # any OST entity
        aln_grp.add_argument('-e', '--entity', metavar=('<FILE>'),
                             help="Structure in any format readable by OST's "+
                             "io.LoadEntity method. Format is chosen by file "+
                             "ending. Recognized File Extensions: .ent, .pdb, "+
                             ".ent.gz, .pdb.gz, .cif, .cif.gz.",
                             action='append', default=list())

    def _AssembleProfile(self):
        self.add_argument('-s', '--seqprof', metavar=('<FILE>'),
                          help="Sequence profile in any format readable by "+
                          "OST's io.LoadSequenceProfile method. Format is "+
                          "chosen by file ending. Recognized File Extensions: "+
                          ".hhm, .hhm.gz, .pssm, .pssm.gz", action='append',
                          default=list())

    def _AssembleFragments(self):
        self.add_argument('-r', '--use-fragments',
                          help="Use fragments instead of torsion angle "+
                          "based sampling for Monte Carlo approaches. "+
                          "For optimal performance you should provide "+
                          "sequence profiles in hhm format. (File "+
                          "extensions: .hhm or .hhm.gz). BUT: be aware of "+
                          "increased runtime.", action="store_true")

class PM3OptionsNamespace(object):
    # class will grow, so for the moment pylint is ignored
    #pylint: disable=too-few-public-methods
    """Output of :meth:`PM3ArgumentParser.Parse`.

    Like output of :meth:`argparse.ArgumentParser.parse_args` with additional
    functions for convenience.
    """
    def __init__(self):
        pass

    def PostProcess(self, activated):
        """Post processing of activated option packs."""
        self.allow_multitemplate = 'ALLOW_MULTITEMPLATE' in activated
        if 'ALIGNMENT' in activated:
            self._PostProcessAlignment()
        if 'STRUCTURE' in activated:
            self._PostProcessStructure()
        if 'ATTACH_VIEWS' in activated:
            self._AttachViews()
        if 'PROFILE' in activated:
            self._PostProcessProfile()
        if 'FRAGMENTS' in activated:
            self._PostProcessFragments()

    def _PostProcessAlignment(self):
        #pylint: disable=no-member
        #pylint: disable=attribute-defined-outside-init
        """Get alignments from command line input."""
        self.aln_sources = list()
        self.alignments = seq.AlignmentList()
        # parse fasta files
        for src in self.fasta:
            new_aln = _FetchAlnFromFile(src, self.allow_multitemplate, "fasta")
            self.alignments.append(new_aln)
            self.aln_sources.append(src)
        # parse clustal files
        for src in self.clustal:
            new_aln = _FetchAlnFromFile(src, self.allow_multitemplate, "clustal")
            self.alignments.append(new_aln)
            self.aln_sources.append(src)
        # parse JSON input
        for src in self.json:
            json_obj = _GetJSONOBject(src)
            for aln in _GetAlnFromJSON(json_obj, src):
                self.alignments.append(aln)
            self.aln_sources.append(src)

    def _PostProcessStructure(self):
        #pylint: disable=attribute-defined-outside-init
        """Get structures from command line input."""
        self.structures = list()
        self.structure_sources = list()
        # parse pdb files
        for src in self.pdb:
            self.structures.append(_LoadPDB(src))
            self.structure_sources.append(src)
        # parse generic structures
        for src in self.entity:
            self.structures.append(_LoadEntity(src))
            self.structure_sources.append(src)

    def _AttachViews(self):
        """Attach views to tpl. sequences according to sequence names."""
        if not (hasattr(self, 'structures') and hasattr(self, 'alignments')):
            helper.MsgErrorAndExit("Need to have structures and alignments to "+
                                   "attach views.", 41)
        # get chain id to entity view (single chain) mapping (dict)
        chain_entities = _GetChains(self.structures, self.structure_sources)
        # go through all templates in all alignments
        for aln in self.alignments:
            _AttachViewsToAln(aln, chain_entities)

    def _PostProcessProfile(self):
        """Get Profiles from command line input."""
        self.profiles = list()

        if len(self.seqprof) == 0:
            # no profiles provided, remember the all or nothing principle
            # so not having any profile is fine
            return

        self.loaded_profiles = list()
        for src in self.seqprof:
            self.loaded_profiles.append(_FetchProfileFromFile(src))

        prof_sequences = [p.sequence for p in self.loaded_profiles]

        # check uniqueness of loaded profiles
        if len(set(prof_sequences)) != len(prof_sequences):
            helper.MsgErrorAndExit("All sequence profiles must have unique " +
                                   "sequence.", 54)

        # map onto alignment target sequences
        trg_sequences = [aln.GetSequence(0).GetGaplessString() \
                         for aln in self.alignments]
        for s in trg_sequences:
            try:
                self.profiles.append(self.loaded_profiles[prof_sequences.index(s)])
            except Exception as exc:
                helper.MsgErrorAndExit("Could not find profile with sequence " +
                                       "that exactly matches trg seq: " + s, 55)

        # We found a profile for every target sequence. So if the size of unique 
        # target sequences is not the same as for unique profile sequences, 
        # we know that we have additional profiles that never got mapped
        if len(set(trg_sequences)) != len(set(prof_sequences)):
            helper.MsgErrorAndExit("Could not map every profile to a target " +
                                   "sequence", 53)

    def _PostProcessFragments(self):

        self.fragger_handles = list()

        if not self.use_fragments:
            # no fragments requested, so lets just return
            return

        trg_sequences = [aln.GetSequence(0).GetGaplessString() \
                         for aln in self.alignments]

        # we only want to setup a Fragger for every unique target sequence
        unique_trg_sequences = list(set(trg_sequences))

        # already setup variables, fill later if required data is present
        profiles = [None] * len(unique_trg_sequences)
        psipred_predictions = [None] * len(unique_trg_sequences)
        ts_coil = None
        ts_helix = None
        ts_extended = None

        # a structure db we need anyway. Load once and assign the same to all 
        # fraggers to avoid memory explosion
        structure_db = loop.LoadStructureDB()

        # load the profiles
        if hasattr(self, "profiles") and len(self.profiles) > 0:
            profile_dict = dict()
            for p in self.loaded_profiles:
                profile_dict[p.sequence] = p
            # as we already mapped the profiles in _PostProcessProfiles,
            # the following is guaranteed to find the right profile
            # for every unique target sequence
            for s_idx, s in enumerate(unique_trg_sequences):
                profiles[s_idx] = profile_dict[s]

            # For the psipred predictions we have to go back to the
            # input files. If they all end with .hhm or hhm.gz we're ready to go
            file_endings_ok = True
            for src in self.seqprof:
                if not (src.endswith(".hhm") or src.endswith(".hhm.gz")):
                    file_endings_ok = False
                    break

            if file_endings_ok:
                # lets load the torsion samplers now as they are only required
                # if we also add psipred handlers
                ts_coil = loop.LoadTorsionSamplerCoil()
                ts_extended = loop.LoadTorsionSamplerExtended()
                ts_helix = loop.LoadTorsionSamplerHelical()

                # to get the right filenames we use the sequences of the 
                # loaded profiles that are in the same order as self.seqprof
                profile_sequences = [p.sequence for p in self.loaded_profiles]
                for s_idx, s in enumerate(unique_trg_sequences):
                    fn = self.seqprof[profile_sequences.index(s)]
                    psipred_predictions[s_idx] = _FetchPsipredFromFile(fn)
        
        # setup one fragger handle for each unique sequence
        fraggers = list()
        for i in range(len(unique_trg_sequences)):
            fraggers.append(modelling.FraggerHandle(unique_trg_sequences[i],
                                                    profile = profiles[i],
                                                    psipred_pred = psipred_predictions[i],
                                                    rmsd_thresh = 0.02,
                                                    structure_db = structure_db,
                                                    torsion_sampler_coil = ts_coil,
                                                    torsion_sampler_helix = ts_helix,
                                                    torsion_sampler_extended = ts_extended))
        # map them to the chains
        for s in trg_sequences:
            self.fragger_handles.append(fraggers[unique_trg_sequences.index(s)])


#  LocalWords:  param attr prog argparse ArgumentParser bool sys os init str
#  LocalWords:  progattr descattr argpinit argv formatter meth args namespace
#  LocalWords:  ArgumentDefaultsHelpFormatter sysargv AssembleParser fasta io
#  LocalWords:  metavar trg tpl FastA gzip tempfile ost promod aln stderr src
#  LocalWords:  AssembleTrgTplAln CreateSequence SetSequenceOffset LogError
#  LocalWords:  LogScript OptionsNamespace PostProcess AssembleAlignment JSON
#  LocalWords:  AddAlignment AlignmentList SEQNAME whitespaces nargs trgname
#  LocalWords:  PostProcessAlignment startswith seqfile elif MsgErrorAndExit
#  LocalWords:  len FileExists gz FileGzip readfile fh NamedTemporaryFile fas
#  LocalWords:  LoadAlignment exc GetCount fst GetSequence snd