File: pm3argparse.py

package info (click to toggle)
promod3 3.4.2%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 966,596 kB
  • sloc: cpp: 55,820; python: 18,058; makefile: 85; sh: 51
file content (913 lines) | stat: -rw-r--r-- 40,055 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
# Copyright (c) 2013-2020, SIB - Swiss Institute of Bioinformatics and
#                          Biozentrum - University of Basel
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#   http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


"""
Extensions for the argparse module.
"""

import argparse
import sys
import os
import gzip
import tempfile
#try:
#    import ujson as json
#except ImportError:
import json

import ost
from ost import io, seq

from promod3.core import helper
from promod3 import loop, modelling

def _TmpForGZip(filename, suffix, msg_prefix):
    """Unpack a file to a tmp file if gzipped.
    """
    helper.FileExists(msg_prefix, 12, filename)
    try:
        zip_fh = gzip.open(filename)
        unzip_content = zip_fh.read()
        zip_fh.close()
    except IOError as ioe:
        helper.MsgErrorAndExit(msg_prefix + " gzip file '" + filename +
                               "' cannot be opened: " + str(ioe), 14)
    unzip_file = tempfile.NamedTemporaryFile(mode='wb', suffix=suffix)
    unzip_file.write(unzip_content)
    unzip_file.flush()
    return unzip_file

def _CheckJSONAlnSeqKeyType(key_name, val_type, json_aln, seqtype, json_source):
    '''Check a key/value in a sequence exists and is of certain type.
    '''
    if key_name not in list(json_aln[seqtype].keys()):
        helper.MsgErrorAndExit("JSON 'alignmentlist' '%s' " % seqtype+
                               "from '%s' is " % json_source+
                               "missing the '%s' key" % key_name, 27)
    altype = type(json_aln[seqtype][key_name])

    if val_type is str or val_type is str:
        if not (altype is str or altype is str):
            helper.MsgErrorAndExit("JSON 'alignmentlist' '%s' " % seqtype+
                                   "'%s' from" % key_name+
                                   "'%s' is not a " % json_source+
                                   "%s" % str(val_type), 28)
    elif not altype is val_type:
        helper.MsgErrorAndExit("JSON 'alignmentlist' '%s' " % seqtype+
                               "'%s' from" % key_name+
                               "'%s' is not a " % json_source+
                               "%s" % str(val_type), 28)

def _GetAlnFromJSON(json_object, json_source):
    """Create alignments from a JSON object.

    Iterate the alignments in a JSON object and deliver OST alignments via the
    yield operator.
    """
    # alignments are stored via the 'alignmentlist' key
    if 'alignmentlist' not in list(json_object.keys()):
        helper.MsgErrorAndExit("JSON object from '%s' does not " % json_source+
                               "provide an 'alignmentlist' key.", 21)
    # alignments come as lists, to enable hetero oligos
    if not type(json_object['alignmentlist']) is list:
        helper.MsgErrorAndExit("JSON object from '%s' does not" % json_source+
                               "provide a list behind 'alignmentlist'.", 24)
    # take the alignments apart, each alignment is a dictionary
    for json_aln in json_object['alignmentlist']:
        # json_aln needs to be a dictionary
        if not type(json_aln) is dict:
            helper.MsgErrorAndExit("JSON 'alignmentlist' member from "+
                                   "'%s' is not a ' " %  json_source+
                                   " dictionary: %s" % json_aln, 25)
        # an alignment has a 'target' and a 'template' dictionary
        # each of them has a 'name' and a 'seqres' pair
        for flav in ['target', 'template']:
            if flav not in list(json_aln.keys()):
                helper.MsgErrorAndExit("JSON 'alignmentlist' from "+
                                       "'%s' does not " % json_source+
                                       "provide a '%s' key." % flav, 22)
            # check sequence to be dictionary
            if not type(json_aln[flav]) is dict:
                helper.MsgErrorAndExit("JSON 'alignmentlist' '%s' from" % flav+
                                       "'%s' is not a " % json_source+
                                       "dictionary: %s" % json_aln[flav], 26)
            # check for keys needed by both sequences:
            for aln_key in ['name', 'seqres']:
                _CheckJSONAlnSeqKeyType(aln_key, str, json_aln, flav,
                                        json_source)
        _CheckJSONAlnSeqKeyType('offset', int, json_aln, 'template',
                                json_source)
        # create and yield alignment
        trg_name = str(json_aln['target']['name']).strip()
        trg_seq = str(json_aln['target']['seqres'])
        tpl_name = str(json_aln['template']['name']).strip()
        tpl_seq = str(json_aln['template']['seqres'])
        new_aln = seq.CreateAlignment(seq.CreateSequence(trg_name, trg_seq),
                                      seq.CreateSequence(tpl_name, tpl_seq))
        new_aln.SetSequenceRole(0, 'TARGET')
        new_aln.SetSequenceRole(1, 'TEMPLATE')
        new_aln.SetSequenceOffset(1, json_aln['template']['offset'])
        yield new_aln

def _GetJSONOBject(json_input):
    """Get a JSON object out of a string which may be an object or a path.

    If the input string starts with '{', we assume its a JSON object. File names
    starting with '{' would be a bit weird.

    If we are looking at a file, check and load it.

    For a JSON object, check that everything is there. No checks for
    superfluous stuff.

    As returnvalue we only use JSON objects.
    """
    if json_input[0] != '{':
        helper.FileExists("JSON Alignment", 12, json_input)
        is_gz = helper.FileGzip("JSON alignment", 13, json_input)
        readfile = json_input
        if is_gz:
            unzip_file = _TmpForGZip(json_input, '.json', "JSON alignment")
            readfile = unzip_file.name
        try:
            jfh = open(readfile)
        except IOError as ioe:
            helper.MsgErrorAndExit("'--json' file '%s' " % json_input+
                                   "can not be processed: %s" % ioe.strerror,
                                   19)
        except:
            raise
        try:
            json_object = json.load(jfh)
        except json.JSONDecodeError as e:
            helper.MsgErrorAndExit("'--json' file '%s' could " % json_input+
                                   "not be processed into a JSON object, "+
                                   "probably it's empty.", 20)
        except:
            raise
        finally:
            jfh.close()
    else:
        try:
            json_object = json.loads(json_input)
        except ValueError as vae:
            helper.MsgErrorAndExit("'--json' string '%s' " % json_input+\
                                   "could not be decoded: %s" % str(vae), 23)
    return json_object

def _FetchAlnFromFile(seqfile, allow_multitemplate, format):
    """Read alignment from seqfile and return it."""
    argstr = "'--" + format + " " + seqfile + "'"
    helper.FileExists("Alignment", 12, seqfile)
    # checking if alignment file has 'gz' extension
    is_gz = helper.FileGzip("Alignment", 13, seqfile)
    # loading the alignment, switch for gzip
    readfile = seqfile
    if is_gz:
        unzip_file = _TmpForGZip(seqfile, '.fas', "Alignment")
        readfile = unzip_file.name
    try:
        aln = io.LoadAlignment(readfile, format=format)
    except Exception as exc: #pylint: disable=broad-except
        if str(exc) in ['Bad FASTA file: File is empty',
                        'Bad CLUSTAL file: File is empty']:
            helper.MsgErrorAndExit(argstr +  " refers to an empty file or " +
                                   "its in the wrong format.", 15)
        else:
            helper.MsgErrorAndExit(argstr + ": error when reading alignment "+
                                   "file: " + str(exc), 18)
    finally:
        if is_gz:
            unzip_file.close()
    # checking the alignment
    if aln.GetCount() == 1:
        helper.MsgErrorAndExit(argstr + " points to an alignment with only " +
                               "1 sequence.", 16)
    if aln.GetCount() > 2 and not allow_multitemplate:
        helper.MsgErrorAndExit(argstr + " points to an alignment with more " +
                               "than 2 sequences and we do not allow this.", 16)
    # identify target
    target_idx = -1
    sequences = [(s.name.strip(),s.string) for s in aln.sequences]
    for i,s in enumerate(sequences):
        if s[0].lower() in ['trg', 'target']:
            if target_idx >= 0:
                helper.MsgErrorAndExit(argstr + ": multiple targets found!", 17)
            target_idx = i
    # reshuffle
    if target_idx > 0:
        sequences.insert(0, sequences[target_idx])
        del sequences[target_idx+1]
    # generate alignment
    new_aln = seq.CreateAlignment()
    for s in sequences:
        new_aln.AddSequence(seq.CreateSequence(s[0], s[1]))
    new_aln.SetSequenceRole(0, 'TARGET')
    for i in range(1, new_aln.GetCount()):
        new_aln.SetSequenceRole(i, 'TEMPLATE')

    return new_aln

def _LoadPDB(filename):
    """Load PDB file from filename and return it."""
    argstr = "'--pdb " + filename + "'"
    helper.FileExists("PDB Structure", 32, filename)
    try:
        ent = io.LoadPDB(filename)
    except Exception as exc: #pylint: disable=broad-except
        helper.MsgErrorAndExit(argstr + ": failure to parse PDB file: " +
                               str(exc), 33)
    return ent

def _LoadEntity(filename):
    """Load generic structure file from filename and return it."""
    argstr = "'--entity " + filename + "'"
    helper.FileExists("Structure", 32, filename)
    try:
        ent = io.LoadEntity(filename)
    except Exception as exc: #pylint: disable=broad-except
        if str(exc).startswith('no suitable entity io handler found'):
            helper.MsgErrorAndExit(argstr + ": not a supported format " +
                                   str(exc), 34)
        else:
            helper.MsgErrorAndExit(argstr + ": failure to parse PDB file: " +
                                   str(exc), 33)
    return ent

def _FetchProfileFromFile(filename):
    """Load generic profile file from filename and return it."""
    argstr = "'--seqprof " + filename + "'"
    helper.FileExists("Profile", 51, filename)
    try:
        prof = io.LoadSequenceProfile(filename)
    except Exception as exc:
        helper.MsgErrorAndExit(argstr + ": failure to parse profile file: " +
                               str(exc), 52)        
    return prof

def _FetchPsipredFromFile(filename):
    """Load psipred prediction from filename and return it."""
    argstr = filename + ": "
    helper.FileExists("Profile", 51, filename)
    try:
        pred = loop.PsipredPrediction.FromHHM(filename)
    except Exception as exc:
        helper.MsgErrorAndExit(argstr + ": failure to parse psipred " +
                               "prediction: " + str(exc), 56)        
    return pred


def _GetChains(structures, structure_sources):
    """Get chain id to entity view (single chain) mapping (dict)."""
    # IDs: (file_base = base file name with no extensions)
    # - file_base.chain_name
    # - file_base (iff only one chain in file)
    # - chain_name (iff only one file)
    # - note: single entry with key 'UNIQUE' created if only one chain in total!
    chain_entities = dict()
    single_file = len(structure_sources) == 1
    # parse structures
    for file_name, ent in zip(structure_sources, structures):
        # get pure file name with no extension
        file_base = os.path.basename(file_name)
        file_split = os.path.splitext(file_base)
        if file_split[1] == '.gz':
            file_base = os.path.splitext(file_split[0])[0]
        else:
            file_base = file_split[0]
        # get chainnames
        prot = ent.Select("peptide=True")
        single_chain = prot.chain_count == 1
        chain_names = [ch.name for ch in prot.chains]
        # fill dict
        if single_file and single_chain:
            chain_entities['UNIQUE'] = prot
        elif single_chain:
            chain_entities[file_base + '.' + chain_names[0]] = prot
            chain_entities[file_base] = prot
        else:
            for chain_name in chain_names:
                ch_ent = prot.Select("cname=" + chain_name)
                chain_entities[file_base + '.' + chain_name] = ch_ent
                if single_file:
                    chain_entities[chain_name] = ch_ent
    return chain_entities

def _AttachViewsToAln(aln, chain_entities):
    """Attach views to tpl. sequences in aln according to sequence names."""
    for i in range(1, aln.GetCount()):
        seq_name = aln.GetSequence(i).GetName()
        # extract offset
        my_split = seq_name.split('|')
        tpl_id = my_split[0].strip()
        if len(my_split) == 2 and my_split[1].strip().isdigit():
            # set offset
            tpl_offset = int(my_split[1].strip())
            # mismatch with existing one?
            old_offset = aln.GetSequenceOffset(i)
            if old_offset > 0 and old_offset != tpl_offset:
                helper.MsgErrorAndExit("Inconsistent offsets between seq. name"+
                                       " and seq. in alignment for " + seq_name,
                                       42)
            else:
                aln.SetSequenceOffset(i, tpl_offset)
        elif len(my_split) == 2 and not my_split[1].strip().isdigit():
            helper.MsgErrorAndExit("Non-integer offset defined in seq. name "+
                                   seq_name, 43)
        elif len(my_split) > 2:
            helper.MsgErrorAndExit("Too many '|' in seq. name " + seq_name, 44)
        # identify chain and attach view
        if len(chain_entities) == 1:
            aln.AttachView(i, chain_entities['UNIQUE'].CreateFullView())
        elif tpl_id in chain_entities:
            aln.AttachView(i, chain_entities[tpl_id].CreateFullView())
        else:
            helper.MsgErrorAndExit("Could not find chain with ID " + tpl_id +
                                   " (should be <FILE>.<CHAIN>) to attach to"+
                                   " sequence named " + seq_name, 45)

class PM3ArgumentParser(argparse.ArgumentParser):
    """
    This class is a child of :class:`argparse.ArgumentParser`. It provides a set
    of standard arguments which can be activated with :meth:`Add*` methods and
    then assembled with :meth:`AssembleParser`. This helps keeping up a common
    naming scheme throughout all |project| actions. As a real extension, this
    subclass provides checking of input parameters on :meth:`Parse`. Besides
    this, everything you can do with a 'real' :class:`~argparse.ArgumentParser`
    instance is possible here.

    Attributes beyond :class:`argparse.ArgumentParser`:

    .. attribute:: action

      Indicates if the calling script is a |project| action.

      :type: :class:`bool`
    """
    def __init__(self, description, action=True):
        """
        Create a new instance of :class:`~pm3argparse.PM3ArgumentParser`.

        :param description: Help text for this script, handed down to
                            |descattr|_ of |argpinit|_.
        :type description: :class:`str`

        :param action: Indicates if the calling script is a |project| action.
                       This influences |progattr|_ of
                       :class:`~argparse.ArgumentParser` by clipping of the
                       first 3 characters of the file name of the script. If
                       ``False``, default behaviour of
                       :class:`~argparse.ArgumentParser` kicks in.
        :type action: :class:`bool`

        :returns: :class:`argparse.ArgumentParser`.
        """
        prog = None
        if action:
            prog = os.path.basename(sys.argv[0])[3:]
        argparse.ArgumentParser.__init__(self, prog=prog,
                                         description=description,
                                         formatter_class=\
                                         argparse.RawDescriptionHelpFormatter)
        self.action = action
        self.activate = set()

    def _print_message(self, message, file=None):
        #pylint: disable=redefined-builtin
        """
        This is like a welcome message to the "country of bad style"... we are
        overwriting a "_" function from the parent-class. Those guys should not
        be used outside of the housing module, never... but here it is a single
        function to bend :mod:`argparse` to use :class:`ost.Logger`.
        """
        if message:
            no_nl_msg = message
            if message[-1] == '\n':
                no_nl_msg = message[:-1]
            if file is None or file is sys.stderr:
                ost.LogError(no_nl_msg)
            else:
                ost.LogScript(no_nl_msg)

    def Parse(self, args=None):
        """
        Parse an argument string. See :meth:`Add*` methods.

        Options/arguments added by default: ``-h/--help`` shows usage.

        General exit codes:

        * 1 - an unhandled exception was raised
        * 2 - arguments cannot be parsed or required arguments are missing

        :param args: The argument string. As default |sysargv|_ is used.
        :type args: :class:`list`

        :returns: Namespace filled with attributes (see :meth:`Add*` methods).
        """
        opts = PM3OptionsNamespace()
        self.parse_args(args=args, namespace=opts)

        opts.PostProcess(self.activate)
        return opts

    def AssembleParser(self):
        """
        When adding options via the :meth:`Add*` methods, call this after you
        are done. Everything before just tells the parser that it should
        contain those option sets but does not actually add anything.
        :meth:`AssembleParser` will put everything in place, in the right order
        and with the right constraints.
        """
        if 'ALIGNMENT' in self.activate:
            self._AssembleAlignment()
        if 'STRUCTURE' in self.activate:
            self._AssembleStructure()
        if 'PROFILE' in self.activate:
            self._AssembleProfile()
        if 'FRAGMENTS' in self.activate:
            self._AssembleFragments()

    def AddAlignment(self, allow_multitemplate=False):
        """Commandline options for alignments.

        Activate everything needed to load alignments to the argument parser.
        Command line arguments are then added in :meth:`AssembleParser` and the
        input is post processed and checked in :meth:`Parse`.

        :param allow_multitemplate: enable support for multitemplate alignments
        :type allow_multitemplate:  :class:`bool`

        Options/arguments added:

        * ``-f/--fasta <FILE>`` - Target-template alignment in FASTA format.
          Target sequence is either named "trg" or "target" or the first
          sequence is used. File can be plain or gzipped.

        * ``-c/--clustal <FILE>`` - Target-template alignment in CLUSTAL format.
          Target sequence is either named "trg" or "target" or the first
          sequence is used. File can be plain or gzipped.

        * ``-j/--json <OBJECT>|<FILE>`` - Alignments provided as JSON
          file/object. File can be plain or gzipped.

        See :ref:`here <promod-build-model>` for details on the file formats.

        Attributes added to the namespace returned by :meth:`Parse`:

        * :attr:`fasta` - filled with the input of the ``--fasta`` option, a
          :class:`list` of :class:`str` (filenames).

        * :attr:`clustal` - filled with the input of the ``--clustal`` option, a
          :class:`list` of :class:`str` (filenames).

        * :attr:`json` - filled with the input of the ``--json`` option, a
          :class:`list` of :class:`str`, where each string may be a filename
          or a JSON object string.

        * :attr:`alignments` - :class:`ost.AlignmentList`, same order as given.
          First sequence of the alignment is the target sequence, if in doubt,
          check for sequence roles ``TARGET`` or ``TEMPLATE``

        * :attr:`aln_sources` - :class:`list` of :class:`str` with the original
          source(s) of the alignment: may be filename(s) or JSON strings.

        Exit codes related to alignment input:

        * 12 - a given alignment file does not exist
        * 13 - never raised (parameter for checking gzip files)
        * 14 - gzip file cannot be opened
        * 15 - found an empty alignment file
        * 16 - unsupported number of sequences in alignment: only 1 sequence or
          (unless *allow_multitemplate* = True) more than 2 sequences
        * 17 - mutliple target sequences found in alignment
        * 18 - error when reading fasta/clustal file
        * 19 - problem with a JSON formatted file handed over to ``--json``
        * 20 - JSON file could not be decoded into a JSON object
        * 21 - JSON object has no 'alignmentlist' key
        * 22 - JSON object has no 'target'/'template' in the 'alignmentlist'
        * 23 - JSON string could not be decoded
        * 24 - JSON object 'alignmentlist' does not point to a list
        * 25 - JSON object 'alignmentlist' member is not a dictionary
        * 26 - JSON object 'alignmentlist' 'target'/'template' does not point
          to a dictionary
        * 27 - JSON  object 'alignmentlist' 'target'/'template' does not have
          a needed key
        * 28 - JSON  object 'alignmentlist' 'target'/'template' has a value of
          wrong type
        """
        self.activate.add('ALIGNMENT')
        if allow_multitemplate:
            self.activate.add('ALLOW_MULTITEMPLATE')

    def AddStructure(self, attach_views=False):
        """Commandline options for structures.

        Activate everything needed to load structures to the argument parser.
        Command line arguments are then added in :meth:`AssembleParser` and the
        input is post processed and checked in :meth:`Parse`.

        :param attach_views: if True: attach views to alignments. Requires call
                             to :meth:`AddAlignment`. Chains for each sequence
                             are identified based on the sequence name of the
                             templates in the alignments (see 
                             :ref:`here <promod-build-model>` for details).
        :type attach_views:  :class:`bool`

        Options/arguments added:

        * ``-p/--pdb <FILE>`` - Structure in PDB format. File can be plain or
          gzipped.

        * ``-e/--entity <FILE>`` - Structure in any format readable by the
          :meth:`ost.io.LoadEntity` method. Format is chosen by file ending.
          Recognized File Extensions: .ent, .pdb, .ent.gz, .pdb.gz, .cif, .cif.gz.

        Notes:

        * one of the inputs must be given and only one type of input acceptable

        * callable multiple times (structures appended in given order)

        Attributes added to the namespace returned by :meth:`Parse`:

        * :attr:`pdb` - filled with the input of the ``--pdb`` option, a
          :class:`list` of :class:`str` (filenames).

        * :attr:`entity` - filled with the input of the ``--entity`` option, a
          :class:`list` of :class:`str` (filenames).

        * :attr:`structures` - :class:`list` of :class:`ost.EntityHandle`, same
          order as given.

        * :attr:`structure_sources` - :class:`list` of :class:`str` with the
          original filenames of the structures.

        Exit codes related to alignment input:

        * 32 - a given structure file does not exist
        * 33 - failure to read a given structure file
        * 34 - file ending is not a supported format

        Exit codes if *attach_views* = True:

        * 41 - attach_views used without adding alignments
        * 42 - inconsistent offsets between seq. name and seq. in alignment
        * 43 - non-integer offset defined in seq. name
        * 44 - too many "|" in seq. name
        * 45 - chain to attach to sequence could not be identified
        """
        self.activate.add('STRUCTURE')
        if attach_views:
            self.activate.add('ATTACH_VIEWS')

    def AddProfile(self):
        """Commandline options for profiles

        Activate everything needed to load profiles to the argument parser.
        Command line arguments are then added in :meth:`AssembleParser` and the
        input is post processed and checked in :meth:`Parse`.

        Options/arguments added:

        * ``-s/--seqprof <FILE>`` - Sequence profile in any format readable
          by the :meth:`ost.io.LoadSequenceProfile` method. Format is chosen by 
          file ending. Recognized file extensions: .hhm, .hhm.gz, .pssm, 
          .pssm.gz. Consider to use 
          :meth:`ost.bindings.hhblits.HHblits.A3MToProfile` if you have a file 
          in a3m format at hand. 

        Notes:

        * the profiles are mapped based on exact matches towards the gapless
          target sequences, i.e. one profile is mapped to several chains in
          case of homo-oligomers

        * every profile must have a unique sequence to avoid ambiguities

        * all or nothing - you cannot provide profiles for only a subset of
          target sequences

        Attributes added to the namespace returned by :meth:`Parse`:

        * :attr:`profiles` - :class:`list` of :class:`ost.seq.ProfileHandle`, 
          ordered to match the target sequences.

        Exit codes related to profile input:

        * 51 - a given profile file does not exist
        * 52 - failure to read a given profile file 
        * 53 - a profile cannot be mapped to any target sequence
        * 54 - profile sequences are not unique
        * 55 - only subset of target sequences is covered by profile
        """
        self.activate.add('PROFILE')


    def AddFragments(self):
        """Commandline option for usage of Fragments

        Activate everything needed to setup 
        :class:`promod3.modelling.FraggerHandle` objects in the argument parser.
        Command line arguments are then added in :meth:`AssembleParser` and the
        input is post processed and checked in :meth:`Parse`.

        Options/arguments added:

        * ``-r/--use-fragments`` - Boolean flag whether to setup fragger handles.

        Notes:

        * Fragger handles are setup to identify fragments in a 
          :class:`promod3.loop.StructureDB`.

        * If no profiles are provided as additional argument 
          (``-s/--seqprof <FILE>``), fragments are identified based on BLOSUM62 
          sequence similarity.

        * If you provide profiles that are not in hhm format, fragments are 
          identified based on BLOSUM62 sequence similarity, sequence profile 
          scoring and structural profile scoring.

        * If you provide profiles in hhm format (optimal case), psipred 
          predictions are fetched from there and fragments are identified based
          on secondary structure agreement, secondary structure dependent
          torsion probabilities, sequence profile scoring and structure 
          profile scoring.

        Attributes added to the namespace returned by :meth:`Parse`:

        * :attr:`fragger_handles` - :class:`list` of 
          :class:`promod3.modelling.FraggerHandle`, ordered to match the target 
          sequences.

        Exit codes related to fragments input:

        * 56 - cannot read psipred prediction from hhm file
        """
        self.activate.add('FRAGMENTS')


    def _AssembleAlignment(self):
        """Actually add alignment arguments/options."""
        aln_grp = self.add_mutually_exclusive_group(required=True)
        # fasta input
        aln_grp.add_argument('-f', '--fasta', metavar=('<FILE>'),
                             help='Target-template alignment in FASTA format. '+
                             'Target sequence is either named "trg" or '+
                             '"target" or the first sequence is used. '+
                             'File can be plain or gzipped.',
                             action='append', default=list())
        # clustal input
        aln_grp.add_argument('-c', '--clustal', metavar=('<FILE>'),
                             help='Target-template alignment in CLUSTAL format. '+
                             'Target sequence is either named "trg" or '+
                             '"target" or the first sequence is used. '+
                             'File can be plain or gzipped.',
                             action='append', default=list())
        # JSON input
        aln_grp.add_argument('-j', '--json', metavar='<OBJECT>|<FILE>',
                             help='Alignments provided as JSON file/object. '+
                             'File can be plain or gzipped.',
                             action='append', default=list())

    def _AssembleStructure(self):
        """Actually add structure arguments/options."""
        aln_grp = self.add_mutually_exclusive_group(required=True)
        # pdb input
        aln_grp.add_argument('-p', '--pdb', metavar=('<FILE>'),
                             help='Structure in PDB format. '+
                             'File can be plain or gzipped.',
                             action='append', default=list())
        # any OST entity
        aln_grp.add_argument('-e', '--entity', metavar=('<FILE>'),
                             help="Structure in any format readable by OST's "+
                             "io.LoadEntity method. Format is chosen by file "+
                             "ending. Recognized File Extensions: .ent, .pdb, "+
                             ".ent.gz, .pdb.gz, .cif, .cif.gz.",
                             action='append', default=list())

    def _AssembleProfile(self):
        self.add_argument('-s', '--seqprof', metavar=('<FILE>'),
                          help="Sequence profile in any format readable by "+
                          "OST's io.LoadSequenceProfile method. Format is "+
                          "chosen by file ending. Recognized File Extensions: "+
                          ".hhm, .hhm.gz, .pssm, .pssm.gz", action='append',
                          default=list())

    def _AssembleFragments(self):
        self.add_argument('-r', '--use-fragments',
                          help="Use fragments instead of torsion angle "+
                          "based sampling for Monte Carlo approaches. "+
                          "For optimal performance you should provide "+
                          "sequence profiles in hhm format. (File "+
                          "extensions: .hhm or .hhm.gz). BUT: be aware of "+
                          "increased runtime.", action="store_true")

class PM3OptionsNamespace(object):
    # class will grow, so for the moment pylint is ignored
    #pylint: disable=too-few-public-methods
    """Output of :meth:`PM3ArgumentParser.Parse`.

    Like output of :meth:`argparse.ArgumentParser.parse_args` with additional
    functions for convenience.
    """
    def __init__(self):
        pass

    def PostProcess(self, activated):
        """Post processing of activated option packs."""
        self.allow_multitemplate = 'ALLOW_MULTITEMPLATE' in activated
        if 'ALIGNMENT' in activated:
            self._PostProcessAlignment()
        if 'STRUCTURE' in activated:
            self._PostProcessStructure()
        if 'ATTACH_VIEWS' in activated:
            self._AttachViews()
        if 'PROFILE' in activated:
            self._PostProcessProfile()
        if 'FRAGMENTS' in activated:
            self._PostProcessFragments()

    def _PostProcessAlignment(self):
        #pylint: disable=no-member
        #pylint: disable=attribute-defined-outside-init
        """Get alignments from command line input."""
        self.aln_sources = list()
        self.alignments = seq.AlignmentList()
        # parse fasta files
        for src in self.fasta:
            new_aln = _FetchAlnFromFile(src, self.allow_multitemplate, "fasta")
            self.alignments.append(new_aln)
            self.aln_sources.append(src)
        # parse clustal files
        for src in self.clustal:
            new_aln = _FetchAlnFromFile(src, self.allow_multitemplate, "clustal")
            self.alignments.append(new_aln)
            self.aln_sources.append(src)
        # parse JSON input
        for src in self.json:
            json_obj = _GetJSONOBject(src)
            for aln in _GetAlnFromJSON(json_obj, src):
                self.alignments.append(aln)
            self.aln_sources.append(src)

    def _PostProcessStructure(self):
        #pylint: disable=attribute-defined-outside-init
        """Get structures from command line input."""
        self.structures = list()
        self.structure_sources = list()
        # parse pdb files
        for src in self.pdb:
            self.structures.append(_LoadPDB(src))
            self.structure_sources.append(src)
        # parse generic structures
        for src in self.entity:
            self.structures.append(_LoadEntity(src))
            self.structure_sources.append(src)

    def _AttachViews(self):
        """Attach views to tpl. sequences according to sequence names."""
        if not (hasattr(self, 'structures') and hasattr(self, 'alignments')):
            helper.MsgErrorAndExit("Need to have structures and alignments to "+
                                   "attach views.", 41)
        # get chain id to entity view (single chain) mapping (dict)
        chain_entities = _GetChains(self.structures, self.structure_sources)
        # go through all templates in all alignments
        for aln in self.alignments:
            _AttachViewsToAln(aln, chain_entities)

    def _PostProcessProfile(self):
        """Get Profiles from command line input."""
        self.profiles = list()

        if len(self.seqprof) == 0:
            # no profiles provided, remember the all or nothing principle
            # so not having any profile is fine
            return

        self.loaded_profiles = list()
        for src in self.seqprof:
            self.loaded_profiles.append(_FetchProfileFromFile(src))

        prof_sequences = [p.sequence for p in self.loaded_profiles]

        # check uniqueness of loaded profiles
        if len(set(prof_sequences)) != len(prof_sequences):
            helper.MsgErrorAndExit("All sequence profiles must have unique " +
                                   "sequence.", 54)

        # map onto alignment target sequences
        trg_sequences = [aln.GetSequence(0).GetGaplessString() \
                         for aln in self.alignments]
        for s in trg_sequences:
            try:
                self.profiles.append(self.loaded_profiles[prof_sequences.index(s)])
            except Exception as exc:
                helper.MsgErrorAndExit("Could not find profile with sequence " +
                                       "that exactly matches trg seq: " + s, 55)

        # We found a profile for every target sequence. So if the size of unique 
        # target sequences is not the same as for unique profile sequences, 
        # we know that we have additional profiles that never got mapped
        if len(set(trg_sequences)) != len(set(prof_sequences)):
            helper.MsgErrorAndExit("Could not map every profile to a target " +
                                   "sequence", 53)

    def _PostProcessFragments(self):

        self.fragger_handles = list()

        if not self.use_fragments:
            # no fragments requested, so lets just return
            return

        trg_sequences = [aln.GetSequence(0).GetGaplessString() \
                         for aln in self.alignments]

        # we only want to setup a Fragger for every unique target sequence
        unique_trg_sequences = list(set(trg_sequences))

        # already setup variables, fill later if required data is present
        profiles = [None] * len(unique_trg_sequences)
        psipred_predictions = [None] * len(unique_trg_sequences)
        ts_coil = None
        ts_helix = None
        ts_extended = None

        # a structure db we need anyway. Load once and assign the same to all 
        # fraggers to avoid memory explosion
        structure_db = loop.LoadStructureDB()

        # load the profiles
        if hasattr(self, "profiles") and len(self.profiles) > 0:
            profile_dict = dict()
            for p in self.loaded_profiles:
                profile_dict[p.sequence] = p
            # as we already mapped the profiles in _PostProcessProfiles,
            # the following is guaranteed to find the right profile
            # for every unique target sequence
            for s_idx, s in enumerate(unique_trg_sequences):
                profiles[s_idx] = profile_dict[s]

            # For the psipred predictions we have to go back to the
            # input files. If they all end with .hhm or hhm.gz we're ready to go
            file_endings_ok = True
            for src in self.seqprof:
                if not (src.endswith(".hhm") or src.endswith(".hhm.gz")):
                    file_endings_ok = False
                    break

            if file_endings_ok:
                # lets load the torsion samplers now as they are only required
                # if we also add psipred handlers
                ts_coil = loop.LoadTorsionSamplerCoil()
                ts_extended = loop.LoadTorsionSamplerExtended()
                ts_helix = loop.LoadTorsionSamplerHelical()

                # to get the right filenames we use the sequences of the 
                # loaded profiles that are in the same order as self.seqprof
                profile_sequences = [p.sequence for p in self.loaded_profiles]
                for s_idx, s in enumerate(unique_trg_sequences):
                    fn = self.seqprof[profile_sequences.index(s)]
                    psipred_predictions[s_idx] = _FetchPsipredFromFile(fn)
        
        # setup one fragger handle for each unique sequence
        fraggers = list()
        for i in range(len(unique_trg_sequences)):
            fraggers.append(modelling.FraggerHandle(unique_trg_sequences[i],
                                                    profile = profiles[i],
                                                    psipred_pred = psipred_predictions[i],
                                                    rmsd_thresh = 0.02,
                                                    structure_db = structure_db,
                                                    torsion_sampler_coil = ts_coil,
                                                    torsion_sampler_helix = ts_helix,
                                                    torsion_sampler_extended = ts_extended))
        # map them to the chains
        for s in trg_sequences:
            self.fragger_handles.append(fraggers[unique_trg_sequences.index(s)])


#  LocalWords:  param attr prog argparse ArgumentParser bool sys os init str
#  LocalWords:  progattr descattr argpinit argv formatter meth args namespace
#  LocalWords:  ArgumentDefaultsHelpFormatter sysargv AssembleParser fasta io
#  LocalWords:  metavar trg tpl FastA gzip tempfile ost promod aln stderr src
#  LocalWords:  AssembleTrgTplAln CreateSequence SetSequenceOffset LogError
#  LocalWords:  LogScript OptionsNamespace PostProcess AssembleAlignment JSON
#  LocalWords:  AddAlignment AlignmentList SEQNAME whitespaces nargs trgname
#  LocalWords:  PostProcessAlignment startswith seqfile elif MsgErrorAndExit
#  LocalWords:  len FileExists gz FileGzip readfile fh NamedTemporaryFile fas
#  LocalWords:  LoadAlignment exc GetCount fst GetSequence snd