File: utils.py

package info (click to toggle)

python-biopython 1.68%2Bdfsg-3

links: PTS, VCS
area: main
in suites: stretch
size: 46,860 kB
ctags: 13,237
sloc: python: 160,306; xml: 93,216; ansic: 9,118; sql: 1,208; makefile: 155; sh: 63

file content (57 lines) | stat: -rw-r--r-- 1,913 bytes

parent folder | download | duplicates (2)

# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.
#

"""Useful utilities for helping in parsing GenBank files.
"""


class FeatureValueCleaner(object):
    r"""Provide specialized capabilities for cleaning up values in features.

    This class is designed to provide a mechanism to clean up and process
    values in the key/value pairs of GenBank features. This is useful
    because in cases like::

         /translation="MED
         YDPWNLRFQSKYKSRDA"

    you'll end up with a value with \012s and spaces in it like::

        "MED\012 YDPWEL..."

    which you probably don't want.

    This cleaning needs to be done on a case by case basis since it is
    impossible to interpret whether you should be concatenating everything
    (as in translations), or combining things with spaces (as might be
    the case with /notes).
    """
    keys_to_process = ["translation"]

    def __init__(self, to_process=keys_to_process):
        """Initialize with the keys we should deal with.
        """
        self._to_process = to_process

    def clean_value(self, key_name, value):
        """Clean the specified value and return it.

        If the value is not specified to be dealt with, the original value
        will be returned.
        """
        if key_name in self._to_process:
            try:
                cleaner = getattr(self, "_clean_%s" % key_name)
                value = cleaner(value)
            except AttributeError:
                raise AssertionError("No function to clean key: %s"
                                     % key_name)
        return value

    def _clean_translation(self, value):
        """Concatenate a translation value to one long protein string.
        """
        translation_parts = value.split()
        return "".join(translation_parts)