#!/usr/bin/env python """ Additional functions for converting file etc. @author M. Kösters """ # Python mzML module - pymzml # Copyright (C) 2010-2019 M. Kösters, C. Fufezan # The MIT License (MIT) # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from pymzml.utils.GSGW import GSGW import pymzml.regex_patterns as regex_patterns import re import gzip def index_gzip(pathIn, pathOut, max_idx=10000, idx_len=8, verbose=False, comp_str=-1): """ Convert an mzml file (can be gzipped) into an indexed, gzipped mzML file. Arguments: pathIn (str): path to an mzML input File. pathOut (str): path were the index gzip will be created. Keyword Arguments: max_idx (int): number of indexes which can be saved. idx_len (int): character len of on key verbose (boolean): print progress while parsing input. comp_str(int): compression strength of zlib compression, needs to be 1 <= x <= 9 """ if pathIn.endswith("gz"): fileOpen = gzip.open elif pathIn.lower().endswith("mzml"): fileOpen = open with GSGW( output_path=pathOut, max_idx=max_idx, max_idx_len=idx_len, max_offset_len=idx_len, comp_str=comp_str, ) as Writer: with fileOpen(pathIn, "rt") as Reader: data = "" for line in Reader: if line.strip().startswith(""): data += line Writer.add_data(data, nativeID) if verbose: print("NativeID : {0}".format(nativeID), end="\r") data = "" elif line.strip().startswith(""): data += line Writer.add_data(data, nativeID) if verbose: print("found chromatogram") print("NativeID: {0}".format(nativeID)) data = "" else: data += line if data: Writer.add_data(data, "tail") if verbose: print("NativeID :", "tail") # print(Writer.index.items()) Writer.write_index() return def index(pathIn, pathOut, max_idx=10000, idx_len=8, verbose=False, comp_str=-1): """ Convert an mzml file (can be gzipped) into an indexed, gzipped mzML file. Arguments: pathIn (str): path to input File. pathOut (str): path were output should be created. Keyword Arguments: max_idx (int): number of indexes which can be saved. idx_len (int): character len of on key verbose (boolean): print progress while parsing input. comp_str(int): compression strength of zlib compression, needs to be 1 <= x <= 9 """ import gzip with GSGW( output_path=pathOut, max_idx_len=idx_len, max_offset_len=idx_len, comp_str=comp_str, ) as Writer: with gzip.open(pathIn, "rt") as Reader: data = "" for line in Reader: if line.strip().startswith(""): data += line Writer.add_data(data, nativeID) if verbose: pass data = "" elif line.strip().startswith(""): data += line Writer.add_data(data, nativeID) if verbose: print("found chromo") print("NativeID :", nativeID, end="\r") data = "" else: data += line if data: Writer.add_data(data, "tail") if verbose: print("NativeID :", "tail") Writer.write_index() def make_obo_mapping(obo, reversed=False): # NOT sure what this is for ... mapping = {} with open(obo) as obo_file: for line in obo_file: if line.startswith("id: "): id = line.split()[-1] elif line.startswith("name: "): mapping[id] = " ".join(line.split()[1:]) if reversed: mapping = {y: x for x, y in mapping.items()} return mapping if __name__ == "__main__": print(__doc__)