1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
|
# Author: Martin D. Smith
from __future__ import absolute_import
import os
import logging
log = logging.getLogger(__name__)
def fileType(fname):
"""Get the extension of fname (with h5 type)"""
remainder, ftype = os.path.splitext(fname)
if ftype == '.h5':
_, prefix = os.path.splitext(remainder)
ftype = prefix + ftype
elif ftype == '.index':
_, prefix = os.path.splitext(remainder)
if prefix == '.contig':
ftype = prefix + ftype
ftype = ftype.strip('.')
return ftype
def getDataSetUuid(xmlfile):
"""
Quickly retrieve the uuid from the root element of a dataset XML file,
using a streaming parser to avoid loading the entire dataset into memory.
Returns None if the parsing fails.
"""
try:
import xml.etree.cElementTree as ET
for event, element in ET.iterparse(xmlfile, events=("start",)):
return element.get("UniqueId")
except Exception:
return None
def getDataSetMetaType(xmlfile):
"""
Quickly retrieve the MetaType from the root element of a dataset XML file,
using a streaming parser to avoid loading the entire dataset into memory.
Returns None if the parsing fails.
"""
try:
import xml.etree.cElementTree as ET
for event, element in ET.iterparse(xmlfile, events=("start",)):
return element.get("MetaType")
except Exception:
return None
def loadMockCollectionMetadata():
"""
Load CollectionMetadata template from pbcore.data.datasets
"""
from pbcore.data.datasets import getMockCollectionMetadata
from pbcore.io.dataset.DataSetReader import parseCollectionMetadata
return parseCollectionMetadata(getMockCollectionMetadata())
|